parquet 0.5.4 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +51 -44
- data/ext/parquet/Cargo.toml +3 -3
- data/ext/parquet/src/reader/mod.rs +2 -1
- data/ext/parquet/src/reader/parquet_column_reader.rs +15 -127
- data/ext/parquet/src/reader/parquet_row_reader.rs +14 -134
- data/ext/parquet/src/reader/unified/mod.rs +328 -0
- data/ext/parquet/src/types/parquet_value.rs +90 -16
- data/ext/parquet/src/types/record_types.rs +53 -7
- data/ext/parquet/src/types/schema_converter.rs +14 -75
- data/ext/parquet/src/types/type_conversion.rs +13 -11
- data/lib/parquet/version.rb +1 -1
- metadata +3 -2
@@ -121,7 +121,8 @@ pub fn parse_legacy_schema(
|
|
121
121
|
ruby.exception_type_error(),
|
122
122
|
"Schema must be an array of field definitions or nil",
|
123
123
|
)
|
124
|
-
})
|
124
|
+
})?
|
125
|
+
.is_empty())
|
125
126
|
{
|
126
127
|
// If schema is nil or an empty array, we'll handle this in the caller
|
127
128
|
return Ok(Vec::new());
|
@@ -206,101 +207,39 @@ pub fn parse_legacy_schema(
|
|
206
207
|
|
207
208
|
// Handle decimal type with precision and scale
|
208
209
|
let mut type_result = PST::try_convert(type_str)?;
|
209
|
-
|
210
|
+
|
210
211
|
// If it's a decimal type and we have precision and scale, override the type
|
211
212
|
if let PST::Primitive(PrimitiveType::Decimal128(_, _)) = type_result {
|
212
|
-
|
213
|
-
let val: u8 = 18;
|
214
|
-
val.into_value_with(ruby)
|
215
|
-
});
|
216
|
-
let scale_value = scale.unwrap_or_else(|| {
|
217
|
-
let val: i8 = 2;
|
218
|
-
val.into_value_with(ruby)
|
219
|
-
});
|
220
|
-
|
221
|
-
let precision_u8 = u8::try_convert(precision_value).map_err(|_| {
|
222
|
-
MagnusError::new(
|
223
|
-
ruby.exception_type_error(),
|
224
|
-
"Invalid precision value for decimal type, expected a positive integer".to_string(),
|
225
|
-
)
|
226
|
-
})?;
|
227
|
-
|
228
|
-
// Validate precision is in a valid range
|
229
|
-
if precision_u8 < 1 {
|
230
|
-
return Err(MagnusError::new(
|
231
|
-
ruby.exception_arg_error(),
|
232
|
-
format!(
|
233
|
-
"Precision for decimal type must be at least 1, got {}",
|
234
|
-
precision_u8
|
235
|
-
),
|
236
|
-
));
|
237
|
-
}
|
238
|
-
|
239
|
-
if precision_u8 > 38 {
|
240
|
-
return Err(MagnusError::new(
|
241
|
-
ruby.exception_arg_error(),
|
242
|
-
format!(
|
243
|
-
"Precision for decimal type cannot exceed 38, got {}",
|
244
|
-
precision_u8
|
245
|
-
),
|
246
|
-
));
|
247
|
-
}
|
248
|
-
|
249
|
-
let scale_i8 = i8::try_convert(scale_value).map_err(|_| {
|
250
|
-
MagnusError::new(
|
251
|
-
ruby.exception_type_error(),
|
252
|
-
"Invalid scale value for decimal type, expected an integer".to_string(),
|
253
|
-
)
|
254
|
-
})?;
|
255
|
-
|
256
|
-
// Validate scale is in a valid range relative to precision
|
257
|
-
if scale_i8 < 0 {
|
258
|
-
return Err(MagnusError::new(
|
259
|
-
ruby.exception_arg_error(),
|
260
|
-
format!(
|
261
|
-
"Scale for decimal type cannot be negative, got {}",
|
262
|
-
scale_i8
|
263
|
-
),
|
264
|
-
));
|
265
|
-
}
|
266
|
-
|
267
|
-
if scale_i8 as u8 > precision_u8 {
|
268
|
-
return Err(MagnusError::new(
|
269
|
-
ruby.exception_arg_error(),
|
270
|
-
format!(
|
271
|
-
"Scale ({}) cannot be larger than precision ({}) for decimal type",
|
272
|
-
scale_i8, precision_u8
|
273
|
-
),
|
274
|
-
));
|
275
|
-
}
|
276
|
-
|
277
|
-
type_result = PST::Primitive(PrimitiveType::Decimal128(precision_u8, scale_i8));
|
213
|
+
// Do nothing
|
278
214
|
} else if let Some(type_name) = parse_string_or_symbol(ruby, type_str)? {
|
279
215
|
if type_name == "decimal" {
|
280
216
|
let precision_value = precision.unwrap_or_else(|| {
|
281
|
-
let val: u8 =
|
217
|
+
let val: u8 = 38;
|
282
218
|
val.into_value_with(ruby)
|
283
219
|
});
|
220
|
+
|
284
221
|
let scale_value = scale.unwrap_or_else(|| {
|
285
|
-
let val: i8 =
|
222
|
+
let val: i8 = 0;
|
286
223
|
val.into_value_with(ruby)
|
287
224
|
});
|
288
|
-
|
225
|
+
|
289
226
|
let precision_u8 = u8::try_convert(precision_value).map_err(|_| {
|
290
227
|
MagnusError::new(
|
291
228
|
ruby.exception_type_error(),
|
292
229
|
"Invalid precision value for decimal type, expected a positive integer".to_string(),
|
293
230
|
)
|
294
231
|
})?;
|
295
|
-
|
232
|
+
|
296
233
|
let scale_i8 = i8::try_convert(scale_value).map_err(|_| {
|
297
234
|
MagnusError::new(
|
298
235
|
ruby.exception_type_error(),
|
299
|
-
"Invalid scale value for decimal type, expected an integer"
|
236
|
+
"Invalid scale value for decimal type, expected an integer"
|
237
|
+
.to_string(),
|
300
238
|
)
|
301
239
|
})?;
|
302
|
-
|
303
|
-
type_result =
|
240
|
+
|
241
|
+
type_result =
|
242
|
+
PST::Primitive(PrimitiveType::Decimal128(precision_u8, scale_i8));
|
304
243
|
}
|
305
244
|
}
|
306
245
|
|
@@ -2,8 +2,8 @@ use std::str::FromStr;
|
|
2
2
|
use std::sync::Arc;
|
3
3
|
|
4
4
|
use super::*;
|
5
|
-
use arrow_array::builder::*;
|
6
5
|
use arrow_array::builder::MapFieldNames;
|
6
|
+
use arrow_array::builder::*;
|
7
7
|
use arrow_schema::{DataType, Field, Fields, TimeUnit};
|
8
8
|
use jiff::tz::{Offset, TimeZone};
|
9
9
|
use magnus::{RArray, RString, TryConvert};
|
@@ -368,15 +368,17 @@ fn create_arrow_builder_for_type(
|
|
368
368
|
ParquetSchemaType::Primitive(PrimitiveType::Decimal128(precision, scale)) => {
|
369
369
|
// Create a Decimal128Builder with specific precision and scale
|
370
370
|
let builder = Decimal128Builder::with_capacity(cap);
|
371
|
-
|
371
|
+
|
372
372
|
// Set precision and scale for the decimal and return the new builder
|
373
|
-
let builder_with_precision = builder
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
373
|
+
let builder_with_precision = builder
|
374
|
+
.with_precision_and_scale(*precision, *scale)
|
375
|
+
.map_err(|e| {
|
376
|
+
MagnusError::new(
|
377
|
+
magnus::exception::runtime_error(),
|
378
|
+
format!("Failed to set precision and scale: {}", e),
|
379
|
+
)
|
380
|
+
})?;
|
381
|
+
|
380
382
|
Ok(Box::new(builder_with_precision))
|
381
383
|
}
|
382
384
|
ParquetSchemaType::Primitive(PrimitiveType::String) => {
|
@@ -857,7 +859,7 @@ fn fill_builder(
|
|
857
859
|
|
858
860
|
for val in values {
|
859
861
|
match val {
|
860
|
-
ParquetValue::Decimal128(d) => typed_builder.append_value(*d),
|
862
|
+
ParquetValue::Decimal128(d, _scale) => typed_builder.append_value(*d),
|
861
863
|
ParquetValue::Float64(f) => {
|
862
864
|
// Scale the float to the desired precision and scale
|
863
865
|
let scaled_value = (*f * 10_f64.powi(*scale as i32)) as i128;
|
@@ -1161,7 +1163,7 @@ fn fill_builder(
|
|
1161
1163
|
)
|
1162
1164
|
})?
|
1163
1165
|
.append_value(bytes),
|
1164
|
-
ParquetValue::Decimal128(x) => typed_builder
|
1166
|
+
ParquetValue::Decimal128(x, _scale) => typed_builder
|
1165
1167
|
.field_builder::<Decimal128Builder>(i)
|
1166
1168
|
.ok_or_else(|| {
|
1167
1169
|
MagnusError::new(
|
data/lib/parquet/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parquet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-05-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -66,6 +66,7 @@ files:
|
|
66
66
|
- ext/parquet/src/reader/mod.rs
|
67
67
|
- ext/parquet/src/reader/parquet_column_reader.rs
|
68
68
|
- ext/parquet/src/reader/parquet_row_reader.rs
|
69
|
+
- ext/parquet/src/reader/unified/mod.rs
|
69
70
|
- ext/parquet/src/ruby_reader.rs
|
70
71
|
- ext/parquet/src/types/core_types.rs
|
71
72
|
- ext/parquet/src/types/mod.rs
|