parquet 0.5.2 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +9 -1
- data/ext/parquet/Cargo.toml +4 -0
- data/ext/parquet/build.rs +5 -0
- data/ext/parquet/src/lib.rs +1 -0
- data/ext/parquet/src/reader/common.rs +7 -6
- data/ext/parquet/src/reader/mod.rs +204 -0
- data/ext/parquet/src/reader/parquet_column_reader.rs +19 -20
- data/ext/parquet/src/reader/parquet_row_reader.rs +18 -22
- data/ext/parquet/src/ruby_reader.rs +3 -5
- data/ext/parquet/src/types/core_types.rs +1 -0
- data/ext/parquet/src/types/mod.rs +8 -5
- data/ext/parquet/src/types/parquet_value.rs +199 -7
- data/ext/parquet/src/types/record_types.rs +16 -5
- data/ext/parquet/src/types/schema_converter.rs +118 -11
- data/ext/parquet/src/types/schema_node.rs +83 -2
- data/ext/parquet/src/types/timestamp.rs +6 -10
- data/ext/parquet/src/types/type_conversion.rs +84 -11
- data/ext/parquet/src/types/writer_types.rs +40 -11
- data/ext/parquet/src/utils.rs +6 -6
- data/ext/parquet/src/writer/mod.rs +25 -17
- data/ext/parquet/src/writer/write_columns.rs +27 -24
- data/ext/parquet/src/writer/write_rows.rs +14 -15
- data/lib/parquet/schema.rb +77 -4
- data/lib/parquet/version.rb +1 -1
- data/lib/parquet.rbi +11 -0
- metadata +3 -2
@@ -2,7 +2,7 @@ use crate::{impl_date_conversion, impl_timestamp_array_conversion, impl_timestam
|
|
2
2
|
|
3
3
|
use super::*;
|
4
4
|
use arrow_array::MapArray;
|
5
|
-
use magnus::RArray;
|
5
|
+
use magnus::{RArray, RString};
|
6
6
|
|
7
7
|
#[derive(Debug, Clone)]
|
8
8
|
pub enum ParquetValue {
|
@@ -22,6 +22,7 @@ pub enum ParquetValue {
|
|
22
22
|
Bytes(Vec<u8>),
|
23
23
|
Date32(i32),
|
24
24
|
Date64(i64),
|
25
|
+
Decimal128(i128),
|
25
26
|
TimestampSecond(i64, Option<Arc<str>>),
|
26
27
|
TimestampMillis(i64, Option<Arc<str>>),
|
27
28
|
TimestampMicros(i64, Option<Arc<str>>),
|
@@ -51,6 +52,7 @@ impl PartialEq for ParquetValue {
|
|
51
52
|
(ParquetValue::Bytes(a), ParquetValue::Bytes(b)) => a == b,
|
52
53
|
(ParquetValue::Date32(a), ParquetValue::Date32(b)) => a == b,
|
53
54
|
(ParquetValue::Date64(a), ParquetValue::Date64(b)) => a == b,
|
55
|
+
(ParquetValue::Decimal128(a), ParquetValue::Decimal128(b)) => a == b,
|
54
56
|
(ParquetValue::TimestampSecond(a, _), ParquetValue::TimestampSecond(b, _)) => a == b,
|
55
57
|
(ParquetValue::TimestampMillis(a, _), ParquetValue::TimestampMillis(b, _)) => a == b,
|
56
58
|
(ParquetValue::TimestampMicros(a, _), ParquetValue::TimestampMicros(b, _)) => a == b,
|
@@ -83,6 +85,7 @@ impl std::hash::Hash for ParquetValue {
|
|
83
85
|
ParquetValue::Bytes(b) => b.hash(state),
|
84
86
|
ParquetValue::Date32(d) => d.hash(state),
|
85
87
|
ParquetValue::Date64(d) => d.hash(state),
|
88
|
+
ParquetValue::Decimal128(d) => d.hash(state),
|
86
89
|
ParquetValue::TimestampSecond(ts, tz) => {
|
87
90
|
ts.hash(state);
|
88
91
|
tz.hash(state);
|
@@ -128,6 +131,7 @@ impl TryIntoValue for ParquetValue {
|
|
128
131
|
ParquetValue::Boolean(b) => Ok(b.into_value_with(handle)),
|
129
132
|
ParquetValue::String(s) => Ok(s.into_value_with(handle)),
|
130
133
|
ParquetValue::Bytes(b) => Ok(handle.str_from_slice(&b).as_value()),
|
134
|
+
ParquetValue::Decimal128(d) => Ok(d.to_string().into_value_with(handle)),
|
131
135
|
ParquetValue::Date32(d) => impl_date_conversion!(d, handle),
|
132
136
|
ParquetValue::Date64(d) => impl_date_conversion!(d, handle),
|
133
137
|
timestamp @ ParquetValue::TimestampSecond(_, _) => {
|
@@ -233,6 +237,18 @@ impl ParquetValue {
|
|
233
237
|
let v = NumericConverter::<f64>::convert_with_string_fallback(ruby, value)?;
|
234
238
|
Ok(ParquetValue::Float64(v))
|
235
239
|
}
|
240
|
+
PrimitiveType::Decimal128(_precision, scale) => {
|
241
|
+
if value.is_kind_of(ruby.class_string()) {
|
242
|
+
convert_to_decimal128(value, *scale)
|
243
|
+
} else if let Ok(s) = value.funcall::<_, _, RString>("to_s", ()) {
|
244
|
+
convert_to_decimal128(s.as_value(), *scale)
|
245
|
+
} else {
|
246
|
+
Err(MagnusError::new(
|
247
|
+
magnus::exception::type_error(),
|
248
|
+
"Expected a string for a decimal type",
|
249
|
+
))
|
250
|
+
}
|
251
|
+
}
|
236
252
|
PrimitiveType::String => {
|
237
253
|
let v = convert_to_string(value)?;
|
238
254
|
Ok(ParquetValue::String(v))
|
@@ -356,6 +372,184 @@ impl ParquetValue {
|
|
356
372
|
}
|
357
373
|
}
|
358
374
|
}
|
375
|
+
/// Unified helper to parse a decimal string and apply scaling
|
376
|
+
fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, MagnusError> {
|
377
|
+
let s = input_str.trim();
|
378
|
+
|
379
|
+
// 1. Handle scientific notation case (e.g., "0.12345e3")
|
380
|
+
if let Some(e_pos) = s.to_lowercase().find('e') {
|
381
|
+
let base = &s[0..e_pos];
|
382
|
+
let exp = &s[e_pos + 1..];
|
383
|
+
|
384
|
+
// Parse the exponent with detailed error message
|
385
|
+
let exp_val = exp.parse::<i32>().map_err(|e| {
|
386
|
+
MagnusError::new(
|
387
|
+
magnus::exception::type_error(),
|
388
|
+
format!("Failed to parse exponent '{}' in decimal string '{}': {}", exp, s, e),
|
389
|
+
)
|
390
|
+
})?;
|
391
|
+
|
392
|
+
// Limit exponent to reasonable range to prevent overflow
|
393
|
+
if exp_val.abs() > 38 {
|
394
|
+
return Err(MagnusError::new(
|
395
|
+
magnus::exception::range_error(),
|
396
|
+
format!("Exponent {} is out of range for decimal value '{}'. Must be between -38 and 38.", exp_val, s),
|
397
|
+
));
|
398
|
+
}
|
399
|
+
|
400
|
+
// Handle the base part which might contain a decimal point
|
401
|
+
let (base_val, base_scale) = if let Some(decimal_pos) = base.find('.') {
|
402
|
+
let mut base_without_point = base.to_string();
|
403
|
+
base_without_point.remove(decimal_pos);
|
404
|
+
|
405
|
+
let base_scale = base.len() - decimal_pos - 1;
|
406
|
+
|
407
|
+
let base_val = base_without_point.parse::<i128>().map_err(|e| {
|
408
|
+
MagnusError::new(
|
409
|
+
magnus::exception::type_error(),
|
410
|
+
format!("Failed to parse base '{}' in scientific notation '{}': {}", base, s, e),
|
411
|
+
)
|
412
|
+
})?;
|
413
|
+
|
414
|
+
(base_val, base_scale as i32)
|
415
|
+
} else {
|
416
|
+
// No decimal point in base
|
417
|
+
let base_val = base.parse::<i128>().map_err(|e| {
|
418
|
+
MagnusError::new(
|
419
|
+
magnus::exception::type_error(),
|
420
|
+
format!("Failed to parse base '{}' in scientific notation '{}': {}", base, s, e),
|
421
|
+
)
|
422
|
+
})?;
|
423
|
+
|
424
|
+
(base_val, 0)
|
425
|
+
};
|
426
|
+
|
427
|
+
// Calculate the effective scale: base_scale - exp_val
|
428
|
+
let effective_scale = base_scale - exp_val;
|
429
|
+
|
430
|
+
// Adjust the value based on the difference between effective scale and requested scale
|
431
|
+
match effective_scale.cmp(&(input_scale as i32)) {
|
432
|
+
std::cmp::Ordering::Less => {
|
433
|
+
// Need to multiply to increase scale
|
434
|
+
let scale_diff = (input_scale as i32 - effective_scale) as u32;
|
435
|
+
if scale_diff > 38 {
|
436
|
+
return Err(MagnusError::new(
|
437
|
+
magnus::exception::range_error(),
|
438
|
+
format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a smaller scale.", scale_diff, s),
|
439
|
+
));
|
440
|
+
}
|
441
|
+
Ok(base_val * 10_i128.pow(scale_diff))
|
442
|
+
}
|
443
|
+
std::cmp::Ordering::Greater => {
|
444
|
+
// Need to divide to decrease scale
|
445
|
+
let scale_diff = (effective_scale - input_scale as i32) as u32;
|
446
|
+
if scale_diff > 38 {
|
447
|
+
return Err(MagnusError::new(
|
448
|
+
magnus::exception::range_error(),
|
449
|
+
format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a larger scale.", scale_diff, s),
|
450
|
+
));
|
451
|
+
}
|
452
|
+
Ok(base_val / 10_i128.pow(scale_diff))
|
453
|
+
}
|
454
|
+
std::cmp::Ordering::Equal => Ok(base_val),
|
455
|
+
}
|
456
|
+
}
|
457
|
+
// 2. Handle decimal point in the string (e.g., "123.456")
|
458
|
+
else if let Some(decimal_pos) = s.find('.') {
|
459
|
+
let mut s_without_point = s.to_string();
|
460
|
+
s_without_point.remove(decimal_pos);
|
461
|
+
|
462
|
+
// Calculate the actual scale from the decimal position
|
463
|
+
let actual_scale = s.len() - decimal_pos - 1;
|
464
|
+
|
465
|
+
// Parse the string without decimal point as i128
|
466
|
+
let v = s_without_point.parse::<i128>().map_err(|e| {
|
467
|
+
MagnusError::new(
|
468
|
+
magnus::exception::type_error(),
|
469
|
+
format!("Failed to parse decimal string '{}' (without decimal point: '{}'): {}", s, s_without_point, e),
|
470
|
+
)
|
471
|
+
})?;
|
472
|
+
|
473
|
+
// Scale the value if needed based on the difference between
|
474
|
+
// the actual scale and the requested scale
|
475
|
+
match actual_scale.cmp(&(input_scale as usize)) {
|
476
|
+
std::cmp::Ordering::Less => {
|
477
|
+
// Need to multiply to increase scale
|
478
|
+
let scale_diff = (input_scale - actual_scale as i8) as u32;
|
479
|
+
if scale_diff > 38 {
|
480
|
+
return Err(MagnusError::new(
|
481
|
+
magnus::exception::range_error(),
|
482
|
+
format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a smaller scale.", scale_diff, s),
|
483
|
+
));
|
484
|
+
}
|
485
|
+
Ok(v * 10_i128.pow(scale_diff))
|
486
|
+
}
|
487
|
+
std::cmp::Ordering::Greater => {
|
488
|
+
// Need to divide to decrease scale
|
489
|
+
let scale_diff = (actual_scale as i8 - input_scale) as u32;
|
490
|
+
if scale_diff > 38 {
|
491
|
+
return Err(MagnusError::new(
|
492
|
+
magnus::exception::range_error(),
|
493
|
+
format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a larger scale.", scale_diff, s),
|
494
|
+
));
|
495
|
+
}
|
496
|
+
Ok(v / 10_i128.pow(scale_diff))
|
497
|
+
}
|
498
|
+
std::cmp::Ordering::Equal => Ok(v),
|
499
|
+
}
|
500
|
+
}
|
501
|
+
// 3. Plain integer value (e.g., "12345")
|
502
|
+
else {
|
503
|
+
// No decimal point, parse as i128 and scale appropriately
|
504
|
+
let v = s.parse::<i128>().map_err(|e| {
|
505
|
+
MagnusError::new(
|
506
|
+
magnus::exception::type_error(),
|
507
|
+
format!("Failed to parse integer string '{}' as decimal: {}", s, e),
|
508
|
+
)
|
509
|
+
})?;
|
510
|
+
|
511
|
+
// Apply scale - make sure it's reasonable
|
512
|
+
if input_scale > 38 {
|
513
|
+
return Err(MagnusError::new(
|
514
|
+
magnus::exception::range_error(),
|
515
|
+
format!("Scale {} is too large for decimal value '{}'. Must be ≤ 38.", input_scale, s),
|
516
|
+
));
|
517
|
+
} else if input_scale < -38 {
|
518
|
+
return Err(MagnusError::new(
|
519
|
+
magnus::exception::range_error(),
|
520
|
+
format!("Scale {} is too small for decimal value '{}'. Must be ≥ -38.", input_scale, s),
|
521
|
+
));
|
522
|
+
}
|
523
|
+
|
524
|
+
// Apply positive scale (multiply)
|
525
|
+
if input_scale >= 0 {
|
526
|
+
Ok(v * 10_i128.pow(input_scale as u32))
|
527
|
+
} else {
|
528
|
+
// Apply negative scale (divide)
|
529
|
+
Ok(v / 10_i128.pow((-input_scale) as u32))
|
530
|
+
}
|
531
|
+
}
|
532
|
+
}
|
533
|
+
|
534
|
+
fn convert_to_decimal128(value: Value, scale: i8) -> Result<ParquetValue, MagnusError> {
|
535
|
+
// Get the decimal string based on the type of value
|
536
|
+
let s = if unsafe { value.classname() } == "BigDecimal" {
|
537
|
+
value
|
538
|
+
.funcall::<_, _, RString>("to_s", ("F",))?
|
539
|
+
.to_string()?
|
540
|
+
} else {
|
541
|
+
value.to_r_string()?.to_string()?
|
542
|
+
};
|
543
|
+
|
544
|
+
// Use our unified parser to convert the string to a decimal value with scaling
|
545
|
+
match parse_decimal_string(&s, scale) {
|
546
|
+
Ok(decimal_value) => Ok(ParquetValue::Decimal128(decimal_value)),
|
547
|
+
Err(e) => Err(MagnusError::new(
|
548
|
+
magnus::exception::type_error(),
|
549
|
+
format!("Failed to convert '{}' to decimal with scale {}: {}", s, scale, e),
|
550
|
+
))
|
551
|
+
}
|
552
|
+
}
|
359
553
|
|
360
554
|
#[derive(Debug)]
|
361
555
|
pub struct ParquetValueVec(Vec<ParquetValue>);
|
@@ -677,12 +871,10 @@ impl<'a> TryFrom<ArrayWrapper<'a>> for ParquetValueVec {
|
|
677
871
|
let x = downcast_array::<NullArray>(column.array);
|
678
872
|
Ok(ParquetValueVec(vec![ParquetValue::Null; x.len()]))
|
679
873
|
}
|
680
|
-
_ =>
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
))?;
|
685
|
-
}
|
874
|
+
_ => Err(MagnusError::new(
|
875
|
+
magnus::exception::type_error(),
|
876
|
+
format!("Unsupported data type: {:?}", column.array.data_type()),
|
877
|
+
))?,
|
686
878
|
}
|
687
879
|
}
|
688
880
|
}
|
@@ -1,7 +1,12 @@
|
|
1
|
+
use std::sync::OnceLock;
|
2
|
+
|
1
3
|
use itertools::Itertools;
|
4
|
+
use parquet::data_type::AsBytes;
|
2
5
|
|
3
6
|
use super::*;
|
4
7
|
|
8
|
+
static LOADED_BIGDECIMAL: OnceLock<bool> = OnceLock::new();
|
9
|
+
|
5
10
|
#[derive(Debug)]
|
6
11
|
pub enum RowRecord<S: BuildHasher + Default> {
|
7
12
|
Vec(Vec<ParquetField>),
|
@@ -145,8 +150,8 @@ impl TryIntoValue for ParquetField {
|
|
145
150
|
Field::Str(s) => {
|
146
151
|
if self.1 {
|
147
152
|
Ok(simdutf8::basic::from_utf8(s.as_bytes())
|
148
|
-
.map_err(
|
149
|
-
.
|
153
|
+
.map_err(ParquetGemError::Utf8Error)
|
154
|
+
.map(|s| s.into_value_with(handle))?)
|
150
155
|
} else {
|
151
156
|
let s = String::from_utf8_lossy(s.as_bytes());
|
152
157
|
Ok(s.into_value_with(handle))
|
@@ -209,12 +214,18 @@ impl TryIntoValue for ParquetField {
|
|
209
214
|
format!("{}e-{}", unscaled, scale)
|
210
215
|
}
|
211
216
|
Decimal::Bytes { value, scale, .. } => {
|
212
|
-
//
|
213
|
-
let
|
217
|
+
// value is a byte array containing the bytes for an i128 value in big endian order
|
218
|
+
let casted = value.as_bytes()[..16].try_into()?;
|
219
|
+
let unscaled = i128::from_be_bytes(casted);
|
214
220
|
format!("{}e-{}", unscaled, scale)
|
215
221
|
}
|
216
222
|
};
|
217
|
-
|
223
|
+
|
224
|
+
// Load the bigdecimal gem if it's not already loaded
|
225
|
+
LOADED_BIGDECIMAL.get_or_init(|| handle.require("bigdecimal").unwrap_or_default());
|
226
|
+
|
227
|
+
let kernel = handle.module_kernel();
|
228
|
+
Ok(kernel.funcall::<_, _, Value>("BigDecimal", (value,))?)
|
218
229
|
}
|
219
230
|
Field::Group(row) => {
|
220
231
|
let hash = handle.hash_new();
|
@@ -1,5 +1,5 @@
|
|
1
1
|
use magnus::value::ReprValue; // Add ReprValue trait to scope
|
2
|
-
use magnus::{Error as MagnusError, RArray, Ruby, TryConvert, Value};
|
2
|
+
use magnus::{Error as MagnusError, IntoValue, RArray, Ruby, TryConvert, Value};
|
3
3
|
|
4
4
|
use crate::types::{ParquetSchemaType as PST, PrimitiveType, SchemaField, SchemaNode};
|
5
5
|
use crate::utils::parse_string_or_symbol;
|
@@ -22,7 +22,7 @@ fn convert_schema_field_to_node(field: &SchemaField) -> SchemaNode {
|
|
22
22
|
let item_field = SchemaField {
|
23
23
|
name: "item".to_string(),
|
24
24
|
type_: list_field.item_type.clone(),
|
25
|
-
format: list_field.format.
|
25
|
+
format: list_field.format.map(String::from),
|
26
26
|
nullable: list_field.nullable,
|
27
27
|
};
|
28
28
|
convert_schema_field_to_node(&item_field)
|
@@ -33,7 +33,7 @@ fn convert_schema_field_to_node(field: &SchemaField) -> SchemaNode {
|
|
33
33
|
let item_field = SchemaField {
|
34
34
|
name: "item".to_string(),
|
35
35
|
type_: list_field.item_type.clone(),
|
36
|
-
format: list_field.format.
|
36
|
+
format: list_field.format.map(String::from),
|
37
37
|
nullable: list_field.nullable,
|
38
38
|
};
|
39
39
|
convert_schema_field_to_node(&item_field)
|
@@ -50,13 +50,13 @@ fn convert_schema_field_to_node(field: &SchemaField) -> SchemaNode {
|
|
50
50
|
let key_field = SchemaField {
|
51
51
|
name: "key".to_string(),
|
52
52
|
type_: map_field.key_type.clone(),
|
53
|
-
format: map_field.key_format.
|
53
|
+
format: map_field.key_format.map(String::from),
|
54
54
|
nullable: false, // Map keys can never be null in Parquet
|
55
55
|
};
|
56
56
|
let value_field = SchemaField {
|
57
57
|
name: "value".to_string(),
|
58
58
|
type_: map_field.value_type.clone(),
|
59
|
-
format: map_field.value_format.
|
59
|
+
format: map_field.value_format.map(String::from),
|
60
60
|
nullable: map_field.value_nullable,
|
61
61
|
};
|
62
62
|
|
@@ -121,9 +121,7 @@ pub fn parse_legacy_schema(
|
|
121
121
|
ruby.exception_type_error(),
|
122
122
|
"Schema must be an array of field definitions or nil",
|
123
123
|
)
|
124
|
-
})
|
125
|
-
.len()
|
126
|
-
== 0)
|
124
|
+
})?.is_empty())
|
127
125
|
{
|
128
126
|
// If schema is nil or an empty array, we'll handle this in the caller
|
129
127
|
return Ok(Vec::new());
|
@@ -155,7 +153,7 @@ pub fn parse_legacy_schema(
|
|
155
153
|
}
|
156
154
|
|
157
155
|
let (name, type_value) = &entries[0];
|
158
|
-
let name_option = parse_string_or_symbol(ruby, name
|
156
|
+
let name_option = parse_string_or_symbol(ruby, *name)?;
|
159
157
|
let name = name_option.ok_or_else(|| {
|
160
158
|
MagnusError::new(ruby.exception_runtime_error(), "Field name cannot be nil")
|
161
159
|
})?;
|
@@ -166,6 +164,9 @@ pub fn parse_legacy_schema(
|
|
166
164
|
let mut format_str = None;
|
167
165
|
let mut nullable = true; // Default to true if not specified
|
168
166
|
|
167
|
+
let mut precision: Option<Value> = None;
|
168
|
+
let mut scale: Option<Value> = None;
|
169
|
+
|
169
170
|
for (key, value) in type_hash {
|
170
171
|
let key_option = parse_string_or_symbol(ruby, key)?;
|
171
172
|
let key = key_option.ok_or_else(|| {
|
@@ -181,6 +182,12 @@ pub fn parse_legacy_schema(
|
|
181
182
|
// Extract nullable if present - convert to boolean
|
182
183
|
nullable = bool::try_convert(value).unwrap_or(true);
|
183
184
|
}
|
185
|
+
"precision" => {
|
186
|
+
precision = Some(value);
|
187
|
+
}
|
188
|
+
"scale" => {
|
189
|
+
scale = Some(value);
|
190
|
+
}
|
184
191
|
_ => {
|
185
192
|
return Err(MagnusError::new(
|
186
193
|
ruby.exception_type_error(),
|
@@ -197,9 +204,109 @@ pub fn parse_legacy_schema(
|
|
197
204
|
)
|
198
205
|
})?;
|
199
206
|
|
200
|
-
|
207
|
+
// Handle decimal type with precision and scale
|
208
|
+
let mut type_result = PST::try_convert(type_str)?;
|
209
|
+
|
210
|
+
// If it's a decimal type and we have precision and scale, override the type
|
211
|
+
if let PST::Primitive(PrimitiveType::Decimal128(_, _)) = type_result {
|
212
|
+
let precision_value = precision.unwrap_or_else(|| {
|
213
|
+
let val: u8 = 18;
|
214
|
+
val.into_value_with(ruby)
|
215
|
+
});
|
216
|
+
let scale_value = scale.unwrap_or_else(|| {
|
217
|
+
let val: i8 = 2;
|
218
|
+
val.into_value_with(ruby)
|
219
|
+
});
|
220
|
+
|
221
|
+
let precision_u8 = u8::try_convert(precision_value).map_err(|_| {
|
222
|
+
MagnusError::new(
|
223
|
+
ruby.exception_type_error(),
|
224
|
+
"Invalid precision value for decimal type, expected a positive integer".to_string(),
|
225
|
+
)
|
226
|
+
})?;
|
227
|
+
|
228
|
+
// Validate precision is in a valid range
|
229
|
+
if precision_u8 < 1 {
|
230
|
+
return Err(MagnusError::new(
|
231
|
+
ruby.exception_arg_error(),
|
232
|
+
format!(
|
233
|
+
"Precision for decimal type must be at least 1, got {}",
|
234
|
+
precision_u8
|
235
|
+
),
|
236
|
+
));
|
237
|
+
}
|
238
|
+
|
239
|
+
if precision_u8 > 38 {
|
240
|
+
return Err(MagnusError::new(
|
241
|
+
ruby.exception_arg_error(),
|
242
|
+
format!(
|
243
|
+
"Precision for decimal type cannot exceed 38, got {}",
|
244
|
+
precision_u8
|
245
|
+
),
|
246
|
+
));
|
247
|
+
}
|
248
|
+
|
249
|
+
let scale_i8 = i8::try_convert(scale_value).map_err(|_| {
|
250
|
+
MagnusError::new(
|
251
|
+
ruby.exception_type_error(),
|
252
|
+
"Invalid scale value for decimal type, expected an integer".to_string(),
|
253
|
+
)
|
254
|
+
})?;
|
255
|
+
|
256
|
+
// Validate scale is in a valid range relative to precision
|
257
|
+
if scale_i8 < 0 {
|
258
|
+
return Err(MagnusError::new(
|
259
|
+
ruby.exception_arg_error(),
|
260
|
+
format!(
|
261
|
+
"Scale for decimal type cannot be negative, got {}",
|
262
|
+
scale_i8
|
263
|
+
),
|
264
|
+
));
|
265
|
+
}
|
266
|
+
|
267
|
+
if scale_i8 as u8 > precision_u8 {
|
268
|
+
return Err(MagnusError::new(
|
269
|
+
ruby.exception_arg_error(),
|
270
|
+
format!(
|
271
|
+
"Scale ({}) cannot be larger than precision ({}) for decimal type",
|
272
|
+
scale_i8, precision_u8
|
273
|
+
),
|
274
|
+
));
|
275
|
+
}
|
276
|
+
|
277
|
+
type_result = PST::Primitive(PrimitiveType::Decimal128(precision_u8, scale_i8));
|
278
|
+
} else if let Some(type_name) = parse_string_or_symbol(ruby, type_str)? {
|
279
|
+
if type_name == "decimal" {
|
280
|
+
let precision_value = precision.unwrap_or_else(|| {
|
281
|
+
let val: u8 = 18;
|
282
|
+
val.into_value_with(ruby)
|
283
|
+
});
|
284
|
+
let scale_value = scale.unwrap_or_else(|| {
|
285
|
+
let val: i8 = 2;
|
286
|
+
val.into_value_with(ruby)
|
287
|
+
});
|
288
|
+
|
289
|
+
let precision_u8 = u8::try_convert(precision_value).map_err(|_| {
|
290
|
+
MagnusError::new(
|
291
|
+
ruby.exception_type_error(),
|
292
|
+
"Invalid precision value for decimal type, expected a positive integer".to_string(),
|
293
|
+
)
|
294
|
+
})?;
|
295
|
+
|
296
|
+
let scale_i8 = i8::try_convert(scale_value).map_err(|_| {
|
297
|
+
MagnusError::new(
|
298
|
+
ruby.exception_type_error(),
|
299
|
+
"Invalid scale value for decimal type, expected an integer".to_string(),
|
300
|
+
)
|
301
|
+
})?;
|
302
|
+
|
303
|
+
type_result = PST::Primitive(PrimitiveType::Decimal128(precision_u8, scale_i8));
|
304
|
+
}
|
305
|
+
}
|
306
|
+
|
307
|
+
(type_result, format_str, nullable)
|
201
308
|
} else {
|
202
|
-
(PST::try_convert(type_value
|
309
|
+
(PST::try_convert(*type_value)?, None, true)
|
203
310
|
};
|
204
311
|
|
205
312
|
schema.push(SchemaField {
|
@@ -68,7 +68,7 @@ fn parse_struct_node(
|
|
68
68
|
})?;
|
69
69
|
|
70
70
|
// Check for empty struct immediately
|
71
|
-
if fields_arr.
|
71
|
+
if fields_arr.is_empty() {
|
72
72
|
return Err(MagnusError::new(
|
73
73
|
ruby.exception_arg_error(),
|
74
74
|
format!("Cannot create a struct with zero fields. Struct name: '{}'. Parquet doesn't support empty structs", name)
|
@@ -175,6 +175,83 @@ pub fn parse_schema_node(ruby: &Ruby, node_value: Value) -> Result<SchemaNode, M
|
|
175
175
|
"struct" => parse_struct_node(ruby, &node_hash, name, nullable),
|
176
176
|
"list" => parse_list_node(ruby, &node_hash, name, nullable),
|
177
177
|
"map" => parse_map_node(ruby, &node_hash, name, nullable),
|
178
|
+
"decimal" => {
|
179
|
+
// Check for precision and scale
|
180
|
+
let precision_val = node_hash.get(Symbol::new("precision"));
|
181
|
+
let scale_val = node_hash.get(Symbol::new("scale"));
|
182
|
+
|
183
|
+
// Handle different precision/scale combinations:
|
184
|
+
// 1. When no precision or scale - use max precision (38)
|
185
|
+
// 2. When precision only - use scale 0
|
186
|
+
// 3. When scale only - use max precision (38)
|
187
|
+
let (precision, scale) = match (precision_val, scale_val) {
|
188
|
+
(None, None) => (38, 0), // Maximum accuracy, scale 0
|
189
|
+
(Some(p), None) => {
|
190
|
+
// Precision provided, scale defaults to 0
|
191
|
+
let prec = u8::try_convert(p).map_err(|_| {
|
192
|
+
MagnusError::new(
|
193
|
+
ruby.exception_type_error(),
|
194
|
+
"Invalid precision value for decimal type, expected a positive integer".to_string(),
|
195
|
+
)
|
196
|
+
})?;
|
197
|
+
(prec, 0)
|
198
|
+
},
|
199
|
+
(None, Some(s)) => {
|
200
|
+
// Scale provided, precision set to maximum (38)
|
201
|
+
let scl = i8::try_convert(s).map_err(|_| {
|
202
|
+
MagnusError::new(
|
203
|
+
ruby.exception_type_error(),
|
204
|
+
"Invalid scale value for decimal type, expected an integer".to_string(),
|
205
|
+
)
|
206
|
+
})?;
|
207
|
+
(38, scl)
|
208
|
+
},
|
209
|
+
(Some(p), Some(s)) => {
|
210
|
+
// Both provided
|
211
|
+
let prec = u8::try_convert(p).map_err(|_| {
|
212
|
+
MagnusError::new(
|
213
|
+
ruby.exception_type_error(),
|
214
|
+
"Invalid precision value for decimal type, expected a positive integer".to_string(),
|
215
|
+
)
|
216
|
+
})?;
|
217
|
+
let scl = i8::try_convert(s).map_err(|_| {
|
218
|
+
MagnusError::new(
|
219
|
+
ruby.exception_type_error(),
|
220
|
+
"Invalid scale value for decimal type, expected an integer".to_string(),
|
221
|
+
)
|
222
|
+
})?;
|
223
|
+
(prec, scl)
|
224
|
+
}
|
225
|
+
};
|
226
|
+
|
227
|
+
// Validate precision is in a valid range
|
228
|
+
if precision < 1 {
|
229
|
+
return Err(MagnusError::new(
|
230
|
+
ruby.exception_arg_error(),
|
231
|
+
format!(
|
232
|
+
"Precision for decimal type must be at least 1, got {}",
|
233
|
+
precision
|
234
|
+
),
|
235
|
+
));
|
236
|
+
}
|
237
|
+
|
238
|
+
if precision > 38 {
|
239
|
+
return Err(MagnusError::new(
|
240
|
+
ruby.exception_arg_error(),
|
241
|
+
format!(
|
242
|
+
"Precision for decimal type cannot exceed 38, got {}",
|
243
|
+
precision
|
244
|
+
),
|
245
|
+
));
|
246
|
+
}
|
247
|
+
|
248
|
+
Ok(SchemaNode::Primitive {
|
249
|
+
name,
|
250
|
+
parquet_type: PrimitiveType::Decimal128(precision, scale),
|
251
|
+
nullable,
|
252
|
+
format,
|
253
|
+
})
|
254
|
+
}
|
178
255
|
// For primitives, provide better error messages when type isn't recognized
|
179
256
|
other => {
|
180
257
|
if let Some(parquet_type) = parse_primitive_type(other) {
|
@@ -188,7 +265,7 @@ pub fn parse_schema_node(ruby: &Ruby, node_value: Value) -> Result<SchemaNode, M
|
|
188
265
|
Err(MagnusError::new(
|
189
266
|
magnus::exception::arg_error(),
|
190
267
|
format!(
|
191
|
-
"Unknown type: '{}'. Supported types are: struct, list, map, int8, int16, int32, int64, uint8, uint16, uint32, uint64, float32, float64, boolean, string, binary, date32, timestamp_millis, timestamp_micros",
|
268
|
+
"Unknown type: '{}'. Supported types are: struct, list, map, int8, int16, int32, int64, uint8, uint16, uint32, uint64, float32, float64, boolean, string, binary, date32, timestamp_millis, timestamp_micros, decimal",
|
192
269
|
other
|
193
270
|
)
|
194
271
|
))
|
@@ -216,6 +293,7 @@ fn parse_primitive_type(s: &str) -> Option<PrimitiveType> {
|
|
216
293
|
"date" | "date32" => Some(PrimitiveType::Date32),
|
217
294
|
"timestamp_millis" | "timestamp_ms" => Some(PrimitiveType::TimestampMillis),
|
218
295
|
"timestamp_micros" | "timestamp_us" => Some(PrimitiveType::TimestampMicros),
|
296
|
+
"decimal" => Some(PrimitiveType::Decimal128(38, 0)), // Maximum precision, scale 0
|
219
297
|
_ => None,
|
220
298
|
}
|
221
299
|
}
|
@@ -240,6 +318,9 @@ pub fn schema_node_to_arrow_field(node: &SchemaNode) -> ArrowField {
|
|
240
318
|
PrimitiveType::UInt64 => ArrowDataType::UInt64,
|
241
319
|
PrimitiveType::Float32 => ArrowDataType::Float32,
|
242
320
|
PrimitiveType::Float64 => ArrowDataType::Float64,
|
321
|
+
PrimitiveType::Decimal128(precision, scale) => {
|
322
|
+
ArrowDataType::Decimal128(*precision, *scale)
|
323
|
+
}
|
243
324
|
PrimitiveType::Boolean => ArrowDataType::Boolean,
|
244
325
|
PrimitiveType::String => ArrowDataType::Utf8,
|
245
326
|
PrimitiveType::Binary => ArrowDataType::Binary,
|
@@ -2,15 +2,11 @@ use super::*;
|
|
2
2
|
|
3
3
|
pub fn parse_zoned_timestamp(value: &ParquetValue) -> Result<jiff::Timestamp, ParquetGemError> {
|
4
4
|
let (ts, tz) = match value {
|
5
|
-
ParquetValue::TimestampSecond(ts, tz) => (jiff::Timestamp::from_second(*ts)
|
6
|
-
ParquetValue::TimestampMillis(ts, tz) =>
|
7
|
-
|
8
|
-
}
|
9
|
-
ParquetValue::TimestampMicros(ts, tz) => {
|
10
|
-
(jiff::Timestamp::from_microsecond(*ts).unwrap(), tz)
|
11
|
-
}
|
5
|
+
ParquetValue::TimestampSecond(ts, tz) => (jiff::Timestamp::from_second(*ts)?, tz),
|
6
|
+
ParquetValue::TimestampMillis(ts, tz) => (jiff::Timestamp::from_millisecond(*ts)?, tz),
|
7
|
+
ParquetValue::TimestampMicros(ts, tz) => (jiff::Timestamp::from_microsecond(*ts)?, tz),
|
12
8
|
ParquetValue::TimestampNanos(ts, tz) => {
|
13
|
-
(jiff::Timestamp::from_nanosecond(*ts as i128)
|
9
|
+
(jiff::Timestamp::from_nanosecond(*ts as i128)?, tz)
|
14
10
|
}
|
15
11
|
_ => {
|
16
12
|
return Err(MagnusError::new(
|
@@ -50,7 +46,7 @@ pub fn parse_zoned_timestamp(value: &ParquetValue) -> Result<jiff::Timestamp, Pa
|
|
50
46
|
Ok(ts.to_zoned(tz).timestamp())
|
51
47
|
} else {
|
52
48
|
// Try IANA timezone
|
53
|
-
match ts.in_tz(
|
49
|
+
match ts.in_tz(tz) {
|
54
50
|
Ok(zoned) => Ok(zoned.timestamp()),
|
55
51
|
Err(_) => Ok(ts), // Fall back to UTC if timezone is invalid
|
56
52
|
}
|
@@ -85,7 +81,7 @@ macro_rules! impl_timestamp_conversion {
|
|
85
81
|
#[macro_export]
|
86
82
|
macro_rules! impl_date_conversion {
|
87
83
|
($value:expr, $handle:expr) => {{
|
88
|
-
let ts = jiff::Timestamp::from_second(($value as i64) * 86400)
|
84
|
+
let ts = jiff::Timestamp::from_second(($value as i64) * 86400)?;
|
89
85
|
let formatted = ts.strftime("%Y-%m-%d").to_string();
|
90
86
|
Ok(formatted.into_value_with($handle))
|
91
87
|
}};
|