parquet 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +9 -1
- data/ext/parquet/Cargo.toml +4 -0
- data/ext/parquet/build.rs +5 -0
- data/ext/parquet/src/lib.rs +1 -0
- data/ext/parquet/src/reader/common.rs +7 -6
- data/ext/parquet/src/reader/mod.rs +204 -0
- data/ext/parquet/src/reader/parquet_column_reader.rs +19 -20
- data/ext/parquet/src/reader/parquet_row_reader.rs +18 -22
- data/ext/parquet/src/ruby_reader.rs +11 -24
- data/ext/parquet/src/types/core_types.rs +1 -0
- data/ext/parquet/src/types/mod.rs +8 -5
- data/ext/parquet/src/types/parquet_value.rs +204 -7
- data/ext/parquet/src/types/record_types.rs +31 -8
- data/ext/parquet/src/types/schema_converter.rs +118 -11
- data/ext/parquet/src/types/schema_node.rs +83 -2
- data/ext/parquet/src/types/timestamp.rs +6 -10
- data/ext/parquet/src/types/type_conversion.rs +84 -11
- data/ext/parquet/src/types/writer_types.rs +40 -11
- data/ext/parquet/src/utils.rs +6 -6
- data/ext/parquet/src/writer/mod.rs +25 -18
- data/ext/parquet/src/writer/write_columns.rs +27 -24
- data/ext/parquet/src/writer/write_rows.rs +17 -16
- data/lib/parquet/schema.rb +77 -4
- data/lib/parquet/version.rb +1 -1
- data/lib/parquet.rbi +11 -0
- metadata +3 -2
@@ -32,6 +32,7 @@ use arrow_schema::{DataType, TimeUnit};
|
|
32
32
|
use magnus::{value::ReprValue, Error as MagnusError, IntoValue, Ruby, Value};
|
33
33
|
use parquet::data_type::Decimal;
|
34
34
|
use parquet::record::Field;
|
35
|
+
use std::array::TryFromSliceError;
|
35
36
|
use std::{collections::HashMap, hash::BuildHasher, sync::Arc};
|
36
37
|
|
37
38
|
use crate::header_cache::StringCacheKey;
|
@@ -58,6 +59,8 @@ pub enum ParquetGemError {
|
|
58
59
|
Utf8Error(#[from] simdutf8::basic::Utf8Error),
|
59
60
|
#[error("Jiff error: {0}")]
|
60
61
|
Jiff(#[from] jiff::Error),
|
62
|
+
#[error("Failed to cast slice to array: {0}")]
|
63
|
+
InvalidDecimal(#[from] TryFromSliceError),
|
61
64
|
}
|
62
65
|
|
63
66
|
#[derive(Debug)]
|
@@ -83,11 +86,11 @@ impl From<MagnusError> for ParquetGemError {
|
|
83
86
|
}
|
84
87
|
}
|
85
88
|
|
86
|
-
impl
|
87
|
-
fn
|
88
|
-
match
|
89
|
-
|
90
|
-
_ => MagnusError::new(magnus::exception::runtime_error(),
|
89
|
+
impl From<ParquetGemError> for MagnusError {
|
90
|
+
fn from(val: ParquetGemError) -> Self {
|
91
|
+
match val {
|
92
|
+
ParquetGemError::Ruby(MagnusErrorWrapper(err)) => err,
|
93
|
+
_ => MagnusError::new(magnus::exception::runtime_error(), val.to_string()),
|
91
94
|
}
|
92
95
|
}
|
93
96
|
}
|
@@ -2,7 +2,7 @@ use crate::{impl_date_conversion, impl_timestamp_array_conversion, impl_timestam
|
|
2
2
|
|
3
3
|
use super::*;
|
4
4
|
use arrow_array::MapArray;
|
5
|
-
use magnus::RArray;
|
5
|
+
use magnus::{RArray, RString};
|
6
6
|
|
7
7
|
#[derive(Debug, Clone)]
|
8
8
|
pub enum ParquetValue {
|
@@ -22,6 +22,7 @@ pub enum ParquetValue {
|
|
22
22
|
Bytes(Vec<u8>),
|
23
23
|
Date32(i32),
|
24
24
|
Date64(i64),
|
25
|
+
Decimal128(i128),
|
25
26
|
TimestampSecond(i64, Option<Arc<str>>),
|
26
27
|
TimestampMillis(i64, Option<Arc<str>>),
|
27
28
|
TimestampMicros(i64, Option<Arc<str>>),
|
@@ -51,6 +52,7 @@ impl PartialEq for ParquetValue {
|
|
51
52
|
(ParquetValue::Bytes(a), ParquetValue::Bytes(b)) => a == b,
|
52
53
|
(ParquetValue::Date32(a), ParquetValue::Date32(b)) => a == b,
|
53
54
|
(ParquetValue::Date64(a), ParquetValue::Date64(b)) => a == b,
|
55
|
+
(ParquetValue::Decimal128(a), ParquetValue::Decimal128(b)) => a == b,
|
54
56
|
(ParquetValue::TimestampSecond(a, _), ParquetValue::TimestampSecond(b, _)) => a == b,
|
55
57
|
(ParquetValue::TimestampMillis(a, _), ParquetValue::TimestampMillis(b, _)) => a == b,
|
56
58
|
(ParquetValue::TimestampMicros(a, _), ParquetValue::TimestampMicros(b, _)) => a == b,
|
@@ -83,6 +85,7 @@ impl std::hash::Hash for ParquetValue {
|
|
83
85
|
ParquetValue::Bytes(b) => b.hash(state),
|
84
86
|
ParquetValue::Date32(d) => d.hash(state),
|
85
87
|
ParquetValue::Date64(d) => d.hash(state),
|
88
|
+
ParquetValue::Decimal128(d) => d.hash(state),
|
86
89
|
ParquetValue::TimestampSecond(ts, tz) => {
|
87
90
|
ts.hash(state);
|
88
91
|
tz.hash(state);
|
@@ -128,6 +131,7 @@ impl TryIntoValue for ParquetValue {
|
|
128
131
|
ParquetValue::Boolean(b) => Ok(b.into_value_with(handle)),
|
129
132
|
ParquetValue::String(s) => Ok(s.into_value_with(handle)),
|
130
133
|
ParquetValue::Bytes(b) => Ok(handle.str_from_slice(&b).as_value()),
|
134
|
+
ParquetValue::Decimal128(d) => Ok(d.to_string().into_value_with(handle)),
|
131
135
|
ParquetValue::Date32(d) => impl_date_conversion!(d, handle),
|
132
136
|
ParquetValue::Date64(d) => impl_date_conversion!(d, handle),
|
133
137
|
timestamp @ ParquetValue::TimestampSecond(_, _) => {
|
@@ -160,7 +164,12 @@ impl TryIntoValue for ParquetValue {
|
|
160
164
|
Ok(ary.into_value_with(handle))
|
161
165
|
}
|
162
166
|
ParquetValue::Map(m) => {
|
167
|
+
#[cfg(ruby_lt_3_2)]
|
163
168
|
let hash = handle.hash_new_capa(m.len());
|
169
|
+
|
170
|
+
#[cfg(not(ruby_lt_3_2))]
|
171
|
+
let hash = handle.hash_new();
|
172
|
+
|
164
173
|
m.into_iter().try_for_each(|(k, v)| {
|
165
174
|
hash.aset(
|
166
175
|
k.try_into_value_with(handle)?,
|
@@ -228,6 +237,18 @@ impl ParquetValue {
|
|
228
237
|
let v = NumericConverter::<f64>::convert_with_string_fallback(ruby, value)?;
|
229
238
|
Ok(ParquetValue::Float64(v))
|
230
239
|
}
|
240
|
+
PrimitiveType::Decimal128(_precision, scale) => {
|
241
|
+
if value.is_kind_of(ruby.class_string()) {
|
242
|
+
convert_to_decimal128(value, *scale)
|
243
|
+
} else if let Ok(s) = value.funcall::<_, _, RString>("to_s", ()) {
|
244
|
+
convert_to_decimal128(s.as_value(), *scale)
|
245
|
+
} else {
|
246
|
+
Err(MagnusError::new(
|
247
|
+
magnus::exception::type_error(),
|
248
|
+
"Expected a string for a decimal type",
|
249
|
+
))
|
250
|
+
}
|
251
|
+
}
|
231
252
|
PrimitiveType::String => {
|
232
253
|
let v = convert_to_string(value)?;
|
233
254
|
Ok(ParquetValue::String(v))
|
@@ -351,6 +372,184 @@ impl ParquetValue {
|
|
351
372
|
}
|
352
373
|
}
|
353
374
|
}
|
375
|
+
/// Unified helper to parse a decimal string and apply scaling
|
376
|
+
fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, MagnusError> {
|
377
|
+
let s = input_str.trim();
|
378
|
+
|
379
|
+
// 1. Handle scientific notation case (e.g., "0.12345e3")
|
380
|
+
if let Some(e_pos) = s.to_lowercase().find('e') {
|
381
|
+
let base = &s[0..e_pos];
|
382
|
+
let exp = &s[e_pos + 1..];
|
383
|
+
|
384
|
+
// Parse the exponent with detailed error message
|
385
|
+
let exp_val = exp.parse::<i32>().map_err(|e| {
|
386
|
+
MagnusError::new(
|
387
|
+
magnus::exception::type_error(),
|
388
|
+
format!("Failed to parse exponent '{}' in decimal string '{}': {}", exp, s, e),
|
389
|
+
)
|
390
|
+
})?;
|
391
|
+
|
392
|
+
// Limit exponent to reasonable range to prevent overflow
|
393
|
+
if exp_val.abs() > 38 {
|
394
|
+
return Err(MagnusError::new(
|
395
|
+
magnus::exception::range_error(),
|
396
|
+
format!("Exponent {} is out of range for decimal value '{}'. Must be between -38 and 38.", exp_val, s),
|
397
|
+
));
|
398
|
+
}
|
399
|
+
|
400
|
+
// Handle the base part which might contain a decimal point
|
401
|
+
let (base_val, base_scale) = if let Some(decimal_pos) = base.find('.') {
|
402
|
+
let mut base_without_point = base.to_string();
|
403
|
+
base_without_point.remove(decimal_pos);
|
404
|
+
|
405
|
+
let base_scale = base.len() - decimal_pos - 1;
|
406
|
+
|
407
|
+
let base_val = base_without_point.parse::<i128>().map_err(|e| {
|
408
|
+
MagnusError::new(
|
409
|
+
magnus::exception::type_error(),
|
410
|
+
format!("Failed to parse base '{}' in scientific notation '{}': {}", base, s, e),
|
411
|
+
)
|
412
|
+
})?;
|
413
|
+
|
414
|
+
(base_val, base_scale as i32)
|
415
|
+
} else {
|
416
|
+
// No decimal point in base
|
417
|
+
let base_val = base.parse::<i128>().map_err(|e| {
|
418
|
+
MagnusError::new(
|
419
|
+
magnus::exception::type_error(),
|
420
|
+
format!("Failed to parse base '{}' in scientific notation '{}': {}", base, s, e),
|
421
|
+
)
|
422
|
+
})?;
|
423
|
+
|
424
|
+
(base_val, 0)
|
425
|
+
};
|
426
|
+
|
427
|
+
// Calculate the effective scale: base_scale - exp_val
|
428
|
+
let effective_scale = base_scale - exp_val;
|
429
|
+
|
430
|
+
// Adjust the value based on the difference between effective scale and requested scale
|
431
|
+
match effective_scale.cmp(&(input_scale as i32)) {
|
432
|
+
std::cmp::Ordering::Less => {
|
433
|
+
// Need to multiply to increase scale
|
434
|
+
let scale_diff = (input_scale as i32 - effective_scale) as u32;
|
435
|
+
if scale_diff > 38 {
|
436
|
+
return Err(MagnusError::new(
|
437
|
+
magnus::exception::range_error(),
|
438
|
+
format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a smaller scale.", scale_diff, s),
|
439
|
+
));
|
440
|
+
}
|
441
|
+
Ok(base_val * 10_i128.pow(scale_diff))
|
442
|
+
}
|
443
|
+
std::cmp::Ordering::Greater => {
|
444
|
+
// Need to divide to decrease scale
|
445
|
+
let scale_diff = (effective_scale - input_scale as i32) as u32;
|
446
|
+
if scale_diff > 38 {
|
447
|
+
return Err(MagnusError::new(
|
448
|
+
magnus::exception::range_error(),
|
449
|
+
format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a larger scale.", scale_diff, s),
|
450
|
+
));
|
451
|
+
}
|
452
|
+
Ok(base_val / 10_i128.pow(scale_diff))
|
453
|
+
}
|
454
|
+
std::cmp::Ordering::Equal => Ok(base_val),
|
455
|
+
}
|
456
|
+
}
|
457
|
+
// 2. Handle decimal point in the string (e.g., "123.456")
|
458
|
+
else if let Some(decimal_pos) = s.find('.') {
|
459
|
+
let mut s_without_point = s.to_string();
|
460
|
+
s_without_point.remove(decimal_pos);
|
461
|
+
|
462
|
+
// Calculate the actual scale from the decimal position
|
463
|
+
let actual_scale = s.len() - decimal_pos - 1;
|
464
|
+
|
465
|
+
// Parse the string without decimal point as i128
|
466
|
+
let v = s_without_point.parse::<i128>().map_err(|e| {
|
467
|
+
MagnusError::new(
|
468
|
+
magnus::exception::type_error(),
|
469
|
+
format!("Failed to parse decimal string '{}' (without decimal point: '{}'): {}", s, s_without_point, e),
|
470
|
+
)
|
471
|
+
})?;
|
472
|
+
|
473
|
+
// Scale the value if needed based on the difference between
|
474
|
+
// the actual scale and the requested scale
|
475
|
+
match actual_scale.cmp(&(input_scale as usize)) {
|
476
|
+
std::cmp::Ordering::Less => {
|
477
|
+
// Need to multiply to increase scale
|
478
|
+
let scale_diff = (input_scale - actual_scale as i8) as u32;
|
479
|
+
if scale_diff > 38 {
|
480
|
+
return Err(MagnusError::new(
|
481
|
+
magnus::exception::range_error(),
|
482
|
+
format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a smaller scale.", scale_diff, s),
|
483
|
+
));
|
484
|
+
}
|
485
|
+
Ok(v * 10_i128.pow(scale_diff))
|
486
|
+
}
|
487
|
+
std::cmp::Ordering::Greater => {
|
488
|
+
// Need to divide to decrease scale
|
489
|
+
let scale_diff = (actual_scale as i8 - input_scale) as u32;
|
490
|
+
if scale_diff > 38 {
|
491
|
+
return Err(MagnusError::new(
|
492
|
+
magnus::exception::range_error(),
|
493
|
+
format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a larger scale.", scale_diff, s),
|
494
|
+
));
|
495
|
+
}
|
496
|
+
Ok(v / 10_i128.pow(scale_diff))
|
497
|
+
}
|
498
|
+
std::cmp::Ordering::Equal => Ok(v),
|
499
|
+
}
|
500
|
+
}
|
501
|
+
// 3. Plain integer value (e.g., "12345")
|
502
|
+
else {
|
503
|
+
// No decimal point, parse as i128 and scale appropriately
|
504
|
+
let v = s.parse::<i128>().map_err(|e| {
|
505
|
+
MagnusError::new(
|
506
|
+
magnus::exception::type_error(),
|
507
|
+
format!("Failed to parse integer string '{}' as decimal: {}", s, e),
|
508
|
+
)
|
509
|
+
})?;
|
510
|
+
|
511
|
+
// Apply scale - make sure it's reasonable
|
512
|
+
if input_scale > 38 {
|
513
|
+
return Err(MagnusError::new(
|
514
|
+
magnus::exception::range_error(),
|
515
|
+
format!("Scale {} is too large for decimal value '{}'. Must be ≤ 38.", input_scale, s),
|
516
|
+
));
|
517
|
+
} else if input_scale < -38 {
|
518
|
+
return Err(MagnusError::new(
|
519
|
+
magnus::exception::range_error(),
|
520
|
+
format!("Scale {} is too small for decimal value '{}'. Must be ≥ -38.", input_scale, s),
|
521
|
+
));
|
522
|
+
}
|
523
|
+
|
524
|
+
// Apply positive scale (multiply)
|
525
|
+
if input_scale >= 0 {
|
526
|
+
Ok(v * 10_i128.pow(input_scale as u32))
|
527
|
+
} else {
|
528
|
+
// Apply negative scale (divide)
|
529
|
+
Ok(v / 10_i128.pow((-input_scale) as u32))
|
530
|
+
}
|
531
|
+
}
|
532
|
+
}
|
533
|
+
|
534
|
+
fn convert_to_decimal128(value: Value, scale: i8) -> Result<ParquetValue, MagnusError> {
|
535
|
+
// Get the decimal string based on the type of value
|
536
|
+
let s = if unsafe { value.classname() } == "BigDecimal" {
|
537
|
+
value
|
538
|
+
.funcall::<_, _, RString>("to_s", ("F",))?
|
539
|
+
.to_string()?
|
540
|
+
} else {
|
541
|
+
value.to_r_string()?.to_string()?
|
542
|
+
};
|
543
|
+
|
544
|
+
// Use our unified parser to convert the string to a decimal value with scaling
|
545
|
+
match parse_decimal_string(&s, scale) {
|
546
|
+
Ok(decimal_value) => Ok(ParquetValue::Decimal128(decimal_value)),
|
547
|
+
Err(e) => Err(MagnusError::new(
|
548
|
+
magnus::exception::type_error(),
|
549
|
+
format!("Failed to convert '{}' to decimal with scale {}: {}", s, scale, e),
|
550
|
+
))
|
551
|
+
}
|
552
|
+
}
|
354
553
|
|
355
554
|
#[derive(Debug)]
|
356
555
|
pub struct ParquetValueVec(Vec<ParquetValue>);
|
@@ -672,12 +871,10 @@ impl<'a> TryFrom<ArrayWrapper<'a>> for ParquetValueVec {
|
|
672
871
|
let x = downcast_array::<NullArray>(column.array);
|
673
872
|
Ok(ParquetValueVec(vec![ParquetValue::Null; x.len()]))
|
674
873
|
}
|
675
|
-
_ =>
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
))?;
|
680
|
-
}
|
874
|
+
_ => Err(MagnusError::new(
|
875
|
+
magnus::exception::type_error(),
|
876
|
+
format!("Unsupported data type: {:?}", column.array.data_type()),
|
877
|
+
))?,
|
681
878
|
}
|
682
879
|
}
|
683
880
|
}
|
@@ -1,7 +1,12 @@
|
|
1
|
+
use std::sync::OnceLock;
|
2
|
+
|
1
3
|
use itertools::Itertools;
|
4
|
+
use parquet::data_type::AsBytes;
|
2
5
|
|
3
6
|
use super::*;
|
4
7
|
|
8
|
+
static LOADED_BIGDECIMAL: OnceLock<bool> = OnceLock::new();
|
9
|
+
|
5
10
|
#[derive(Debug)]
|
6
11
|
pub enum RowRecord<S: BuildHasher + Default> {
|
7
12
|
Vec(Vec<ParquetField>),
|
@@ -29,8 +34,12 @@ impl<S: BuildHasher + Default> TryIntoValue for RowRecord<S> {
|
|
29
34
|
Ok(handle.into_value(ary))
|
30
35
|
}
|
31
36
|
RowRecord::Map(map) => {
|
37
|
+
#[cfg(ruby_lt_3_2)]
|
32
38
|
let hash = handle.hash_new_capa(map.len());
|
33
39
|
|
40
|
+
#[cfg(not(ruby_lt_3_2))]
|
41
|
+
let hash = handle.hash_new();
|
42
|
+
|
34
43
|
let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
|
35
44
|
let mut i = 0;
|
36
45
|
|
@@ -78,8 +87,12 @@ impl<S: BuildHasher + Default> TryIntoValue for ColumnRecord<S> {
|
|
78
87
|
Ok(ary.into_value_with(handle))
|
79
88
|
}
|
80
89
|
ColumnRecord::Map(map) => {
|
90
|
+
#[cfg(ruby_lt_3_2)]
|
81
91
|
let hash = handle.hash_new_capa(map.len());
|
82
92
|
|
93
|
+
#[cfg(not(ruby_lt_3_2))]
|
94
|
+
let hash = handle.hash_new();
|
95
|
+
|
83
96
|
let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
|
84
97
|
let mut i = 0;
|
85
98
|
|
@@ -137,8 +150,8 @@ impl TryIntoValue for ParquetField {
|
|
137
150
|
Field::Str(s) => {
|
138
151
|
if self.1 {
|
139
152
|
Ok(simdutf8::basic::from_utf8(s.as_bytes())
|
140
|
-
.map_err(
|
141
|
-
.
|
153
|
+
.map_err(ParquetGemError::Utf8Error)
|
154
|
+
.map(|s| s.into_value_with(handle))?)
|
142
155
|
} else {
|
143
156
|
let s = String::from_utf8_lossy(s.as_bytes());
|
144
157
|
Ok(s.into_value_with(handle))
|
@@ -175,9 +188,13 @@ impl TryIntoValue for ParquetField {
|
|
175
188
|
Ok(ary.into_value_with(handle))
|
176
189
|
}
|
177
190
|
Field::MapInternal(map) => {
|
178
|
-
|
179
|
-
let hash = handle.hash_new_capa(
|
180
|
-
|
191
|
+
#[cfg(ruby_lt_3_2)]
|
192
|
+
let hash = handle.hash_new_capa(map.len());
|
193
|
+
|
194
|
+
#[cfg(not(ruby_lt_3_2))]
|
195
|
+
let hash = handle.hash_new();
|
196
|
+
|
197
|
+
map.entries().iter().try_for_each(|(k, v)| {
|
181
198
|
hash.aset(
|
182
199
|
ParquetField(k.clone(), self.1).try_into_value_with(handle)?,
|
183
200
|
ParquetField(v.clone(), self.1).try_into_value_with(handle)?,
|
@@ -197,12 +214,18 @@ impl TryIntoValue for ParquetField {
|
|
197
214
|
format!("{}e-{}", unscaled, scale)
|
198
215
|
}
|
199
216
|
Decimal::Bytes { value, scale, .. } => {
|
200
|
-
//
|
201
|
-
let
|
217
|
+
// value is a byte array containing the bytes for an i128 value in big endian order
|
218
|
+
let casted = value.as_bytes()[..16].try_into()?;
|
219
|
+
let unscaled = i128::from_be_bytes(casted);
|
202
220
|
format!("{}e-{}", unscaled, scale)
|
203
221
|
}
|
204
222
|
};
|
205
|
-
|
223
|
+
|
224
|
+
// Load the bigdecimal gem if it's not already loaded
|
225
|
+
LOADED_BIGDECIMAL.get_or_init(|| handle.require("bigdecimal").unwrap_or_default());
|
226
|
+
|
227
|
+
let kernel = handle.module_kernel();
|
228
|
+
Ok(kernel.funcall::<_, _, Value>("BigDecimal", (value,))?)
|
206
229
|
}
|
207
230
|
Field::Group(row) => {
|
208
231
|
let hash = handle.hash_new();
|
@@ -1,5 +1,5 @@
|
|
1
1
|
use magnus::value::ReprValue; // Add ReprValue trait to scope
|
2
|
-
use magnus::{Error as MagnusError, RArray, Ruby, TryConvert, Value};
|
2
|
+
use magnus::{Error as MagnusError, IntoValue, RArray, Ruby, TryConvert, Value};
|
3
3
|
|
4
4
|
use crate::types::{ParquetSchemaType as PST, PrimitiveType, SchemaField, SchemaNode};
|
5
5
|
use crate::utils::parse_string_or_symbol;
|
@@ -22,7 +22,7 @@ fn convert_schema_field_to_node(field: &SchemaField) -> SchemaNode {
|
|
22
22
|
let item_field = SchemaField {
|
23
23
|
name: "item".to_string(),
|
24
24
|
type_: list_field.item_type.clone(),
|
25
|
-
format: list_field.format.
|
25
|
+
format: list_field.format.map(String::from),
|
26
26
|
nullable: list_field.nullable,
|
27
27
|
};
|
28
28
|
convert_schema_field_to_node(&item_field)
|
@@ -33,7 +33,7 @@ fn convert_schema_field_to_node(field: &SchemaField) -> SchemaNode {
|
|
33
33
|
let item_field = SchemaField {
|
34
34
|
name: "item".to_string(),
|
35
35
|
type_: list_field.item_type.clone(),
|
36
|
-
format: list_field.format.
|
36
|
+
format: list_field.format.map(String::from),
|
37
37
|
nullable: list_field.nullable,
|
38
38
|
};
|
39
39
|
convert_schema_field_to_node(&item_field)
|
@@ -50,13 +50,13 @@ fn convert_schema_field_to_node(field: &SchemaField) -> SchemaNode {
|
|
50
50
|
let key_field = SchemaField {
|
51
51
|
name: "key".to_string(),
|
52
52
|
type_: map_field.key_type.clone(),
|
53
|
-
format: map_field.key_format.
|
53
|
+
format: map_field.key_format.map(String::from),
|
54
54
|
nullable: false, // Map keys can never be null in Parquet
|
55
55
|
};
|
56
56
|
let value_field = SchemaField {
|
57
57
|
name: "value".to_string(),
|
58
58
|
type_: map_field.value_type.clone(),
|
59
|
-
format: map_field.value_format.
|
59
|
+
format: map_field.value_format.map(String::from),
|
60
60
|
nullable: map_field.value_nullable,
|
61
61
|
};
|
62
62
|
|
@@ -121,9 +121,7 @@ pub fn parse_legacy_schema(
|
|
121
121
|
ruby.exception_type_error(),
|
122
122
|
"Schema must be an array of field definitions or nil",
|
123
123
|
)
|
124
|
-
})
|
125
|
-
.len()
|
126
|
-
== 0)
|
124
|
+
})?.is_empty())
|
127
125
|
{
|
128
126
|
// If schema is nil or an empty array, we'll handle this in the caller
|
129
127
|
return Ok(Vec::new());
|
@@ -155,7 +153,7 @@ pub fn parse_legacy_schema(
|
|
155
153
|
}
|
156
154
|
|
157
155
|
let (name, type_value) = &entries[0];
|
158
|
-
let name_option = parse_string_or_symbol(ruby, name
|
156
|
+
let name_option = parse_string_or_symbol(ruby, *name)?;
|
159
157
|
let name = name_option.ok_or_else(|| {
|
160
158
|
MagnusError::new(ruby.exception_runtime_error(), "Field name cannot be nil")
|
161
159
|
})?;
|
@@ -166,6 +164,9 @@ pub fn parse_legacy_schema(
|
|
166
164
|
let mut format_str = None;
|
167
165
|
let mut nullable = true; // Default to true if not specified
|
168
166
|
|
167
|
+
let mut precision: Option<Value> = None;
|
168
|
+
let mut scale: Option<Value> = None;
|
169
|
+
|
169
170
|
for (key, value) in type_hash {
|
170
171
|
let key_option = parse_string_or_symbol(ruby, key)?;
|
171
172
|
let key = key_option.ok_or_else(|| {
|
@@ -181,6 +182,12 @@ pub fn parse_legacy_schema(
|
|
181
182
|
// Extract nullable if present - convert to boolean
|
182
183
|
nullable = bool::try_convert(value).unwrap_or(true);
|
183
184
|
}
|
185
|
+
"precision" => {
|
186
|
+
precision = Some(value);
|
187
|
+
}
|
188
|
+
"scale" => {
|
189
|
+
scale = Some(value);
|
190
|
+
}
|
184
191
|
_ => {
|
185
192
|
return Err(MagnusError::new(
|
186
193
|
ruby.exception_type_error(),
|
@@ -197,9 +204,109 @@ pub fn parse_legacy_schema(
|
|
197
204
|
)
|
198
205
|
})?;
|
199
206
|
|
200
|
-
|
207
|
+
// Handle decimal type with precision and scale
|
208
|
+
let mut type_result = PST::try_convert(type_str)?;
|
209
|
+
|
210
|
+
// If it's a decimal type and we have precision and scale, override the type
|
211
|
+
if let PST::Primitive(PrimitiveType::Decimal128(_, _)) = type_result {
|
212
|
+
let precision_value = precision.unwrap_or_else(|| {
|
213
|
+
let val: u8 = 18;
|
214
|
+
val.into_value_with(ruby)
|
215
|
+
});
|
216
|
+
let scale_value = scale.unwrap_or_else(|| {
|
217
|
+
let val: i8 = 2;
|
218
|
+
val.into_value_with(ruby)
|
219
|
+
});
|
220
|
+
|
221
|
+
let precision_u8 = u8::try_convert(precision_value).map_err(|_| {
|
222
|
+
MagnusError::new(
|
223
|
+
ruby.exception_type_error(),
|
224
|
+
"Invalid precision value for decimal type, expected a positive integer".to_string(),
|
225
|
+
)
|
226
|
+
})?;
|
227
|
+
|
228
|
+
// Validate precision is in a valid range
|
229
|
+
if precision_u8 < 1 {
|
230
|
+
return Err(MagnusError::new(
|
231
|
+
ruby.exception_arg_error(),
|
232
|
+
format!(
|
233
|
+
"Precision for decimal type must be at least 1, got {}",
|
234
|
+
precision_u8
|
235
|
+
),
|
236
|
+
));
|
237
|
+
}
|
238
|
+
|
239
|
+
if precision_u8 > 38 {
|
240
|
+
return Err(MagnusError::new(
|
241
|
+
ruby.exception_arg_error(),
|
242
|
+
format!(
|
243
|
+
"Precision for decimal type cannot exceed 38, got {}",
|
244
|
+
precision_u8
|
245
|
+
),
|
246
|
+
));
|
247
|
+
}
|
248
|
+
|
249
|
+
let scale_i8 = i8::try_convert(scale_value).map_err(|_| {
|
250
|
+
MagnusError::new(
|
251
|
+
ruby.exception_type_error(),
|
252
|
+
"Invalid scale value for decimal type, expected an integer".to_string(),
|
253
|
+
)
|
254
|
+
})?;
|
255
|
+
|
256
|
+
// Validate scale is in a valid range relative to precision
|
257
|
+
if scale_i8 < 0 {
|
258
|
+
return Err(MagnusError::new(
|
259
|
+
ruby.exception_arg_error(),
|
260
|
+
format!(
|
261
|
+
"Scale for decimal type cannot be negative, got {}",
|
262
|
+
scale_i8
|
263
|
+
),
|
264
|
+
));
|
265
|
+
}
|
266
|
+
|
267
|
+
if scale_i8 as u8 > precision_u8 {
|
268
|
+
return Err(MagnusError::new(
|
269
|
+
ruby.exception_arg_error(),
|
270
|
+
format!(
|
271
|
+
"Scale ({}) cannot be larger than precision ({}) for decimal type",
|
272
|
+
scale_i8, precision_u8
|
273
|
+
),
|
274
|
+
));
|
275
|
+
}
|
276
|
+
|
277
|
+
type_result = PST::Primitive(PrimitiveType::Decimal128(precision_u8, scale_i8));
|
278
|
+
} else if let Some(type_name) = parse_string_or_symbol(ruby, type_str)? {
|
279
|
+
if type_name == "decimal" {
|
280
|
+
let precision_value = precision.unwrap_or_else(|| {
|
281
|
+
let val: u8 = 18;
|
282
|
+
val.into_value_with(ruby)
|
283
|
+
});
|
284
|
+
let scale_value = scale.unwrap_or_else(|| {
|
285
|
+
let val: i8 = 2;
|
286
|
+
val.into_value_with(ruby)
|
287
|
+
});
|
288
|
+
|
289
|
+
let precision_u8 = u8::try_convert(precision_value).map_err(|_| {
|
290
|
+
MagnusError::new(
|
291
|
+
ruby.exception_type_error(),
|
292
|
+
"Invalid precision value for decimal type, expected a positive integer".to_string(),
|
293
|
+
)
|
294
|
+
})?;
|
295
|
+
|
296
|
+
let scale_i8 = i8::try_convert(scale_value).map_err(|_| {
|
297
|
+
MagnusError::new(
|
298
|
+
ruby.exception_type_error(),
|
299
|
+
"Invalid scale value for decimal type, expected an integer".to_string(),
|
300
|
+
)
|
301
|
+
})?;
|
302
|
+
|
303
|
+
type_result = PST::Primitive(PrimitiveType::Decimal128(precision_u8, scale_i8));
|
304
|
+
}
|
305
|
+
}
|
306
|
+
|
307
|
+
(type_result, format_str, nullable)
|
201
308
|
} else {
|
202
|
-
(PST::try_convert(type_value
|
309
|
+
(PST::try_convert(*type_value)?, None, true)
|
203
310
|
};
|
204
311
|
|
205
312
|
schema.push(SchemaField {
|