parquet 0.5.9 → 0.5.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +3 -0
- data/ext/parquet/Cargo.toml +2 -0
- data/ext/parquet/build.rs +1 -1
- data/ext/parquet/src/lib.rs +3 -0
- data/ext/parquet/src/reader/arrow_reader.rs +579 -0
- data/ext/parquet/src/reader/common.rs +65 -11
- data/ext/parquet/src/reader/format_detector.rs +69 -0
- data/ext/parquet/src/reader/mod.rs +7 -2
- data/ext/parquet/src/reader/unified/mod.rs +82 -14
- data/ext/parquet/src/types/core_types.rs +1 -0
- data/ext/parquet/src/types/mod.rs +11 -4
- data/ext/parquet/src/types/parquet_value.rs +290 -73
- data/ext/parquet/src/types/record_types.rs +92 -8
- data/ext/parquet/src/types/schema_node.rs +11 -5
- data/ext/parquet/src/types/type_conversion.rs +216 -0
- data/ext/parquet/src/types/writer_types.rs +50 -0
- data/ext/parquet/src/writer/mod.rs +3 -0
- data/ext/parquet/src/writer/write_columns.rs +3 -0
- data/ext/parquet/src/writer/write_rows.rs +1 -0
- data/lib/parquet/version.rb +1 -1
- metadata +4 -2
@@ -1,7 +1,7 @@
|
|
1
1
|
use crate::{impl_date_conversion, impl_timestamp_array_conversion, impl_timestamp_conversion};
|
2
2
|
|
3
|
+
use super::record_types::{format_decimal_with_i8_scale, format_i256_decimal_with_scale};
|
3
4
|
use super::*;
|
4
|
-
use super::record_types::format_decimal_with_i8_scale;
|
5
5
|
use arrow_array::MapArray;
|
6
6
|
use magnus::{RArray, RString};
|
7
7
|
|
@@ -24,6 +24,7 @@ pub enum ParquetValue {
|
|
24
24
|
Date32(i32),
|
25
25
|
Date64(i64),
|
26
26
|
Decimal128(i128, i8),
|
27
|
+
Decimal256(arrow_buffer::i256, i8),
|
27
28
|
TimestampSecond(i64, Option<Arc<str>>),
|
28
29
|
TimestampMillis(i64, Option<Arc<str>>),
|
29
30
|
TimestampMicros(i64, Option<Arc<str>>),
|
@@ -94,6 +95,15 @@ impl PartialEq for ParquetValue {
|
|
94
95
|
a_val == b_val
|
95
96
|
}
|
96
97
|
}
|
98
|
+
(ParquetValue::Decimal256(a, scale_a), ParquetValue::Decimal256(b, scale_b)) => {
|
99
|
+
if scale_a == scale_b {
|
100
|
+
// Same scale, compare directly
|
101
|
+
a == b
|
102
|
+
} else {
|
103
|
+
// TODO: Implement decimal256 comparison
|
104
|
+
todo!("decimal256 comparison");
|
105
|
+
}
|
106
|
+
}
|
97
107
|
(ParquetValue::TimestampSecond(a, _), ParquetValue::TimestampSecond(b, _)) => a == b,
|
98
108
|
(ParquetValue::TimestampMillis(a, _), ParquetValue::TimestampMillis(b, _)) => a == b,
|
99
109
|
(ParquetValue::TimestampMicros(a, _), ParquetValue::TimestampMicros(b, _)) => a == b,
|
@@ -130,6 +140,10 @@ impl std::hash::Hash for ParquetValue {
|
|
130
140
|
d.hash(state);
|
131
141
|
scale.hash(state);
|
132
142
|
}
|
143
|
+
ParquetValue::Decimal256(d, scale) => {
|
144
|
+
d.hash(state);
|
145
|
+
scale.hash(state);
|
146
|
+
}
|
133
147
|
ParquetValue::TimestampSecond(ts, tz) => {
|
134
148
|
ts.hash(state);
|
135
149
|
tz.hash(state);
|
@@ -185,6 +199,17 @@ impl TryIntoValue for ParquetValue {
|
|
185
199
|
let kernel = handle.module_kernel();
|
186
200
|
Ok(kernel.funcall::<_, _, Value>("BigDecimal", (value,))?)
|
187
201
|
}
|
202
|
+
ParquetValue::Decimal256(d, scale) => {
|
203
|
+
// Load the bigdecimal gem if it's not already loaded
|
204
|
+
LOADED_BIGDECIMAL.get_or_init(|| handle.require("bigdecimal").unwrap_or_default());
|
205
|
+
|
206
|
+
// Format with proper scaling based on the sign of scale
|
207
|
+
// Use specialized function to preserve full precision
|
208
|
+
let value = format_i256_decimal_with_scale(d, scale)?;
|
209
|
+
|
210
|
+
let kernel = handle.module_kernel();
|
211
|
+
Ok(kernel.funcall::<_, _, Value>("BigDecimal", (value,))?)
|
212
|
+
}
|
188
213
|
ParquetValue::Date32(d) => impl_date_conversion!(d, handle),
|
189
214
|
ParquetValue::Date64(d) => impl_date_conversion!(d, handle),
|
190
215
|
timestamp @ ParquetValue::TimestampSecond(_, _) => {
|
@@ -292,9 +317,21 @@ impl ParquetValue {
|
|
292
317
|
}
|
293
318
|
PrimitiveType::Decimal128(_precision, scale) => {
|
294
319
|
if value.is_kind_of(ruby.class_string()) {
|
295
|
-
|
320
|
+
convert_to_decimal(value, *scale)
|
296
321
|
} else if let Ok(s) = value.funcall::<_, _, RString>("to_s", ()) {
|
297
|
-
|
322
|
+
convert_to_decimal(s.as_value(), *scale)
|
323
|
+
} else {
|
324
|
+
Err(MagnusError::new(
|
325
|
+
magnus::exception::type_error(),
|
326
|
+
"Expected a string for a decimal type",
|
327
|
+
))
|
328
|
+
}
|
329
|
+
}
|
330
|
+
PrimitiveType::Decimal256(_precision, scale) => {
|
331
|
+
if value.is_kind_of(ruby.class_string()) {
|
332
|
+
convert_to_decimal(value, *scale)
|
333
|
+
} else if let Ok(s) = value.funcall::<_, _, RString>("to_s", ()) {
|
334
|
+
convert_to_decimal(s.as_value(), *scale)
|
298
335
|
} else {
|
299
336
|
Err(MagnusError::new(
|
300
337
|
magnus::exception::type_error(),
|
@@ -425,8 +462,14 @@ impl ParquetValue {
|
|
425
462
|
}
|
426
463
|
}
|
427
464
|
}
|
465
|
+
|
466
|
+
enum ParsedDecimal {
|
467
|
+
Int128(i128),
|
468
|
+
Int256(arrow_buffer::i256),
|
469
|
+
}
|
470
|
+
|
428
471
|
/// Unified helper to parse a decimal string and apply scaling
|
429
|
-
fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<
|
472
|
+
fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<ParsedDecimal, MagnusError> {
|
430
473
|
let s = input_str.trim();
|
431
474
|
|
432
475
|
// 1. Handle scientific notation case (e.g., "0.12345e3")
|
@@ -445,12 +488,9 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
|
|
445
488
|
)
|
446
489
|
})?;
|
447
490
|
|
448
|
-
//
|
491
|
+
// For very large exponents, we'll need to use BigInt
|
449
492
|
if exp_val.abs() > 38 {
|
450
|
-
return
|
451
|
-
magnus::exception::range_error(),
|
452
|
-
format!("Exponent {} is out of range for decimal value '{}'. Must be between -38 and 38.", exp_val, s),
|
453
|
-
));
|
493
|
+
return parse_large_decimal_with_bigint(s, input_scale);
|
454
494
|
}
|
455
495
|
|
456
496
|
// Handle the base part which might contain a decimal point
|
@@ -460,30 +500,23 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
|
|
460
500
|
|
461
501
|
let base_scale = base.len() - decimal_pos - 1;
|
462
502
|
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
})?;
|
472
|
-
|
473
|
-
(base_val, base_scale as i32)
|
503
|
+
// Try to parse as i128 first
|
504
|
+
match base_without_point.parse::<i128>() {
|
505
|
+
Ok(v) => (v, base_scale as i32),
|
506
|
+
Err(_) => {
|
507
|
+
// Value too large for i128, use BigInt
|
508
|
+
return parse_large_decimal_with_bigint(s, input_scale);
|
509
|
+
}
|
510
|
+
}
|
474
511
|
} else {
|
475
512
|
// No decimal point in base
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
)
|
484
|
-
})?;
|
485
|
-
|
486
|
-
(base_val, 0)
|
513
|
+
match base.parse::<i128>() {
|
514
|
+
Ok(v) => (v, 0),
|
515
|
+
Err(_) => {
|
516
|
+
// Value too large for i128, use BigInt
|
517
|
+
return parse_large_decimal_with_bigint(s, input_scale);
|
518
|
+
}
|
519
|
+
}
|
487
520
|
};
|
488
521
|
|
489
522
|
// Calculate the effective scale: base_scale - exp_val
|
@@ -495,12 +528,14 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
|
|
495
528
|
// Need to multiply to increase scale
|
496
529
|
let scale_diff = (input_scale as i32 - effective_scale) as u32;
|
497
530
|
if scale_diff > 38 {
|
498
|
-
return
|
499
|
-
|
500
|
-
|
501
|
-
|
531
|
+
return parse_large_decimal_with_bigint(s, input_scale);
|
532
|
+
}
|
533
|
+
|
534
|
+
// Check for overflow
|
535
|
+
match base_val.checked_mul(10_i128.pow(scale_diff)) {
|
536
|
+
Some(v) => Ok(ParsedDecimal::Int128(v)),
|
537
|
+
None => parse_large_decimal_with_bigint(s, input_scale),
|
502
538
|
}
|
503
|
-
Ok(base_val * 10_i128.pow(scale_diff))
|
504
539
|
}
|
505
540
|
std::cmp::Ordering::Greater => {
|
506
541
|
// Need to divide to decrease scale
|
@@ -511,9 +546,9 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
|
|
511
546
|
format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a larger scale.", scale_diff, s),
|
512
547
|
));
|
513
548
|
}
|
514
|
-
Ok(base_val / 10_i128.pow(scale_diff))
|
549
|
+
Ok(ParsedDecimal::Int128(base_val / 10_i128.pow(scale_diff)))
|
515
550
|
}
|
516
|
-
std::cmp::Ordering::Equal => Ok(base_val),
|
551
|
+
std::cmp::Ordering::Equal => Ok(ParsedDecimal::Int128(base_val)),
|
517
552
|
}
|
518
553
|
}
|
519
554
|
// 2. Handle decimal point in the string (e.g., "123.456")
|
@@ -524,16 +559,14 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
|
|
524
559
|
// Calculate the actual scale from the decimal position
|
525
560
|
let actual_scale = s.len() - decimal_pos - 1;
|
526
561
|
|
527
|
-
//
|
528
|
-
let v = s_without_point.parse::<i128>()
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
)
|
536
|
-
})?;
|
562
|
+
// Try to parse as i128 first
|
563
|
+
let v = match s_without_point.parse::<i128>() {
|
564
|
+
Ok(v) => v,
|
565
|
+
Err(_) => {
|
566
|
+
// Value too large for i128, use BigInt
|
567
|
+
return parse_large_decimal_with_bigint(s, input_scale);
|
568
|
+
}
|
569
|
+
};
|
537
570
|
|
538
571
|
// Scale the value if needed based on the difference between
|
539
572
|
// the actual scale and the requested scale
|
@@ -542,12 +575,14 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
|
|
542
575
|
// Need to multiply to increase scale
|
543
576
|
let scale_diff = (input_scale - actual_scale as i8) as u32;
|
544
577
|
if scale_diff > 38 {
|
545
|
-
return
|
546
|
-
|
547
|
-
|
548
|
-
|
578
|
+
return parse_large_decimal_with_bigint(s, input_scale);
|
579
|
+
}
|
580
|
+
|
581
|
+
// Check for overflow
|
582
|
+
match v.checked_mul(10_i128.pow(scale_diff)) {
|
583
|
+
Some(v) => Ok(ParsedDecimal::Int128(v)),
|
584
|
+
None => parse_large_decimal_with_bigint(s, input_scale),
|
549
585
|
}
|
550
|
-
Ok(v * 10_i128.pow(scale_diff))
|
551
586
|
}
|
552
587
|
std::cmp::Ordering::Greater => {
|
553
588
|
// Need to divide to decrease scale
|
@@ -558,30 +593,25 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
|
|
558
593
|
format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a larger scale.", scale_diff, s),
|
559
594
|
));
|
560
595
|
}
|
561
|
-
Ok(v / 10_i128.pow(scale_diff))
|
596
|
+
Ok(ParsedDecimal::Int128(v / 10_i128.pow(scale_diff)))
|
562
597
|
}
|
563
|
-
std::cmp::Ordering::Equal => Ok(v),
|
598
|
+
std::cmp::Ordering::Equal => Ok(ParsedDecimal::Int128(v)),
|
564
599
|
}
|
565
600
|
}
|
566
601
|
// 3. Plain integer value (e.g., "12345")
|
567
602
|
else {
|
568
|
-
// No decimal point, parse as i128
|
569
|
-
let v = s.parse::<i128>()
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
603
|
+
// No decimal point, try to parse as i128 first
|
604
|
+
let v = match s.parse::<i128>() {
|
605
|
+
Ok(v) => v,
|
606
|
+
Err(_) => {
|
607
|
+
// Value too large for i128, use BigInt
|
608
|
+
return parse_large_decimal_with_bigint(s, input_scale);
|
609
|
+
}
|
610
|
+
};
|
575
611
|
|
576
612
|
// Apply scale - make sure it's reasonable
|
577
613
|
if input_scale > 38 {
|
578
|
-
return
|
579
|
-
magnus::exception::range_error(),
|
580
|
-
format!(
|
581
|
-
"Scale {} is too large for decimal value '{}'. Must be ≤ 38.",
|
582
|
-
input_scale, s
|
583
|
-
),
|
584
|
-
));
|
614
|
+
return parse_large_decimal_with_bigint(s, input_scale);
|
585
615
|
} else if input_scale < -38 {
|
586
616
|
return Err(MagnusError::new(
|
587
617
|
magnus::exception::range_error(),
|
@@ -594,15 +624,153 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
|
|
594
624
|
|
595
625
|
// Apply positive scale (multiply)
|
596
626
|
if input_scale >= 0 {
|
597
|
-
|
627
|
+
match v.checked_mul(10_i128.pow(input_scale as u32)) {
|
628
|
+
Some(v) => Ok(ParsedDecimal::Int128(v)),
|
629
|
+
None => parse_large_decimal_with_bigint(s, input_scale),
|
630
|
+
}
|
598
631
|
} else {
|
599
632
|
// Apply negative scale (divide)
|
600
|
-
Ok(
|
633
|
+
Ok(ParsedDecimal::Int128(
|
634
|
+
v / 10_i128.pow((-input_scale) as u32),
|
635
|
+
))
|
636
|
+
}
|
637
|
+
}
|
638
|
+
}
|
639
|
+
|
640
|
+
/// Parse large decimal values using BigInt when they would overflow i128
|
641
|
+
fn parse_large_decimal_with_bigint(s: &str, input_scale: i8) -> Result<ParsedDecimal, MagnusError> {
|
642
|
+
use num::BigInt;
|
643
|
+
use std::str::FromStr;
|
644
|
+
|
645
|
+
// Parse the input string as a BigInt
|
646
|
+
let bigint = if let Some(e_pos) = s.to_lowercase().find('e') {
|
647
|
+
// Handle scientific notation
|
648
|
+
let base = &s[0..e_pos];
|
649
|
+
let exp = &s[e_pos + 1..];
|
650
|
+
|
651
|
+
let exp_val = exp.parse::<i32>().map_err(|e| {
|
652
|
+
MagnusError::new(
|
653
|
+
magnus::exception::type_error(),
|
654
|
+
format!("Failed to parse exponent '{}': {}", exp, e),
|
655
|
+
)
|
656
|
+
})?;
|
657
|
+
|
658
|
+
// Parse base as BigInt
|
659
|
+
let base_bigint = if let Some(decimal_pos) = base.find('.') {
|
660
|
+
let mut base_without_point = base.to_string();
|
661
|
+
base_without_point.remove(decimal_pos);
|
662
|
+
let base_scale = base.len() - decimal_pos - 1;
|
663
|
+
|
664
|
+
let bigint = BigInt::from_str(&base_without_point).map_err(|e| {
|
665
|
+
MagnusError::new(
|
666
|
+
magnus::exception::type_error(),
|
667
|
+
format!("Failed to parse decimal base '{}': {}", base, e),
|
668
|
+
)
|
669
|
+
})?;
|
670
|
+
|
671
|
+
// Adjust for the decimal point
|
672
|
+
let effective_exp = exp_val - base_scale as i32;
|
673
|
+
|
674
|
+
if effective_exp > 0 {
|
675
|
+
bigint * BigInt::from(10).pow(effective_exp as u32)
|
676
|
+
} else if effective_exp < 0 {
|
677
|
+
bigint / BigInt::from(10).pow((-effective_exp) as u32)
|
678
|
+
} else {
|
679
|
+
bigint
|
680
|
+
}
|
681
|
+
} else {
|
682
|
+
let bigint = BigInt::from_str(base).map_err(|e| {
|
683
|
+
MagnusError::new(
|
684
|
+
magnus::exception::type_error(),
|
685
|
+
format!("Failed to parse decimal base '{}': {}", base, e),
|
686
|
+
)
|
687
|
+
})?;
|
688
|
+
|
689
|
+
if exp_val > 0 {
|
690
|
+
bigint * BigInt::from(10).pow(exp_val as u32)
|
691
|
+
} else if exp_val < 0 {
|
692
|
+
bigint / BigInt::from(10).pow((-exp_val) as u32)
|
693
|
+
} else {
|
694
|
+
bigint
|
695
|
+
}
|
696
|
+
};
|
697
|
+
|
698
|
+
base_bigint
|
699
|
+
} else if let Some(decimal_pos) = s.find('.') {
|
700
|
+
// Handle decimal point
|
701
|
+
let mut s_without_point = s.to_string();
|
702
|
+
s_without_point.remove(decimal_pos);
|
703
|
+
|
704
|
+
let actual_scale = s.len() - decimal_pos - 1;
|
705
|
+
let bigint = BigInt::from_str(&s_without_point).map_err(|e| {
|
706
|
+
MagnusError::new(
|
707
|
+
magnus::exception::type_error(),
|
708
|
+
format!("Failed to parse decimal string '{}': {}", s, e),
|
709
|
+
)
|
710
|
+
})?;
|
711
|
+
|
712
|
+
// Adjust for scale difference
|
713
|
+
let scale_diff = actual_scale as i8 - input_scale;
|
714
|
+
|
715
|
+
if scale_diff > 0 {
|
716
|
+
bigint / BigInt::from(10).pow(scale_diff as u32)
|
717
|
+
} else if scale_diff < 0 {
|
718
|
+
bigint * BigInt::from(10).pow((-scale_diff) as u32)
|
719
|
+
} else {
|
720
|
+
bigint
|
721
|
+
}
|
722
|
+
} else {
|
723
|
+
// Plain integer
|
724
|
+
let bigint = BigInt::from_str(s).map_err(|e| {
|
725
|
+
MagnusError::new(
|
726
|
+
magnus::exception::type_error(),
|
727
|
+
format!("Failed to parse integer string '{}': {}", s, e),
|
728
|
+
)
|
729
|
+
})?;
|
730
|
+
|
731
|
+
if input_scale > 0 {
|
732
|
+
bigint * BigInt::from(10).pow(input_scale as u32)
|
733
|
+
} else if input_scale < 0 {
|
734
|
+
bigint / BigInt::from(10).pow((-input_scale) as u32)
|
735
|
+
} else {
|
736
|
+
bigint
|
601
737
|
}
|
738
|
+
};
|
739
|
+
|
740
|
+
// Convert BigInt to bytes and then to i256
|
741
|
+
let bytes = bigint.to_signed_bytes_le();
|
742
|
+
|
743
|
+
if bytes.len() <= 16 {
|
744
|
+
// Fits in i128
|
745
|
+
let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
|
746
|
+
[0xff; 16]
|
747
|
+
} else {
|
748
|
+
[0; 16]
|
749
|
+
};
|
750
|
+
buf[..bytes.len()].copy_from_slice(&bytes);
|
751
|
+
|
752
|
+
Ok(ParsedDecimal::Int128(i128::from_le_bytes(buf)))
|
753
|
+
} else if bytes.len() <= 32 {
|
754
|
+
// Fits in i256
|
755
|
+
let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
|
756
|
+
[0xff; 32]
|
757
|
+
} else {
|
758
|
+
[0; 32]
|
759
|
+
};
|
760
|
+
buf[..bytes.len()].copy_from_slice(&bytes);
|
761
|
+
|
762
|
+
Ok(ParsedDecimal::Int256(arrow_buffer::i256::from_le_bytes(
|
763
|
+
buf,
|
764
|
+
)))
|
765
|
+
} else {
|
766
|
+
Err(MagnusError::new(
|
767
|
+
magnus::exception::range_error(),
|
768
|
+
format!("Decimal value '{}' is too large to fit in 256 bits", s),
|
769
|
+
))
|
602
770
|
}
|
603
771
|
}
|
604
772
|
|
605
|
-
fn
|
773
|
+
fn convert_to_decimal(value: Value, scale: i8) -> Result<ParquetValue, MagnusError> {
|
606
774
|
// Get the decimal string based on the type of value
|
607
775
|
let s = if unsafe { value.classname() } == "BigDecimal" {
|
608
776
|
value
|
@@ -614,7 +782,10 @@ fn convert_to_decimal128(value: Value, scale: i8) -> Result<ParquetValue, Magnus
|
|
614
782
|
|
615
783
|
// Use our unified parser to convert the string to a decimal value with scaling
|
616
784
|
match parse_decimal_string(&s, scale) {
|
617
|
-
Ok(decimal_value) =>
|
785
|
+
Ok(decimal_value) => match decimal_value {
|
786
|
+
ParsedDecimal::Int128(v) => Ok(ParquetValue::Decimal128(v, scale)),
|
787
|
+
ParsedDecimal::Int256(v) => Ok(ParquetValue::Decimal256(v, scale)),
|
788
|
+
},
|
618
789
|
Err(e) => Err(MagnusError::new(
|
619
790
|
magnus::exception::type_error(),
|
620
791
|
format!(
|
@@ -731,6 +902,52 @@ impl<'a> TryFrom<ArrayWrapper<'a>> for ParquetValueVec {
|
|
731
902
|
}
|
732
903
|
DataType::Date32 => impl_numeric_array_conversion!(column.array, Date32Array, Date32),
|
733
904
|
DataType::Date64 => impl_numeric_array_conversion!(column.array, Date64Array, Date64),
|
905
|
+
DataType::Decimal128(_precision, scale) => {
|
906
|
+
let array = downcast_array::<Decimal128Array>(column.array);
|
907
|
+
Ok(ParquetValueVec(if array.is_nullable() {
|
908
|
+
array
|
909
|
+
.values()
|
910
|
+
.iter()
|
911
|
+
.enumerate()
|
912
|
+
.map(|(i, x)| {
|
913
|
+
if array.is_null(i) {
|
914
|
+
ParquetValue::Null
|
915
|
+
} else {
|
916
|
+
ParquetValue::Decimal128(*x, *scale)
|
917
|
+
}
|
918
|
+
})
|
919
|
+
.collect()
|
920
|
+
} else {
|
921
|
+
array
|
922
|
+
.values()
|
923
|
+
.iter()
|
924
|
+
.map(|x| ParquetValue::Decimal128(*x, *scale))
|
925
|
+
.collect()
|
926
|
+
}))
|
927
|
+
}
|
928
|
+
DataType::Decimal256(_precision, scale) => {
|
929
|
+
let array = downcast_array::<Decimal256Array>(column.array);
|
930
|
+
Ok(ParquetValueVec(if array.is_nullable() {
|
931
|
+
array
|
932
|
+
.values()
|
933
|
+
.iter()
|
934
|
+
.enumerate()
|
935
|
+
.map(|(i, x)| {
|
936
|
+
if array.is_null(i) {
|
937
|
+
ParquetValue::Null
|
938
|
+
} else {
|
939
|
+
ParquetValue::Decimal256(*x, *scale)
|
940
|
+
}
|
941
|
+
})
|
942
|
+
.collect()
|
943
|
+
} else {
|
944
|
+
array
|
945
|
+
.values()
|
946
|
+
.iter()
|
947
|
+
.map(|x| ParquetValue::Decimal256(*x, *scale))
|
948
|
+
.collect()
|
949
|
+
}))
|
950
|
+
}
|
734
951
|
DataType::Timestamp(TimeUnit::Second, tz) => {
|
735
952
|
impl_timestamp_array_conversion!(
|
736
953
|
column.array,
|
@@ -22,6 +22,19 @@ pub fn format_decimal_with_i8_scale<T: std::fmt::Display>(value: T, scale: i8) -
|
|
22
22
|
}
|
23
23
|
}
|
24
24
|
|
25
|
+
/// Format i256 decimal value with appropriate scale for BigDecimal conversion
|
26
|
+
/// Uses bytes conversion to preserve full precision
|
27
|
+
pub fn format_i256_decimal_with_scale(
|
28
|
+
value: arrow_buffer::i256,
|
29
|
+
scale: i8,
|
30
|
+
) -> Result<String, ParquetGemError> {
|
31
|
+
// Convert i256 to big-endian bytes
|
32
|
+
let bytes = value.to_be_bytes();
|
33
|
+
|
34
|
+
// Use the existing bytes_to_decimal function which handles full precision
|
35
|
+
bytes_to_decimal(&bytes, scale as i32)
|
36
|
+
}
|
37
|
+
|
25
38
|
/// Format decimal value with appropriate scale for BigDecimal conversion
|
26
39
|
/// Handles positive and negative scales correctly for i32 scale
|
27
40
|
pub fn format_decimal_with_i32_scale<T: std::fmt::Display>(value: T, scale: i32) -> String {
|
@@ -35,7 +48,7 @@ pub fn format_decimal_with_i32_scale<T: std::fmt::Display>(value: T, scale: i32)
|
|
35
48
|
}
|
36
49
|
|
37
50
|
/// Convert arbitrary-length big-endian byte array to decimal string
|
38
|
-
/// Supports byte arrays from 1 to
|
51
|
+
/// Supports byte arrays from 1 to 32 bytes in length
|
39
52
|
fn bytes_to_decimal(bytes: &[u8], scale: i32) -> Result<String, ParquetGemError> {
|
40
53
|
match bytes.len() {
|
41
54
|
0 => Err(ParquetGemError::InvalidDecimal(
|
@@ -50,34 +63,34 @@ fn bytes_to_decimal(bytes: &[u8], scale: i32) -> Result<String, ParquetGemError>
|
|
50
63
|
// For 2 bytes, use i16
|
51
64
|
let mut value: i16 = 0;
|
52
65
|
let is_negative = bytes[0] & 0x80 != 0;
|
53
|
-
|
66
|
+
|
54
67
|
for &byte in bytes {
|
55
68
|
value = (value << 8) | (byte as i16);
|
56
69
|
}
|
57
|
-
|
70
|
+
|
58
71
|
// Sign extend if negative
|
59
72
|
if is_negative {
|
60
73
|
let shift = 16 - (bytes.len() * 8);
|
61
74
|
value = (value << shift) >> shift;
|
62
75
|
}
|
63
|
-
|
76
|
+
|
64
77
|
Ok(format_decimal_with_i32_scale(value, scale))
|
65
78
|
}
|
66
79
|
3..=4 => {
|
67
80
|
// For 3-4 bytes, use i32
|
68
81
|
let mut value: i32 = 0;
|
69
82
|
let is_negative = bytes[0] & 0x80 != 0;
|
70
|
-
|
83
|
+
|
71
84
|
for &byte in bytes {
|
72
85
|
value = (value << 8) | (byte as i32);
|
73
86
|
}
|
74
|
-
|
87
|
+
|
75
88
|
// Sign extend if negative
|
76
89
|
if is_negative {
|
77
90
|
let shift = 32 - (bytes.len() * 8);
|
78
91
|
value = (value << shift) >> shift;
|
79
92
|
}
|
80
|
-
|
93
|
+
|
81
94
|
Ok(format_decimal_with_i32_scale(value, scale))
|
82
95
|
}
|
83
96
|
5..=8 => {
|
@@ -114,8 +127,79 @@ fn bytes_to_decimal(bytes: &[u8], scale: i32) -> Result<String, ParquetGemError>
|
|
114
127
|
|
115
128
|
Ok(format_decimal_with_i32_scale(value, scale))
|
116
129
|
}
|
130
|
+
17..=32 => {
|
131
|
+
// For 17-32 bytes, we need arbitrary precision handling
|
132
|
+
// Check if the number is negative (MSB of first byte)
|
133
|
+
let is_negative = bytes[0] & 0x80 != 0;
|
134
|
+
|
135
|
+
if is_negative {
|
136
|
+
// For negative numbers, we need to compute two's complement
|
137
|
+
// First, invert all bits
|
138
|
+
let mut inverted = Vec::with_capacity(bytes.len());
|
139
|
+
for &byte in bytes {
|
140
|
+
inverted.push(!byte);
|
141
|
+
}
|
142
|
+
|
143
|
+
// Then add 1
|
144
|
+
let mut carry = 1u8;
|
145
|
+
for i in (0..inverted.len()).rev() {
|
146
|
+
let (sum, new_carry) = inverted[i].overflowing_add(carry);
|
147
|
+
inverted[i] = sum;
|
148
|
+
carry = if new_carry { 1 } else { 0 };
|
149
|
+
}
|
150
|
+
|
151
|
+
// Convert to decimal string
|
152
|
+
let mut result = String::new();
|
153
|
+
let mut remainder = inverted;
|
154
|
+
|
155
|
+
// Repeatedly divide by 10 to get decimal digits
|
156
|
+
while !remainder.iter().all(|&b| b == 0) {
|
157
|
+
let mut carry = 0u16;
|
158
|
+
for i in 0..remainder.len() {
|
159
|
+
let temp = (carry << 8) | (remainder[i] as u16);
|
160
|
+
remainder[i] = (temp / 10) as u8;
|
161
|
+
carry = temp % 10;
|
162
|
+
}
|
163
|
+
result.push_str(&carry.to_string());
|
164
|
+
}
|
165
|
+
|
166
|
+
// The digits are in reverse order
|
167
|
+
if result.is_empty() {
|
168
|
+
result = "0".to_string();
|
169
|
+
} else {
|
170
|
+
result = result.chars().rev().collect();
|
171
|
+
}
|
172
|
+
|
173
|
+
// Add negative sign and format with scale
|
174
|
+
Ok(format_decimal_with_i32_scale(format!("-{}", result), scale))
|
175
|
+
} else {
|
176
|
+
// For positive numbers, direct conversion
|
177
|
+
let mut result = String::new();
|
178
|
+
let mut remainder = bytes.to_vec();
|
179
|
+
|
180
|
+
// Repeatedly divide by 10 to get decimal digits
|
181
|
+
while !remainder.iter().all(|&b| b == 0) {
|
182
|
+
let mut carry = 0u16;
|
183
|
+
for i in 0..remainder.len() {
|
184
|
+
let temp = (carry << 8) | (remainder[i] as u16);
|
185
|
+
remainder[i] = (temp / 10) as u8;
|
186
|
+
carry = temp % 10;
|
187
|
+
}
|
188
|
+
result.push_str(&carry.to_string());
|
189
|
+
}
|
190
|
+
|
191
|
+
// The digits are in reverse order
|
192
|
+
if result.is_empty() {
|
193
|
+
result = "0".to_string();
|
194
|
+
} else {
|
195
|
+
result = result.chars().rev().collect();
|
196
|
+
}
|
197
|
+
|
198
|
+
Ok(format_decimal_with_i32_scale(result, scale))
|
199
|
+
}
|
200
|
+
}
|
117
201
|
_ => Err(ParquetGemError::InvalidDecimal(format!(
|
118
|
-
"Unsupported decimal byte array size: {}",
|
202
|
+
"Unsupported decimal byte array size: {} (maximum 32 bytes)",
|
119
203
|
bytes.len()
|
120
204
|
))),
|
121
205
|
}
|