parquet 0.5.9 → 0.5.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  use crate::{impl_date_conversion, impl_timestamp_array_conversion, impl_timestamp_conversion};
2
2
 
3
+ use super::record_types::{format_decimal_with_i8_scale, format_i256_decimal_with_scale};
3
4
  use super::*;
4
- use super::record_types::format_decimal_with_i8_scale;
5
5
  use arrow_array::MapArray;
6
6
  use magnus::{RArray, RString};
7
7
 
@@ -24,6 +24,7 @@ pub enum ParquetValue {
24
24
  Date32(i32),
25
25
  Date64(i64),
26
26
  Decimal128(i128, i8),
27
+ Decimal256(arrow_buffer::i256, i8),
27
28
  TimestampSecond(i64, Option<Arc<str>>),
28
29
  TimestampMillis(i64, Option<Arc<str>>),
29
30
  TimestampMicros(i64, Option<Arc<str>>),
@@ -94,6 +95,15 @@ impl PartialEq for ParquetValue {
94
95
  a_val == b_val
95
96
  }
96
97
  }
98
+ (ParquetValue::Decimal256(a, scale_a), ParquetValue::Decimal256(b, scale_b)) => {
99
+ if scale_a == scale_b {
100
+ // Same scale, compare directly
101
+ a == b
102
+ } else {
103
+ // TODO: Implement decimal256 comparison
104
+ todo!("decimal256 comparison");
105
+ }
106
+ }
97
107
  (ParquetValue::TimestampSecond(a, _), ParquetValue::TimestampSecond(b, _)) => a == b,
98
108
  (ParquetValue::TimestampMillis(a, _), ParquetValue::TimestampMillis(b, _)) => a == b,
99
109
  (ParquetValue::TimestampMicros(a, _), ParquetValue::TimestampMicros(b, _)) => a == b,
@@ -130,6 +140,10 @@ impl std::hash::Hash for ParquetValue {
130
140
  d.hash(state);
131
141
  scale.hash(state);
132
142
  }
143
+ ParquetValue::Decimal256(d, scale) => {
144
+ d.hash(state);
145
+ scale.hash(state);
146
+ }
133
147
  ParquetValue::TimestampSecond(ts, tz) => {
134
148
  ts.hash(state);
135
149
  tz.hash(state);
@@ -185,6 +199,17 @@ impl TryIntoValue for ParquetValue {
185
199
  let kernel = handle.module_kernel();
186
200
  Ok(kernel.funcall::<_, _, Value>("BigDecimal", (value,))?)
187
201
  }
202
+ ParquetValue::Decimal256(d, scale) => {
203
+ // Load the bigdecimal gem if it's not already loaded
204
+ LOADED_BIGDECIMAL.get_or_init(|| handle.require("bigdecimal").unwrap_or_default());
205
+
206
+ // Format with proper scaling based on the sign of scale
207
+ // Use specialized function to preserve full precision
208
+ let value = format_i256_decimal_with_scale(d, scale)?;
209
+
210
+ let kernel = handle.module_kernel();
211
+ Ok(kernel.funcall::<_, _, Value>("BigDecimal", (value,))?)
212
+ }
188
213
  ParquetValue::Date32(d) => impl_date_conversion!(d, handle),
189
214
  ParquetValue::Date64(d) => impl_date_conversion!(d, handle),
190
215
  timestamp @ ParquetValue::TimestampSecond(_, _) => {
@@ -292,9 +317,21 @@ impl ParquetValue {
292
317
  }
293
318
  PrimitiveType::Decimal128(_precision, scale) => {
294
319
  if value.is_kind_of(ruby.class_string()) {
295
- convert_to_decimal128(value, *scale)
320
+ convert_to_decimal(value, *scale)
296
321
  } else if let Ok(s) = value.funcall::<_, _, RString>("to_s", ()) {
297
- convert_to_decimal128(s.as_value(), *scale)
322
+ convert_to_decimal(s.as_value(), *scale)
323
+ } else {
324
+ Err(MagnusError::new(
325
+ magnus::exception::type_error(),
326
+ "Expected a string for a decimal type",
327
+ ))
328
+ }
329
+ }
330
+ PrimitiveType::Decimal256(_precision, scale) => {
331
+ if value.is_kind_of(ruby.class_string()) {
332
+ convert_to_decimal(value, *scale)
333
+ } else if let Ok(s) = value.funcall::<_, _, RString>("to_s", ()) {
334
+ convert_to_decimal(s.as_value(), *scale)
298
335
  } else {
299
336
  Err(MagnusError::new(
300
337
  magnus::exception::type_error(),
@@ -425,8 +462,14 @@ impl ParquetValue {
425
462
  }
426
463
  }
427
464
  }
465
+
466
+ enum ParsedDecimal {
467
+ Int128(i128),
468
+ Int256(arrow_buffer::i256),
469
+ }
470
+
428
471
  /// Unified helper to parse a decimal string and apply scaling
429
- fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, MagnusError> {
472
+ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<ParsedDecimal, MagnusError> {
430
473
  let s = input_str.trim();
431
474
 
432
475
  // 1. Handle scientific notation case (e.g., "0.12345e3")
@@ -445,12 +488,9 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
445
488
  )
446
489
  })?;
447
490
 
448
- // Limit exponent to reasonable range to prevent overflow
491
+ // For very large exponents, we'll need to use BigInt
449
492
  if exp_val.abs() > 38 {
450
- return Err(MagnusError::new(
451
- magnus::exception::range_error(),
452
- format!("Exponent {} is out of range for decimal value '{}'. Must be between -38 and 38.", exp_val, s),
453
- ));
493
+ return parse_large_decimal_with_bigint(s, input_scale);
454
494
  }
455
495
 
456
496
  // Handle the base part which might contain a decimal point
@@ -460,30 +500,23 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
460
500
 
461
501
  let base_scale = base.len() - decimal_pos - 1;
462
502
 
463
- let base_val = base_without_point.parse::<i128>().map_err(|e| {
464
- MagnusError::new(
465
- magnus::exception::type_error(),
466
- format!(
467
- "Failed to parse base '{}' in scientific notation '{}': {}",
468
- base, s, e
469
- ),
470
- )
471
- })?;
472
-
473
- (base_val, base_scale as i32)
503
+ // Try to parse as i128 first
504
+ match base_without_point.parse::<i128>() {
505
+ Ok(v) => (v, base_scale as i32),
506
+ Err(_) => {
507
+ // Value too large for i128, use BigInt
508
+ return parse_large_decimal_with_bigint(s, input_scale);
509
+ }
510
+ }
474
511
  } else {
475
512
  // No decimal point in base
476
- let base_val = base.parse::<i128>().map_err(|e| {
477
- MagnusError::new(
478
- magnus::exception::type_error(),
479
- format!(
480
- "Failed to parse base '{}' in scientific notation '{}': {}",
481
- base, s, e
482
- ),
483
- )
484
- })?;
485
-
486
- (base_val, 0)
513
+ match base.parse::<i128>() {
514
+ Ok(v) => (v, 0),
515
+ Err(_) => {
516
+ // Value too large for i128, use BigInt
517
+ return parse_large_decimal_with_bigint(s, input_scale);
518
+ }
519
+ }
487
520
  };
488
521
 
489
522
  // Calculate the effective scale: base_scale - exp_val
@@ -495,12 +528,14 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
495
528
  // Need to multiply to increase scale
496
529
  let scale_diff = (input_scale as i32 - effective_scale) as u32;
497
530
  if scale_diff > 38 {
498
- return Err(MagnusError::new(
499
- magnus::exception::range_error(),
500
- format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a smaller scale.", scale_diff, s),
501
- ));
531
+ return parse_large_decimal_with_bigint(s, input_scale);
532
+ }
533
+
534
+ // Check for overflow
535
+ match base_val.checked_mul(10_i128.pow(scale_diff)) {
536
+ Some(v) => Ok(ParsedDecimal::Int128(v)),
537
+ None => parse_large_decimal_with_bigint(s, input_scale),
502
538
  }
503
- Ok(base_val * 10_i128.pow(scale_diff))
504
539
  }
505
540
  std::cmp::Ordering::Greater => {
506
541
  // Need to divide to decrease scale
@@ -511,9 +546,9 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
511
546
  format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a larger scale.", scale_diff, s),
512
547
  ));
513
548
  }
514
- Ok(base_val / 10_i128.pow(scale_diff))
549
+ Ok(ParsedDecimal::Int128(base_val / 10_i128.pow(scale_diff)))
515
550
  }
516
- std::cmp::Ordering::Equal => Ok(base_val),
551
+ std::cmp::Ordering::Equal => Ok(ParsedDecimal::Int128(base_val)),
517
552
  }
518
553
  }
519
554
  // 2. Handle decimal point in the string (e.g., "123.456")
@@ -524,16 +559,14 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
524
559
  // Calculate the actual scale from the decimal position
525
560
  let actual_scale = s.len() - decimal_pos - 1;
526
561
 
527
- // Parse the string without decimal point as i128
528
- let v = s_without_point.parse::<i128>().map_err(|e| {
529
- MagnusError::new(
530
- magnus::exception::type_error(),
531
- format!(
532
- "Failed to parse decimal string '{}' (without decimal point: '{}'): {}",
533
- s, s_without_point, e
534
- ),
535
- )
536
- })?;
562
+ // Try to parse as i128 first
563
+ let v = match s_without_point.parse::<i128>() {
564
+ Ok(v) => v,
565
+ Err(_) => {
566
+ // Value too large for i128, use BigInt
567
+ return parse_large_decimal_with_bigint(s, input_scale);
568
+ }
569
+ };
537
570
 
538
571
  // Scale the value if needed based on the difference between
539
572
  // the actual scale and the requested scale
@@ -542,12 +575,14 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
542
575
  // Need to multiply to increase scale
543
576
  let scale_diff = (input_scale - actual_scale as i8) as u32;
544
577
  if scale_diff > 38 {
545
- return Err(MagnusError::new(
546
- magnus::exception::range_error(),
547
- format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a smaller scale.", scale_diff, s),
548
- ));
578
+ return parse_large_decimal_with_bigint(s, input_scale);
579
+ }
580
+
581
+ // Check for overflow
582
+ match v.checked_mul(10_i128.pow(scale_diff)) {
583
+ Some(v) => Ok(ParsedDecimal::Int128(v)),
584
+ None => parse_large_decimal_with_bigint(s, input_scale),
549
585
  }
550
- Ok(v * 10_i128.pow(scale_diff))
551
586
  }
552
587
  std::cmp::Ordering::Greater => {
553
588
  // Need to divide to decrease scale
@@ -558,30 +593,25 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
558
593
  format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a larger scale.", scale_diff, s),
559
594
  ));
560
595
  }
561
- Ok(v / 10_i128.pow(scale_diff))
596
+ Ok(ParsedDecimal::Int128(v / 10_i128.pow(scale_diff)))
562
597
  }
563
- std::cmp::Ordering::Equal => Ok(v),
598
+ std::cmp::Ordering::Equal => Ok(ParsedDecimal::Int128(v)),
564
599
  }
565
600
  }
566
601
  // 3. Plain integer value (e.g., "12345")
567
602
  else {
568
- // No decimal point, parse as i128 and scale appropriately
569
- let v = s.parse::<i128>().map_err(|e| {
570
- MagnusError::new(
571
- magnus::exception::type_error(),
572
- format!("Failed to parse integer string '{}' as decimal: {}", s, e),
573
- )
574
- })?;
603
+ // No decimal point, try to parse as i128 first
604
+ let v = match s.parse::<i128>() {
605
+ Ok(v) => v,
606
+ Err(_) => {
607
+ // Value too large for i128, use BigInt
608
+ return parse_large_decimal_with_bigint(s, input_scale);
609
+ }
610
+ };
575
611
 
576
612
  // Apply scale - make sure it's reasonable
577
613
  if input_scale > 38 {
578
- return Err(MagnusError::new(
579
- magnus::exception::range_error(),
580
- format!(
581
- "Scale {} is too large for decimal value '{}'. Must be ≤ 38.",
582
- input_scale, s
583
- ),
584
- ));
614
+ return parse_large_decimal_with_bigint(s, input_scale);
585
615
  } else if input_scale < -38 {
586
616
  return Err(MagnusError::new(
587
617
  magnus::exception::range_error(),
@@ -594,15 +624,153 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
594
624
 
595
625
  // Apply positive scale (multiply)
596
626
  if input_scale >= 0 {
597
- Ok(v * 10_i128.pow(input_scale as u32))
627
+ match v.checked_mul(10_i128.pow(input_scale as u32)) {
628
+ Some(v) => Ok(ParsedDecimal::Int128(v)),
629
+ None => parse_large_decimal_with_bigint(s, input_scale),
630
+ }
598
631
  } else {
599
632
  // Apply negative scale (divide)
600
- Ok(v / 10_i128.pow((-input_scale) as u32))
633
+ Ok(ParsedDecimal::Int128(
634
+ v / 10_i128.pow((-input_scale) as u32),
635
+ ))
636
+ }
637
+ }
638
+ }
639
+
640
+ /// Parse large decimal values using BigInt when they would overflow i128
641
+ fn parse_large_decimal_with_bigint(s: &str, input_scale: i8) -> Result<ParsedDecimal, MagnusError> {
642
+ use num::BigInt;
643
+ use std::str::FromStr;
644
+
645
+ // Parse the input string as a BigInt
646
+ let bigint = if let Some(e_pos) = s.to_lowercase().find('e') {
647
+ // Handle scientific notation
648
+ let base = &s[0..e_pos];
649
+ let exp = &s[e_pos + 1..];
650
+
651
+ let exp_val = exp.parse::<i32>().map_err(|e| {
652
+ MagnusError::new(
653
+ magnus::exception::type_error(),
654
+ format!("Failed to parse exponent '{}': {}", exp, e),
655
+ )
656
+ })?;
657
+
658
+ // Parse base as BigInt
659
+ let base_bigint = if let Some(decimal_pos) = base.find('.') {
660
+ let mut base_without_point = base.to_string();
661
+ base_without_point.remove(decimal_pos);
662
+ let base_scale = base.len() - decimal_pos - 1;
663
+
664
+ let bigint = BigInt::from_str(&base_without_point).map_err(|e| {
665
+ MagnusError::new(
666
+ magnus::exception::type_error(),
667
+ format!("Failed to parse decimal base '{}': {}", base, e),
668
+ )
669
+ })?;
670
+
671
+ // Adjust for the decimal point
672
+ let effective_exp = exp_val - base_scale as i32;
673
+
674
+ if effective_exp > 0 {
675
+ bigint * BigInt::from(10).pow(effective_exp as u32)
676
+ } else if effective_exp < 0 {
677
+ bigint / BigInt::from(10).pow((-effective_exp) as u32)
678
+ } else {
679
+ bigint
680
+ }
681
+ } else {
682
+ let bigint = BigInt::from_str(base).map_err(|e| {
683
+ MagnusError::new(
684
+ magnus::exception::type_error(),
685
+ format!("Failed to parse decimal base '{}': {}", base, e),
686
+ )
687
+ })?;
688
+
689
+ if exp_val > 0 {
690
+ bigint * BigInt::from(10).pow(exp_val as u32)
691
+ } else if exp_val < 0 {
692
+ bigint / BigInt::from(10).pow((-exp_val) as u32)
693
+ } else {
694
+ bigint
695
+ }
696
+ };
697
+
698
+ base_bigint
699
+ } else if let Some(decimal_pos) = s.find('.') {
700
+ // Handle decimal point
701
+ let mut s_without_point = s.to_string();
702
+ s_without_point.remove(decimal_pos);
703
+
704
+ let actual_scale = s.len() - decimal_pos - 1;
705
+ let bigint = BigInt::from_str(&s_without_point).map_err(|e| {
706
+ MagnusError::new(
707
+ magnus::exception::type_error(),
708
+ format!("Failed to parse decimal string '{}': {}", s, e),
709
+ )
710
+ })?;
711
+
712
+ // Adjust for scale difference
713
+ let scale_diff = actual_scale as i8 - input_scale;
714
+
715
+ if scale_diff > 0 {
716
+ bigint / BigInt::from(10).pow(scale_diff as u32)
717
+ } else if scale_diff < 0 {
718
+ bigint * BigInt::from(10).pow((-scale_diff) as u32)
719
+ } else {
720
+ bigint
721
+ }
722
+ } else {
723
+ // Plain integer
724
+ let bigint = BigInt::from_str(s).map_err(|e| {
725
+ MagnusError::new(
726
+ magnus::exception::type_error(),
727
+ format!("Failed to parse integer string '{}': {}", s, e),
728
+ )
729
+ })?;
730
+
731
+ if input_scale > 0 {
732
+ bigint * BigInt::from(10).pow(input_scale as u32)
733
+ } else if input_scale < 0 {
734
+ bigint / BigInt::from(10).pow((-input_scale) as u32)
735
+ } else {
736
+ bigint
601
737
  }
738
+ };
739
+
740
+ // Convert BigInt to bytes and then to i256
741
+ let bytes = bigint.to_signed_bytes_le();
742
+
743
+ if bytes.len() <= 16 {
744
+ // Fits in i128
745
+ let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
746
+ [0xff; 16]
747
+ } else {
748
+ [0; 16]
749
+ };
750
+ buf[..bytes.len()].copy_from_slice(&bytes);
751
+
752
+ Ok(ParsedDecimal::Int128(i128::from_le_bytes(buf)))
753
+ } else if bytes.len() <= 32 {
754
+ // Fits in i256
755
+ let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
756
+ [0xff; 32]
757
+ } else {
758
+ [0; 32]
759
+ };
760
+ buf[..bytes.len()].copy_from_slice(&bytes);
761
+
762
+ Ok(ParsedDecimal::Int256(arrow_buffer::i256::from_le_bytes(
763
+ buf,
764
+ )))
765
+ } else {
766
+ Err(MagnusError::new(
767
+ magnus::exception::range_error(),
768
+ format!("Decimal value '{}' is too large to fit in 256 bits", s),
769
+ ))
602
770
  }
603
771
  }
604
772
 
605
- fn convert_to_decimal128(value: Value, scale: i8) -> Result<ParquetValue, MagnusError> {
773
+ fn convert_to_decimal(value: Value, scale: i8) -> Result<ParquetValue, MagnusError> {
606
774
  // Get the decimal string based on the type of value
607
775
  let s = if unsafe { value.classname() } == "BigDecimal" {
608
776
  value
@@ -614,7 +782,10 @@ fn convert_to_decimal128(value: Value, scale: i8) -> Result<ParquetValue, Magnus
614
782
 
615
783
  // Use our unified parser to convert the string to a decimal value with scaling
616
784
  match parse_decimal_string(&s, scale) {
617
- Ok(decimal_value) => Ok(ParquetValue::Decimal128(decimal_value, scale)),
785
+ Ok(decimal_value) => match decimal_value {
786
+ ParsedDecimal::Int128(v) => Ok(ParquetValue::Decimal128(v, scale)),
787
+ ParsedDecimal::Int256(v) => Ok(ParquetValue::Decimal256(v, scale)),
788
+ },
618
789
  Err(e) => Err(MagnusError::new(
619
790
  magnus::exception::type_error(),
620
791
  format!(
@@ -731,6 +902,52 @@ impl<'a> TryFrom<ArrayWrapper<'a>> for ParquetValueVec {
731
902
  }
732
903
  DataType::Date32 => impl_numeric_array_conversion!(column.array, Date32Array, Date32),
733
904
  DataType::Date64 => impl_numeric_array_conversion!(column.array, Date64Array, Date64),
905
+ DataType::Decimal128(_precision, scale) => {
906
+ let array = downcast_array::<Decimal128Array>(column.array);
907
+ Ok(ParquetValueVec(if array.is_nullable() {
908
+ array
909
+ .values()
910
+ .iter()
911
+ .enumerate()
912
+ .map(|(i, x)| {
913
+ if array.is_null(i) {
914
+ ParquetValue::Null
915
+ } else {
916
+ ParquetValue::Decimal128(*x, *scale)
917
+ }
918
+ })
919
+ .collect()
920
+ } else {
921
+ array
922
+ .values()
923
+ .iter()
924
+ .map(|x| ParquetValue::Decimal128(*x, *scale))
925
+ .collect()
926
+ }))
927
+ }
928
+ DataType::Decimal256(_precision, scale) => {
929
+ let array = downcast_array::<Decimal256Array>(column.array);
930
+ Ok(ParquetValueVec(if array.is_nullable() {
931
+ array
932
+ .values()
933
+ .iter()
934
+ .enumerate()
935
+ .map(|(i, x)| {
936
+ if array.is_null(i) {
937
+ ParquetValue::Null
938
+ } else {
939
+ ParquetValue::Decimal256(*x, *scale)
940
+ }
941
+ })
942
+ .collect()
943
+ } else {
944
+ array
945
+ .values()
946
+ .iter()
947
+ .map(|x| ParquetValue::Decimal256(*x, *scale))
948
+ .collect()
949
+ }))
950
+ }
734
951
  DataType::Timestamp(TimeUnit::Second, tz) => {
735
952
  impl_timestamp_array_conversion!(
736
953
  column.array,
@@ -22,6 +22,19 @@ pub fn format_decimal_with_i8_scale<T: std::fmt::Display>(value: T, scale: i8) -
22
22
  }
23
23
  }
24
24
 
25
+ /// Format i256 decimal value with appropriate scale for BigDecimal conversion
26
+ /// Uses bytes conversion to preserve full precision
27
+ pub fn format_i256_decimal_with_scale(
28
+ value: arrow_buffer::i256,
29
+ scale: i8,
30
+ ) -> Result<String, ParquetGemError> {
31
+ // Convert i256 to big-endian bytes
32
+ let bytes = value.to_be_bytes();
33
+
34
+ // Use the existing bytes_to_decimal function which handles full precision
35
+ bytes_to_decimal(&bytes, scale as i32)
36
+ }
37
+
25
38
  /// Format decimal value with appropriate scale for BigDecimal conversion
26
39
  /// Handles positive and negative scales correctly for i32 scale
27
40
  pub fn format_decimal_with_i32_scale<T: std::fmt::Display>(value: T, scale: i32) -> String {
@@ -35,7 +48,7 @@ pub fn format_decimal_with_i32_scale<T: std::fmt::Display>(value: T, scale: i32)
35
48
  }
36
49
 
37
50
  /// Convert arbitrary-length big-endian byte array to decimal string
38
- /// Supports byte arrays from 1 to 16 bytes in length
51
+ /// Supports byte arrays from 1 to 32 bytes in length
39
52
  fn bytes_to_decimal(bytes: &[u8], scale: i32) -> Result<String, ParquetGemError> {
40
53
  match bytes.len() {
41
54
  0 => Err(ParquetGemError::InvalidDecimal(
@@ -50,34 +63,34 @@ fn bytes_to_decimal(bytes: &[u8], scale: i32) -> Result<String, ParquetGemError>
50
63
  // For 2 bytes, use i16
51
64
  let mut value: i16 = 0;
52
65
  let is_negative = bytes[0] & 0x80 != 0;
53
-
66
+
54
67
  for &byte in bytes {
55
68
  value = (value << 8) | (byte as i16);
56
69
  }
57
-
70
+
58
71
  // Sign extend if negative
59
72
  if is_negative {
60
73
  let shift = 16 - (bytes.len() * 8);
61
74
  value = (value << shift) >> shift;
62
75
  }
63
-
76
+
64
77
  Ok(format_decimal_with_i32_scale(value, scale))
65
78
  }
66
79
  3..=4 => {
67
80
  // For 3-4 bytes, use i32
68
81
  let mut value: i32 = 0;
69
82
  let is_negative = bytes[0] & 0x80 != 0;
70
-
83
+
71
84
  for &byte in bytes {
72
85
  value = (value << 8) | (byte as i32);
73
86
  }
74
-
87
+
75
88
  // Sign extend if negative
76
89
  if is_negative {
77
90
  let shift = 32 - (bytes.len() * 8);
78
91
  value = (value << shift) >> shift;
79
92
  }
80
-
93
+
81
94
  Ok(format_decimal_with_i32_scale(value, scale))
82
95
  }
83
96
  5..=8 => {
@@ -114,8 +127,79 @@ fn bytes_to_decimal(bytes: &[u8], scale: i32) -> Result<String, ParquetGemError>
114
127
 
115
128
  Ok(format_decimal_with_i32_scale(value, scale))
116
129
  }
130
+ 17..=32 => {
131
+ // For 17-32 bytes, we need arbitrary precision handling
132
+ // Check if the number is negative (MSB of first byte)
133
+ let is_negative = bytes[0] & 0x80 != 0;
134
+
135
+ if is_negative {
136
+ // For negative numbers, we need to compute two's complement
137
+ // First, invert all bits
138
+ let mut inverted = Vec::with_capacity(bytes.len());
139
+ for &byte in bytes {
140
+ inverted.push(!byte);
141
+ }
142
+
143
+ // Then add 1
144
+ let mut carry = 1u8;
145
+ for i in (0..inverted.len()).rev() {
146
+ let (sum, new_carry) = inverted[i].overflowing_add(carry);
147
+ inverted[i] = sum;
148
+ carry = if new_carry { 1 } else { 0 };
149
+ }
150
+
151
+ // Convert to decimal string
152
+ let mut result = String::new();
153
+ let mut remainder = inverted;
154
+
155
+ // Repeatedly divide by 10 to get decimal digits
156
+ while !remainder.iter().all(|&b| b == 0) {
157
+ let mut carry = 0u16;
158
+ for i in 0..remainder.len() {
159
+ let temp = (carry << 8) | (remainder[i] as u16);
160
+ remainder[i] = (temp / 10) as u8;
161
+ carry = temp % 10;
162
+ }
163
+ result.push_str(&carry.to_string());
164
+ }
165
+
166
+ // The digits are in reverse order
167
+ if result.is_empty() {
168
+ result = "0".to_string();
169
+ } else {
170
+ result = result.chars().rev().collect();
171
+ }
172
+
173
+ // Add negative sign and format with scale
174
+ Ok(format_decimal_with_i32_scale(format!("-{}", result), scale))
175
+ } else {
176
+ // For positive numbers, direct conversion
177
+ let mut result = String::new();
178
+ let mut remainder = bytes.to_vec();
179
+
180
+ // Repeatedly divide by 10 to get decimal digits
181
+ while !remainder.iter().all(|&b| b == 0) {
182
+ let mut carry = 0u16;
183
+ for i in 0..remainder.len() {
184
+ let temp = (carry << 8) | (remainder[i] as u16);
185
+ remainder[i] = (temp / 10) as u8;
186
+ carry = temp % 10;
187
+ }
188
+ result.push_str(&carry.to_string());
189
+ }
190
+
191
+ // The digits are in reverse order
192
+ if result.is_empty() {
193
+ result = "0".to_string();
194
+ } else {
195
+ result = result.chars().rev().collect();
196
+ }
197
+
198
+ Ok(format_decimal_with_i32_scale(result, scale))
199
+ }
200
+ }
117
201
  _ => Err(ParquetGemError::InvalidDecimal(format!(
118
- "Unsupported decimal byte array size: {}",
202
+ "Unsupported decimal byte array size: {} (maximum 32 bytes)",
119
203
  bytes.len()
120
204
  ))),
121
205
  }