parquet 0.5.9 → 0.5.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f1a1f7c250b960dbe334145a537e865889fbc759e7b8bfbafcbadc77689972cd
4
- data.tar.gz: 22116ec0b9fe89f0ad08a3674267bd00b141170b889091b476f3aab0d6be88a8
3
+ metadata.gz: 82528b663c4a577262db90b6d17ba473a81d0ea725ceba486b63a3619040fa73
4
+ data.tar.gz: 2e44daa9b4e36ef1503589daaa0815cbc3acee10c565d9942f6c0b6d35ced5f0
5
5
  SHA512:
6
- metadata.gz: ef8485d03247dd0d31993a774117669c1aaef5b875e7cb5c6f4c53e237a72fb81113ea35251426a21ea1ba24f8ae568bd2c3a158e6a45ce2416a308251d0f467
7
- data.tar.gz: 672f38dfbf703dae996283fba8d137529e3089f569797df87feaac32fb0f956ea7c4d7ae57032715d1a21bd5bfa4dd728c2a3fe80174fb5fc0abdef51c73110a
6
+ metadata.gz: 418951253384f5492385fcb30fa5b0113b85d9bc51346b6abad16105c124d8869266943c1a29bc0879cfee4270b94d32fb99004e233c6ebde4a70e1d329435af
7
+ data.tar.gz: bc0db4ebb36add314253b5b9b946cc2c84f315d51ba7fefbead6c7de3b65a3f7752fa4e4cf0be19704405b390ae0106d8383e30791e7fac4a86a75141c214de1
data/Cargo.lock CHANGED
@@ -126,6 +126,7 @@ dependencies = [
126
126
  "arrow-data",
127
127
  "arrow-schema",
128
128
  "flatbuffers",
129
+ "lz4_flex",
129
130
  ]
130
131
 
131
132
  [[package]]
@@ -842,6 +843,8 @@ version = "0.1.0"
842
843
  dependencies = [
843
844
  "ahash",
844
845
  "arrow-array",
846
+ "arrow-buffer",
847
+ "arrow-ipc",
845
848
  "arrow-schema",
846
849
  "bytes",
847
850
  "either",
@@ -12,6 +12,8 @@ rb-sys-env = "^0.2"
12
12
  [dependencies]
13
13
  ahash = "0.8"
14
14
  arrow-array = { git = "https://github.com/njaremko/arrow-rs", branch = "nathan/fix-time" }
15
+ arrow-buffer = { git = "https://github.com/njaremko/arrow-rs", branch = "nathan/fix-time" }
16
+ arrow-ipc = { git = "https://github.com/njaremko/arrow-rs", branch = "nathan/fix-time", features = ["lz4"] }
15
17
  arrow-schema = { git = "https://github.com/njaremko/arrow-rs", branch = "nathan/fix-time" }
16
18
  bytes = "^1.9"
17
19
  either = "1.9"
data/ext/parquet/build.rs CHANGED
@@ -1,5 +1,5 @@
1
1
  pub fn main() -> Result<(), Box<dyn std::error::Error>> {
2
- let _rb_env = rb_sys_env::activate()?;
2
+ rb_sys_env::activate()?;
3
3
 
4
4
  Ok(())
5
5
  }
@@ -19,6 +19,9 @@ use writer::write_rows;
19
19
  /// Initializes the Ruby extension and defines methods.
20
20
  #[magnus::init]
21
21
  fn init(ruby: &Ruby) -> Result<(), Error> {
22
+ // Require 'time' for Time.parse method
23
+ ruby.require("time")?;
24
+
22
25
  let module = ruby.define_module("Parquet")?;
23
26
  module.define_module_function("metadata", magnus::method!(reader::parse_metadata, -1))?;
24
27
  module.define_module_function("each_row", magnus::method!(parse_parquet_rows, -1))?;
@@ -108,6 +108,7 @@ pub enum PrimitiveType {
108
108
  Float32,
109
109
  Float64,
110
110
  Decimal128(u8, i8),
111
+ Decimal256(u8, i8),
111
112
  Boolean,
112
113
  String,
113
114
  Binary,
@@ -23,10 +23,11 @@ pub use writer_types::*;
23
23
  // Common imports used across the module
24
24
  use arrow_array::cast::downcast_array;
25
25
  use arrow_array::{
26
- Array, BinaryArray, BooleanArray, Date32Array, Date64Array, Float16Array, Float32Array,
27
- Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, ListArray, NullArray, StringArray,
28
- StructArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
29
- TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
26
+ Array, BinaryArray, BooleanArray, Date32Array, Date64Array, Decimal128Array, Decimal256Array,
27
+ Float16Array, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array,
28
+ ListArray, NullArray, StringArray, StructArray, TimestampMicrosecondArray,
29
+ TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt16Array,
30
+ UInt32Array, UInt64Array, UInt8Array,
30
31
  };
31
32
  use arrow_schema::{DataType, TimeUnit};
32
33
  use magnus::{value::ReprValue, Error as MagnusError, IntoValue, Ruby, Value};
@@ -62,6 +63,8 @@ pub enum ParquetGemError {
62
63
  InvalidDecimal(String),
63
64
  #[error("Failed to parse UUID: {0}")]
64
65
  UuidError(#[from] uuid::Error),
66
+ #[error("Decimals larger than 128 bits are not supported")]
67
+ DecimalWouldBeTruncated,
65
68
  }
66
69
 
67
70
  #[derive(Debug)]
@@ -1,7 +1,7 @@
1
1
  use crate::{impl_date_conversion, impl_timestamp_array_conversion, impl_timestamp_conversion};
2
2
 
3
+ use super::record_types::{format_decimal_with_i8_scale, format_i256_decimal_with_scale};
3
4
  use super::*;
4
- use super::record_types::format_decimal_with_i8_scale;
5
5
  use arrow_array::MapArray;
6
6
  use magnus::{RArray, RString};
7
7
 
@@ -24,6 +24,7 @@ pub enum ParquetValue {
24
24
  Date32(i32),
25
25
  Date64(i64),
26
26
  Decimal128(i128, i8),
27
+ Decimal256(arrow_buffer::i256, i8),
27
28
  TimestampSecond(i64, Option<Arc<str>>),
28
29
  TimestampMillis(i64, Option<Arc<str>>),
29
30
  TimestampMicros(i64, Option<Arc<str>>),
@@ -94,6 +95,15 @@ impl PartialEq for ParquetValue {
94
95
  a_val == b_val
95
96
  }
96
97
  }
98
+ (ParquetValue::Decimal256(a, scale_a), ParquetValue::Decimal256(b, scale_b)) => {
99
+ if scale_a == scale_b {
100
+ // Same scale, compare directly
101
+ a == b
102
+ } else {
103
+ // TODO: Implement decimal256 comparison
104
+ todo!("decimal256 comparison");
105
+ }
106
+ }
97
107
  (ParquetValue::TimestampSecond(a, _), ParquetValue::TimestampSecond(b, _)) => a == b,
98
108
  (ParquetValue::TimestampMillis(a, _), ParquetValue::TimestampMillis(b, _)) => a == b,
99
109
  (ParquetValue::TimestampMicros(a, _), ParquetValue::TimestampMicros(b, _)) => a == b,
@@ -130,6 +140,10 @@ impl std::hash::Hash for ParquetValue {
130
140
  d.hash(state);
131
141
  scale.hash(state);
132
142
  }
143
+ ParquetValue::Decimal256(d, scale) => {
144
+ d.hash(state);
145
+ scale.hash(state);
146
+ }
133
147
  ParquetValue::TimestampSecond(ts, tz) => {
134
148
  ts.hash(state);
135
149
  tz.hash(state);
@@ -185,6 +199,17 @@ impl TryIntoValue for ParquetValue {
185
199
  let kernel = handle.module_kernel();
186
200
  Ok(kernel.funcall::<_, _, Value>("BigDecimal", (value,))?)
187
201
  }
202
+ ParquetValue::Decimal256(d, scale) => {
203
+ // Load the bigdecimal gem if it's not already loaded
204
+ LOADED_BIGDECIMAL.get_or_init(|| handle.require("bigdecimal").unwrap_or_default());
205
+
206
+ // Format with proper scaling based on the sign of scale
207
+ // Use specialized function to preserve full precision
208
+ let value = format_i256_decimal_with_scale(d, scale)?;
209
+
210
+ let kernel = handle.module_kernel();
211
+ Ok(kernel.funcall::<_, _, Value>("BigDecimal", (value,))?)
212
+ }
188
213
  ParquetValue::Date32(d) => impl_date_conversion!(d, handle),
189
214
  ParquetValue::Date64(d) => impl_date_conversion!(d, handle),
190
215
  timestamp @ ParquetValue::TimestampSecond(_, _) => {
@@ -292,9 +317,21 @@ impl ParquetValue {
292
317
  }
293
318
  PrimitiveType::Decimal128(_precision, scale) => {
294
319
  if value.is_kind_of(ruby.class_string()) {
295
- convert_to_decimal128(value, *scale)
320
+ convert_to_decimal(value, *scale)
296
321
  } else if let Ok(s) = value.funcall::<_, _, RString>("to_s", ()) {
297
- convert_to_decimal128(s.as_value(), *scale)
322
+ convert_to_decimal(s.as_value(), *scale)
323
+ } else {
324
+ Err(MagnusError::new(
325
+ magnus::exception::type_error(),
326
+ "Expected a string for a decimal type",
327
+ ))
328
+ }
329
+ }
330
+ PrimitiveType::Decimal256(_precision, scale) => {
331
+ if value.is_kind_of(ruby.class_string()) {
332
+ convert_to_decimal(value, *scale)
333
+ } else if let Ok(s) = value.funcall::<_, _, RString>("to_s", ()) {
334
+ convert_to_decimal(s.as_value(), *scale)
298
335
  } else {
299
336
  Err(MagnusError::new(
300
337
  magnus::exception::type_error(),
@@ -425,8 +462,14 @@ impl ParquetValue {
425
462
  }
426
463
  }
427
464
  }
465
+
466
+ enum ParsedDecimal {
467
+ Int128(i128),
468
+ Int256(arrow_buffer::i256),
469
+ }
470
+
428
471
  /// Unified helper to parse a decimal string and apply scaling
429
- fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, MagnusError> {
472
+ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<ParsedDecimal, MagnusError> {
430
473
  let s = input_str.trim();
431
474
 
432
475
  // 1. Handle scientific notation case (e.g., "0.12345e3")
@@ -445,12 +488,9 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
445
488
  )
446
489
  })?;
447
490
 
448
- // Limit exponent to reasonable range to prevent overflow
491
+ // For very large exponents, we'll need to use BigInt
449
492
  if exp_val.abs() > 38 {
450
- return Err(MagnusError::new(
451
- magnus::exception::range_error(),
452
- format!("Exponent {} is out of range for decimal value '{}'. Must be between -38 and 38.", exp_val, s),
453
- ));
493
+ return parse_large_decimal_with_bigint(s, input_scale);
454
494
  }
455
495
 
456
496
  // Handle the base part which might contain a decimal point
@@ -460,30 +500,23 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
460
500
 
461
501
  let base_scale = base.len() - decimal_pos - 1;
462
502
 
463
- let base_val = base_without_point.parse::<i128>().map_err(|e| {
464
- MagnusError::new(
465
- magnus::exception::type_error(),
466
- format!(
467
- "Failed to parse base '{}' in scientific notation '{}': {}",
468
- base, s, e
469
- ),
470
- )
471
- })?;
472
-
473
- (base_val, base_scale as i32)
503
+ // Try to parse as i128 first
504
+ match base_without_point.parse::<i128>() {
505
+ Ok(v) => (v, base_scale as i32),
506
+ Err(_) => {
507
+ // Value too large for i128, use BigInt
508
+ return parse_large_decimal_with_bigint(s, input_scale);
509
+ }
510
+ }
474
511
  } else {
475
512
  // No decimal point in base
476
- let base_val = base.parse::<i128>().map_err(|e| {
477
- MagnusError::new(
478
- magnus::exception::type_error(),
479
- format!(
480
- "Failed to parse base '{}' in scientific notation '{}': {}",
481
- base, s, e
482
- ),
483
- )
484
- })?;
485
-
486
- (base_val, 0)
513
+ match base.parse::<i128>() {
514
+ Ok(v) => (v, 0),
515
+ Err(_) => {
516
+ // Value too large for i128, use BigInt
517
+ return parse_large_decimal_with_bigint(s, input_scale);
518
+ }
519
+ }
487
520
  };
488
521
 
489
522
  // Calculate the effective scale: base_scale - exp_val
@@ -495,12 +528,14 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
495
528
  // Need to multiply to increase scale
496
529
  let scale_diff = (input_scale as i32 - effective_scale) as u32;
497
530
  if scale_diff > 38 {
498
- return Err(MagnusError::new(
499
- magnus::exception::range_error(),
500
- format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a smaller scale.", scale_diff, s),
501
- ));
531
+ return parse_large_decimal_with_bigint(s, input_scale);
532
+ }
533
+
534
+ // Check for overflow
535
+ match base_val.checked_mul(10_i128.pow(scale_diff)) {
536
+ Some(v) => Ok(ParsedDecimal::Int128(v)),
537
+ None => parse_large_decimal_with_bigint(s, input_scale),
502
538
  }
503
- Ok(base_val * 10_i128.pow(scale_diff))
504
539
  }
505
540
  std::cmp::Ordering::Greater => {
506
541
  // Need to divide to decrease scale
@@ -511,9 +546,9 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
511
546
  format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a larger scale.", scale_diff, s),
512
547
  ));
513
548
  }
514
- Ok(base_val / 10_i128.pow(scale_diff))
549
+ Ok(ParsedDecimal::Int128(base_val / 10_i128.pow(scale_diff)))
515
550
  }
516
- std::cmp::Ordering::Equal => Ok(base_val),
551
+ std::cmp::Ordering::Equal => Ok(ParsedDecimal::Int128(base_val)),
517
552
  }
518
553
  }
519
554
  // 2. Handle decimal point in the string (e.g., "123.456")
@@ -524,16 +559,14 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
524
559
  // Calculate the actual scale from the decimal position
525
560
  let actual_scale = s.len() - decimal_pos - 1;
526
561
 
527
- // Parse the string without decimal point as i128
528
- let v = s_without_point.parse::<i128>().map_err(|e| {
529
- MagnusError::new(
530
- magnus::exception::type_error(),
531
- format!(
532
- "Failed to parse decimal string '{}' (without decimal point: '{}'): {}",
533
- s, s_without_point, e
534
- ),
535
- )
536
- })?;
562
+ // Try to parse as i128 first
563
+ let v = match s_without_point.parse::<i128>() {
564
+ Ok(v) => v,
565
+ Err(_) => {
566
+ // Value too large for i128, use BigInt
567
+ return parse_large_decimal_with_bigint(s, input_scale);
568
+ }
569
+ };
537
570
 
538
571
  // Scale the value if needed based on the difference between
539
572
  // the actual scale and the requested scale
@@ -542,12 +575,14 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
542
575
  // Need to multiply to increase scale
543
576
  let scale_diff = (input_scale - actual_scale as i8) as u32;
544
577
  if scale_diff > 38 {
545
- return Err(MagnusError::new(
546
- magnus::exception::range_error(),
547
- format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a smaller scale.", scale_diff, s),
548
- ));
578
+ return parse_large_decimal_with_bigint(s, input_scale);
579
+ }
580
+
581
+ // Check for overflow
582
+ match v.checked_mul(10_i128.pow(scale_diff)) {
583
+ Some(v) => Ok(ParsedDecimal::Int128(v)),
584
+ None => parse_large_decimal_with_bigint(s, input_scale),
549
585
  }
550
- Ok(v * 10_i128.pow(scale_diff))
551
586
  }
552
587
  std::cmp::Ordering::Greater => {
553
588
  // Need to divide to decrease scale
@@ -558,30 +593,25 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
558
593
  format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a larger scale.", scale_diff, s),
559
594
  ));
560
595
  }
561
- Ok(v / 10_i128.pow(scale_diff))
596
+ Ok(ParsedDecimal::Int128(v / 10_i128.pow(scale_diff)))
562
597
  }
563
- std::cmp::Ordering::Equal => Ok(v),
598
+ std::cmp::Ordering::Equal => Ok(ParsedDecimal::Int128(v)),
564
599
  }
565
600
  }
566
601
  // 3. Plain integer value (e.g., "12345")
567
602
  else {
568
- // No decimal point, parse as i128 and scale appropriately
569
- let v = s.parse::<i128>().map_err(|e| {
570
- MagnusError::new(
571
- magnus::exception::type_error(),
572
- format!("Failed to parse integer string '{}' as decimal: {}", s, e),
573
- )
574
- })?;
603
+ // No decimal point, try to parse as i128 first
604
+ let v = match s.parse::<i128>() {
605
+ Ok(v) => v,
606
+ Err(_) => {
607
+ // Value too large for i128, use BigInt
608
+ return parse_large_decimal_with_bigint(s, input_scale);
609
+ }
610
+ };
575
611
 
576
612
  // Apply scale - make sure it's reasonable
577
613
  if input_scale > 38 {
578
- return Err(MagnusError::new(
579
- magnus::exception::range_error(),
580
- format!(
581
- "Scale {} is too large for decimal value '{}'. Must be ≤ 38.",
582
- input_scale, s
583
- ),
584
- ));
614
+ return parse_large_decimal_with_bigint(s, input_scale);
585
615
  } else if input_scale < -38 {
586
616
  return Err(MagnusError::new(
587
617
  magnus::exception::range_error(),
@@ -594,15 +624,153 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
594
624
 
595
625
  // Apply positive scale (multiply)
596
626
  if input_scale >= 0 {
597
- Ok(v * 10_i128.pow(input_scale as u32))
627
+ match v.checked_mul(10_i128.pow(input_scale as u32)) {
628
+ Some(v) => Ok(ParsedDecimal::Int128(v)),
629
+ None => parse_large_decimal_with_bigint(s, input_scale),
630
+ }
598
631
  } else {
599
632
  // Apply negative scale (divide)
600
- Ok(v / 10_i128.pow((-input_scale) as u32))
633
+ Ok(ParsedDecimal::Int128(
634
+ v / 10_i128.pow((-input_scale) as u32),
635
+ ))
636
+ }
637
+ }
638
+ }
639
+
640
+ /// Parse large decimal values using BigInt when they would overflow i128
641
+ fn parse_large_decimal_with_bigint(s: &str, input_scale: i8) -> Result<ParsedDecimal, MagnusError> {
642
+ use num::BigInt;
643
+ use std::str::FromStr;
644
+
645
+ // Parse the input string as a BigInt
646
+ let bigint = if let Some(e_pos) = s.to_lowercase().find('e') {
647
+ // Handle scientific notation
648
+ let base = &s[0..e_pos];
649
+ let exp = &s[e_pos + 1..];
650
+
651
+ let exp_val = exp.parse::<i32>().map_err(|e| {
652
+ MagnusError::new(
653
+ magnus::exception::type_error(),
654
+ format!("Failed to parse exponent '{}': {}", exp, e),
655
+ )
656
+ })?;
657
+
658
+ // Parse base as BigInt
659
+ let base_bigint = if let Some(decimal_pos) = base.find('.') {
660
+ let mut base_without_point = base.to_string();
661
+ base_without_point.remove(decimal_pos);
662
+ let base_scale = base.len() - decimal_pos - 1;
663
+
664
+ let bigint = BigInt::from_str(&base_without_point).map_err(|e| {
665
+ MagnusError::new(
666
+ magnus::exception::type_error(),
667
+ format!("Failed to parse decimal base '{}': {}", base, e),
668
+ )
669
+ })?;
670
+
671
+ // Adjust for the decimal point
672
+ let effective_exp = exp_val - base_scale as i32;
673
+
674
+ if effective_exp > 0 {
675
+ bigint * BigInt::from(10).pow(effective_exp as u32)
676
+ } else if effective_exp < 0 {
677
+ bigint / BigInt::from(10).pow((-effective_exp) as u32)
678
+ } else {
679
+ bigint
680
+ }
681
+ } else {
682
+ let bigint = BigInt::from_str(base).map_err(|e| {
683
+ MagnusError::new(
684
+ magnus::exception::type_error(),
685
+ format!("Failed to parse decimal base '{}': {}", base, e),
686
+ )
687
+ })?;
688
+
689
+ if exp_val > 0 {
690
+ bigint * BigInt::from(10).pow(exp_val as u32)
691
+ } else if exp_val < 0 {
692
+ bigint / BigInt::from(10).pow((-exp_val) as u32)
693
+ } else {
694
+ bigint
695
+ }
696
+ };
697
+
698
+ base_bigint
699
+ } else if let Some(decimal_pos) = s.find('.') {
700
+ // Handle decimal point
701
+ let mut s_without_point = s.to_string();
702
+ s_without_point.remove(decimal_pos);
703
+
704
+ let actual_scale = s.len() - decimal_pos - 1;
705
+ let bigint = BigInt::from_str(&s_without_point).map_err(|e| {
706
+ MagnusError::new(
707
+ magnus::exception::type_error(),
708
+ format!("Failed to parse decimal string '{}': {}", s, e),
709
+ )
710
+ })?;
711
+
712
+ // Adjust for scale difference
713
+ let scale_diff = actual_scale as i8 - input_scale;
714
+
715
+ if scale_diff > 0 {
716
+ bigint / BigInt::from(10).pow(scale_diff as u32)
717
+ } else if scale_diff < 0 {
718
+ bigint * BigInt::from(10).pow((-scale_diff) as u32)
719
+ } else {
720
+ bigint
721
+ }
722
+ } else {
723
+ // Plain integer
724
+ let bigint = BigInt::from_str(s).map_err(|e| {
725
+ MagnusError::new(
726
+ magnus::exception::type_error(),
727
+ format!("Failed to parse integer string '{}': {}", s, e),
728
+ )
729
+ })?;
730
+
731
+ if input_scale > 0 {
732
+ bigint * BigInt::from(10).pow(input_scale as u32)
733
+ } else if input_scale < 0 {
734
+ bigint / BigInt::from(10).pow((-input_scale) as u32)
735
+ } else {
736
+ bigint
601
737
  }
738
+ };
739
+
740
+ // Convert BigInt to bytes and then to i256
741
+ let bytes = bigint.to_signed_bytes_le();
742
+
743
+ if bytes.len() <= 16 {
744
+ // Fits in i128
745
+ let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
746
+ [0xff; 16]
747
+ } else {
748
+ [0; 16]
749
+ };
750
+ buf[..bytes.len()].copy_from_slice(&bytes);
751
+
752
+ Ok(ParsedDecimal::Int128(i128::from_le_bytes(buf)))
753
+ } else if bytes.len() <= 32 {
754
+ // Fits in i256
755
+ let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
756
+ [0xff; 32]
757
+ } else {
758
+ [0; 32]
759
+ };
760
+ buf[..bytes.len()].copy_from_slice(&bytes);
761
+
762
+ Ok(ParsedDecimal::Int256(arrow_buffer::i256::from_le_bytes(
763
+ buf,
764
+ )))
765
+ } else {
766
+ Err(MagnusError::new(
767
+ magnus::exception::range_error(),
768
+ format!("Decimal value '{}' is too large to fit in 256 bits", s),
769
+ ))
602
770
  }
603
771
  }
604
772
 
605
- fn convert_to_decimal128(value: Value, scale: i8) -> Result<ParquetValue, MagnusError> {
773
+ fn convert_to_decimal(value: Value, scale: i8) -> Result<ParquetValue, MagnusError> {
606
774
  // Get the decimal string based on the type of value
607
775
  let s = if unsafe { value.classname() } == "BigDecimal" {
608
776
  value
@@ -614,7 +782,10 @@ fn convert_to_decimal128(value: Value, scale: i8) -> Result<ParquetValue, Magnus
614
782
 
615
783
  // Use our unified parser to convert the string to a decimal value with scaling
616
784
  match parse_decimal_string(&s, scale) {
617
- Ok(decimal_value) => Ok(ParquetValue::Decimal128(decimal_value, scale)),
785
+ Ok(decimal_value) => match decimal_value {
786
+ ParsedDecimal::Int128(v) => Ok(ParquetValue::Decimal128(v, scale)),
787
+ ParsedDecimal::Int256(v) => Ok(ParquetValue::Decimal256(v, scale)),
788
+ },
618
789
  Err(e) => Err(MagnusError::new(
619
790
  magnus::exception::type_error(),
620
791
  format!(
@@ -731,6 +902,52 @@ impl<'a> TryFrom<ArrayWrapper<'a>> for ParquetValueVec {
731
902
  }
732
903
  DataType::Date32 => impl_numeric_array_conversion!(column.array, Date32Array, Date32),
733
904
  DataType::Date64 => impl_numeric_array_conversion!(column.array, Date64Array, Date64),
905
+ DataType::Decimal128(_precision, scale) => {
906
+ let array = downcast_array::<Decimal128Array>(column.array);
907
+ Ok(ParquetValueVec(if array.is_nullable() {
908
+ array
909
+ .values()
910
+ .iter()
911
+ .enumerate()
912
+ .map(|(i, x)| {
913
+ if array.is_null(i) {
914
+ ParquetValue::Null
915
+ } else {
916
+ ParquetValue::Decimal128(*x, *scale)
917
+ }
918
+ })
919
+ .collect()
920
+ } else {
921
+ array
922
+ .values()
923
+ .iter()
924
+ .map(|x| ParquetValue::Decimal128(*x, *scale))
925
+ .collect()
926
+ }))
927
+ }
928
+ DataType::Decimal256(_precision, scale) => {
929
+ let array = downcast_array::<Decimal256Array>(column.array);
930
+ Ok(ParquetValueVec(if array.is_nullable() {
931
+ array
932
+ .values()
933
+ .iter()
934
+ .enumerate()
935
+ .map(|(i, x)| {
936
+ if array.is_null(i) {
937
+ ParquetValue::Null
938
+ } else {
939
+ ParquetValue::Decimal256(*x, *scale)
940
+ }
941
+ })
942
+ .collect()
943
+ } else {
944
+ array
945
+ .values()
946
+ .iter()
947
+ .map(|x| ParquetValue::Decimal256(*x, *scale))
948
+ .collect()
949
+ }))
950
+ }
734
951
  DataType::Timestamp(TimeUnit::Second, tz) => {
735
952
  impl_timestamp_array_conversion!(
736
953
  column.array,
@@ -22,6 +22,19 @@ pub fn format_decimal_with_i8_scale<T: std::fmt::Display>(value: T, scale: i8) -
22
22
  }
23
23
  }
24
24
 
25
+ /// Format i256 decimal value with appropriate scale for BigDecimal conversion
26
+ /// Uses bytes conversion to preserve full precision
27
+ pub fn format_i256_decimal_with_scale(
28
+ value: arrow_buffer::i256,
29
+ scale: i8,
30
+ ) -> Result<String, ParquetGemError> {
31
+ // Convert i256 to big-endian bytes
32
+ let bytes = value.to_be_bytes();
33
+
34
+ // Use the existing bytes_to_decimal function which handles full precision
35
+ bytes_to_decimal(&bytes, scale as i32)
36
+ }
37
+
25
38
  /// Format decimal value with appropriate scale for BigDecimal conversion
26
39
  /// Handles positive and negative scales correctly for i32 scale
27
40
  pub fn format_decimal_with_i32_scale<T: std::fmt::Display>(value: T, scale: i32) -> String {
@@ -35,7 +48,7 @@ pub fn format_decimal_with_i32_scale<T: std::fmt::Display>(value: T, scale: i32)
35
48
  }
36
49
 
37
50
  /// Convert arbitrary-length big-endian byte array to decimal string
38
- /// Supports byte arrays from 1 to 16 bytes in length
51
+ /// Supports byte arrays from 1 to 32 bytes in length
39
52
  fn bytes_to_decimal(bytes: &[u8], scale: i32) -> Result<String, ParquetGemError> {
40
53
  match bytes.len() {
41
54
  0 => Err(ParquetGemError::InvalidDecimal(
@@ -50,34 +63,34 @@ fn bytes_to_decimal(bytes: &[u8], scale: i32) -> Result<String, ParquetGemError>
50
63
  // For 2 bytes, use i16
51
64
  let mut value: i16 = 0;
52
65
  let is_negative = bytes[0] & 0x80 != 0;
53
-
66
+
54
67
  for &byte in bytes {
55
68
  value = (value << 8) | (byte as i16);
56
69
  }
57
-
70
+
58
71
  // Sign extend if negative
59
72
  if is_negative {
60
73
  let shift = 16 - (bytes.len() * 8);
61
74
  value = (value << shift) >> shift;
62
75
  }
63
-
76
+
64
77
  Ok(format_decimal_with_i32_scale(value, scale))
65
78
  }
66
79
  3..=4 => {
67
80
  // For 3-4 bytes, use i32
68
81
  let mut value: i32 = 0;
69
82
  let is_negative = bytes[0] & 0x80 != 0;
70
-
83
+
71
84
  for &byte in bytes {
72
85
  value = (value << 8) | (byte as i32);
73
86
  }
74
-
87
+
75
88
  // Sign extend if negative
76
89
  if is_negative {
77
90
  let shift = 32 - (bytes.len() * 8);
78
91
  value = (value << shift) >> shift;
79
92
  }
80
-
93
+
81
94
  Ok(format_decimal_with_i32_scale(value, scale))
82
95
  }
83
96
  5..=8 => {
@@ -114,8 +127,79 @@ fn bytes_to_decimal(bytes: &[u8], scale: i32) -> Result<String, ParquetGemError>
114
127
 
115
128
  Ok(format_decimal_with_i32_scale(value, scale))
116
129
  }
130
+ 17..=32 => {
131
+ // For 17-32 bytes, we need arbitrary precision handling
132
+ // Check if the number is negative (MSB of first byte)
133
+ let is_negative = bytes[0] & 0x80 != 0;
134
+
135
+ if is_negative {
136
+ // For negative numbers, we need to compute two's complement
137
+ // First, invert all bits
138
+ let mut inverted = Vec::with_capacity(bytes.len());
139
+ for &byte in bytes {
140
+ inverted.push(!byte);
141
+ }
142
+
143
+ // Then add 1
144
+ let mut carry = 1u8;
145
+ for i in (0..inverted.len()).rev() {
146
+ let (sum, new_carry) = inverted[i].overflowing_add(carry);
147
+ inverted[i] = sum;
148
+ carry = if new_carry { 1 } else { 0 };
149
+ }
150
+
151
+ // Convert to decimal string
152
+ let mut result = String::new();
153
+ let mut remainder = inverted;
154
+
155
+ // Repeatedly divide by 10 to get decimal digits
156
+ while !remainder.iter().all(|&b| b == 0) {
157
+ let mut carry = 0u16;
158
+ for i in 0..remainder.len() {
159
+ let temp = (carry << 8) | (remainder[i] as u16);
160
+ remainder[i] = (temp / 10) as u8;
161
+ carry = temp % 10;
162
+ }
163
+ result.push_str(&carry.to_string());
164
+ }
165
+
166
+ // The digits are in reverse order
167
+ if result.is_empty() {
168
+ result = "0".to_string();
169
+ } else {
170
+ result = result.chars().rev().collect();
171
+ }
172
+
173
+ // Add negative sign and format with scale
174
+ Ok(format_decimal_with_i32_scale(format!("-{}", result), scale))
175
+ } else {
176
+ // For positive numbers, direct conversion
177
+ let mut result = String::new();
178
+ let mut remainder = bytes.to_vec();
179
+
180
+ // Repeatedly divide by 10 to get decimal digits
181
+ while !remainder.iter().all(|&b| b == 0) {
182
+ let mut carry = 0u16;
183
+ for i in 0..remainder.len() {
184
+ let temp = (carry << 8) | (remainder[i] as u16);
185
+ remainder[i] = (temp / 10) as u8;
186
+ carry = temp % 10;
187
+ }
188
+ result.push_str(&carry.to_string());
189
+ }
190
+
191
+ // The digits are in reverse order
192
+ if result.is_empty() {
193
+ result = "0".to_string();
194
+ } else {
195
+ result = result.chars().rev().collect();
196
+ }
197
+
198
+ Ok(format_decimal_with_i32_scale(result, scale))
199
+ }
200
+ }
117
201
  _ => Err(ParquetGemError::InvalidDecimal(format!(
118
- "Unsupported decimal byte array size: {}",
202
+ "Unsupported decimal byte array size: {} (maximum 32 bytes)",
119
203
  bytes.len()
120
204
  ))),
121
205
  }
@@ -185,17 +185,18 @@ pub fn parse_schema_node(ruby: &Ruby, node_value: Value) -> Result<SchemaNode, M
185
185
  // 2. When precision only - use scale 0
186
186
  // 3. When scale only - use max precision (38)
187
187
  let (precision, scale) = match (precision_val, scale_val) {
188
- (None, None) => (38, 0), // Maximum accuracy, scale 0
188
+ (None, None) => (38, 0), // Maximum accuracy, scale 0
189
189
  (Some(p), None) => {
190
190
  // Precision provided, scale defaults to 0
191
191
  let prec = u8::try_convert(p).map_err(|_| {
192
192
  MagnusError::new(
193
193
  ruby.exception_type_error(),
194
- "Invalid precision value for decimal type, expected a positive integer".to_string(),
194
+ "Invalid precision value for decimal type, expected a positive integer"
195
+ .to_string(),
195
196
  )
196
197
  })?;
197
198
  (prec, 0)
198
- },
199
+ }
199
200
  (None, Some(s)) => {
200
201
  // Scale provided, precision set to maximum (38)
201
202
  let scl = i8::try_convert(s).map_err(|_| {
@@ -205,13 +206,14 @@ pub fn parse_schema_node(ruby: &Ruby, node_value: Value) -> Result<SchemaNode, M
205
206
  )
206
207
  })?;
207
208
  (38, scl)
208
- },
209
+ }
209
210
  (Some(p), Some(s)) => {
210
211
  // Both provided
211
212
  let prec = u8::try_convert(p).map_err(|_| {
212
213
  MagnusError::new(
213
214
  ruby.exception_type_error(),
214
- "Invalid precision value for decimal type, expected a positive integer".to_string(),
215
+ "Invalid precision value for decimal type, expected a positive integer"
216
+ .to_string(),
215
217
  )
216
218
  })?;
217
219
  let scl = i8::try_convert(s).map_err(|_| {
@@ -294,6 +296,7 @@ fn parse_primitive_type(s: &str) -> Option<PrimitiveType> {
294
296
  "timestamp_millis" | "timestamp_ms" => Some(PrimitiveType::TimestampMillis),
295
297
  "timestamp_micros" | "timestamp_us" => Some(PrimitiveType::TimestampMicros),
296
298
  "decimal" => Some(PrimitiveType::Decimal128(38, 0)), // Maximum precision, scale 0
299
+ "decimal256" => Some(PrimitiveType::Decimal256(38, 0)), // Maximum precision, scale 0
297
300
  _ => None,
298
301
  }
299
302
  }
@@ -321,6 +324,9 @@ pub fn schema_node_to_arrow_field(node: &SchemaNode) -> ArrowField {
321
324
  PrimitiveType::Decimal128(precision, scale) => {
322
325
  ArrowDataType::Decimal128(*precision, *scale)
323
326
  }
327
+ PrimitiveType::Decimal256(precision, scale) => {
328
+ ArrowDataType::Decimal256(*precision, *scale)
329
+ }
324
330
  PrimitiveType::Boolean => ArrowDataType::Boolean,
325
331
  PrimitiveType::String => ArrowDataType::Utf8,
326
332
  PrimitiveType::Binary => ArrowDataType::Binary,
@@ -243,6 +243,7 @@ pub fn parquet_schema_type_to_arrow_data_type(
243
243
  PrimitiveType::Float32 => DataType::Float32,
244
244
  PrimitiveType::Float64 => DataType::Float64,
245
245
  PrimitiveType::Decimal128(precision, scale) => DataType::Decimal128(*precision, *scale),
246
+ PrimitiveType::Decimal256(precision, scale) => DataType::Decimal256(*precision, *scale),
246
247
  PrimitiveType::String => DataType::Utf8,
247
248
  PrimitiveType::Binary => DataType::Binary,
248
249
  PrimitiveType::Boolean => DataType::Boolean,
@@ -381,6 +382,22 @@ fn create_arrow_builder_for_type(
381
382
 
382
383
  Ok(Box::new(builder_with_precision))
383
384
  }
385
+ ParquetSchemaType::Primitive(PrimitiveType::Decimal256(precision, scale)) => {
386
+ // Create a Decimal128Builder since we're truncating Decimal256 to Decimal128
387
+ let builder = Decimal256Builder::with_capacity(cap);
388
+
389
+ // Set precision and scale for the decimal and return the new builder
390
+ let builder_with_precision = builder
391
+ .with_precision_and_scale(*precision, *scale)
392
+ .map_err(|e| {
393
+ MagnusError::new(
394
+ magnus::exception::runtime_error(),
395
+ format!("Failed to set precision and scale: {}", e),
396
+ )
397
+ })?;
398
+
399
+ Ok(Box::new(builder_with_precision))
400
+ }
384
401
  ParquetSchemaType::Primitive(PrimitiveType::String) => {
385
402
  Ok(Box::new(StringBuilder::with_capacity(cap, cap * 32)))
386
403
  }
@@ -891,6 +908,187 @@ fn fill_builder(
891
908
  }
892
909
  Ok(())
893
910
  }
911
+ ParquetSchemaType::Primitive(PrimitiveType::Decimal256(_precision, scale)) => {
912
+ let typed_builder = builder
913
+ .as_any_mut()
914
+ .downcast_mut::<Decimal256Builder>()
915
+ .expect("Builder mismatch: expected Decimal256Builder for Decimal256");
916
+
917
+ for val in values {
918
+ match val {
919
+ ParquetValue::Decimal256(d, _scale) => typed_builder.append_value(*d),
920
+ ParquetValue::Decimal128(d, _scale) => {
921
+ // Convert i128 to i256
922
+ typed_builder.append_value(arrow_buffer::i256::from_i128(*d))
923
+ }
924
+ ParquetValue::Float64(f) => {
925
+ // Scale the float to the desired precision and scale
926
+ // For large values, use BigInt to avoid overflow
927
+ let scaled = *f * 10_f64.powi(*scale as i32);
928
+ if scaled >= i128::MIN as f64 && scaled <= i128::MAX as f64 {
929
+ let scaled_value = scaled as i128;
930
+ typed_builder.append_value(arrow_buffer::i256::from_i128(scaled_value))
931
+ } else {
932
+ // Use BigInt for values that don't fit in i128
933
+ use num::{BigInt, FromPrimitive};
934
+ let bigint = BigInt::from_f64(scaled).ok_or_else(|| {
935
+ MagnusError::new(
936
+ magnus::exception::type_error(),
937
+ format!("Failed to convert float {} to BigInt", f),
938
+ )
939
+ })?;
940
+ let bytes = bigint.to_signed_bytes_le();
941
+ if bytes.len() <= 32 {
942
+ let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
943
+ [0xff; 32]
944
+ } else {
945
+ [0; 32]
946
+ };
947
+ buf[..bytes.len()].copy_from_slice(&bytes);
948
+ typed_builder.append_value(arrow_buffer::i256::from_le_bytes(buf))
949
+ } else {
950
+ return Err(MagnusError::new(
951
+ magnus::exception::type_error(),
952
+ format!(
953
+ "Float value {} scaled to {} is too large for Decimal256",
954
+ f, scaled
955
+ ),
956
+ ));
957
+ }
958
+ }
959
+ }
960
+ ParquetValue::Float32(flo) => {
961
+ // Scale the float to the desired precision and scale
962
+ let scaled = (*flo as f64) * 10_f64.powi(*scale as i32);
963
+ if scaled >= i128::MIN as f64 && scaled <= i128::MAX as f64 {
964
+ let scaled_value = scaled as i128;
965
+ typed_builder.append_value(arrow_buffer::i256::from_i128(scaled_value))
966
+ } else {
967
+ // Use BigInt for values that don't fit in i128
968
+ use num::{BigInt, FromPrimitive};
969
+ let bigint = BigInt::from_f64(scaled).ok_or_else(|| {
970
+ MagnusError::new(
971
+ magnus::exception::type_error(),
972
+ format!("Failed to convert float {} to BigInt", flo),
973
+ )
974
+ })?;
975
+ let bytes = bigint.to_signed_bytes_le();
976
+ if bytes.len() <= 32 {
977
+ let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
978
+ [0xff; 32]
979
+ } else {
980
+ [0; 32]
981
+ };
982
+ buf[..bytes.len()].copy_from_slice(&bytes);
983
+ typed_builder.append_value(arrow_buffer::i256::from_le_bytes(buf))
984
+ } else {
985
+ return Err(MagnusError::new(
986
+ magnus::exception::type_error(),
987
+ format!(
988
+ "Float value {} scaled is too large for Decimal256",
989
+ flo
990
+ ),
991
+ ));
992
+ }
993
+ }
994
+ }
995
+ ParquetValue::Int64(i) => {
996
+ // Scale the integer to the desired scale
997
+ let base = arrow_buffer::i256::from_i128(*i as i128);
998
+ if *scale <= 38 {
999
+ // Can use i128 multiplication for scale <= 38
1000
+ let scale_factor =
1001
+ arrow_buffer::i256::from_i128(10_i128.pow(*scale as u32));
1002
+ match base.checked_mul(scale_factor) {
1003
+ Some(scaled) => typed_builder.append_value(scaled),
1004
+ None => {
1005
+ return Err(MagnusError::new(
1006
+ magnus::exception::type_error(),
1007
+ format!(
1008
+ "Integer {} scaled by {} overflows Decimal256",
1009
+ i, scale
1010
+ ),
1011
+ ));
1012
+ }
1013
+ }
1014
+ } else {
1015
+ // For very large scales, use BigInt
1016
+ use num::BigInt;
1017
+ let bigint = BigInt::from(*i) * BigInt::from(10).pow(*scale as u32);
1018
+ let bytes = bigint.to_signed_bytes_le();
1019
+ if bytes.len() <= 32 {
1020
+ let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
1021
+ [0xff; 32]
1022
+ } else {
1023
+ [0; 32]
1024
+ };
1025
+ buf[..bytes.len()].copy_from_slice(&bytes);
1026
+ typed_builder.append_value(arrow_buffer::i256::from_le_bytes(buf))
1027
+ } else {
1028
+ return Err(MagnusError::new(
1029
+ magnus::exception::type_error(),
1030
+ format!(
1031
+ "Integer {} scaled by {} is too large for Decimal256",
1032
+ i, scale
1033
+ ),
1034
+ ));
1035
+ }
1036
+ }
1037
+ }
1038
+ ParquetValue::Int32(i) => {
1039
+ // Scale the integer to the desired scale
1040
+ let base = arrow_buffer::i256::from_i128(*i as i128);
1041
+ if *scale <= 38 {
1042
+ // Can use i128 multiplication for scale <= 38
1043
+ let scale_factor =
1044
+ arrow_buffer::i256::from_i128(10_i128.pow(*scale as u32));
1045
+ match base.checked_mul(scale_factor) {
1046
+ Some(scaled) => typed_builder.append_value(scaled),
1047
+ None => {
1048
+ return Err(MagnusError::new(
1049
+ magnus::exception::type_error(),
1050
+ format!(
1051
+ "Integer {} scaled by {} overflows Decimal256",
1052
+ i, scale
1053
+ ),
1054
+ ));
1055
+ }
1056
+ }
1057
+ } else {
1058
+ // For very large scales, use BigInt
1059
+ use num::BigInt;
1060
+ let bigint = BigInt::from(*i) * BigInt::from(10).pow(*scale as u32);
1061
+ let bytes = bigint.to_signed_bytes_le();
1062
+ if bytes.len() <= 32 {
1063
+ let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
1064
+ [0xff; 32]
1065
+ } else {
1066
+ [0; 32]
1067
+ };
1068
+ buf[..bytes.len()].copy_from_slice(&bytes);
1069
+ typed_builder.append_value(arrow_buffer::i256::from_le_bytes(buf))
1070
+ } else {
1071
+ return Err(MagnusError::new(
1072
+ magnus::exception::type_error(),
1073
+ format!(
1074
+ "Integer {} scaled by {} is too large for Decimal256",
1075
+ i, scale
1076
+ ),
1077
+ ));
1078
+ }
1079
+ }
1080
+ }
1081
+ ParquetValue::Null => typed_builder.append_null(),
1082
+ other => {
1083
+ return Err(MagnusError::new(
1084
+ magnus::exception::type_error(),
1085
+ format!("Expected numeric value for Decimal256, got {:?}", other),
1086
+ ))
1087
+ }
1088
+ }
1089
+ }
1090
+ Ok(())
1091
+ }
894
1092
  ParquetSchemaType::Primitive(PrimitiveType::Boolean) => {
895
1093
  let typed_builder = builder
896
1094
  .as_any_mut()
@@ -1172,6 +1370,15 @@ fn fill_builder(
1172
1370
  )
1173
1371
  })?
1174
1372
  .append_value(*x),
1373
+ ParquetValue::Decimal256(x, _scale) => typed_builder
1374
+ .field_builder::<Decimal256Builder>(i)
1375
+ .ok_or_else(|| {
1376
+ MagnusError::new(
1377
+ magnus::exception::type_error(),
1378
+ "Failed to coerce into Decimal256Builder",
1379
+ )
1380
+ })?
1381
+ .append_value(*x),
1175
1382
  ParquetValue::Date32(x) => typed_builder
1176
1383
  .field_builder::<Date32Builder>(i)
1177
1384
  .ok_or_else(|| {
@@ -1377,6 +1584,15 @@ fn fill_builder(
1377
1584
  )
1378
1585
  })?
1379
1586
  .append_null(),
1587
+ ParquetSchemaType::Primitive(PrimitiveType::Decimal256(_, _)) => typed_builder
1588
+ .field_builder::<Decimal256Builder>(i)
1589
+ .ok_or_else(|| {
1590
+ MagnusError::new(
1591
+ magnus::exception::type_error(),
1592
+ "Failed to coerce into Decimal256Builder for Decimal256",
1593
+ )
1594
+ })?
1595
+ .append_null(),
1380
1596
  ParquetSchemaType::Primitive(PrimitiveType::String) => typed_builder
1381
1597
  .field_builder::<StringBuilder>(i)
1382
1598
  .ok_or_else(|| {
@@ -145,6 +145,53 @@ impl FromStr for ParquetSchemaType<'_> {
145
145
  }
146
146
  }
147
147
 
148
+ // Check if it's a decimal256 type with precision and scale
149
+ if let Some(decimal_params) = s.strip_prefix("decimal256(").and_then(|s| s.strip_suffix(")")) {
150
+ let parts: Vec<&str> = decimal_params.split(',').collect();
151
+
152
+ // Handle both single parameter (precision only) and two parameters (precision and scale)
153
+ if parts.len() == 1 {
154
+ // Only precision provided, scale defaults to 0
155
+ let precision = parts[0].trim().parse::<u8>().map_err(|_| {
156
+ MagnusError::new(
157
+ magnus::exception::runtime_error(),
158
+ format!("Invalid precision value in decimal256 type: {}", parts[0]),
159
+ )
160
+ })?;
161
+
162
+ return Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal256(
163
+ precision, 0,
164
+ )));
165
+ } else if parts.len() == 2 {
166
+ // Both precision and scale provided
167
+ let precision = parts[0].trim().parse::<u8>().map_err(|_| {
168
+ MagnusError::new(
169
+ magnus::exception::runtime_error(),
170
+ format!("Invalid precision value in decimal256 type: {}", parts[0]),
171
+ )
172
+ })?;
173
+
174
+ let scale = parts[1].trim().parse::<i8>().map_err(|_| {
175
+ MagnusError::new(
176
+ magnus::exception::runtime_error(),
177
+ format!("Invalid scale value in decimal256 type: {}", parts[1]),
178
+ )
179
+ })?;
180
+
181
+ return Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal256(
182
+ precision, scale,
183
+ )));
184
+ } else {
185
+ return Err(MagnusError::new(
186
+ magnus::exception::runtime_error(),
187
+ format!(
188
+ "Invalid decimal256 format. Expected 'decimal256(precision)' or 'decimal256(precision,scale)', got '{}'",
189
+ s
190
+ ),
191
+ ));
192
+ }
193
+ }
194
+
148
195
  // Handle primitive types
149
196
  match s {
150
197
  "int8" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Int8)),
@@ -166,6 +213,9 @@ impl FromStr for ParquetSchemaType<'_> {
166
213
  "decimal" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal128(
167
214
  38, 0,
168
215
  ))),
216
+ "decimal256" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal256(
217
+ 38, 0,
218
+ ))),
169
219
  "list" => Ok(ParquetSchemaType::List(Box::new(ListField {
170
220
  item_type: ParquetSchemaType::Primitive(PrimitiveType::String),
171
221
  format: None,
@@ -197,6 +197,9 @@ fn arrow_data_type_to_parquet_schema_type(dt: &DataType) -> Result<ParquetSchema
197
197
  DataType::Decimal128(precision, scale) => Ok(PST::Primitive(PrimitiveType::Decimal128(
198
198
  *precision, *scale,
199
199
  ))),
200
+ DataType::Decimal256(precision, scale) => Ok(PST::Primitive(PrimitiveType::Decimal256(
201
+ *precision, *scale,
202
+ ))),
200
203
  DataType::Date32 => Ok(PST::Primitive(PrimitiveType::Date32)),
201
204
  DataType::Date64 => {
202
205
  // Our code typically uses Date32 or Timestamp for 64. But Arrow has Date64
@@ -170,6 +170,9 @@ fn write_columns_impl(ruby: Rc<Ruby>, args: &[Value]) -> Result<(), ParquetGemEr
170
170
  PrimitiveType::TimestampMicros => {
171
171
  PST::Primitive(PrimitiveType::TimestampMicros)
172
172
  }
173
+ PrimitiveType::Decimal256(precision, scale) => {
174
+ PST::Primitive(PrimitiveType::Decimal256(precision, scale))
175
+ }
173
176
  },
174
177
  SchemaNode::List { .. }
175
178
  | SchemaNode::Map { .. }
@@ -258,6 +258,7 @@ pub fn estimate_value_size(
258
258
  | PST::Primitive(PrimitiveType::Float64) => Ok(8),
259
259
  PST::Primitive(PrimitiveType::Boolean) => Ok(1),
260
260
  PST::Primitive(PrimitiveType::Decimal128(_, _)) => Ok(16),
261
+ PST::Primitive(PrimitiveType::Decimal256(_, _)) => Ok(32),
261
262
  PST::Primitive(PrimitiveType::Date32)
262
263
  | PST::Primitive(PrimitiveType::TimestampMillis)
263
264
  | PST::Primitive(PrimitiveType::TimestampMicros) => Ok(8),
@@ -1,3 +1,3 @@
1
1
  module Parquet
2
- VERSION = "0.5.9"
2
+ VERSION = "0.5.11"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.9
4
+ version: 0.5.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-06-03 00:00:00.000000000 Z
11
+ date: 2025-06-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys