parquet 0.5.9 → 0.5.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -185,17 +185,18 @@ pub fn parse_schema_node(ruby: &Ruby, node_value: Value) -> Result<SchemaNode, M
185
185
  // 2. When precision only - use scale 0
186
186
  // 3. When scale only - use max precision (38)
187
187
  let (precision, scale) = match (precision_val, scale_val) {
188
- (None, None) => (38, 0), // Maximum accuracy, scale 0
188
+ (None, None) => (38, 0), // Maximum accuracy, scale 0
189
189
  (Some(p), None) => {
190
190
  // Precision provided, scale defaults to 0
191
191
  let prec = u8::try_convert(p).map_err(|_| {
192
192
  MagnusError::new(
193
193
  ruby.exception_type_error(),
194
- "Invalid precision value for decimal type, expected a positive integer".to_string(),
194
+ "Invalid precision value for decimal type, expected a positive integer"
195
+ .to_string(),
195
196
  )
196
197
  })?;
197
198
  (prec, 0)
198
- },
199
+ }
199
200
  (None, Some(s)) => {
200
201
  // Scale provided, precision set to maximum (38)
201
202
  let scl = i8::try_convert(s).map_err(|_| {
@@ -205,13 +206,14 @@ pub fn parse_schema_node(ruby: &Ruby, node_value: Value) -> Result<SchemaNode, M
205
206
  )
206
207
  })?;
207
208
  (38, scl)
208
- },
209
+ }
209
210
  (Some(p), Some(s)) => {
210
211
  // Both provided
211
212
  let prec = u8::try_convert(p).map_err(|_| {
212
213
  MagnusError::new(
213
214
  ruby.exception_type_error(),
214
- "Invalid precision value for decimal type, expected a positive integer".to_string(),
215
+ "Invalid precision value for decimal type, expected a positive integer"
216
+ .to_string(),
215
217
  )
216
218
  })?;
217
219
  let scl = i8::try_convert(s).map_err(|_| {
@@ -294,6 +296,7 @@ fn parse_primitive_type(s: &str) -> Option<PrimitiveType> {
294
296
  "timestamp_millis" | "timestamp_ms" => Some(PrimitiveType::TimestampMillis),
295
297
  "timestamp_micros" | "timestamp_us" => Some(PrimitiveType::TimestampMicros),
296
298
  "decimal" => Some(PrimitiveType::Decimal128(38, 0)), // Maximum precision, scale 0
299
+ "decimal256" => Some(PrimitiveType::Decimal256(38, 0)), // Maximum precision, scale 0
297
300
  _ => None,
298
301
  }
299
302
  }
@@ -321,6 +324,9 @@ pub fn schema_node_to_arrow_field(node: &SchemaNode) -> ArrowField {
321
324
  PrimitiveType::Decimal128(precision, scale) => {
322
325
  ArrowDataType::Decimal128(*precision, *scale)
323
326
  }
327
+ PrimitiveType::Decimal256(precision, scale) => {
328
+ ArrowDataType::Decimal256(*precision, *scale)
329
+ }
324
330
  PrimitiveType::Boolean => ArrowDataType::Boolean,
325
331
  PrimitiveType::String => ArrowDataType::Utf8,
326
332
  PrimitiveType::Binary => ArrowDataType::Binary,
@@ -243,6 +243,7 @@ pub fn parquet_schema_type_to_arrow_data_type(
243
243
  PrimitiveType::Float32 => DataType::Float32,
244
244
  PrimitiveType::Float64 => DataType::Float64,
245
245
  PrimitiveType::Decimal128(precision, scale) => DataType::Decimal128(*precision, *scale),
246
+ PrimitiveType::Decimal256(precision, scale) => DataType::Decimal256(*precision, *scale),
246
247
  PrimitiveType::String => DataType::Utf8,
247
248
  PrimitiveType::Binary => DataType::Binary,
248
249
  PrimitiveType::Boolean => DataType::Boolean,
@@ -381,6 +382,22 @@ fn create_arrow_builder_for_type(
381
382
 
382
383
  Ok(Box::new(builder_with_precision))
383
384
  }
385
+ ParquetSchemaType::Primitive(PrimitiveType::Decimal256(precision, scale)) => {
386
+ // Create a Decimal128Builder since we're truncating Decimal256 to Decimal128
387
+ let builder = Decimal256Builder::with_capacity(cap);
388
+
389
+ // Set precision and scale for the decimal and return the new builder
390
+ let builder_with_precision = builder
391
+ .with_precision_and_scale(*precision, *scale)
392
+ .map_err(|e| {
393
+ MagnusError::new(
394
+ magnus::exception::runtime_error(),
395
+ format!("Failed to set precision and scale: {}", e),
396
+ )
397
+ })?;
398
+
399
+ Ok(Box::new(builder_with_precision))
400
+ }
384
401
  ParquetSchemaType::Primitive(PrimitiveType::String) => {
385
402
  Ok(Box::new(StringBuilder::with_capacity(cap, cap * 32)))
386
403
  }
@@ -891,6 +908,187 @@ fn fill_builder(
891
908
  }
892
909
  Ok(())
893
910
  }
911
+ ParquetSchemaType::Primitive(PrimitiveType::Decimal256(_precision, scale)) => {
912
+ let typed_builder = builder
913
+ .as_any_mut()
914
+ .downcast_mut::<Decimal256Builder>()
915
+ .expect("Builder mismatch: expected Decimal256Builder for Decimal256");
916
+
917
+ for val in values {
918
+ match val {
919
+ ParquetValue::Decimal256(d, _scale) => typed_builder.append_value(*d),
920
+ ParquetValue::Decimal128(d, _scale) => {
921
+ // Convert i128 to i256
922
+ typed_builder.append_value(arrow_buffer::i256::from_i128(*d))
923
+ }
924
+ ParquetValue::Float64(f) => {
925
+ // Scale the float to the desired precision and scale
926
+ // For large values, use BigInt to avoid overflow
927
+ let scaled = *f * 10_f64.powi(*scale as i32);
928
+ if scaled >= i128::MIN as f64 && scaled <= i128::MAX as f64 {
929
+ let scaled_value = scaled as i128;
930
+ typed_builder.append_value(arrow_buffer::i256::from_i128(scaled_value))
931
+ } else {
932
+ // Use BigInt for values that don't fit in i128
933
+ use num::{BigInt, FromPrimitive};
934
+ let bigint = BigInt::from_f64(scaled).ok_or_else(|| {
935
+ MagnusError::new(
936
+ magnus::exception::type_error(),
937
+ format!("Failed to convert float {} to BigInt", f),
938
+ )
939
+ })?;
940
+ let bytes = bigint.to_signed_bytes_le();
941
+ if bytes.len() <= 32 {
942
+ let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
943
+ [0xff; 32]
944
+ } else {
945
+ [0; 32]
946
+ };
947
+ buf[..bytes.len()].copy_from_slice(&bytes);
948
+ typed_builder.append_value(arrow_buffer::i256::from_le_bytes(buf))
949
+ } else {
950
+ return Err(MagnusError::new(
951
+ magnus::exception::type_error(),
952
+ format!(
953
+ "Float value {} scaled to {} is too large for Decimal256",
954
+ f, scaled
955
+ ),
956
+ ));
957
+ }
958
+ }
959
+ }
960
+ ParquetValue::Float32(flo) => {
961
+ // Scale the float to the desired precision and scale
962
+ let scaled = (*flo as f64) * 10_f64.powi(*scale as i32);
963
+ if scaled >= i128::MIN as f64 && scaled <= i128::MAX as f64 {
964
+ let scaled_value = scaled as i128;
965
+ typed_builder.append_value(arrow_buffer::i256::from_i128(scaled_value))
966
+ } else {
967
+ // Use BigInt for values that don't fit in i128
968
+ use num::{BigInt, FromPrimitive};
969
+ let bigint = BigInt::from_f64(scaled).ok_or_else(|| {
970
+ MagnusError::new(
971
+ magnus::exception::type_error(),
972
+ format!("Failed to convert float {} to BigInt", flo),
973
+ )
974
+ })?;
975
+ let bytes = bigint.to_signed_bytes_le();
976
+ if bytes.len() <= 32 {
977
+ let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
978
+ [0xff; 32]
979
+ } else {
980
+ [0; 32]
981
+ };
982
+ buf[..bytes.len()].copy_from_slice(&bytes);
983
+ typed_builder.append_value(arrow_buffer::i256::from_le_bytes(buf))
984
+ } else {
985
+ return Err(MagnusError::new(
986
+ magnus::exception::type_error(),
987
+ format!(
988
+ "Float value {} scaled is too large for Decimal256",
989
+ flo
990
+ ),
991
+ ));
992
+ }
993
+ }
994
+ }
995
+ ParquetValue::Int64(i) => {
996
+ // Scale the integer to the desired scale
997
+ let base = arrow_buffer::i256::from_i128(*i as i128);
998
+ if *scale <= 38 {
999
+ // Can use i128 multiplication for scale <= 38
1000
+ let scale_factor =
1001
+ arrow_buffer::i256::from_i128(10_i128.pow(*scale as u32));
1002
+ match base.checked_mul(scale_factor) {
1003
+ Some(scaled) => typed_builder.append_value(scaled),
1004
+ None => {
1005
+ return Err(MagnusError::new(
1006
+ magnus::exception::type_error(),
1007
+ format!(
1008
+ "Integer {} scaled by {} overflows Decimal256",
1009
+ i, scale
1010
+ ),
1011
+ ));
1012
+ }
1013
+ }
1014
+ } else {
1015
+ // For very large scales, use BigInt
1016
+ use num::BigInt;
1017
+ let bigint = BigInt::from(*i) * BigInt::from(10).pow(*scale as u32);
1018
+ let bytes = bigint.to_signed_bytes_le();
1019
+ if bytes.len() <= 32 {
1020
+ let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
1021
+ [0xff; 32]
1022
+ } else {
1023
+ [0; 32]
1024
+ };
1025
+ buf[..bytes.len()].copy_from_slice(&bytes);
1026
+ typed_builder.append_value(arrow_buffer::i256::from_le_bytes(buf))
1027
+ } else {
1028
+ return Err(MagnusError::new(
1029
+ magnus::exception::type_error(),
1030
+ format!(
1031
+ "Integer {} scaled by {} is too large for Decimal256",
1032
+ i, scale
1033
+ ),
1034
+ ));
1035
+ }
1036
+ }
1037
+ }
1038
+ ParquetValue::Int32(i) => {
1039
+ // Scale the integer to the desired scale
1040
+ let base = arrow_buffer::i256::from_i128(*i as i128);
1041
+ if *scale <= 38 {
1042
+ // Can use i128 multiplication for scale <= 38
1043
+ let scale_factor =
1044
+ arrow_buffer::i256::from_i128(10_i128.pow(*scale as u32));
1045
+ match base.checked_mul(scale_factor) {
1046
+ Some(scaled) => typed_builder.append_value(scaled),
1047
+ None => {
1048
+ return Err(MagnusError::new(
1049
+ magnus::exception::type_error(),
1050
+ format!(
1051
+ "Integer {} scaled by {} overflows Decimal256",
1052
+ i, scale
1053
+ ),
1054
+ ));
1055
+ }
1056
+ }
1057
+ } else {
1058
+ // For very large scales, use BigInt
1059
+ use num::BigInt;
1060
+ let bigint = BigInt::from(*i) * BigInt::from(10).pow(*scale as u32);
1061
+ let bytes = bigint.to_signed_bytes_le();
1062
+ if bytes.len() <= 32 {
1063
+ let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
1064
+ [0xff; 32]
1065
+ } else {
1066
+ [0; 32]
1067
+ };
1068
+ buf[..bytes.len()].copy_from_slice(&bytes);
1069
+ typed_builder.append_value(arrow_buffer::i256::from_le_bytes(buf))
1070
+ } else {
1071
+ return Err(MagnusError::new(
1072
+ magnus::exception::type_error(),
1073
+ format!(
1074
+ "Integer {} scaled by {} is too large for Decimal256",
1075
+ i, scale
1076
+ ),
1077
+ ));
1078
+ }
1079
+ }
1080
+ }
1081
+ ParquetValue::Null => typed_builder.append_null(),
1082
+ other => {
1083
+ return Err(MagnusError::new(
1084
+ magnus::exception::type_error(),
1085
+ format!("Expected numeric value for Decimal256, got {:?}", other),
1086
+ ))
1087
+ }
1088
+ }
1089
+ }
1090
+ Ok(())
1091
+ }
894
1092
  ParquetSchemaType::Primitive(PrimitiveType::Boolean) => {
895
1093
  let typed_builder = builder
896
1094
  .as_any_mut()
@@ -1172,6 +1370,15 @@ fn fill_builder(
1172
1370
  )
1173
1371
  })?
1174
1372
  .append_value(*x),
1373
+ ParquetValue::Decimal256(x, _scale) => typed_builder
1374
+ .field_builder::<Decimal256Builder>(i)
1375
+ .ok_or_else(|| {
1376
+ MagnusError::new(
1377
+ magnus::exception::type_error(),
1378
+ "Failed to coerce into Decimal256Builder",
1379
+ )
1380
+ })?
1381
+ .append_value(*x),
1175
1382
  ParquetValue::Date32(x) => typed_builder
1176
1383
  .field_builder::<Date32Builder>(i)
1177
1384
  .ok_or_else(|| {
@@ -1377,6 +1584,15 @@ fn fill_builder(
1377
1584
  )
1378
1585
  })?
1379
1586
  .append_null(),
1587
+ ParquetSchemaType::Primitive(PrimitiveType::Decimal256(_, _)) => typed_builder
1588
+ .field_builder::<Decimal256Builder>(i)
1589
+ .ok_or_else(|| {
1590
+ MagnusError::new(
1591
+ magnus::exception::type_error(),
1592
+ "Failed to coerce into Decimal256Builder for Decimal256",
1593
+ )
1594
+ })?
1595
+ .append_null(),
1380
1596
  ParquetSchemaType::Primitive(PrimitiveType::String) => typed_builder
1381
1597
  .field_builder::<StringBuilder>(i)
1382
1598
  .ok_or_else(|| {
@@ -145,6 +145,53 @@ impl FromStr for ParquetSchemaType<'_> {
145
145
  }
146
146
  }
147
147
 
148
+ // Check if it's a decimal256 type with precision and scale
149
+ if let Some(decimal_params) = s.strip_prefix("decimal256(").and_then(|s| s.strip_suffix(")")) {
150
+ let parts: Vec<&str> = decimal_params.split(',').collect();
151
+
152
+ // Handle both single parameter (precision only) and two parameters (precision and scale)
153
+ if parts.len() == 1 {
154
+ // Only precision provided, scale defaults to 0
155
+ let precision = parts[0].trim().parse::<u8>().map_err(|_| {
156
+ MagnusError::new(
157
+ magnus::exception::runtime_error(),
158
+ format!("Invalid precision value in decimal256 type: {}", parts[0]),
159
+ )
160
+ })?;
161
+
162
+ return Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal256(
163
+ precision, 0,
164
+ )));
165
+ } else if parts.len() == 2 {
166
+ // Both precision and scale provided
167
+ let precision = parts[0].trim().parse::<u8>().map_err(|_| {
168
+ MagnusError::new(
169
+ magnus::exception::runtime_error(),
170
+ format!("Invalid precision value in decimal256 type: {}", parts[0]),
171
+ )
172
+ })?;
173
+
174
+ let scale = parts[1].trim().parse::<i8>().map_err(|_| {
175
+ MagnusError::new(
176
+ magnus::exception::runtime_error(),
177
+ format!("Invalid scale value in decimal256 type: {}", parts[1]),
178
+ )
179
+ })?;
180
+
181
+ return Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal256(
182
+ precision, scale,
183
+ )));
184
+ } else {
185
+ return Err(MagnusError::new(
186
+ magnus::exception::runtime_error(),
187
+ format!(
188
+ "Invalid decimal256 format. Expected 'decimal256(precision)' or 'decimal256(precision,scale)', got '{}'",
189
+ s
190
+ ),
191
+ ));
192
+ }
193
+ }
194
+
148
195
  // Handle primitive types
149
196
  match s {
150
197
  "int8" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Int8)),
@@ -166,6 +213,9 @@ impl FromStr for ParquetSchemaType<'_> {
166
213
  "decimal" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal128(
167
214
  38, 0,
168
215
  ))),
216
+ "decimal256" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal256(
217
+ 38, 0,
218
+ ))),
169
219
  "list" => Ok(ParquetSchemaType::List(Box::new(ListField {
170
220
  item_type: ParquetSchemaType::Primitive(PrimitiveType::String),
171
221
  format: None,
@@ -197,6 +197,9 @@ fn arrow_data_type_to_parquet_schema_type(dt: &DataType) -> Result<ParquetSchema
197
197
  DataType::Decimal128(precision, scale) => Ok(PST::Primitive(PrimitiveType::Decimal128(
198
198
  *precision, *scale,
199
199
  ))),
200
+ DataType::Decimal256(precision, scale) => Ok(PST::Primitive(PrimitiveType::Decimal256(
201
+ *precision, *scale,
202
+ ))),
200
203
  DataType::Date32 => Ok(PST::Primitive(PrimitiveType::Date32)),
201
204
  DataType::Date64 => {
202
205
  // Our code typically uses Date32 or Timestamp for 64. But Arrow has Date64
@@ -170,6 +170,9 @@ fn write_columns_impl(ruby: Rc<Ruby>, args: &[Value]) -> Result<(), ParquetGemEr
170
170
  PrimitiveType::TimestampMicros => {
171
171
  PST::Primitive(PrimitiveType::TimestampMicros)
172
172
  }
173
+ PrimitiveType::Decimal256(precision, scale) => {
174
+ PST::Primitive(PrimitiveType::Decimal256(precision, scale))
175
+ }
173
176
  },
174
177
  SchemaNode::List { .. }
175
178
  | SchemaNode::Map { .. }
@@ -258,6 +258,7 @@ pub fn estimate_value_size(
258
258
  | PST::Primitive(PrimitiveType::Float64) => Ok(8),
259
259
  PST::Primitive(PrimitiveType::Boolean) => Ok(1),
260
260
  PST::Primitive(PrimitiveType::Decimal128(_, _)) => Ok(16),
261
+ PST::Primitive(PrimitiveType::Decimal256(_, _)) => Ok(32),
261
262
  PST::Primitive(PrimitiveType::Date32)
262
263
  | PST::Primitive(PrimitiveType::TimestampMillis)
263
264
  | PST::Primitive(PrimitiveType::TimestampMicros) => Ok(8),
@@ -1,3 +1,3 @@
1
1
  module Parquet
2
- VERSION = "0.5.9"
2
+ VERSION = "0.5.10"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.9
4
+ version: 0.5.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-06-03 00:00:00.000000000 Z
11
+ date: 2025-06-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -62,7 +62,9 @@ files:
62
62
  - ext/parquet/src/header_cache.rs
63
63
  - ext/parquet/src/lib.rs
64
64
  - ext/parquet/src/logger.rs
65
+ - ext/parquet/src/reader/arrow_reader.rs
65
66
  - ext/parquet/src/reader/common.rs
67
+ - ext/parquet/src/reader/format_detector.rs
66
68
  - ext/parquet/src/reader/mod.rs
67
69
  - ext/parquet/src/reader/parquet_column_reader.rs
68
70
  - ext/parquet/src/reader/parquet_row_reader.rs