parquet 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@ use crate::{impl_date_conversion, impl_timestamp_array_conversion, impl_timestam
2
2
 
3
3
  use super::*;
4
4
  use arrow_array::MapArray;
5
- use magnus::RArray;
5
+ use magnus::{RArray, RString};
6
6
 
7
7
  #[derive(Debug, Clone)]
8
8
  pub enum ParquetValue {
@@ -22,6 +22,7 @@ pub enum ParquetValue {
22
22
  Bytes(Vec<u8>),
23
23
  Date32(i32),
24
24
  Date64(i64),
25
+ Decimal128(i128),
25
26
  TimestampSecond(i64, Option<Arc<str>>),
26
27
  TimestampMillis(i64, Option<Arc<str>>),
27
28
  TimestampMicros(i64, Option<Arc<str>>),
@@ -51,6 +52,7 @@ impl PartialEq for ParquetValue {
51
52
  (ParquetValue::Bytes(a), ParquetValue::Bytes(b)) => a == b,
52
53
  (ParquetValue::Date32(a), ParquetValue::Date32(b)) => a == b,
53
54
  (ParquetValue::Date64(a), ParquetValue::Date64(b)) => a == b,
55
+ (ParquetValue::Decimal128(a), ParquetValue::Decimal128(b)) => a == b,
54
56
  (ParquetValue::TimestampSecond(a, _), ParquetValue::TimestampSecond(b, _)) => a == b,
55
57
  (ParquetValue::TimestampMillis(a, _), ParquetValue::TimestampMillis(b, _)) => a == b,
56
58
  (ParquetValue::TimestampMicros(a, _), ParquetValue::TimestampMicros(b, _)) => a == b,
@@ -83,6 +85,7 @@ impl std::hash::Hash for ParquetValue {
83
85
  ParquetValue::Bytes(b) => b.hash(state),
84
86
  ParquetValue::Date32(d) => d.hash(state),
85
87
  ParquetValue::Date64(d) => d.hash(state),
88
+ ParquetValue::Decimal128(d) => d.hash(state),
86
89
  ParquetValue::TimestampSecond(ts, tz) => {
87
90
  ts.hash(state);
88
91
  tz.hash(state);
@@ -128,6 +131,7 @@ impl TryIntoValue for ParquetValue {
128
131
  ParquetValue::Boolean(b) => Ok(b.into_value_with(handle)),
129
132
  ParquetValue::String(s) => Ok(s.into_value_with(handle)),
130
133
  ParquetValue::Bytes(b) => Ok(handle.str_from_slice(&b).as_value()),
134
+ ParquetValue::Decimal128(d) => Ok(d.to_string().into_value_with(handle)),
131
135
  ParquetValue::Date32(d) => impl_date_conversion!(d, handle),
132
136
  ParquetValue::Date64(d) => impl_date_conversion!(d, handle),
133
137
  timestamp @ ParquetValue::TimestampSecond(_, _) => {
@@ -233,6 +237,18 @@ impl ParquetValue {
233
237
  let v = NumericConverter::<f64>::convert_with_string_fallback(ruby, value)?;
234
238
  Ok(ParquetValue::Float64(v))
235
239
  }
240
+ PrimitiveType::Decimal128(_precision, scale) => {
241
+ if value.is_kind_of(ruby.class_string()) {
242
+ convert_to_decimal128(value, *scale)
243
+ } else if let Ok(s) = value.funcall::<_, _, RString>("to_s", ()) {
244
+ convert_to_decimal128(s.as_value(), *scale)
245
+ } else {
246
+ Err(MagnusError::new(
247
+ magnus::exception::type_error(),
248
+ "Expected a string for a decimal type",
249
+ ))
250
+ }
251
+ }
236
252
  PrimitiveType::String => {
237
253
  let v = convert_to_string(value)?;
238
254
  Ok(ParquetValue::String(v))
@@ -356,6 +372,184 @@ impl ParquetValue {
356
372
  }
357
373
  }
358
374
  }
375
+ /// Unified helper to parse a decimal string and apply scaling
376
+ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, MagnusError> {
377
+ let s = input_str.trim();
378
+
379
+ // 1. Handle scientific notation case (e.g., "0.12345e3")
380
+ if let Some(e_pos) = s.to_lowercase().find('e') {
381
+ let base = &s[0..e_pos];
382
+ let exp = &s[e_pos + 1..];
383
+
384
+ // Parse the exponent with detailed error message
385
+ let exp_val = exp.parse::<i32>().map_err(|e| {
386
+ MagnusError::new(
387
+ magnus::exception::type_error(),
388
+ format!("Failed to parse exponent '{}' in decimal string '{}': {}", exp, s, e),
389
+ )
390
+ })?;
391
+
392
+ // Limit exponent to reasonable range to prevent overflow
393
+ if exp_val.abs() > 38 {
394
+ return Err(MagnusError::new(
395
+ magnus::exception::range_error(),
396
+ format!("Exponent {} is out of range for decimal value '{}'. Must be between -38 and 38.", exp_val, s),
397
+ ));
398
+ }
399
+
400
+ // Handle the base part which might contain a decimal point
401
+ let (base_val, base_scale) = if let Some(decimal_pos) = base.find('.') {
402
+ let mut base_without_point = base.to_string();
403
+ base_without_point.remove(decimal_pos);
404
+
405
+ let base_scale = base.len() - decimal_pos - 1;
406
+
407
+ let base_val = base_without_point.parse::<i128>().map_err(|e| {
408
+ MagnusError::new(
409
+ magnus::exception::type_error(),
410
+ format!("Failed to parse base '{}' in scientific notation '{}': {}", base, s, e),
411
+ )
412
+ })?;
413
+
414
+ (base_val, base_scale as i32)
415
+ } else {
416
+ // No decimal point in base
417
+ let base_val = base.parse::<i128>().map_err(|e| {
418
+ MagnusError::new(
419
+ magnus::exception::type_error(),
420
+ format!("Failed to parse base '{}' in scientific notation '{}': {}", base, s, e),
421
+ )
422
+ })?;
423
+
424
+ (base_val, 0)
425
+ };
426
+
427
+ // Calculate the effective scale: base_scale - exp_val
428
+ let effective_scale = base_scale - exp_val;
429
+
430
+ // Adjust the value based on the difference between effective scale and requested scale
431
+ match effective_scale.cmp(&(input_scale as i32)) {
432
+ std::cmp::Ordering::Less => {
433
+ // Need to multiply to increase scale
434
+ let scale_diff = (input_scale as i32 - effective_scale) as u32;
435
+ if scale_diff > 38 {
436
+ return Err(MagnusError::new(
437
+ magnus::exception::range_error(),
438
+ format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a smaller scale.", scale_diff, s),
439
+ ));
440
+ }
441
+ Ok(base_val * 10_i128.pow(scale_diff))
442
+ }
443
+ std::cmp::Ordering::Greater => {
444
+ // Need to divide to decrease scale
445
+ let scale_diff = (effective_scale - input_scale as i32) as u32;
446
+ if scale_diff > 38 {
447
+ return Err(MagnusError::new(
448
+ magnus::exception::range_error(),
449
+ format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a larger scale.", scale_diff, s),
450
+ ));
451
+ }
452
+ Ok(base_val / 10_i128.pow(scale_diff))
453
+ }
454
+ std::cmp::Ordering::Equal => Ok(base_val),
455
+ }
456
+ }
457
+ // 2. Handle decimal point in the string (e.g., "123.456")
458
+ else if let Some(decimal_pos) = s.find('.') {
459
+ let mut s_without_point = s.to_string();
460
+ s_without_point.remove(decimal_pos);
461
+
462
+ // Calculate the actual scale from the decimal position
463
+ let actual_scale = s.len() - decimal_pos - 1;
464
+
465
+ // Parse the string without decimal point as i128
466
+ let v = s_without_point.parse::<i128>().map_err(|e| {
467
+ MagnusError::new(
468
+ magnus::exception::type_error(),
469
+ format!("Failed to parse decimal string '{}' (without decimal point: '{}'): {}", s, s_without_point, e),
470
+ )
471
+ })?;
472
+
473
+ // Scale the value if needed based on the difference between
474
+ // the actual scale and the requested scale
475
+ match actual_scale.cmp(&(input_scale as usize)) {
476
+ std::cmp::Ordering::Less => {
477
+ // Need to multiply to increase scale
478
+ let scale_diff = (input_scale - actual_scale as i8) as u32;
479
+ if scale_diff > 38 {
480
+ return Err(MagnusError::new(
481
+ magnus::exception::range_error(),
482
+ format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a smaller scale.", scale_diff, s),
483
+ ));
484
+ }
485
+ Ok(v * 10_i128.pow(scale_diff))
486
+ }
487
+ std::cmp::Ordering::Greater => {
488
+ // Need to divide to decrease scale
489
+ let scale_diff = (actual_scale as i8 - input_scale) as u32;
490
+ if scale_diff > 38 {
491
+ return Err(MagnusError::new(
492
+ magnus::exception::range_error(),
493
+ format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a larger scale.", scale_diff, s),
494
+ ));
495
+ }
496
+ Ok(v / 10_i128.pow(scale_diff))
497
+ }
498
+ std::cmp::Ordering::Equal => Ok(v),
499
+ }
500
+ }
501
+ // 3. Plain integer value (e.g., "12345")
502
+ else {
503
+ // No decimal point, parse as i128 and scale appropriately
504
+ let v = s.parse::<i128>().map_err(|e| {
505
+ MagnusError::new(
506
+ magnus::exception::type_error(),
507
+ format!("Failed to parse integer string '{}' as decimal: {}", s, e),
508
+ )
509
+ })?;
510
+
511
+ // Apply scale - make sure it's reasonable
512
+ if input_scale > 38 {
513
+ return Err(MagnusError::new(
514
+ magnus::exception::range_error(),
515
+ format!("Scale {} is too large for decimal value '{}'. Must be ≤ 38.", input_scale, s),
516
+ ));
517
+ } else if input_scale < -38 {
518
+ return Err(MagnusError::new(
519
+ magnus::exception::range_error(),
520
+ format!("Scale {} is too small for decimal value '{}'. Must be ≥ -38.", input_scale, s),
521
+ ));
522
+ }
523
+
524
+ // Apply positive scale (multiply)
525
+ if input_scale >= 0 {
526
+ Ok(v * 10_i128.pow(input_scale as u32))
527
+ } else {
528
+ // Apply negative scale (divide)
529
+ Ok(v / 10_i128.pow((-input_scale) as u32))
530
+ }
531
+ }
532
+ }
533
+
534
+ fn convert_to_decimal128(value: Value, scale: i8) -> Result<ParquetValue, MagnusError> {
535
+ // Get the decimal string based on the type of value
536
+ let s = if unsafe { value.classname() } == "BigDecimal" {
537
+ value
538
+ .funcall::<_, _, RString>("to_s", ("F",))?
539
+ .to_string()?
540
+ } else {
541
+ value.to_r_string()?.to_string()?
542
+ };
543
+
544
+ // Use our unified parser to convert the string to a decimal value with scaling
545
+ match parse_decimal_string(&s, scale) {
546
+ Ok(decimal_value) => Ok(ParquetValue::Decimal128(decimal_value)),
547
+ Err(e) => Err(MagnusError::new(
548
+ magnus::exception::type_error(),
549
+ format!("Failed to convert '{}' to decimal with scale {}: {}", s, scale, e),
550
+ ))
551
+ }
552
+ }
359
553
 
360
554
  #[derive(Debug)]
361
555
  pub struct ParquetValueVec(Vec<ParquetValue>);
@@ -677,12 +871,10 @@ impl<'a> TryFrom<ArrayWrapper<'a>> for ParquetValueVec {
677
871
  let x = downcast_array::<NullArray>(column.array);
678
872
  Ok(ParquetValueVec(vec![ParquetValue::Null; x.len()]))
679
873
  }
680
- _ => {
681
- return Err(MagnusError::new(
682
- magnus::exception::type_error(),
683
- format!("Unsupported data type: {:?}", column.array.data_type()),
684
- ))?;
685
- }
874
+ _ => Err(MagnusError::new(
875
+ magnus::exception::type_error(),
876
+ format!("Unsupported data type: {:?}", column.array.data_type()),
877
+ ))?,
686
878
  }
687
879
  }
688
880
  }
@@ -1,7 +1,12 @@
1
+ use std::sync::OnceLock;
2
+
1
3
  use itertools::Itertools;
4
+ use parquet::data_type::AsBytes;
2
5
 
3
6
  use super::*;
4
7
 
8
+ static LOADED_BIGDECIMAL: OnceLock<bool> = OnceLock::new();
9
+
5
10
  #[derive(Debug)]
6
11
  pub enum RowRecord<S: BuildHasher + Default> {
7
12
  Vec(Vec<ParquetField>),
@@ -145,8 +150,8 @@ impl TryIntoValue for ParquetField {
145
150
  Field::Str(s) => {
146
151
  if self.1 {
147
152
  Ok(simdutf8::basic::from_utf8(s.as_bytes())
148
- .map_err(|e| ParquetGemError::Utf8Error(e))
149
- .and_then(|s| Ok(s.into_value_with(handle)))?)
153
+ .map_err(ParquetGemError::Utf8Error)
154
+ .map(|s| s.into_value_with(handle))?)
150
155
  } else {
151
156
  let s = String::from_utf8_lossy(s.as_bytes());
152
157
  Ok(s.into_value_with(handle))
@@ -209,12 +214,18 @@ impl TryIntoValue for ParquetField {
209
214
  format!("{}e-{}", unscaled, scale)
210
215
  }
211
216
  Decimal::Bytes { value, scale, .. } => {
212
- // Convert bytes to string representation of unscaled value
213
- let unscaled = String::from_utf8_lossy(value.data());
217
+ // value is a byte array containing the bytes for an i128 value in big endian order
218
+ let casted = value.as_bytes()[..16].try_into()?;
219
+ let unscaled = i128::from_be_bytes(casted);
214
220
  format!("{}e-{}", unscaled, scale)
215
221
  }
216
222
  };
217
- Ok(handle.eval(&format!("BigDecimal(\"{value}\")"))?)
223
+
224
+ // Load the bigdecimal gem if it's not already loaded
225
+ LOADED_BIGDECIMAL.get_or_init(|| handle.require("bigdecimal").unwrap_or_default());
226
+
227
+ let kernel = handle.module_kernel();
228
+ Ok(kernel.funcall::<_, _, Value>("BigDecimal", (value,))?)
218
229
  }
219
230
  Field::Group(row) => {
220
231
  let hash = handle.hash_new();
@@ -1,5 +1,5 @@
1
1
  use magnus::value::ReprValue; // Add ReprValue trait to scope
2
- use magnus::{Error as MagnusError, RArray, Ruby, TryConvert, Value};
2
+ use magnus::{Error as MagnusError, IntoValue, RArray, Ruby, TryConvert, Value};
3
3
 
4
4
  use crate::types::{ParquetSchemaType as PST, PrimitiveType, SchemaField, SchemaNode};
5
5
  use crate::utils::parse_string_or_symbol;
@@ -22,7 +22,7 @@ fn convert_schema_field_to_node(field: &SchemaField) -> SchemaNode {
22
22
  let item_field = SchemaField {
23
23
  name: "item".to_string(),
24
24
  type_: list_field.item_type.clone(),
25
- format: list_field.format.clone().map(String::from),
25
+ format: list_field.format.map(String::from),
26
26
  nullable: list_field.nullable,
27
27
  };
28
28
  convert_schema_field_to_node(&item_field)
@@ -33,7 +33,7 @@ fn convert_schema_field_to_node(field: &SchemaField) -> SchemaNode {
33
33
  let item_field = SchemaField {
34
34
  name: "item".to_string(),
35
35
  type_: list_field.item_type.clone(),
36
- format: list_field.format.clone().map(String::from),
36
+ format: list_field.format.map(String::from),
37
37
  nullable: list_field.nullable,
38
38
  };
39
39
  convert_schema_field_to_node(&item_field)
@@ -50,13 +50,13 @@ fn convert_schema_field_to_node(field: &SchemaField) -> SchemaNode {
50
50
  let key_field = SchemaField {
51
51
  name: "key".to_string(),
52
52
  type_: map_field.key_type.clone(),
53
- format: map_field.key_format.clone().map(String::from),
53
+ format: map_field.key_format.map(String::from),
54
54
  nullable: false, // Map keys can never be null in Parquet
55
55
  };
56
56
  let value_field = SchemaField {
57
57
  name: "value".to_string(),
58
58
  type_: map_field.value_type.clone(),
59
- format: map_field.value_format.clone().map(String::from),
59
+ format: map_field.value_format.map(String::from),
60
60
  nullable: map_field.value_nullable,
61
61
  };
62
62
 
@@ -121,9 +121,7 @@ pub fn parse_legacy_schema(
121
121
  ruby.exception_type_error(),
122
122
  "Schema must be an array of field definitions or nil",
123
123
  )
124
- })?
125
- .len()
126
- == 0)
124
+ })?.is_empty())
127
125
  {
128
126
  // If schema is nil or an empty array, we'll handle this in the caller
129
127
  return Ok(Vec::new());
@@ -155,7 +153,7 @@ pub fn parse_legacy_schema(
155
153
  }
156
154
 
157
155
  let (name, type_value) = &entries[0];
158
- let name_option = parse_string_or_symbol(ruby, name.clone())?;
156
+ let name_option = parse_string_or_symbol(ruby, *name)?;
159
157
  let name = name_option.ok_or_else(|| {
160
158
  MagnusError::new(ruby.exception_runtime_error(), "Field name cannot be nil")
161
159
  })?;
@@ -166,6 +164,9 @@ pub fn parse_legacy_schema(
166
164
  let mut format_str = None;
167
165
  let mut nullable = true; // Default to true if not specified
168
166
 
167
+ let mut precision: Option<Value> = None;
168
+ let mut scale: Option<Value> = None;
169
+
169
170
  for (key, value) in type_hash {
170
171
  let key_option = parse_string_or_symbol(ruby, key)?;
171
172
  let key = key_option.ok_or_else(|| {
@@ -181,6 +182,12 @@ pub fn parse_legacy_schema(
181
182
  // Extract nullable if present - convert to boolean
182
183
  nullable = bool::try_convert(value).unwrap_or(true);
183
184
  }
185
+ "precision" => {
186
+ precision = Some(value);
187
+ }
188
+ "scale" => {
189
+ scale = Some(value);
190
+ }
184
191
  _ => {
185
192
  return Err(MagnusError::new(
186
193
  ruby.exception_type_error(),
@@ -197,9 +204,109 @@ pub fn parse_legacy_schema(
197
204
  )
198
205
  })?;
199
206
 
200
- (PST::try_convert(type_str)?, format_str, nullable)
207
+ // Handle decimal type with precision and scale
208
+ let mut type_result = PST::try_convert(type_str)?;
209
+
210
+ // If it's a decimal type and we have precision and scale, override the type
211
+ if let PST::Primitive(PrimitiveType::Decimal128(_, _)) = type_result {
212
+ let precision_value = precision.unwrap_or_else(|| {
213
+ let val: u8 = 18;
214
+ val.into_value_with(ruby)
215
+ });
216
+ let scale_value = scale.unwrap_or_else(|| {
217
+ let val: i8 = 2;
218
+ val.into_value_with(ruby)
219
+ });
220
+
221
+ let precision_u8 = u8::try_convert(precision_value).map_err(|_| {
222
+ MagnusError::new(
223
+ ruby.exception_type_error(),
224
+ "Invalid precision value for decimal type, expected a positive integer".to_string(),
225
+ )
226
+ })?;
227
+
228
+ // Validate precision is in a valid range
229
+ if precision_u8 < 1 {
230
+ return Err(MagnusError::new(
231
+ ruby.exception_arg_error(),
232
+ format!(
233
+ "Precision for decimal type must be at least 1, got {}",
234
+ precision_u8
235
+ ),
236
+ ));
237
+ }
238
+
239
+ if precision_u8 > 38 {
240
+ return Err(MagnusError::new(
241
+ ruby.exception_arg_error(),
242
+ format!(
243
+ "Precision for decimal type cannot exceed 38, got {}",
244
+ precision_u8
245
+ ),
246
+ ));
247
+ }
248
+
249
+ let scale_i8 = i8::try_convert(scale_value).map_err(|_| {
250
+ MagnusError::new(
251
+ ruby.exception_type_error(),
252
+ "Invalid scale value for decimal type, expected an integer".to_string(),
253
+ )
254
+ })?;
255
+
256
+ // Validate scale is in a valid range relative to precision
257
+ if scale_i8 < 0 {
258
+ return Err(MagnusError::new(
259
+ ruby.exception_arg_error(),
260
+ format!(
261
+ "Scale for decimal type cannot be negative, got {}",
262
+ scale_i8
263
+ ),
264
+ ));
265
+ }
266
+
267
+ if scale_i8 as u8 > precision_u8 {
268
+ return Err(MagnusError::new(
269
+ ruby.exception_arg_error(),
270
+ format!(
271
+ "Scale ({}) cannot be larger than precision ({}) for decimal type",
272
+ scale_i8, precision_u8
273
+ ),
274
+ ));
275
+ }
276
+
277
+ type_result = PST::Primitive(PrimitiveType::Decimal128(precision_u8, scale_i8));
278
+ } else if let Some(type_name) = parse_string_or_symbol(ruby, type_str)? {
279
+ if type_name == "decimal" {
280
+ let precision_value = precision.unwrap_or_else(|| {
281
+ let val: u8 = 18;
282
+ val.into_value_with(ruby)
283
+ });
284
+ let scale_value = scale.unwrap_or_else(|| {
285
+ let val: i8 = 2;
286
+ val.into_value_with(ruby)
287
+ });
288
+
289
+ let precision_u8 = u8::try_convert(precision_value).map_err(|_| {
290
+ MagnusError::new(
291
+ ruby.exception_type_error(),
292
+ "Invalid precision value for decimal type, expected a positive integer".to_string(),
293
+ )
294
+ })?;
295
+
296
+ let scale_i8 = i8::try_convert(scale_value).map_err(|_| {
297
+ MagnusError::new(
298
+ ruby.exception_type_error(),
299
+ "Invalid scale value for decimal type, expected an integer".to_string(),
300
+ )
301
+ })?;
302
+
303
+ type_result = PST::Primitive(PrimitiveType::Decimal128(precision_u8, scale_i8));
304
+ }
305
+ }
306
+
307
+ (type_result, format_str, nullable)
201
308
  } else {
202
- (PST::try_convert(type_value.clone())?, None, true)
309
+ (PST::try_convert(*type_value)?, None, true)
203
310
  };
204
311
 
205
312
  schema.push(SchemaField {
@@ -68,7 +68,7 @@ fn parse_struct_node(
68
68
  })?;
69
69
 
70
70
  // Check for empty struct immediately
71
- if fields_arr.len() == 0 {
71
+ if fields_arr.is_empty() {
72
72
  return Err(MagnusError::new(
73
73
  ruby.exception_arg_error(),
74
74
  format!("Cannot create a struct with zero fields. Struct name: '{}'. Parquet doesn't support empty structs", name)
@@ -175,6 +175,83 @@ pub fn parse_schema_node(ruby: &Ruby, node_value: Value) -> Result<SchemaNode, M
175
175
  "struct" => parse_struct_node(ruby, &node_hash, name, nullable),
176
176
  "list" => parse_list_node(ruby, &node_hash, name, nullable),
177
177
  "map" => parse_map_node(ruby, &node_hash, name, nullable),
178
+ "decimal" => {
179
+ // Check for precision and scale
180
+ let precision_val = node_hash.get(Symbol::new("precision"));
181
+ let scale_val = node_hash.get(Symbol::new("scale"));
182
+
183
+ // Handle different precision/scale combinations:
184
+ // 1. When no precision or scale - use max precision (38)
185
+ // 2. When precision only - use scale 0
186
+ // 3. When scale only - use max precision (38)
187
+ let (precision, scale) = match (precision_val, scale_val) {
188
+ (None, None) => (38, 0), // Maximum accuracy, scale 0
189
+ (Some(p), None) => {
190
+ // Precision provided, scale defaults to 0
191
+ let prec = u8::try_convert(p).map_err(|_| {
192
+ MagnusError::new(
193
+ ruby.exception_type_error(),
194
+ "Invalid precision value for decimal type, expected a positive integer".to_string(),
195
+ )
196
+ })?;
197
+ (prec, 0)
198
+ },
199
+ (None, Some(s)) => {
200
+ // Scale provided, precision set to maximum (38)
201
+ let scl = i8::try_convert(s).map_err(|_| {
202
+ MagnusError::new(
203
+ ruby.exception_type_error(),
204
+ "Invalid scale value for decimal type, expected an integer".to_string(),
205
+ )
206
+ })?;
207
+ (38, scl)
208
+ },
209
+ (Some(p), Some(s)) => {
210
+ // Both provided
211
+ let prec = u8::try_convert(p).map_err(|_| {
212
+ MagnusError::new(
213
+ ruby.exception_type_error(),
214
+ "Invalid precision value for decimal type, expected a positive integer".to_string(),
215
+ )
216
+ })?;
217
+ let scl = i8::try_convert(s).map_err(|_| {
218
+ MagnusError::new(
219
+ ruby.exception_type_error(),
220
+ "Invalid scale value for decimal type, expected an integer".to_string(),
221
+ )
222
+ })?;
223
+ (prec, scl)
224
+ }
225
+ };
226
+
227
+ // Validate precision is in a valid range
228
+ if precision < 1 {
229
+ return Err(MagnusError::new(
230
+ ruby.exception_arg_error(),
231
+ format!(
232
+ "Precision for decimal type must be at least 1, got {}",
233
+ precision
234
+ ),
235
+ ));
236
+ }
237
+
238
+ if precision > 38 {
239
+ return Err(MagnusError::new(
240
+ ruby.exception_arg_error(),
241
+ format!(
242
+ "Precision for decimal type cannot exceed 38, got {}",
243
+ precision
244
+ ),
245
+ ));
246
+ }
247
+
248
+ Ok(SchemaNode::Primitive {
249
+ name,
250
+ parquet_type: PrimitiveType::Decimal128(precision, scale),
251
+ nullable,
252
+ format,
253
+ })
254
+ }
178
255
  // For primitives, provide better error messages when type isn't recognized
179
256
  other => {
180
257
  if let Some(parquet_type) = parse_primitive_type(other) {
@@ -188,7 +265,7 @@ pub fn parse_schema_node(ruby: &Ruby, node_value: Value) -> Result<SchemaNode, M
188
265
  Err(MagnusError::new(
189
266
  magnus::exception::arg_error(),
190
267
  format!(
191
- "Unknown type: '{}'. Supported types are: struct, list, map, int8, int16, int32, int64, uint8, uint16, uint32, uint64, float32, float64, boolean, string, binary, date32, timestamp_millis, timestamp_micros",
268
+ "Unknown type: '{}'. Supported types are: struct, list, map, int8, int16, int32, int64, uint8, uint16, uint32, uint64, float32, float64, boolean, string, binary, date32, timestamp_millis, timestamp_micros, decimal",
192
269
  other
193
270
  )
194
271
  ))
@@ -216,6 +293,7 @@ fn parse_primitive_type(s: &str) -> Option<PrimitiveType> {
216
293
  "date" | "date32" => Some(PrimitiveType::Date32),
217
294
  "timestamp_millis" | "timestamp_ms" => Some(PrimitiveType::TimestampMillis),
218
295
  "timestamp_micros" | "timestamp_us" => Some(PrimitiveType::TimestampMicros),
296
+ "decimal" => Some(PrimitiveType::Decimal128(38, 0)), // Maximum precision, scale 0
219
297
  _ => None,
220
298
  }
221
299
  }
@@ -240,6 +318,9 @@ pub fn schema_node_to_arrow_field(node: &SchemaNode) -> ArrowField {
240
318
  PrimitiveType::UInt64 => ArrowDataType::UInt64,
241
319
  PrimitiveType::Float32 => ArrowDataType::Float32,
242
320
  PrimitiveType::Float64 => ArrowDataType::Float64,
321
+ PrimitiveType::Decimal128(precision, scale) => {
322
+ ArrowDataType::Decimal128(*precision, *scale)
323
+ }
243
324
  PrimitiveType::Boolean => ArrowDataType::Boolean,
244
325
  PrimitiveType::String => ArrowDataType::Utf8,
245
326
  PrimitiveType::Binary => ArrowDataType::Binary,
@@ -2,15 +2,11 @@ use super::*;
2
2
 
3
3
  pub fn parse_zoned_timestamp(value: &ParquetValue) -> Result<jiff::Timestamp, ParquetGemError> {
4
4
  let (ts, tz) = match value {
5
- ParquetValue::TimestampSecond(ts, tz) => (jiff::Timestamp::from_second(*ts).unwrap(), tz),
6
- ParquetValue::TimestampMillis(ts, tz) => {
7
- (jiff::Timestamp::from_millisecond(*ts).unwrap(), tz)
8
- }
9
- ParquetValue::TimestampMicros(ts, tz) => {
10
- (jiff::Timestamp::from_microsecond(*ts).unwrap(), tz)
11
- }
5
+ ParquetValue::TimestampSecond(ts, tz) => (jiff::Timestamp::from_second(*ts)?, tz),
6
+ ParquetValue::TimestampMillis(ts, tz) => (jiff::Timestamp::from_millisecond(*ts)?, tz),
7
+ ParquetValue::TimestampMicros(ts, tz) => (jiff::Timestamp::from_microsecond(*ts)?, tz),
12
8
  ParquetValue::TimestampNanos(ts, tz) => {
13
- (jiff::Timestamp::from_nanosecond(*ts as i128).unwrap(), tz)
9
+ (jiff::Timestamp::from_nanosecond(*ts as i128)?, tz)
14
10
  }
15
11
  _ => {
16
12
  return Err(MagnusError::new(
@@ -50,7 +46,7 @@ pub fn parse_zoned_timestamp(value: &ParquetValue) -> Result<jiff::Timestamp, Pa
50
46
  Ok(ts.to_zoned(tz).timestamp())
51
47
  } else {
52
48
  // Try IANA timezone
53
- match ts.in_tz(&tz) {
49
+ match ts.in_tz(tz) {
54
50
  Ok(zoned) => Ok(zoned.timestamp()),
55
51
  Err(_) => Ok(ts), // Fall back to UTC if timezone is invalid
56
52
  }
@@ -85,7 +81,7 @@ macro_rules! impl_timestamp_conversion {
85
81
  #[macro_export]
86
82
  macro_rules! impl_date_conversion {
87
83
  ($value:expr, $handle:expr) => {{
88
- let ts = jiff::Timestamp::from_second(($value as i64) * 86400).unwrap();
84
+ let ts = jiff::Timestamp::from_second(($value as i64) * 86400)?;
89
85
  let formatted = ts.strftime("%Y-%m-%d").to_string();
90
86
  Ok(formatted.into_value_with($handle))
91
87
  }};