parquet 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,24 +13,23 @@ use parquet::file::reader::{FileReader, SerializedFileReader};
13
13
  use parquet::record::reader::RowIter as ParquetRowIter;
14
14
  use parquet::schema::types::{Type as SchemaType, TypePtr};
15
15
  use std::collections::HashMap;
16
- use std::sync::{Arc, OnceLock};
16
+ use std::rc::Rc;
17
+ use std::sync::OnceLock;
17
18
 
18
19
  use super::common::{handle_block_or_enum, open_parquet_source};
19
20
 
20
21
  #[inline]
21
- pub fn parse_parquet_rows<'a>(rb_self: Value, args: &[Value]) -> Result<Value, MagnusError> {
22
+ pub fn parse_parquet_rows(rb_self: Value, args: &[Value]) -> Result<Value, MagnusError> {
22
23
  let ruby = unsafe { Ruby::get_unchecked() };
23
- Ok(
24
- parse_parquet_rows_impl(Arc::new(ruby), rb_self, args).map_err(|e| {
25
- let z: MagnusError = e.into();
26
- z
27
- })?,
28
- )
24
+ parse_parquet_rows_impl(Rc::new(ruby), rb_self, args).map_err(|e| {
25
+ let z: MagnusError = e.into();
26
+ z
27
+ })
29
28
  }
30
29
 
31
30
  #[inline]
32
- fn parse_parquet_rows_impl<'a>(
33
- ruby: Arc<Ruby>,
31
+ fn parse_parquet_rows_impl(
32
+ ruby: Rc<Ruby>,
34
33
  rb_self: Value,
35
34
  args: &[Value],
36
35
  ) -> Result<Value, ParquetGemError> {
@@ -93,7 +92,7 @@ fn parse_parquet_rows_impl<'a>(
93
92
  let headers = OnceLock::new();
94
93
  let headers_clone = headers.clone();
95
94
  let iter = iter.map(move |row| {
96
- row.and_then(|row| {
95
+ row.map(|row| {
97
96
  let headers = headers_clone.get_or_init(|| {
98
97
  let column_count = row.get_column_iter().count();
99
98
 
@@ -102,10 +101,7 @@ fn parse_parquet_rows_impl<'a>(
102
101
  header_string.push(k.to_owned());
103
102
  }
104
103
 
105
- let headers = StringCache::intern_many(&header_string)
106
- .expect("Failed to intern headers");
107
-
108
- headers
104
+ StringCache::intern_many(&header_string).expect("Failed to intern headers")
109
105
  });
110
106
 
111
107
  let mut map =
@@ -113,10 +109,10 @@ fn parse_parquet_rows_impl<'a>(
113
109
  for (i, (_, v)) in row.get_column_iter().enumerate() {
114
110
  map.insert(headers[i], ParquetField(v.clone(), strict));
115
111
  }
116
- Ok(map)
112
+ map
117
113
  })
118
- .and_then(|row| Ok(RowRecord::Map::<RandomState>(row)))
119
- .map_err(|e| ParquetGemError::from(e))
114
+ .map(RowRecord::Map::<RandomState>)
115
+ .map_err(ParquetGemError::from)
120
116
  });
121
117
 
122
118
  for result in iter {
@@ -126,16 +122,16 @@ fn parse_parquet_rows_impl<'a>(
126
122
  }
127
123
  ParserResultType::Array => {
128
124
  let iter = iter.map(|row| {
129
- row.and_then(|row| {
125
+ row.map(|row| {
130
126
  let column_count = row.get_column_iter().count();
131
127
  let mut vec = Vec::with_capacity(column_count);
132
128
  for (_, v) in row.get_column_iter() {
133
129
  vec.push(ParquetField(v.clone(), strict));
134
130
  }
135
- Ok(vec)
131
+ vec
136
132
  })
137
- .and_then(|row| Ok(RowRecord::Vec::<RandomState>(row)))
138
- .map_err(|e| ParquetGemError::from(e))
133
+ .map(RowRecord::Vec::<RandomState>)
134
+ .map_err(ParquetGemError::from)
139
135
  });
140
136
 
141
137
  for result in iter {
@@ -7,7 +7,7 @@ use parquet::{
7
7
  errors::ParquetError,
8
8
  file::reader::{ChunkReader, Length},
9
9
  };
10
- use std::{fs::File, sync::Mutex};
10
+ use std::{fs::File, rc::Rc, sync::Mutex};
11
11
  use std::{
12
12
  io::{self, BufReader, Read, Seek, SeekFrom, Write},
13
13
  sync::Arc,
@@ -35,7 +35,7 @@ pub enum RubyReader {
35
35
  unsafe impl Send for RubyReader {}
36
36
 
37
37
  impl RubyReader {
38
- pub fn new(ruby: Arc<Ruby>, value: Value) -> Result<Self, ParquetGemError> {
38
+ pub fn new(ruby: Rc<Ruby>, value: Value) -> Result<Self, ParquetGemError> {
39
39
  if RubyReader::is_seekable_io_like(&value) {
40
40
  Ok(RubyReader::RubyIoLike {
41
41
  inner: Opaque::from(value),
@@ -165,9 +165,7 @@ impl Read for RubyReader {
165
165
  buf.write_all(string_buffer)?;
166
166
  Ok(string_buffer.len())
167
167
  }
168
- None => {
169
- return Ok(0);
170
- }
168
+ None => Ok(0),
171
169
  }
172
170
  }
173
171
  }
@@ -107,6 +107,7 @@ pub enum PrimitiveType {
107
107
  UInt64,
108
108
  Float32,
109
109
  Float64,
110
+ Decimal128(u8, i8),
110
111
  Boolean,
111
112
  String,
112
113
  Binary,
@@ -32,6 +32,7 @@ use arrow_schema::{DataType, TimeUnit};
32
32
  use magnus::{value::ReprValue, Error as MagnusError, IntoValue, Ruby, Value};
33
33
  use parquet::data_type::Decimal;
34
34
  use parquet::record::Field;
35
+ use std::array::TryFromSliceError;
35
36
  use std::{collections::HashMap, hash::BuildHasher, sync::Arc};
36
37
 
37
38
  use crate::header_cache::StringCacheKey;
@@ -58,6 +59,8 @@ pub enum ParquetGemError {
58
59
  Utf8Error(#[from] simdutf8::basic::Utf8Error),
59
60
  #[error("Jiff error: {0}")]
60
61
  Jiff(#[from] jiff::Error),
62
+ #[error("Failed to cast slice to array: {0}")]
63
+ InvalidDecimal(#[from] TryFromSliceError),
61
64
  }
62
65
 
63
66
  #[derive(Debug)]
@@ -83,11 +86,11 @@ impl From<MagnusError> for ParquetGemError {
83
86
  }
84
87
  }
85
88
 
86
- impl Into<MagnusError> for ParquetGemError {
87
- fn into(self) -> MagnusError {
88
- match self {
89
- Self::Ruby(MagnusErrorWrapper(err)) => err.into(),
90
- _ => MagnusError::new(magnus::exception::runtime_error(), self.to_string()),
89
+ impl From<ParquetGemError> for MagnusError {
90
+ fn from(val: ParquetGemError) -> Self {
91
+ match val {
92
+ ParquetGemError::Ruby(MagnusErrorWrapper(err)) => err,
93
+ _ => MagnusError::new(magnus::exception::runtime_error(), val.to_string()),
91
94
  }
92
95
  }
93
96
  }
@@ -2,7 +2,7 @@ use crate::{impl_date_conversion, impl_timestamp_array_conversion, impl_timestam
2
2
 
3
3
  use super::*;
4
4
  use arrow_array::MapArray;
5
- use magnus::RArray;
5
+ use magnus::{RArray, RString};
6
6
 
7
7
  #[derive(Debug, Clone)]
8
8
  pub enum ParquetValue {
@@ -22,6 +22,7 @@ pub enum ParquetValue {
22
22
  Bytes(Vec<u8>),
23
23
  Date32(i32),
24
24
  Date64(i64),
25
+ Decimal128(i128),
25
26
  TimestampSecond(i64, Option<Arc<str>>),
26
27
  TimestampMillis(i64, Option<Arc<str>>),
27
28
  TimestampMicros(i64, Option<Arc<str>>),
@@ -51,6 +52,7 @@ impl PartialEq for ParquetValue {
51
52
  (ParquetValue::Bytes(a), ParquetValue::Bytes(b)) => a == b,
52
53
  (ParquetValue::Date32(a), ParquetValue::Date32(b)) => a == b,
53
54
  (ParquetValue::Date64(a), ParquetValue::Date64(b)) => a == b,
55
+ (ParquetValue::Decimal128(a), ParquetValue::Decimal128(b)) => a == b,
54
56
  (ParquetValue::TimestampSecond(a, _), ParquetValue::TimestampSecond(b, _)) => a == b,
55
57
  (ParquetValue::TimestampMillis(a, _), ParquetValue::TimestampMillis(b, _)) => a == b,
56
58
  (ParquetValue::TimestampMicros(a, _), ParquetValue::TimestampMicros(b, _)) => a == b,
@@ -83,6 +85,7 @@ impl std::hash::Hash for ParquetValue {
83
85
  ParquetValue::Bytes(b) => b.hash(state),
84
86
  ParquetValue::Date32(d) => d.hash(state),
85
87
  ParquetValue::Date64(d) => d.hash(state),
88
+ ParquetValue::Decimal128(d) => d.hash(state),
86
89
  ParquetValue::TimestampSecond(ts, tz) => {
87
90
  ts.hash(state);
88
91
  tz.hash(state);
@@ -128,6 +131,7 @@ impl TryIntoValue for ParquetValue {
128
131
  ParquetValue::Boolean(b) => Ok(b.into_value_with(handle)),
129
132
  ParquetValue::String(s) => Ok(s.into_value_with(handle)),
130
133
  ParquetValue::Bytes(b) => Ok(handle.str_from_slice(&b).as_value()),
134
+ ParquetValue::Decimal128(d) => Ok(d.to_string().into_value_with(handle)),
131
135
  ParquetValue::Date32(d) => impl_date_conversion!(d, handle),
132
136
  ParquetValue::Date64(d) => impl_date_conversion!(d, handle),
133
137
  timestamp @ ParquetValue::TimestampSecond(_, _) => {
@@ -233,6 +237,18 @@ impl ParquetValue {
233
237
  let v = NumericConverter::<f64>::convert_with_string_fallback(ruby, value)?;
234
238
  Ok(ParquetValue::Float64(v))
235
239
  }
240
+ PrimitiveType::Decimal128(_precision, scale) => {
241
+ if value.is_kind_of(ruby.class_string()) {
242
+ convert_to_decimal128(value, *scale)
243
+ } else if let Ok(s) = value.funcall::<_, _, RString>("to_s", ()) {
244
+ convert_to_decimal128(s.as_value(), *scale)
245
+ } else {
246
+ Err(MagnusError::new(
247
+ magnus::exception::type_error(),
248
+ "Expected a string for a decimal type",
249
+ ))
250
+ }
251
+ }
236
252
  PrimitiveType::String => {
237
253
  let v = convert_to_string(value)?;
238
254
  Ok(ParquetValue::String(v))
@@ -356,6 +372,184 @@ impl ParquetValue {
356
372
  }
357
373
  }
358
374
  }
375
+ /// Unified helper to parse a decimal string and apply scaling
376
+ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, MagnusError> {
377
+ let s = input_str.trim();
378
+
379
+ // 1. Handle scientific notation case (e.g., "0.12345e3")
380
+ if let Some(e_pos) = s.to_lowercase().find('e') {
381
+ let base = &s[0..e_pos];
382
+ let exp = &s[e_pos + 1..];
383
+
384
+ // Parse the exponent with detailed error message
385
+ let exp_val = exp.parse::<i32>().map_err(|e| {
386
+ MagnusError::new(
387
+ magnus::exception::type_error(),
388
+ format!("Failed to parse exponent '{}' in decimal string '{}': {}", exp, s, e),
389
+ )
390
+ })?;
391
+
392
+ // Limit exponent to reasonable range to prevent overflow
393
+ if exp_val.abs() > 38 {
394
+ return Err(MagnusError::new(
395
+ magnus::exception::range_error(),
396
+ format!("Exponent {} is out of range for decimal value '{}'. Must be between -38 and 38.", exp_val, s),
397
+ ));
398
+ }
399
+
400
+ // Handle the base part which might contain a decimal point
401
+ let (base_val, base_scale) = if let Some(decimal_pos) = base.find('.') {
402
+ let mut base_without_point = base.to_string();
403
+ base_without_point.remove(decimal_pos);
404
+
405
+ let base_scale = base.len() - decimal_pos - 1;
406
+
407
+ let base_val = base_without_point.parse::<i128>().map_err(|e| {
408
+ MagnusError::new(
409
+ magnus::exception::type_error(),
410
+ format!("Failed to parse base '{}' in scientific notation '{}': {}", base, s, e),
411
+ )
412
+ })?;
413
+
414
+ (base_val, base_scale as i32)
415
+ } else {
416
+ // No decimal point in base
417
+ let base_val = base.parse::<i128>().map_err(|e| {
418
+ MagnusError::new(
419
+ magnus::exception::type_error(),
420
+ format!("Failed to parse base '{}' in scientific notation '{}': {}", base, s, e),
421
+ )
422
+ })?;
423
+
424
+ (base_val, 0)
425
+ };
426
+
427
+ // Calculate the effective scale: base_scale - exp_val
428
+ let effective_scale = base_scale - exp_val;
429
+
430
+ // Adjust the value based on the difference between effective scale and requested scale
431
+ match effective_scale.cmp(&(input_scale as i32)) {
432
+ std::cmp::Ordering::Less => {
433
+ // Need to multiply to increase scale
434
+ let scale_diff = (input_scale as i32 - effective_scale) as u32;
435
+ if scale_diff > 38 {
436
+ return Err(MagnusError::new(
437
+ magnus::exception::range_error(),
438
+ format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a smaller scale.", scale_diff, s),
439
+ ));
440
+ }
441
+ Ok(base_val * 10_i128.pow(scale_diff))
442
+ }
443
+ std::cmp::Ordering::Greater => {
444
+ // Need to divide to decrease scale
445
+ let scale_diff = (effective_scale - input_scale as i32) as u32;
446
+ if scale_diff > 38 {
447
+ return Err(MagnusError::new(
448
+ magnus::exception::range_error(),
449
+ format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a larger scale.", scale_diff, s),
450
+ ));
451
+ }
452
+ Ok(base_val / 10_i128.pow(scale_diff))
453
+ }
454
+ std::cmp::Ordering::Equal => Ok(base_val),
455
+ }
456
+ }
457
+ // 2. Handle decimal point in the string (e.g., "123.456")
458
+ else if let Some(decimal_pos) = s.find('.') {
459
+ let mut s_without_point = s.to_string();
460
+ s_without_point.remove(decimal_pos);
461
+
462
+ // Calculate the actual scale from the decimal position
463
+ let actual_scale = s.len() - decimal_pos - 1;
464
+
465
+ // Parse the string without decimal point as i128
466
+ let v = s_without_point.parse::<i128>().map_err(|e| {
467
+ MagnusError::new(
468
+ magnus::exception::type_error(),
469
+ format!("Failed to parse decimal string '{}' (without decimal point: '{}'): {}", s, s_without_point, e),
470
+ )
471
+ })?;
472
+
473
+ // Scale the value if needed based on the difference between
474
+ // the actual scale and the requested scale
475
+ match actual_scale.cmp(&(input_scale as usize)) {
476
+ std::cmp::Ordering::Less => {
477
+ // Need to multiply to increase scale
478
+ let scale_diff = (input_scale - actual_scale as i8) as u32;
479
+ if scale_diff > 38 {
480
+ return Err(MagnusError::new(
481
+ magnus::exception::range_error(),
482
+ format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a smaller scale.", scale_diff, s),
483
+ ));
484
+ }
485
+ Ok(v * 10_i128.pow(scale_diff))
486
+ }
487
+ std::cmp::Ordering::Greater => {
488
+ // Need to divide to decrease scale
489
+ let scale_diff = (actual_scale as i8 - input_scale) as u32;
490
+ if scale_diff > 38 {
491
+ return Err(MagnusError::new(
492
+ magnus::exception::range_error(),
493
+ format!("Scale adjustment too large ({}) for decimal value '{}'. Consider using a larger scale.", scale_diff, s),
494
+ ));
495
+ }
496
+ Ok(v / 10_i128.pow(scale_diff))
497
+ }
498
+ std::cmp::Ordering::Equal => Ok(v),
499
+ }
500
+ }
501
+ // 3. Plain integer value (e.g., "12345")
502
+ else {
503
+ // No decimal point, parse as i128 and scale appropriately
504
+ let v = s.parse::<i128>().map_err(|e| {
505
+ MagnusError::new(
506
+ magnus::exception::type_error(),
507
+ format!("Failed to parse integer string '{}' as decimal: {}", s, e),
508
+ )
509
+ })?;
510
+
511
+ // Apply scale - make sure it's reasonable
512
+ if input_scale > 38 {
513
+ return Err(MagnusError::new(
514
+ magnus::exception::range_error(),
515
+ format!("Scale {} is too large for decimal value '{}'. Must be ≤ 38.", input_scale, s),
516
+ ));
517
+ } else if input_scale < -38 {
518
+ return Err(MagnusError::new(
519
+ magnus::exception::range_error(),
520
+ format!("Scale {} is too small for decimal value '{}'. Must be ≥ -38.", input_scale, s),
521
+ ));
522
+ }
523
+
524
+ // Apply positive scale (multiply)
525
+ if input_scale >= 0 {
526
+ Ok(v * 10_i128.pow(input_scale as u32))
527
+ } else {
528
+ // Apply negative scale (divide)
529
+ Ok(v / 10_i128.pow((-input_scale) as u32))
530
+ }
531
+ }
532
+ }
533
+
534
+ fn convert_to_decimal128(value: Value, scale: i8) -> Result<ParquetValue, MagnusError> {
535
+ // Get the decimal string based on the type of value
536
+ let s = if unsafe { value.classname() } == "BigDecimal" {
537
+ value
538
+ .funcall::<_, _, RString>("to_s", ("F",))?
539
+ .to_string()?
540
+ } else {
541
+ value.to_r_string()?.to_string()?
542
+ };
543
+
544
+ // Use our unified parser to convert the string to a decimal value with scaling
545
+ match parse_decimal_string(&s, scale) {
546
+ Ok(decimal_value) => Ok(ParquetValue::Decimal128(decimal_value)),
547
+ Err(e) => Err(MagnusError::new(
548
+ magnus::exception::type_error(),
549
+ format!("Failed to convert '{}' to decimal with scale {}: {}", s, scale, e),
550
+ ))
551
+ }
552
+ }
359
553
 
360
554
  #[derive(Debug)]
361
555
  pub struct ParquetValueVec(Vec<ParquetValue>);
@@ -677,12 +871,10 @@ impl<'a> TryFrom<ArrayWrapper<'a>> for ParquetValueVec {
677
871
  let x = downcast_array::<NullArray>(column.array);
678
872
  Ok(ParquetValueVec(vec![ParquetValue::Null; x.len()]))
679
873
  }
680
- _ => {
681
- return Err(MagnusError::new(
682
- magnus::exception::type_error(),
683
- format!("Unsupported data type: {:?}", column.array.data_type()),
684
- ))?;
685
- }
874
+ _ => Err(MagnusError::new(
875
+ magnus::exception::type_error(),
876
+ format!("Unsupported data type: {:?}", column.array.data_type()),
877
+ ))?,
686
878
  }
687
879
  }
688
880
  }
@@ -1,7 +1,12 @@
1
+ use std::sync::OnceLock;
2
+
1
3
  use itertools::Itertools;
4
+ use parquet::data_type::AsBytes;
2
5
 
3
6
  use super::*;
4
7
 
8
+ static LOADED_BIGDECIMAL: OnceLock<bool> = OnceLock::new();
9
+
5
10
  #[derive(Debug)]
6
11
  pub enum RowRecord<S: BuildHasher + Default> {
7
12
  Vec(Vec<ParquetField>),
@@ -145,8 +150,8 @@ impl TryIntoValue for ParquetField {
145
150
  Field::Str(s) => {
146
151
  if self.1 {
147
152
  Ok(simdutf8::basic::from_utf8(s.as_bytes())
148
- .map_err(|e| ParquetGemError::Utf8Error(e))
149
- .and_then(|s| Ok(s.into_value_with(handle)))?)
153
+ .map_err(ParquetGemError::Utf8Error)
154
+ .map(|s| s.into_value_with(handle))?)
150
155
  } else {
151
156
  let s = String::from_utf8_lossy(s.as_bytes());
152
157
  Ok(s.into_value_with(handle))
@@ -209,12 +214,18 @@ impl TryIntoValue for ParquetField {
209
214
  format!("{}e-{}", unscaled, scale)
210
215
  }
211
216
  Decimal::Bytes { value, scale, .. } => {
212
- // Convert bytes to string representation of unscaled value
213
- let unscaled = String::from_utf8_lossy(value.data());
217
+ // value is a byte array containing the bytes for an i128 value in big endian order
218
+ let casted = value.as_bytes()[..16].try_into()?;
219
+ let unscaled = i128::from_be_bytes(casted);
214
220
  format!("{}e-{}", unscaled, scale)
215
221
  }
216
222
  };
217
- Ok(handle.eval(&format!("BigDecimal(\"{value}\")"))?)
223
+
224
+ // Load the bigdecimal gem if it's not already loaded
225
+ LOADED_BIGDECIMAL.get_or_init(|| handle.require("bigdecimal").unwrap_or_default());
226
+
227
+ let kernel = handle.module_kernel();
228
+ Ok(kernel.funcall::<_, _, Value>("BigDecimal", (value,))?)
218
229
  }
219
230
  Field::Group(row) => {
220
231
  let hash = handle.hash_new();
@@ -1,5 +1,5 @@
1
1
  use magnus::value::ReprValue; // Add ReprValue trait to scope
2
- use magnus::{Error as MagnusError, RArray, Ruby, TryConvert, Value};
2
+ use magnus::{Error as MagnusError, IntoValue, RArray, Ruby, TryConvert, Value};
3
3
 
4
4
  use crate::types::{ParquetSchemaType as PST, PrimitiveType, SchemaField, SchemaNode};
5
5
  use crate::utils::parse_string_or_symbol;
@@ -22,7 +22,7 @@ fn convert_schema_field_to_node(field: &SchemaField) -> SchemaNode {
22
22
  let item_field = SchemaField {
23
23
  name: "item".to_string(),
24
24
  type_: list_field.item_type.clone(),
25
- format: list_field.format.clone().map(String::from),
25
+ format: list_field.format.map(String::from),
26
26
  nullable: list_field.nullable,
27
27
  };
28
28
  convert_schema_field_to_node(&item_field)
@@ -33,7 +33,7 @@ fn convert_schema_field_to_node(field: &SchemaField) -> SchemaNode {
33
33
  let item_field = SchemaField {
34
34
  name: "item".to_string(),
35
35
  type_: list_field.item_type.clone(),
36
- format: list_field.format.clone().map(String::from),
36
+ format: list_field.format.map(String::from),
37
37
  nullable: list_field.nullable,
38
38
  };
39
39
  convert_schema_field_to_node(&item_field)
@@ -50,13 +50,13 @@ fn convert_schema_field_to_node(field: &SchemaField) -> SchemaNode {
50
50
  let key_field = SchemaField {
51
51
  name: "key".to_string(),
52
52
  type_: map_field.key_type.clone(),
53
- format: map_field.key_format.clone().map(String::from),
53
+ format: map_field.key_format.map(String::from),
54
54
  nullable: false, // Map keys can never be null in Parquet
55
55
  };
56
56
  let value_field = SchemaField {
57
57
  name: "value".to_string(),
58
58
  type_: map_field.value_type.clone(),
59
- format: map_field.value_format.clone().map(String::from),
59
+ format: map_field.value_format.map(String::from),
60
60
  nullable: map_field.value_nullable,
61
61
  };
62
62
 
@@ -121,9 +121,7 @@ pub fn parse_legacy_schema(
121
121
  ruby.exception_type_error(),
122
122
  "Schema must be an array of field definitions or nil",
123
123
  )
124
- })?
125
- .len()
126
- == 0)
124
+ })?.is_empty())
127
125
  {
128
126
  // If schema is nil or an empty array, we'll handle this in the caller
129
127
  return Ok(Vec::new());
@@ -155,7 +153,7 @@ pub fn parse_legacy_schema(
155
153
  }
156
154
 
157
155
  let (name, type_value) = &entries[0];
158
- let name_option = parse_string_or_symbol(ruby, name.clone())?;
156
+ let name_option = parse_string_or_symbol(ruby, *name)?;
159
157
  let name = name_option.ok_or_else(|| {
160
158
  MagnusError::new(ruby.exception_runtime_error(), "Field name cannot be nil")
161
159
  })?;
@@ -166,6 +164,9 @@ pub fn parse_legacy_schema(
166
164
  let mut format_str = None;
167
165
  let mut nullable = true; // Default to true if not specified
168
166
 
167
+ let mut precision: Option<Value> = None;
168
+ let mut scale: Option<Value> = None;
169
+
169
170
  for (key, value) in type_hash {
170
171
  let key_option = parse_string_or_symbol(ruby, key)?;
171
172
  let key = key_option.ok_or_else(|| {
@@ -181,6 +182,12 @@ pub fn parse_legacy_schema(
181
182
  // Extract nullable if present - convert to boolean
182
183
  nullable = bool::try_convert(value).unwrap_or(true);
183
184
  }
185
+ "precision" => {
186
+ precision = Some(value);
187
+ }
188
+ "scale" => {
189
+ scale = Some(value);
190
+ }
184
191
  _ => {
185
192
  return Err(MagnusError::new(
186
193
  ruby.exception_type_error(),
@@ -197,9 +204,109 @@ pub fn parse_legacy_schema(
197
204
  )
198
205
  })?;
199
206
 
200
- (PST::try_convert(type_str)?, format_str, nullable)
207
+ // Handle decimal type with precision and scale
208
+ let mut type_result = PST::try_convert(type_str)?;
209
+
210
+ // If it's a decimal type and we have precision and scale, override the type
211
+ if let PST::Primitive(PrimitiveType::Decimal128(_, _)) = type_result {
212
+ let precision_value = precision.unwrap_or_else(|| {
213
+ let val: u8 = 18;
214
+ val.into_value_with(ruby)
215
+ });
216
+ let scale_value = scale.unwrap_or_else(|| {
217
+ let val: i8 = 2;
218
+ val.into_value_with(ruby)
219
+ });
220
+
221
+ let precision_u8 = u8::try_convert(precision_value).map_err(|_| {
222
+ MagnusError::new(
223
+ ruby.exception_type_error(),
224
+ "Invalid precision value for decimal type, expected a positive integer".to_string(),
225
+ )
226
+ })?;
227
+
228
+ // Validate precision is in a valid range
229
+ if precision_u8 < 1 {
230
+ return Err(MagnusError::new(
231
+ ruby.exception_arg_error(),
232
+ format!(
233
+ "Precision for decimal type must be at least 1, got {}",
234
+ precision_u8
235
+ ),
236
+ ));
237
+ }
238
+
239
+ if precision_u8 > 38 {
240
+ return Err(MagnusError::new(
241
+ ruby.exception_arg_error(),
242
+ format!(
243
+ "Precision for decimal type cannot exceed 38, got {}",
244
+ precision_u8
245
+ ),
246
+ ));
247
+ }
248
+
249
+ let scale_i8 = i8::try_convert(scale_value).map_err(|_| {
250
+ MagnusError::new(
251
+ ruby.exception_type_error(),
252
+ "Invalid scale value for decimal type, expected an integer".to_string(),
253
+ )
254
+ })?;
255
+
256
+ // Validate scale is in a valid range relative to precision
257
+ if scale_i8 < 0 {
258
+ return Err(MagnusError::new(
259
+ ruby.exception_arg_error(),
260
+ format!(
261
+ "Scale for decimal type cannot be negative, got {}",
262
+ scale_i8
263
+ ),
264
+ ));
265
+ }
266
+
267
+ if scale_i8 as u8 > precision_u8 {
268
+ return Err(MagnusError::new(
269
+ ruby.exception_arg_error(),
270
+ format!(
271
+ "Scale ({}) cannot be larger than precision ({}) for decimal type",
272
+ scale_i8, precision_u8
273
+ ),
274
+ ));
275
+ }
276
+
277
+ type_result = PST::Primitive(PrimitiveType::Decimal128(precision_u8, scale_i8));
278
+ } else if let Some(type_name) = parse_string_or_symbol(ruby, type_str)? {
279
+ if type_name == "decimal" {
280
+ let precision_value = precision.unwrap_or_else(|| {
281
+ let val: u8 = 18;
282
+ val.into_value_with(ruby)
283
+ });
284
+ let scale_value = scale.unwrap_or_else(|| {
285
+ let val: i8 = 2;
286
+ val.into_value_with(ruby)
287
+ });
288
+
289
+ let precision_u8 = u8::try_convert(precision_value).map_err(|_| {
290
+ MagnusError::new(
291
+ ruby.exception_type_error(),
292
+ "Invalid precision value for decimal type, expected a positive integer".to_string(),
293
+ )
294
+ })?;
295
+
296
+ let scale_i8 = i8::try_convert(scale_value).map_err(|_| {
297
+ MagnusError::new(
298
+ ruby.exception_type_error(),
299
+ "Invalid scale value for decimal type, expected an integer".to_string(),
300
+ )
301
+ })?;
302
+
303
+ type_result = PST::Primitive(PrimitiveType::Decimal128(precision_u8, scale_i8));
304
+ }
305
+ }
306
+
307
+ (type_result, format_str, nullable)
201
308
  } else {
202
- (PST::try_convert(type_value.clone())?, None, true)
309
+ (PST::try_convert(*type_value)?, None, true)
203
310
  };
204
311
 
205
312
  schema.push(SchemaField {