parquet 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -68,7 +68,7 @@ fn parse_struct_node(
68
68
  })?;
69
69
 
70
70
  // Check for empty struct immediately
71
- if fields_arr.len() == 0 {
71
+ if fields_arr.is_empty() {
72
72
  return Err(MagnusError::new(
73
73
  ruby.exception_arg_error(),
74
74
  format!("Cannot create a struct with zero fields. Struct name: '{}'. Parquet doesn't support empty structs", name)
@@ -175,6 +175,83 @@ pub fn parse_schema_node(ruby: &Ruby, node_value: Value) -> Result<SchemaNode, M
175
175
  "struct" => parse_struct_node(ruby, &node_hash, name, nullable),
176
176
  "list" => parse_list_node(ruby, &node_hash, name, nullable),
177
177
  "map" => parse_map_node(ruby, &node_hash, name, nullable),
178
+ "decimal" => {
179
+ // Check for precision and scale
180
+ let precision_val = node_hash.get(Symbol::new("precision"));
181
+ let scale_val = node_hash.get(Symbol::new("scale"));
182
+
183
+ // Handle different precision/scale combinations:
184
+ // 1. When no precision or scale - use max precision (38)
185
+ // 2. When precision only - use scale 0
186
+ // 3. When scale only - use max precision (38)
187
+ let (precision, scale) = match (precision_val, scale_val) {
188
+ (None, None) => (38, 0), // Maximum accuracy, scale 0
189
+ (Some(p), None) => {
190
+ // Precision provided, scale defaults to 0
191
+ let prec = u8::try_convert(p).map_err(|_| {
192
+ MagnusError::new(
193
+ ruby.exception_type_error(),
194
+ "Invalid precision value for decimal type, expected a positive integer".to_string(),
195
+ )
196
+ })?;
197
+ (prec, 0)
198
+ },
199
+ (None, Some(s)) => {
200
+ // Scale provided, precision set to maximum (38)
201
+ let scl = i8::try_convert(s).map_err(|_| {
202
+ MagnusError::new(
203
+ ruby.exception_type_error(),
204
+ "Invalid scale value for decimal type, expected an integer".to_string(),
205
+ )
206
+ })?;
207
+ (38, scl)
208
+ },
209
+ (Some(p), Some(s)) => {
210
+ // Both provided
211
+ let prec = u8::try_convert(p).map_err(|_| {
212
+ MagnusError::new(
213
+ ruby.exception_type_error(),
214
+ "Invalid precision value for decimal type, expected a positive integer".to_string(),
215
+ )
216
+ })?;
217
+ let scl = i8::try_convert(s).map_err(|_| {
218
+ MagnusError::new(
219
+ ruby.exception_type_error(),
220
+ "Invalid scale value for decimal type, expected an integer".to_string(),
221
+ )
222
+ })?;
223
+ (prec, scl)
224
+ }
225
+ };
226
+
227
+ // Validate precision is in a valid range
228
+ if precision < 1 {
229
+ return Err(MagnusError::new(
230
+ ruby.exception_arg_error(),
231
+ format!(
232
+ "Precision for decimal type must be at least 1, got {}",
233
+ precision
234
+ ),
235
+ ));
236
+ }
237
+
238
+ if precision > 38 {
239
+ return Err(MagnusError::new(
240
+ ruby.exception_arg_error(),
241
+ format!(
242
+ "Precision for decimal type cannot exceed 38, got {}",
243
+ precision
244
+ ),
245
+ ));
246
+ }
247
+
248
+ Ok(SchemaNode::Primitive {
249
+ name,
250
+ parquet_type: PrimitiveType::Decimal128(precision, scale),
251
+ nullable,
252
+ format,
253
+ })
254
+ }
178
255
  // For primitives, provide better error messages when type isn't recognized
179
256
  other => {
180
257
  if let Some(parquet_type) = parse_primitive_type(other) {
@@ -188,7 +265,7 @@ pub fn parse_schema_node(ruby: &Ruby, node_value: Value) -> Result<SchemaNode, M
188
265
  Err(MagnusError::new(
189
266
  magnus::exception::arg_error(),
190
267
  format!(
191
- "Unknown type: '{}'. Supported types are: struct, list, map, int8, int16, int32, int64, uint8, uint16, uint32, uint64, float32, float64, boolean, string, binary, date32, timestamp_millis, timestamp_micros",
268
+ "Unknown type: '{}'. Supported types are: struct, list, map, int8, int16, int32, int64, uint8, uint16, uint32, uint64, float32, float64, boolean, string, binary, date32, timestamp_millis, timestamp_micros, decimal",
192
269
  other
193
270
  )
194
271
  ))
@@ -216,6 +293,7 @@ fn parse_primitive_type(s: &str) -> Option<PrimitiveType> {
216
293
  "date" | "date32" => Some(PrimitiveType::Date32),
217
294
  "timestamp_millis" | "timestamp_ms" => Some(PrimitiveType::TimestampMillis),
218
295
  "timestamp_micros" | "timestamp_us" => Some(PrimitiveType::TimestampMicros),
296
+ "decimal" => Some(PrimitiveType::Decimal128(38, 0)), // Maximum precision, scale 0
219
297
  _ => None,
220
298
  }
221
299
  }
@@ -240,6 +318,9 @@ pub fn schema_node_to_arrow_field(node: &SchemaNode) -> ArrowField {
240
318
  PrimitiveType::UInt64 => ArrowDataType::UInt64,
241
319
  PrimitiveType::Float32 => ArrowDataType::Float32,
242
320
  PrimitiveType::Float64 => ArrowDataType::Float64,
321
+ PrimitiveType::Decimal128(precision, scale) => {
322
+ ArrowDataType::Decimal128(*precision, *scale)
323
+ }
243
324
  PrimitiveType::Boolean => ArrowDataType::Boolean,
244
325
  PrimitiveType::String => ArrowDataType::Utf8,
245
326
  PrimitiveType::Binary => ArrowDataType::Binary,
@@ -2,15 +2,11 @@ use super::*;
2
2
 
3
3
  pub fn parse_zoned_timestamp(value: &ParquetValue) -> Result<jiff::Timestamp, ParquetGemError> {
4
4
  let (ts, tz) = match value {
5
- ParquetValue::TimestampSecond(ts, tz) => (jiff::Timestamp::from_second(*ts).unwrap(), tz),
6
- ParquetValue::TimestampMillis(ts, tz) => {
7
- (jiff::Timestamp::from_millisecond(*ts).unwrap(), tz)
8
- }
9
- ParquetValue::TimestampMicros(ts, tz) => {
10
- (jiff::Timestamp::from_microsecond(*ts).unwrap(), tz)
11
- }
5
+ ParquetValue::TimestampSecond(ts, tz) => (jiff::Timestamp::from_second(*ts)?, tz),
6
+ ParquetValue::TimestampMillis(ts, tz) => (jiff::Timestamp::from_millisecond(*ts)?, tz),
7
+ ParquetValue::TimestampMicros(ts, tz) => (jiff::Timestamp::from_microsecond(*ts)?, tz),
12
8
  ParquetValue::TimestampNanos(ts, tz) => {
13
- (jiff::Timestamp::from_nanosecond(*ts as i128).unwrap(), tz)
9
+ (jiff::Timestamp::from_nanosecond(*ts as i128)?, tz)
14
10
  }
15
11
  _ => {
16
12
  return Err(MagnusError::new(
@@ -50,7 +46,7 @@ pub fn parse_zoned_timestamp(value: &ParquetValue) -> Result<jiff::Timestamp, Pa
50
46
  Ok(ts.to_zoned(tz).timestamp())
51
47
  } else {
52
48
  // Try IANA timezone
53
- match ts.in_tz(&tz) {
49
+ match ts.in_tz(tz) {
54
50
  Ok(zoned) => Ok(zoned.timestamp()),
55
51
  Err(_) => Ok(ts), // Fall back to UTC if timezone is invalid
56
52
  }
@@ -85,7 +81,7 @@ macro_rules! impl_timestamp_conversion {
85
81
  #[macro_export]
86
82
  macro_rules! impl_date_conversion {
87
83
  ($value:expr, $handle:expr) => {{
88
- let ts = jiff::Timestamp::from_second(($value as i64) * 86400).unwrap();
84
+ let ts = jiff::Timestamp::from_second(($value as i64) * 86400)?;
89
85
  let formatted = ts.strftime("%Y-%m-%d").to_string();
90
86
  Ok(formatted.into_value_with($handle))
91
87
  }};
@@ -2,8 +2,8 @@ use std::str::FromStr;
2
2
  use std::sync::Arc;
3
3
 
4
4
  use super::*;
5
- use arrow_array::builder::MapFieldNames;
6
5
  use arrow_array::builder::*;
6
+ use arrow_array::builder::MapFieldNames;
7
7
  use arrow_schema::{DataType, Field, Fields, TimeUnit};
8
8
  use jiff::tz::{Offset, TimeZone};
9
9
  use magnus::{RArray, RString, TryConvert};
@@ -41,9 +41,9 @@ pub fn convert_to_date32(
41
41
  let s = String::try_convert(value)?;
42
42
  // Parse string into Date using jiff
43
43
  let date = if let Some(fmt) = format {
44
- jiff::civil::Date::strptime(&fmt, &s).or_else(|e1| {
44
+ jiff::civil::Date::strptime(fmt, &s).or_else(|e1| {
45
45
  // Try parsing as DateTime and convert to Date with zero offset
46
- jiff::civil::DateTime::strptime(&fmt, &s)
46
+ jiff::civil::DateTime::strptime(fmt, &s)
47
47
  .and_then(|dt| dt.to_zoned(TimeZone::fixed(Offset::constant(0))))
48
48
  .map(|dt| dt.date())
49
49
  .map_err(|e2| {
@@ -78,7 +78,7 @@ pub fn convert_to_date32(
78
78
  .timestamp();
79
79
 
80
80
  // Convert to epoch days
81
- Ok((x.as_second() as i64 / 86400) as i32)
81
+ Ok((x.as_second() / 86400) as i32)
82
82
  } else if value.is_kind_of(ruby.class_time()) {
83
83
  // Convert Time object to epoch days
84
84
  let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())?)?;
@@ -100,10 +100,10 @@ pub fn convert_to_timestamp_millis(
100
100
  let s = String::try_convert(value)?;
101
101
  // Parse string into Timestamp using jiff
102
102
  let timestamp = if let Some(fmt) = format {
103
- jiff::Timestamp::strptime(&fmt, &s)
103
+ jiff::Timestamp::strptime(fmt, &s)
104
104
  .or_else(|e1| {
105
105
  // Try parsing as DateTime and convert to Timestamp with zero offset
106
- jiff::civil::DateTime::strptime(&fmt, &s)
106
+ jiff::civil::DateTime::strptime(fmt, &s)
107
107
  .and_then(|dt| dt.to_zoned(TimeZone::fixed(Offset::constant(0))))
108
108
  .map(|dt| dt.timestamp())
109
109
  .map_err(|e2| {
@@ -150,9 +150,9 @@ pub fn convert_to_timestamp_micros(
150
150
  let s = String::try_convert(value)?;
151
151
  // Parse string into Timestamp using jiff
152
152
  let timestamp = if let Some(fmt) = format {
153
- jiff::Timestamp::strptime(&fmt, &s).or_else(|e1| {
153
+ jiff::Timestamp::strptime(fmt, &s).or_else(|e1| {
154
154
  // Try parsing as DateTime and convert to Timestamp with zero offset
155
- jiff::civil::DateTime::strptime(&fmt, &s).and_then(|dt| {
155
+ jiff::civil::DateTime::strptime(fmt, &s).and_then(|dt| {
156
156
  dt.to_zoned(TimeZone::fixed(Offset::constant(0)))
157
157
  })
158
158
  .map(|dt| dt.timestamp())
@@ -242,6 +242,7 @@ pub fn parquet_schema_type_to_arrow_data_type(
242
242
  PrimitiveType::UInt64 => DataType::UInt64,
243
243
  PrimitiveType::Float32 => DataType::Float32,
244
244
  PrimitiveType::Float64 => DataType::Float64,
245
+ PrimitiveType::Decimal128(precision, scale) => DataType::Decimal128(*precision, *scale),
245
246
  PrimitiveType::String => DataType::Utf8,
246
247
  PrimitiveType::Binary => DataType::Binary,
247
248
  PrimitiveType::Boolean => DataType::Boolean,
@@ -364,6 +365,20 @@ fn create_arrow_builder_for_type(
364
365
  ParquetSchemaType::Primitive(PrimitiveType::Float64) => {
365
366
  Ok(Box::new(Float64Builder::with_capacity(cap)))
366
367
  }
368
+ ParquetSchemaType::Primitive(PrimitiveType::Decimal128(precision, scale)) => {
369
+ // Create a Decimal128Builder with specific precision and scale
370
+ let builder = Decimal128Builder::with_capacity(cap);
371
+
372
+ // Set precision and scale for the decimal and return the new builder
373
+ let builder_with_precision = builder.with_precision_and_scale(*precision, *scale).map_err(|e| {
374
+ MagnusError::new(
375
+ magnus::exception::runtime_error(),
376
+ format!("Failed to set precision and scale: {}", e),
377
+ )
378
+ })?;
379
+
380
+ Ok(Box::new(builder_with_precision))
381
+ }
367
382
  ParquetSchemaType::Primitive(PrimitiveType::String) => {
368
383
  Ok(Box::new(StringBuilder::with_capacity(cap, cap * 32)))
369
384
  }
@@ -415,7 +430,7 @@ fn create_arrow_builder_for_type(
415
430
  ParquetSchemaType::Struct(struct_field) => {
416
431
  // Check for empty struct immediately
417
432
  if struct_field.fields.is_empty() {
418
- return Err(MagnusError::new(
433
+ Err(MagnusError::new(
419
434
  magnus::exception::runtime_error(),
420
435
  "Cannot build a struct with zero fields - Parquet doesn't support empty structs".to_string(),
421
436
  ))?;
@@ -445,7 +460,7 @@ fn create_arrow_builder_for_type(
445
460
 
446
461
  // Make sure we have the right number of builders
447
462
  if child_field_builders.len() != arrow_fields.len() {
448
- return Err(MagnusError::new(
463
+ Err(MagnusError::new(
449
464
  magnus::exception::runtime_error(),
450
465
  format!(
451
466
  "Number of field builders ({}) doesn't match number of arrow fields ({})",
@@ -834,6 +849,46 @@ fn fill_builder(
834
849
  }
835
850
  Ok(())
836
851
  }
852
+ ParquetSchemaType::Primitive(PrimitiveType::Decimal128(_precision, scale)) => {
853
+ let typed_builder = builder
854
+ .as_any_mut()
855
+ .downcast_mut::<Decimal128Builder>()
856
+ .expect("Builder mismatch: expected Float64Builder");
857
+
858
+ for val in values {
859
+ match val {
860
+ ParquetValue::Decimal128(d) => typed_builder.append_value(*d),
861
+ ParquetValue::Float64(f) => {
862
+ // Scale the float to the desired precision and scale
863
+ let scaled_value = (*f * 10_f64.powi(*scale as i32)) as i128;
864
+ typed_builder.append_value(scaled_value)
865
+ }
866
+ ParquetValue::Float32(flo) => {
867
+ // Scale the float to the desired precision and scale
868
+ let scaled_value = (*flo as f64 * 10_f64.powi(*scale as i32)) as i128;
869
+ typed_builder.append_value(scaled_value)
870
+ }
871
+ ParquetValue::Int64(i) => {
872
+ // Scale the integer to the desired scale
873
+ let scaled_value = (*i as i128) * 10_i128.pow(*scale as u32);
874
+ typed_builder.append_value(scaled_value)
875
+ }
876
+ ParquetValue::Int32(i) => {
877
+ // Scale the integer to the desired scale
878
+ let scaled_value = (*i as i128) * 10_i128.pow(*scale as u32);
879
+ typed_builder.append_value(scaled_value)
880
+ }
881
+ ParquetValue::Null => typed_builder.append_null(),
882
+ other => {
883
+ return Err(MagnusError::new(
884
+ magnus::exception::type_error(),
885
+ format!("Expected Float64, got {:?}", other),
886
+ ))
887
+ }
888
+ }
889
+ }
890
+ Ok(())
891
+ }
837
892
  ParquetSchemaType::Primitive(PrimitiveType::Boolean) => {
838
893
  let typed_builder = builder
839
894
  .as_any_mut()
@@ -954,7 +1009,7 @@ fn fill_builder(
954
1009
  .expect("Builder mismatch: expected BinaryBuilder");
955
1010
  for val in values {
956
1011
  match val {
957
- ParquetValue::Bytes(b) => typed_builder.append_value(&b),
1012
+ ParquetValue::Bytes(b) => typed_builder.append_value(b),
958
1013
  ParquetValue::Null => typed_builder.append_null(),
959
1014
  other => {
960
1015
  return Err(MagnusError::new(
@@ -1106,6 +1161,15 @@ fn fill_builder(
1106
1161
  )
1107
1162
  })?
1108
1163
  .append_value(bytes),
1164
+ ParquetValue::Decimal128(x) => typed_builder
1165
+ .field_builder::<Decimal128Builder>(i)
1166
+ .ok_or_else(|| {
1167
+ MagnusError::new(
1168
+ magnus::exception::type_error(),
1169
+ "Failed to coerce into Decimal128Builder",
1170
+ )
1171
+ })?
1172
+ .append_value(*x),
1109
1173
  ParquetValue::Date32(x) => typed_builder
1110
1174
  .field_builder::<Date32Builder>(i)
1111
1175
  .ok_or_else(|| {
@@ -1302,6 +1366,15 @@ fn fill_builder(
1302
1366
  )
1303
1367
  })?
1304
1368
  .append_null(),
1369
+ ParquetSchemaType::Primitive(PrimitiveType::Decimal128(_, _)) => typed_builder
1370
+ .field_builder::<Decimal128Builder>(i)
1371
+ .ok_or_else(|| {
1372
+ MagnusError::new(
1373
+ magnus::exception::type_error(),
1374
+ "Failed to coerce into Decimal128Builder",
1375
+ )
1376
+ })?
1377
+ .append_null(),
1305
1378
  ParquetSchemaType::Primitive(PrimitiveType::String) => typed_builder
1306
1379
  .field_builder::<StringBuilder>(i)
1307
1380
  .ok_or_else(|| {
@@ -59,7 +59,7 @@ impl Write for IoLikeValue {
59
59
  }
60
60
  }
61
61
 
62
- impl<'a> FromStr for ParquetSchemaType<'a> {
62
+ impl FromStr for ParquetSchemaType<'_> {
63
63
  type Err = MagnusError;
64
64
 
65
65
  fn from_str(s: &str) -> Result<Self, Self::Err> {
@@ -98,6 +98,53 @@ impl<'a> FromStr for ParquetSchemaType<'a> {
98
98
  })));
99
99
  }
100
100
 
101
+ // Check if it's a decimal type with precision and scale
102
+ if let Some(decimal_params) = s.strip_prefix("decimal(").and_then(|s| s.strip_suffix(")")) {
103
+ let parts: Vec<&str> = decimal_params.split(',').collect();
104
+
105
+ // Handle both single parameter (precision only) and two parameters (precision and scale)
106
+ if parts.len() == 1 {
107
+ // Only precision provided, scale defaults to 0
108
+ let precision = parts[0].trim().parse::<u8>().map_err(|_| {
109
+ MagnusError::new(
110
+ magnus::exception::runtime_error(),
111
+ format!("Invalid precision value in decimal type: {}", parts[0]),
112
+ )
113
+ })?;
114
+
115
+ return Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal128(
116
+ precision, 0,
117
+ )));
118
+ } else if parts.len() == 2 {
119
+ // Both precision and scale provided
120
+ let precision = parts[0].trim().parse::<u8>().map_err(|_| {
121
+ MagnusError::new(
122
+ magnus::exception::runtime_error(),
123
+ format!("Invalid precision value in decimal type: {}", parts[0]),
124
+ )
125
+ })?;
126
+
127
+ let scale = parts[1].trim().parse::<i8>().map_err(|_| {
128
+ MagnusError::new(
129
+ magnus::exception::runtime_error(),
130
+ format!("Invalid scale value in decimal type: {}", parts[1]),
131
+ )
132
+ })?;
133
+
134
+ return Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal128(
135
+ precision, scale,
136
+ )));
137
+ } else {
138
+ return Err(MagnusError::new(
139
+ magnus::exception::runtime_error(),
140
+ format!(
141
+ "Invalid decimal format. Expected 'decimal(precision)' or 'decimal(precision,scale)', got '{}'",
142
+ s
143
+ ),
144
+ ));
145
+ }
146
+ }
147
+
101
148
  // Handle primitive types
102
149
  match s {
103
150
  "int8" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Int8)),
@@ -116,6 +163,9 @@ impl<'a> FromStr for ParquetSchemaType<'a> {
116
163
  "date32" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Date32)),
117
164
  "timestamp_millis" => Ok(ParquetSchemaType::Primitive(PrimitiveType::TimestampMillis)),
118
165
  "timestamp_micros" => Ok(ParquetSchemaType::Primitive(PrimitiveType::TimestampMicros)),
166
+ "decimal" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal128(
167
+ 38, 0,
168
+ ))),
119
169
  "list" => Ok(ParquetSchemaType::List(Box::new(ListField {
120
170
  item_type: ParquetSchemaType::Primitive(PrimitiveType::String),
121
171
  format: None,
@@ -129,7 +179,7 @@ impl<'a> FromStr for ParquetSchemaType<'a> {
129
179
  }
130
180
  }
131
181
 
132
- impl<'a> TryConvert for ParquetSchemaType<'a> {
182
+ impl TryConvert for ParquetSchemaType<'_> {
133
183
  fn try_convert(value: Value) -> Result<Self, MagnusError> {
134
184
  let ruby = unsafe { Ruby::get_unchecked() };
135
185
  let schema_type = parse_string_or_symbol(&ruby, value)?;
@@ -144,7 +194,7 @@ impl<'a> TryConvert for ParquetSchemaType<'a> {
144
194
 
145
195
  // We know this type is safe to move between threads because it's just an enum
146
196
  // with simple primitive types and strings
147
- unsafe impl<'a> Send for ParquetSchemaType<'a> {}
197
+ unsafe impl Send for ParquetSchemaType<'_> {}
148
198
 
149
199
  pub enum WriterOutput {
150
200
  File(ArrowWriter<Box<dyn SendableWrite>>),
@@ -202,14 +252,12 @@ impl<'a> ColumnCollector<'a> {
202
252
  pub fn push_value(&mut self, value: Value) -> Result<(), MagnusError> {
203
253
  use crate::types::ParquetValue;
204
254
 
205
- if value.is_nil() {
206
- if !self.nullable {
207
- // For non-nullable fields, raise an error
208
- return Err(MagnusError::new(
209
- magnus::exception::runtime_error(),
210
- "Cannot write nil value for non-nullable field",
211
- ));
212
- }
255
+ if value.is_nil() && !self.nullable {
256
+ // For non-nullable fields, raise an error
257
+ return Err(MagnusError::new(
258
+ magnus::exception::runtime_error(),
259
+ "Cannot write nil value for non-nullable field",
260
+ ));
213
261
  }
214
262
 
215
263
  // For all other types, proceed as normal
@@ -13,12 +13,12 @@ pub fn parse_string_or_symbol(ruby: &Ruby, value: Value) -> Result<Option<String
13
13
  RString::from_value(value)
14
14
  .ok_or_else(|| Error::new(magnus::exception::type_error(), "Invalid string value"))?
15
15
  .to_string()
16
- .map(|s| Some(s))
16
+ .map(Some)
17
17
  } else if value.is_kind_of(ruby.class_symbol()) {
18
18
  Symbol::from_value(value)
19
19
  .ok_or_else(|| Error::new(magnus::exception::type_error(), "Invalid symbol value"))?
20
20
  .funcall("to_s", ())
21
- .map(|s| Some(s))
21
+ .map(Some)
22
22
  } else {
23
23
  Err(Error::new(
24
24
  magnus::exception::type_error(),
@@ -161,11 +161,11 @@ pub fn parse_parquet_columns_args(
161
161
  };
162
162
 
163
163
  let batch_size = kwargs.optional.2.flatten();
164
- if let Some(sz) = batch_size {
165
- if sz <= 0 {
164
+ if let Some(batch_size) = batch_size {
165
+ if batch_size == 0 {
166
166
  return Err(Error::new(
167
- ruby.exception_arg_error(),
168
- format!("batch_size must be > 0, got {}", sz),
167
+ magnus::exception::arg_error(),
168
+ "Batch size must be greater than 0",
169
169
  ));
170
170
  }
171
171
  }
@@ -111,13 +111,13 @@ pub fn parse_parquet_write_args(
111
111
  if let Some(type_val) = type_val {
112
112
  // If it has a type: :struct, it's the new DSL format
113
113
  // Use parse_string_or_symbol to handle both String and Symbol values
114
- let ttype = parse_string_or_symbol(&ruby, type_val)?;
114
+ let ttype = parse_string_or_symbol(ruby, type_val)?;
115
115
  if let Some(ref type_str) = ttype {
116
116
  if type_str == "struct" {
117
117
  // Parse using the new schema approach
118
- let schema_node = crate::parse_schema_node(&ruby, schema_value)?;
118
+ let schema_node = crate::parse_schema_node(ruby, schema_value)?;
119
119
 
120
- validate_schema_node(&ruby, &schema_node)?;
120
+ validate_schema_node(ruby, &schema_node)?;
121
121
 
122
122
  return Ok(ParquetWriteArgs {
123
123
  read_from,
@@ -143,22 +143,21 @@ pub fn parse_parquet_write_args(
143
143
  "Schema fields must be an array",
144
144
  )
145
145
  })?
146
- .len()
147
- == 0)
146
+ .is_empty())
148
147
  {
149
148
  // If schema is nil or an empty array, we need to peek at the first value to determine column count
150
149
  let first_value = read_from.funcall::<_, _, Value>("peek", ())?;
151
150
  // Default to nullable:true for auto-inferred fields
152
- crate::infer_schema_from_first_row(&ruby, first_value, true)?
151
+ crate::infer_schema_from_first_row(ruby, first_value, true)?
153
152
  } else {
154
153
  // Legacy array format - use our centralized parser
155
- crate::parse_legacy_schema(&ruby, schema_value)?
154
+ crate::parse_legacy_schema(ruby, schema_value)?
156
155
  };
157
156
 
158
157
  // Convert the legacy schema fields to SchemaNode (DSL format)
159
- let schema_node = crate::legacy_schema_to_dsl(&ruby, schema_fields)?;
158
+ let schema_node = crate::legacy_schema_to_dsl(ruby, schema_fields)?;
160
159
 
161
- validate_schema_node(&ruby, &schema_node)?;
160
+ validate_schema_node(ruby, &schema_node)?;
162
161
 
163
162
  Ok(ParquetWriteArgs {
164
163
  read_from,
@@ -195,6 +194,9 @@ fn arrow_data_type_to_parquet_schema_type(dt: &DataType) -> Result<ParquetSchema
195
194
  }
196
195
  DataType::Float32 => Ok(PST::Primitive(PrimitiveType::Float32)),
197
196
  DataType::Float64 => Ok(PST::Primitive(PrimitiveType::Float64)),
197
+ DataType::Decimal128(precision, scale) => Ok(PST::Primitive(PrimitiveType::Decimal128(
198
+ *precision, *scale,
199
+ ))),
198
200
  DataType::Date32 => Ok(PST::Primitive(PrimitiveType::Date32)),
199
201
  DataType::Date64 => {
200
202
  // Our code typically uses Date32 or Timestamp for 64. But Arrow has Date64
@@ -414,15 +416,21 @@ fn create_writer(
414
416
  compression: Option<String>,
415
417
  ) -> Result<WriterOutput, ParquetGemError> {
416
418
  // Create writer properties with compression based on the option
419
+ let compression_setting = match compression.map(|s| s.to_lowercase()).as_deref() {
420
+ Some("none") | Some("uncompressed") => Ok(Compression::UNCOMPRESSED),
421
+ Some("snappy") => Ok(Compression::SNAPPY),
422
+ Some("gzip") => Ok(Compression::GZIP(GzipLevel::default())),
423
+ Some("lz4") => Ok(Compression::LZ4),
424
+ Some("zstd") => Ok(Compression::ZSTD(ZstdLevel::default())),
425
+ None => Ok(Compression::UNCOMPRESSED),
426
+ other => Err(MagnusError::new(
427
+ magnus::exception::arg_error(),
428
+ format!("Invalid compression option: {:?}", other),
429
+ )),
430
+ }?;
431
+
417
432
  let props = WriterProperties::builder()
418
- .set_compression(match compression.as_deref() {
419
- Some("none") | Some("uncompressed") => Compression::UNCOMPRESSED,
420
- Some("snappy") => Compression::SNAPPY,
421
- Some("gzip") => Compression::GZIP(GzipLevel::default()),
422
- Some("lz4") => Compression::LZ4,
423
- Some("zstd") => Compression::ZSTD(ZstdLevel::default()),
424
- _ => Compression::UNCOMPRESSED,
425
- })
433
+ .set_compression(compression_setting)
426
434
  .build();
427
435
 
428
436
  if write_to.is_kind_of(ruby.class_string()) {