parquet 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,8 +2,8 @@ use std::str::FromStr;
2
2
  use std::sync::Arc;
3
3
 
4
4
  use super::*;
5
- use arrow_array::builder::MapFieldNames;
6
5
  use arrow_array::builder::*;
6
+ use arrow_array::builder::MapFieldNames;
7
7
  use arrow_schema::{DataType, Field, Fields, TimeUnit};
8
8
  use jiff::tz::{Offset, TimeZone};
9
9
  use magnus::{RArray, RString, TryConvert};
@@ -41,9 +41,9 @@ pub fn convert_to_date32(
41
41
  let s = String::try_convert(value)?;
42
42
  // Parse string into Date using jiff
43
43
  let date = if let Some(fmt) = format {
44
- jiff::civil::Date::strptime(&fmt, &s).or_else(|e1| {
44
+ jiff::civil::Date::strptime(fmt, &s).or_else(|e1| {
45
45
  // Try parsing as DateTime and convert to Date with zero offset
46
- jiff::civil::DateTime::strptime(&fmt, &s)
46
+ jiff::civil::DateTime::strptime(fmt, &s)
47
47
  .and_then(|dt| dt.to_zoned(TimeZone::fixed(Offset::constant(0))))
48
48
  .map(|dt| dt.date())
49
49
  .map_err(|e2| {
@@ -78,7 +78,7 @@ pub fn convert_to_date32(
78
78
  .timestamp();
79
79
 
80
80
  // Convert to epoch days
81
- Ok((x.as_second() as i64 / 86400) as i32)
81
+ Ok((x.as_second() / 86400) as i32)
82
82
  } else if value.is_kind_of(ruby.class_time()) {
83
83
  // Convert Time object to epoch days
84
84
  let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())?)?;
@@ -100,10 +100,10 @@ pub fn convert_to_timestamp_millis(
100
100
  let s = String::try_convert(value)?;
101
101
  // Parse string into Timestamp using jiff
102
102
  let timestamp = if let Some(fmt) = format {
103
- jiff::Timestamp::strptime(&fmt, &s)
103
+ jiff::Timestamp::strptime(fmt, &s)
104
104
  .or_else(|e1| {
105
105
  // Try parsing as DateTime and convert to Timestamp with zero offset
106
- jiff::civil::DateTime::strptime(&fmt, &s)
106
+ jiff::civil::DateTime::strptime(fmt, &s)
107
107
  .and_then(|dt| dt.to_zoned(TimeZone::fixed(Offset::constant(0))))
108
108
  .map(|dt| dt.timestamp())
109
109
  .map_err(|e2| {
@@ -150,9 +150,9 @@ pub fn convert_to_timestamp_micros(
150
150
  let s = String::try_convert(value)?;
151
151
  // Parse string into Timestamp using jiff
152
152
  let timestamp = if let Some(fmt) = format {
153
- jiff::Timestamp::strptime(&fmt, &s).or_else(|e1| {
153
+ jiff::Timestamp::strptime(fmt, &s).or_else(|e1| {
154
154
  // Try parsing as DateTime and convert to Timestamp with zero offset
155
- jiff::civil::DateTime::strptime(&fmt, &s).and_then(|dt| {
155
+ jiff::civil::DateTime::strptime(fmt, &s).and_then(|dt| {
156
156
  dt.to_zoned(TimeZone::fixed(Offset::constant(0)))
157
157
  })
158
158
  .map(|dt| dt.timestamp())
@@ -242,6 +242,7 @@ pub fn parquet_schema_type_to_arrow_data_type(
242
242
  PrimitiveType::UInt64 => DataType::UInt64,
243
243
  PrimitiveType::Float32 => DataType::Float32,
244
244
  PrimitiveType::Float64 => DataType::Float64,
245
+ PrimitiveType::Decimal128(precision, scale) => DataType::Decimal128(*precision, *scale),
245
246
  PrimitiveType::String => DataType::Utf8,
246
247
  PrimitiveType::Binary => DataType::Binary,
247
248
  PrimitiveType::Boolean => DataType::Boolean,
@@ -364,6 +365,20 @@ fn create_arrow_builder_for_type(
364
365
  ParquetSchemaType::Primitive(PrimitiveType::Float64) => {
365
366
  Ok(Box::new(Float64Builder::with_capacity(cap)))
366
367
  }
368
+ ParquetSchemaType::Primitive(PrimitiveType::Decimal128(precision, scale)) => {
369
+ // Create a Decimal128Builder with specific precision and scale
370
+ let builder = Decimal128Builder::with_capacity(cap);
371
+
372
+ // Set precision and scale for the decimal and return the new builder
373
+ let builder_with_precision = builder.with_precision_and_scale(*precision, *scale).map_err(|e| {
374
+ MagnusError::new(
375
+ magnus::exception::runtime_error(),
376
+ format!("Failed to set precision and scale: {}", e),
377
+ )
378
+ })?;
379
+
380
+ Ok(Box::new(builder_with_precision))
381
+ }
367
382
  ParquetSchemaType::Primitive(PrimitiveType::String) => {
368
383
  Ok(Box::new(StringBuilder::with_capacity(cap, cap * 32)))
369
384
  }
@@ -415,7 +430,7 @@ fn create_arrow_builder_for_type(
415
430
  ParquetSchemaType::Struct(struct_field) => {
416
431
  // Check for empty struct immediately
417
432
  if struct_field.fields.is_empty() {
418
- return Err(MagnusError::new(
433
+ Err(MagnusError::new(
419
434
  magnus::exception::runtime_error(),
420
435
  "Cannot build a struct with zero fields - Parquet doesn't support empty structs".to_string(),
421
436
  ))?;
@@ -445,7 +460,7 @@ fn create_arrow_builder_for_type(
445
460
 
446
461
  // Make sure we have the right number of builders
447
462
  if child_field_builders.len() != arrow_fields.len() {
448
- return Err(MagnusError::new(
463
+ Err(MagnusError::new(
449
464
  magnus::exception::runtime_error(),
450
465
  format!(
451
466
  "Number of field builders ({}) doesn't match number of arrow fields ({})",
@@ -834,6 +849,46 @@ fn fill_builder(
834
849
  }
835
850
  Ok(())
836
851
  }
852
+ ParquetSchemaType::Primitive(PrimitiveType::Decimal128(_precision, scale)) => {
853
+ let typed_builder = builder
854
+ .as_any_mut()
855
+ .downcast_mut::<Decimal128Builder>()
856
+ .expect("Builder mismatch: expected Float64Builder");
857
+
858
+ for val in values {
859
+ match val {
860
+ ParquetValue::Decimal128(d) => typed_builder.append_value(*d),
861
+ ParquetValue::Float64(f) => {
862
+ // Scale the float to the desired precision and scale
863
+ let scaled_value = (*f * 10_f64.powi(*scale as i32)) as i128;
864
+ typed_builder.append_value(scaled_value)
865
+ }
866
+ ParquetValue::Float32(flo) => {
867
+ // Scale the float to the desired precision and scale
868
+ let scaled_value = (*flo as f64 * 10_f64.powi(*scale as i32)) as i128;
869
+ typed_builder.append_value(scaled_value)
870
+ }
871
+ ParquetValue::Int64(i) => {
872
+ // Scale the integer to the desired scale
873
+ let scaled_value = (*i as i128) * 10_i128.pow(*scale as u32);
874
+ typed_builder.append_value(scaled_value)
875
+ }
876
+ ParquetValue::Int32(i) => {
877
+ // Scale the integer to the desired scale
878
+ let scaled_value = (*i as i128) * 10_i128.pow(*scale as u32);
879
+ typed_builder.append_value(scaled_value)
880
+ }
881
+ ParquetValue::Null => typed_builder.append_null(),
882
+ other => {
883
+ return Err(MagnusError::new(
884
+ magnus::exception::type_error(),
885
+ format!("Expected Float64, got {:?}", other),
886
+ ))
887
+ }
888
+ }
889
+ }
890
+ Ok(())
891
+ }
837
892
  ParquetSchemaType::Primitive(PrimitiveType::Boolean) => {
838
893
  let typed_builder = builder
839
894
  .as_any_mut()
@@ -954,7 +1009,7 @@ fn fill_builder(
954
1009
  .expect("Builder mismatch: expected BinaryBuilder");
955
1010
  for val in values {
956
1011
  match val {
957
- ParquetValue::Bytes(b) => typed_builder.append_value(&b),
1012
+ ParquetValue::Bytes(b) => typed_builder.append_value(b),
958
1013
  ParquetValue::Null => typed_builder.append_null(),
959
1014
  other => {
960
1015
  return Err(MagnusError::new(
@@ -1106,6 +1161,15 @@ fn fill_builder(
1106
1161
  )
1107
1162
  })?
1108
1163
  .append_value(bytes),
1164
+ ParquetValue::Decimal128(x) => typed_builder
1165
+ .field_builder::<Decimal128Builder>(i)
1166
+ .ok_or_else(|| {
1167
+ MagnusError::new(
1168
+ magnus::exception::type_error(),
1169
+ "Failed to coerce into Decimal128Builder",
1170
+ )
1171
+ })?
1172
+ .append_value(*x),
1109
1173
  ParquetValue::Date32(x) => typed_builder
1110
1174
  .field_builder::<Date32Builder>(i)
1111
1175
  .ok_or_else(|| {
@@ -1302,6 +1366,15 @@ fn fill_builder(
1302
1366
  )
1303
1367
  })?
1304
1368
  .append_null(),
1369
+ ParquetSchemaType::Primitive(PrimitiveType::Decimal128(_, _)) => typed_builder
1370
+ .field_builder::<Decimal128Builder>(i)
1371
+ .ok_or_else(|| {
1372
+ MagnusError::new(
1373
+ magnus::exception::type_error(),
1374
+ "Failed to coerce into Decimal128Builder",
1375
+ )
1376
+ })?
1377
+ .append_null(),
1305
1378
  ParquetSchemaType::Primitive(PrimitiveType::String) => typed_builder
1306
1379
  .field_builder::<StringBuilder>(i)
1307
1380
  .ok_or_else(|| {
@@ -59,7 +59,7 @@ impl Write for IoLikeValue {
59
59
  }
60
60
  }
61
61
 
62
- impl<'a> FromStr for ParquetSchemaType<'a> {
62
+ impl FromStr for ParquetSchemaType<'_> {
63
63
  type Err = MagnusError;
64
64
 
65
65
  fn from_str(s: &str) -> Result<Self, Self::Err> {
@@ -98,6 +98,36 @@ impl<'a> FromStr for ParquetSchemaType<'a> {
98
98
  })));
99
99
  }
100
100
 
101
+ // Check if it's a decimal type with precision and scale
102
+ if let Some(decimal_params) = s.strip_prefix("decimal(").and_then(|s| s.strip_suffix(")")) {
103
+ let parts: Vec<&str> = decimal_params.split(',').collect();
104
+ if parts.len() != 2 {
105
+ return Err(MagnusError::new(
106
+ magnus::exception::runtime_error(),
107
+ format!(
108
+ "Invalid decimal format. Expected 'decimal(precision,scale)', got '{}'",
109
+ s
110
+ ),
111
+ ));
112
+ }
113
+
114
+ let precision = parts[0].trim().parse::<u8>().map_err(|_| {
115
+ MagnusError::new(
116
+ magnus::exception::runtime_error(),
117
+ format!("Invalid precision value in decimal type: {}", parts[0]),
118
+ )
119
+ })?;
120
+
121
+ let scale = parts[1].trim().parse::<i8>().map_err(|_| {
122
+ MagnusError::new(
123
+ magnus::exception::runtime_error(),
124
+ format!("Invalid scale value in decimal type: {}", parts[1]),
125
+ )
126
+ })?;
127
+
128
+ return Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal128(precision, scale)));
129
+ }
130
+
101
131
  // Handle primitive types
102
132
  match s {
103
133
  "int8" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Int8)),
@@ -116,6 +146,7 @@ impl<'a> FromStr for ParquetSchemaType<'a> {
116
146
  "date32" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Date32)),
117
147
  "timestamp_millis" => Ok(ParquetSchemaType::Primitive(PrimitiveType::TimestampMillis)),
118
148
  "timestamp_micros" => Ok(ParquetSchemaType::Primitive(PrimitiveType::TimestampMicros)),
149
+ "decimal" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal128(18, 2))), // Default precision 18, scale 2
119
150
  "list" => Ok(ParquetSchemaType::List(Box::new(ListField {
120
151
  item_type: ParquetSchemaType::Primitive(PrimitiveType::String),
121
152
  format: None,
@@ -129,7 +160,7 @@ impl<'a> FromStr for ParquetSchemaType<'a> {
129
160
  }
130
161
  }
131
162
 
132
- impl<'a> TryConvert for ParquetSchemaType<'a> {
163
+ impl TryConvert for ParquetSchemaType<'_> {
133
164
  fn try_convert(value: Value) -> Result<Self, MagnusError> {
134
165
  let ruby = unsafe { Ruby::get_unchecked() };
135
166
  let schema_type = parse_string_or_symbol(&ruby, value)?;
@@ -144,7 +175,7 @@ impl<'a> TryConvert for ParquetSchemaType<'a> {
144
175
 
145
176
  // We know this type is safe to move between threads because it's just an enum
146
177
  // with simple primitive types and strings
147
- unsafe impl<'a> Send for ParquetSchemaType<'a> {}
178
+ unsafe impl Send for ParquetSchemaType<'_> {}
148
179
 
149
180
  pub enum WriterOutput {
150
181
  File(ArrowWriter<Box<dyn SendableWrite>>),
@@ -202,14 +233,12 @@ impl<'a> ColumnCollector<'a> {
202
233
  pub fn push_value(&mut self, value: Value) -> Result<(), MagnusError> {
203
234
  use crate::types::ParquetValue;
204
235
 
205
- if value.is_nil() {
206
- if !self.nullable {
207
- // For non-nullable fields, raise an error
208
- return Err(MagnusError::new(
209
- magnus::exception::runtime_error(),
210
- "Cannot write nil value for non-nullable field",
211
- ));
212
- }
236
+ if value.is_nil() && !self.nullable {
237
+ // For non-nullable fields, raise an error
238
+ return Err(MagnusError::new(
239
+ magnus::exception::runtime_error(),
240
+ "Cannot write nil value for non-nullable field",
241
+ ));
213
242
  }
214
243
 
215
244
  // For all other types, proceed as normal
@@ -13,12 +13,12 @@ pub fn parse_string_or_symbol(ruby: &Ruby, value: Value) -> Result<Option<String
13
13
  RString::from_value(value)
14
14
  .ok_or_else(|| Error::new(magnus::exception::type_error(), "Invalid string value"))?
15
15
  .to_string()
16
- .map(|s| Some(s))
16
+ .map(Some)
17
17
  } else if value.is_kind_of(ruby.class_symbol()) {
18
18
  Symbol::from_value(value)
19
19
  .ok_or_else(|| Error::new(magnus::exception::type_error(), "Invalid symbol value"))?
20
20
  .funcall("to_s", ())
21
- .map(|s| Some(s))
21
+ .map(Some)
22
22
  } else {
23
23
  Err(Error::new(
24
24
  magnus::exception::type_error(),
@@ -161,11 +161,11 @@ pub fn parse_parquet_columns_args(
161
161
  };
162
162
 
163
163
  let batch_size = kwargs.optional.2.flatten();
164
- if let Some(sz) = batch_size {
165
- if sz <= 0 {
164
+ if let Some(batch_size) = batch_size {
165
+ if batch_size == 0 {
166
166
  return Err(Error::new(
167
- ruby.exception_arg_error(),
168
- format!("batch_size must be > 0, got {}", sz),
167
+ magnus::exception::arg_error(),
168
+ "Batch size must be greater than 0",
169
169
  ));
170
170
  }
171
171
  }
@@ -111,13 +111,13 @@ pub fn parse_parquet_write_args(
111
111
  if let Some(type_val) = type_val {
112
112
  // If it has a type: :struct, it's the new DSL format
113
113
  // Use parse_string_or_symbol to handle both String and Symbol values
114
- let ttype = parse_string_or_symbol(&ruby, type_val)?;
114
+ let ttype = parse_string_or_symbol(ruby, type_val)?;
115
115
  if let Some(ref type_str) = ttype {
116
116
  if type_str == "struct" {
117
117
  // Parse using the new schema approach
118
- let schema_node = crate::parse_schema_node(&ruby, schema_value)?;
118
+ let schema_node = crate::parse_schema_node(ruby, schema_value)?;
119
119
 
120
- validate_schema_node(&ruby, &schema_node)?;
120
+ validate_schema_node(ruby, &schema_node)?;
121
121
 
122
122
  return Ok(ParquetWriteArgs {
123
123
  read_from,
@@ -143,22 +143,21 @@ pub fn parse_parquet_write_args(
143
143
  "Schema fields must be an array",
144
144
  )
145
145
  })?
146
- .len()
147
- == 0)
146
+ .is_empty())
148
147
  {
149
148
  // If schema is nil or an empty array, we need to peek at the first value to determine column count
150
149
  let first_value = read_from.funcall::<_, _, Value>("peek", ())?;
151
150
  // Default to nullable:true for auto-inferred fields
152
- crate::infer_schema_from_first_row(&ruby, first_value, true)?
151
+ crate::infer_schema_from_first_row(ruby, first_value, true)?
153
152
  } else {
154
153
  // Legacy array format - use our centralized parser
155
- crate::parse_legacy_schema(&ruby, schema_value)?
154
+ crate::parse_legacy_schema(ruby, schema_value)?
156
155
  };
157
156
 
158
157
  // Convert the legacy schema fields to SchemaNode (DSL format)
159
- let schema_node = crate::legacy_schema_to_dsl(&ruby, schema_fields)?;
158
+ let schema_node = crate::legacy_schema_to_dsl(ruby, schema_fields)?;
160
159
 
161
- validate_schema_node(&ruby, &schema_node)?;
160
+ validate_schema_node(ruby, &schema_node)?;
162
161
 
163
162
  Ok(ParquetWriteArgs {
164
163
  read_from,
@@ -195,6 +194,9 @@ fn arrow_data_type_to_parquet_schema_type(dt: &DataType) -> Result<ParquetSchema
195
194
  }
196
195
  DataType::Float32 => Ok(PST::Primitive(PrimitiveType::Float32)),
197
196
  DataType::Float64 => Ok(PST::Primitive(PrimitiveType::Float64)),
197
+ DataType::Decimal128(precision, scale) => Ok(PST::Primitive(PrimitiveType::Decimal128(
198
+ *precision, *scale,
199
+ ))),
198
200
  DataType::Date32 => Ok(PST::Primitive(PrimitiveType::Date32)),
199
201
  DataType::Date64 => {
200
202
  // Our code typically uses Date32 or Timestamp for 64. But Arrow has Date64
@@ -414,15 +416,21 @@ fn create_writer(
414
416
  compression: Option<String>,
415
417
  ) -> Result<WriterOutput, ParquetGemError> {
416
418
  // Create writer properties with compression based on the option
419
+ let compression_setting = match compression.map(|s| s.to_lowercase()).as_deref() {
420
+ Some("none") | Some("uncompressed") => Ok(Compression::UNCOMPRESSED),
421
+ Some("snappy") => Ok(Compression::SNAPPY),
422
+ Some("gzip") => Ok(Compression::GZIP(GzipLevel::default())),
423
+ Some("lz4") => Ok(Compression::LZ4),
424
+ Some("zstd") => Ok(Compression::ZSTD(ZstdLevel::default())),
425
+ None => Ok(Compression::UNCOMPRESSED),
426
+ other => Err(MagnusError::new(
427
+ magnus::exception::arg_error(),
428
+ format!("Invalid compression option: {:?}", other),
429
+ )),
430
+ }?;
431
+
417
432
  let props = WriterProperties::builder()
418
- .set_compression(match compression.as_deref() {
419
- Some("none") | Some("uncompressed") => Compression::UNCOMPRESSED,
420
- Some("snappy") => Compression::SNAPPY,
421
- Some("gzip") => Compression::GZIP(GzipLevel::default()),
422
- Some("lz4") => Compression::LZ4,
423
- Some("zstd") => Compression::ZSTD(ZstdLevel::default()),
424
- _ => Compression::UNCOMPRESSED,
425
- })
433
+ .set_compression(compression_setting)
426
434
  .build();
427
435
 
428
436
  if write_to.is_kind_of(ruby.class_string()) {
@@ -11,12 +11,12 @@ use crate::{
11
11
  use crate::{types::PrimitiveType, SchemaNode};
12
12
  use arrow_array::{Array, RecordBatch};
13
13
  use magnus::{value::ReprValue, Error as MagnusError, RArray, Ruby, Value};
14
- use std::sync::Arc;
14
+ use std::{rc::Rc, sync::Arc};
15
15
 
16
16
  #[inline]
17
17
  pub fn write_columns(args: &[Value]) -> Result<(), MagnusError> {
18
18
  let ruby = unsafe { Ruby::get_unchecked() };
19
- write_columns_impl(Arc::new(ruby), args).map_err(|e| {
19
+ write_columns_impl(Rc::new(ruby), args).map_err(|e| {
20
20
  let z: MagnusError = e.into();
21
21
  z
22
22
  })?;
@@ -24,7 +24,7 @@ pub fn write_columns(args: &[Value]) -> Result<(), MagnusError> {
24
24
  }
25
25
 
26
26
  #[inline]
27
- fn write_columns_impl(ruby: Arc<Ruby>, args: &[Value]) -> Result<(), ParquetGemError> {
27
+ fn write_columns_impl(ruby: Rc<Ruby>, args: &[Value]) -> Result<(), ParquetGemError> {
28
28
  let ParquetWriteArgs {
29
29
  read_from,
30
30
  write_to,
@@ -94,7 +94,7 @@ fn write_columns_impl(ruby: Arc<Ruby>, args: &[Value]) -> Result<(), ParquetGemE
94
94
  };
95
95
 
96
96
  if batch_array.len() != schema_len {
97
- return Err(MagnusError::new(
97
+ Err(MagnusError::new(
98
98
  magnus::exception::type_error(),
99
99
  format!(
100
100
  "Batch column count ({}) does not match schema length ({}). Schema expects columns: {:?}",
@@ -118,7 +118,7 @@ fn write_columns_impl(ruby: Arc<Ruby>, args: &[Value]) -> Result<(), ParquetGemE
118
118
  ))?,
119
119
  };
120
120
  if top_fields.len() != fields.len() {
121
- return Err(MagnusError::new(
121
+ Err(MagnusError::new(
122
122
  magnus::exception::runtime_error(),
123
123
  "Mismatch top-level DSL fields vs Arrow fields",
124
124
  ))?;
@@ -140,31 +140,34 @@ fn write_columns_impl(ruby: Arc<Ruby>, args: &[Value]) -> Result<(), ParquetGemE
140
140
  parquet_type,
141
141
  // Format is handled internally now
142
142
  ..
143
- } => match parquet_type {
144
- &PrimitiveType::Int8 => PST::Primitive(PrimitiveType::Int8),
145
- &PrimitiveType::Int16 => PST::Primitive(PrimitiveType::Int16),
146
- &PrimitiveType::Int32 => PST::Primitive(PrimitiveType::Int32),
147
- &PrimitiveType::Int64 => PST::Primitive(PrimitiveType::Int64),
148
- &PrimitiveType::UInt8 => PST::Primitive(PrimitiveType::UInt8),
149
- &PrimitiveType::UInt16 => PST::Primitive(PrimitiveType::UInt16),
150
- &PrimitiveType::UInt32 => PST::Primitive(PrimitiveType::UInt32),
151
- &PrimitiveType::UInt64 => PST::Primitive(PrimitiveType::UInt64),
152
- &PrimitiveType::Float32 => {
143
+ } => match *parquet_type {
144
+ PrimitiveType::Int8 => PST::Primitive(PrimitiveType::Int8),
145
+ PrimitiveType::Int16 => PST::Primitive(PrimitiveType::Int16),
146
+ PrimitiveType::Int32 => PST::Primitive(PrimitiveType::Int32),
147
+ PrimitiveType::Int64 => PST::Primitive(PrimitiveType::Int64),
148
+ PrimitiveType::UInt8 => PST::Primitive(PrimitiveType::UInt8),
149
+ PrimitiveType::UInt16 => PST::Primitive(PrimitiveType::UInt16),
150
+ PrimitiveType::UInt32 => PST::Primitive(PrimitiveType::UInt32),
151
+ PrimitiveType::UInt64 => PST::Primitive(PrimitiveType::UInt64),
152
+ PrimitiveType::Float32 => {
153
153
  PST::Primitive(PrimitiveType::Float32)
154
154
  }
155
- &PrimitiveType::Float64 => {
155
+ PrimitiveType::Float64 => {
156
156
  PST::Primitive(PrimitiveType::Float64)
157
157
  }
158
- &PrimitiveType::String => PST::Primitive(PrimitiveType::String),
159
- &PrimitiveType::Binary => PST::Primitive(PrimitiveType::Binary),
160
- &PrimitiveType::Boolean => {
158
+ PrimitiveType::Decimal128(precision, scale) => {
159
+ PST::Primitive(PrimitiveType::Decimal128(precision, scale))
160
+ }
161
+ PrimitiveType::String => PST::Primitive(PrimitiveType::String),
162
+ PrimitiveType::Binary => PST::Primitive(PrimitiveType::Binary),
163
+ PrimitiveType::Boolean => {
161
164
  PST::Primitive(PrimitiveType::Boolean)
162
165
  }
163
- &PrimitiveType::Date32 => PST::Primitive(PrimitiveType::Date32),
164
- &PrimitiveType::TimestampMillis => {
166
+ PrimitiveType::Date32 => PST::Primitive(PrimitiveType::Date32),
167
+ PrimitiveType::TimestampMillis => {
165
168
  PST::Primitive(PrimitiveType::TimestampMillis)
166
169
  }
167
- &PrimitiveType::TimestampMicros => {
170
+ PrimitiveType::TimestampMicros => {
168
171
  PST::Primitive(PrimitiveType::TimestampMicros)
169
172
  }
170
173
  },
@@ -205,12 +208,12 @@ fn write_columns_impl(ruby: Arc<Ruby>, args: &[Value]) -> Result<(), ParquetGemE
205
208
  if e.is_kind_of(ruby.exception_stop_iteration()) {
206
209
  break;
207
210
  }
208
- return Err(e)?;
211
+ Err(e)?;
209
212
  }
210
213
  }
211
214
  }
212
215
  } else {
213
- return Err(MagnusError::new(
216
+ Err(MagnusError::new(
214
217
  magnus::exception::type_error(),
215
218
  "read_from must be an Enumerator".to_string(),
216
219
  ))?;
@@ -16,14 +16,14 @@ use magnus::{
16
16
  value::ReprValue, Error as MagnusError, RArray, RHash, Ruby, Symbol, TryConvert, Value,
17
17
  };
18
18
  use rand::Rng;
19
- use std::sync::Arc;
19
+ use std::{rc::Rc, sync::Arc};
20
20
 
21
21
  const MIN_SAMPLES_FOR_ESTIMATE: usize = 10;
22
22
 
23
23
  #[inline]
24
24
  pub fn write_rows(args: &[Value]) -> Result<(), MagnusError> {
25
25
  let ruby = unsafe { Ruby::get_unchecked() };
26
- write_rows_impl(Arc::new(ruby), args).map_err(|e| {
26
+ write_rows_impl(Rc::new(ruby), args).map_err(|e| {
27
27
  let z: MagnusError = e.into();
28
28
  z
29
29
  })?;
@@ -31,7 +31,7 @@ pub fn write_rows(args: &[Value]) -> Result<(), MagnusError> {
31
31
  }
32
32
 
33
33
  #[inline]
34
- fn write_rows_impl(ruby: Arc<Ruby>, args: &[Value]) -> Result<(), ParquetGemError> {
34
+ fn write_rows_impl(ruby: Rc<Ruby>, args: &[Value]) -> Result<(), ParquetGemError> {
35
35
  let ParquetWriteArgs {
36
36
  read_from,
37
37
  write_to,
@@ -83,8 +83,8 @@ fn write_rows_impl(ruby: Arc<Ruby>, args: &[Value]) -> Result<(), ParquetGemErro
83
83
  })?;
84
84
  let row_size = estimate_single_row_size(&row_array, &column_collectors)?;
85
85
  size_samples.push(row_size);
86
- } else if rng.random_range(0..=total_rows) < sample_size as usize {
87
- let idx = rng.random_range(0..sample_size as usize);
86
+ } else if rng.random_range(0..=total_rows) < sample_size {
87
+ let idx = rng.random_range(0..sample_size);
88
88
  let row_array = RArray::from_value(row).ok_or_else(|| {
89
89
  MagnusError::new(ruby.exception_type_error(), "Row must be an array")
90
90
  })?;
@@ -115,12 +115,12 @@ fn write_rows_impl(ruby: Arc<Ruby>, args: &[Value]) -> Result<(), ParquetGemErro
115
115
  }
116
116
  break;
117
117
  }
118
- return Err(e)?;
118
+ Err(e)?;
119
119
  }
120
120
  }
121
121
  }
122
122
  } else {
123
- return Err(MagnusError::new(
123
+ Err(MagnusError::new(
124
124
  magnus::exception::type_error(),
125
125
  "read_from must be an Enumerator".to_string(),
126
126
  ))?;
@@ -257,6 +257,7 @@ pub fn estimate_value_size(
257
257
  | PST::Primitive(PrimitiveType::UInt64)
258
258
  | PST::Primitive(PrimitiveType::Float64) => Ok(8),
259
259
  PST::Primitive(PrimitiveType::Boolean) => Ok(1),
260
+ PST::Primitive(PrimitiveType::Decimal128(_, _)) => Ok(16),
260
261
  PST::Primitive(PrimitiveType::Date32)
261
262
  | PST::Primitive(PrimitiveType::TimestampMillis)
262
263
  | PST::Primitive(PrimitiveType::TimestampMicros) => Ok(8),
@@ -429,15 +430,13 @@ pub fn estimate_value_size(
429
430
  if let Some(field_value) = hash.get(&*field.name) {
430
431
  total_fields_size +=
431
432
  estimate_value_size(field_value, &field.type_)?;
433
+ } else if field.nullable {
434
+ total_fields_size += 0;
432
435
  } else {
433
- if field.nullable {
434
- total_fields_size += 0;
435
- } else {
436
- return Err(MagnusError::new(
437
- magnus::exception::runtime_error(),
438
- format!("Missing field: {} in hash {:?}", field.name, hash),
439
- ));
440
- }
436
+ return Err(MagnusError::new(
437
+ magnus::exception::runtime_error(),
438
+ format!("Missing field: {} in hash {:?}", field.name, hash),
439
+ ));
441
440
  }
442
441
  }
443
442
  }