RubyGems - parquet - Versions diffs - 0.5.2 → 0.5.3 - Mend

parquet 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

checksums.yaml +4 -4
data/Cargo.lock +9 -1
data/ext/parquet/Cargo.toml +4 -0
data/ext/parquet/build.rs +5 -0
data/ext/parquet/src/lib.rs +1 -0
data/ext/parquet/src/reader/common.rs +7 -6
data/ext/parquet/src/reader/mod.rs +204 -0
data/ext/parquet/src/reader/parquet_column_reader.rs +19 -20
data/ext/parquet/src/reader/parquet_row_reader.rs +18 -22
data/ext/parquet/src/ruby_reader.rs +3 -5
data/ext/parquet/src/types/core_types.rs +1 -0
data/ext/parquet/src/types/mod.rs +8 -5
data/ext/parquet/src/types/parquet_value.rs +199 -7
data/ext/parquet/src/types/record_types.rs +16 -5
data/ext/parquet/src/types/schema_converter.rs +118 -11
data/ext/parquet/src/types/schema_node.rs +83 -2
data/ext/parquet/src/types/timestamp.rs +6 -10
data/ext/parquet/src/types/type_conversion.rs +84 -11
data/ext/parquet/src/types/writer_types.rs +40 -11
data/ext/parquet/src/utils.rs +6 -6
data/ext/parquet/src/writer/mod.rs +25 -17
data/ext/parquet/src/writer/write_columns.rs +27 -24
data/ext/parquet/src/writer/write_rows.rs +14 -15
data/lib/parquet/schema.rb +77 -4
data/lib/parquet/version.rb +1 -1
data/lib/parquet.rbi +11 -0
metadata +3 -2

data/ext/parquet/src/types/type_conversion.rs CHANGED Viewed

@@ -2,8 +2,8 @@ use std::str::FromStr;
 use std::sync::Arc;
 use super::*;
-use arrow_array::builder::MapFieldNames;
 use arrow_array::builder::*;
+use arrow_array::builder::MapFieldNames;
 use arrow_schema::{DataType, Field, Fields, TimeUnit};
 use jiff::tz::{Offset, TimeZone};
 use magnus::{RArray, RString, TryConvert};
@@ -41,9 +41,9 @@ pub fn convert_to_date32(
         let s = String::try_convert(value)?;
         // Parse string into Date using jiff
         let date = if let Some(fmt) = format {
-            jiff::civil::Date::strptime(&fmt, &s).or_else(|e1| {
+            jiff::civil::Date::strptime(fmt, &s).or_else(|e1| {
                 // Try parsing as DateTime and convert to Date with zero offset
-                jiff::civil::DateTime::strptime(&fmt, &s)
+                jiff::civil::DateTime::strptime(fmt, &s)
                     .and_then(|dt| dt.to_zoned(TimeZone::fixed(Offset::constant(0))))
                     .map(|dt| dt.date())
                     .map_err(|e2| {
@@ -78,7 +78,7 @@ pub fn convert_to_date32(
             .timestamp();
         // Convert to epoch days
-        Ok((x.as_second() as i64 / 86400) as i32)
+        Ok((x.as_second() / 86400) as i32)
     } else if value.is_kind_of(ruby.class_time()) {
         // Convert Time object to epoch days
         let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())?)?;
@@ -100,10 +100,10 @@ pub fn convert_to_timestamp_millis(
         let s = String::try_convert(value)?;
         // Parse string into Timestamp using jiff
         let timestamp = if let Some(fmt) = format {
-            jiff::Timestamp::strptime(&fmt, &s)
+            jiff::Timestamp::strptime(fmt, &s)
                 .or_else(|e1| {
                     // Try parsing as DateTime and convert to Timestamp with zero offset
-                    jiff::civil::DateTime::strptime(&fmt, &s)
+                    jiff::civil::DateTime::strptime(fmt, &s)
                         .and_then(|dt| dt.to_zoned(TimeZone::fixed(Offset::constant(0))))
                         .map(|dt| dt.timestamp())
                         .map_err(|e2| {
@@ -150,9 +150,9 @@ pub fn convert_to_timestamp_micros(
         let s = String::try_convert(value)?;
         // Parse string into Timestamp using jiff
         let timestamp = if let Some(fmt) = format {
-            jiff::Timestamp::strptime(&fmt, &s).or_else(|e1| {
+            jiff::Timestamp::strptime(fmt, &s).or_else(|e1| {
                 // Try parsing as DateTime and convert to Timestamp with zero offset
-                jiff::civil::DateTime::strptime(&fmt, &s).and_then(|dt| {
+                jiff::civil::DateTime::strptime(fmt, &s).and_then(|dt| {
                     dt.to_zoned(TimeZone::fixed(Offset::constant(0)))
                 })
                 .map(|dt| dt.timestamp())
@@ -242,6 +242,7 @@ pub fn parquet_schema_type_to_arrow_data_type(
             PrimitiveType::UInt64 => DataType::UInt64,
             PrimitiveType::Float32 => DataType::Float32,
             PrimitiveType::Float64 => DataType::Float64,
+            PrimitiveType::Decimal128(precision, scale) => DataType::Decimal128(*precision, *scale),
             PrimitiveType::String => DataType::Utf8,
             PrimitiveType::Binary => DataType::Binary,
             PrimitiveType::Boolean => DataType::Boolean,
@@ -364,6 +365,20 @@ fn create_arrow_builder_for_type(
         ParquetSchemaType::Primitive(PrimitiveType::Float64) => {
             Ok(Box::new(Float64Builder::with_capacity(cap)))
         }
+        ParquetSchemaType::Primitive(PrimitiveType::Decimal128(precision, scale)) => {
+            // Create a Decimal128Builder with specific precision and scale
+            let builder = Decimal128Builder::with_capacity(cap);
+            // Set precision and scale for the decimal and return the new builder
+            let builder_with_precision = builder.with_precision_and_scale(*precision, *scale).map_err(|e| {
+                MagnusError::new(
+                    magnus::exception::runtime_error(),
+                    format!("Failed to set precision and scale: {}", e),
+                )
+            })?;
+            Ok(Box::new(builder_with_precision))
+        }
         ParquetSchemaType::Primitive(PrimitiveType::String) => {
             Ok(Box::new(StringBuilder::with_capacity(cap, cap * 32)))
         }
@@ -415,7 +430,7 @@ fn create_arrow_builder_for_type(
         ParquetSchemaType::Struct(struct_field) => {
             // Check for empty struct immediately
             if struct_field.fields.is_empty() {
-                return Err(MagnusError::new(
+                Err(MagnusError::new(
                     magnus::exception::runtime_error(),
                     "Cannot build a struct with zero fields - Parquet doesn't support empty structs".to_string(),
                 ))?;
@@ -445,7 +460,7 @@ fn create_arrow_builder_for_type(
             // Make sure we have the right number of builders
             if child_field_builders.len() != arrow_fields.len() {
-                return Err(MagnusError::new(
+                Err(MagnusError::new(
                     magnus::exception::runtime_error(),
                     format!(
                         "Number of field builders ({}) doesn't match number of arrow fields ({})",
@@ -834,6 +849,46 @@ fn fill_builder(
             }
             Ok(())
         }
+        ParquetSchemaType::Primitive(PrimitiveType::Decimal128(_precision, scale)) => {
+            let typed_builder = builder
+                .as_any_mut()
+                .downcast_mut::<Decimal128Builder>()
+                .expect("Builder mismatch: expected Float64Builder");
+            for val in values {
+                match val {
+                    ParquetValue::Decimal128(d) => typed_builder.append_value(*d),
+                    ParquetValue::Float64(f) => {
+                        // Scale the float to the desired precision and scale
+                        let scaled_value = (*f * 10_f64.powi(*scale as i32)) as i128;
+                        typed_builder.append_value(scaled_value)
+                    }
+                    ParquetValue::Float32(flo) => {
+                        // Scale the float to the desired precision and scale
+                        let scaled_value = (*flo as f64 * 10_f64.powi(*scale as i32)) as i128;
+                        typed_builder.append_value(scaled_value)
+                    }
+                    ParquetValue::Int64(i) => {
+                        // Scale the integer to the desired scale
+                        let scaled_value = (*i as i128) * 10_i128.pow(*scale as u32);
+                        typed_builder.append_value(scaled_value)
+                    }
+                    ParquetValue::Int32(i) => {
+                        // Scale the integer to the desired scale
+                        let scaled_value = (*i as i128) * 10_i128.pow(*scale as u32);
+                        typed_builder.append_value(scaled_value)
+                    }
+                    ParquetValue::Null => typed_builder.append_null(),
+                    other => {
+                        return Err(MagnusError::new(
+                            magnus::exception::type_error(),
+                            format!("Expected Float64, got {:?}", other),
+                        ))
+                    }
+                }
+            }
+            Ok(())
+        }
         ParquetSchemaType::Primitive(PrimitiveType::Boolean) => {
             let typed_builder = builder
                 .as_any_mut()
@@ -954,7 +1009,7 @@ fn fill_builder(
                 .expect("Builder mismatch: expected BinaryBuilder");
             for val in values {
                 match val {
-                    ParquetValue::Bytes(b) => typed_builder.append_value(&b),
+                    ParquetValue::Bytes(b) => typed_builder.append_value(b),
                     ParquetValue::Null => typed_builder.append_null(),
                     other => {
                         return Err(MagnusError::new(
@@ -1106,6 +1161,15 @@ fn fill_builder(
                                             )
                                         })?
                                         .append_value(bytes),
+                                    ParquetValue::Decimal128(x) => typed_builder
+                                        .field_builder::<Decimal128Builder>(i)
+                                        .ok_or_else(|| {
+                                            MagnusError::new(
+                                                magnus::exception::type_error(),
+                                                "Failed to coerce into Decimal128Builder",
+                                            )
+                                        })?
+                                        .append_value(*x),
                                     ParquetValue::Date32(x) => typed_builder
                                         .field_builder::<Date32Builder>(i)
                                         .ok_or_else(|| {
@@ -1302,6 +1366,15 @@ fn fill_builder(
                                                 )
                                             })?
                                             .append_null(),
+                                            ParquetSchemaType::Primitive(PrimitiveType::Decimal128(_, _)) => typed_builder
+                                            .field_builder::<Decimal128Builder>(i)
+                                            .ok_or_else(|| {
+                                                MagnusError::new(
+                                                    magnus::exception::type_error(),
+                                                    "Failed to coerce into Decimal128Builder",
+                                                )
+                                            })?
+                                            .append_null(),
                                         ParquetSchemaType::Primitive(PrimitiveType::String) => typed_builder
                                             .field_builder::<StringBuilder>(i)
                                             .ok_or_else(|| {

data/ext/parquet/src/types/writer_types.rs CHANGED Viewed

@@ -59,7 +59,7 @@ impl Write for IoLikeValue {
     }
 }
-impl<'a> FromStr for ParquetSchemaType<'a> {
+impl FromStr for ParquetSchemaType<'_> {
     type Err = MagnusError;
     fn from_str(s: &str) -> Result<Self, Self::Err> {
@@ -98,6 +98,36 @@ impl<'a> FromStr for ParquetSchemaType<'a> {
             })));
         }
+        // Check if it's a decimal type with precision and scale
+        if let Some(decimal_params) = s.strip_prefix("decimal(").and_then(|s| s.strip_suffix(")")) {
+            let parts: Vec<&str> = decimal_params.split(',').collect();
+            if parts.len() != 2 {
+                return Err(MagnusError::new(
+                    magnus::exception::runtime_error(),
+                    format!(
+                        "Invalid decimal format. Expected 'decimal(precision,scale)', got '{}'",
+                        s
+                    ),
+                ));
+            }
+            let precision = parts[0].trim().parse::<u8>().map_err(|_| {
+                MagnusError::new(
+                    magnus::exception::runtime_error(),
+                    format!("Invalid precision value in decimal type: {}", parts[0]),
+                )
+            })?;
+            let scale = parts[1].trim().parse::<i8>().map_err(|_| {
+                MagnusError::new(
+                    magnus::exception::runtime_error(),
+                    format!("Invalid scale value in decimal type: {}", parts[1]),
+                )
+            })?;
+            return Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal128(precision, scale)));
+        }
         // Handle primitive types
         match s {
             "int8" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Int8)),
@@ -116,6 +146,7 @@ impl<'a> FromStr for ParquetSchemaType<'a> {
             "date32" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Date32)),
             "timestamp_millis" => Ok(ParquetSchemaType::Primitive(PrimitiveType::TimestampMillis)),
             "timestamp_micros" => Ok(ParquetSchemaType::Primitive(PrimitiveType::TimestampMicros)),
+            "decimal" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal128(18, 2))), // Default precision 18, scale 2
             "list" => Ok(ParquetSchemaType::List(Box::new(ListField {
                 item_type: ParquetSchemaType::Primitive(PrimitiveType::String),
                 format: None,
@@ -129,7 +160,7 @@ impl<'a> FromStr for ParquetSchemaType<'a> {
     }
 }
-impl<'a> TryConvert for ParquetSchemaType<'a> {
+impl TryConvert for ParquetSchemaType<'_> {
     fn try_convert(value: Value) -> Result<Self, MagnusError> {
         let ruby = unsafe { Ruby::get_unchecked() };
         let schema_type = parse_string_or_symbol(&ruby, value)?;
@@ -144,7 +175,7 @@ impl<'a> TryConvert for ParquetSchemaType<'a> {
 // We know this type is safe to move between threads because it's just an enum
 // with simple primitive types and strings
-unsafe impl<'a> Send for ParquetSchemaType<'a> {}
+unsafe impl Send for ParquetSchemaType<'_> {}
 pub enum WriterOutput {
     File(ArrowWriter<Box<dyn SendableWrite>>),
@@ -202,14 +233,12 @@ impl<'a> ColumnCollector<'a> {
     pub fn push_value(&mut self, value: Value) -> Result<(), MagnusError> {
         use crate::types::ParquetValue;
-        if value.is_nil() {
-            if !self.nullable {
-                // For non-nullable fields, raise an error
-                return Err(MagnusError::new(
-                    magnus::exception::runtime_error(),
-                    "Cannot write nil value for non-nullable field",
-                ));
-            }
+        if value.is_nil() && !self.nullable {
+            // For non-nullable fields, raise an error
+            return Err(MagnusError::new(
+                magnus::exception::runtime_error(),
+                "Cannot write nil value for non-nullable field",
+            ));
         }
         // For all other types, proceed as normal

data/ext/parquet/src/utils.rs CHANGED Viewed

@@ -13,12 +13,12 @@ pub fn parse_string_or_symbol(ruby: &Ruby, value: Value) -> Result<Option<String
         RString::from_value(value)
             .ok_or_else(|| Error::new(magnus::exception::type_error(), "Invalid string value"))?
             .to_string()
-            .map(|s| Some(s))
+            .map(Some)
     } else if value.is_kind_of(ruby.class_symbol()) {
         Symbol::from_value(value)
             .ok_or_else(|| Error::new(magnus::exception::type_error(), "Invalid symbol value"))?
             .funcall("to_s", ())
-            .map(|s| Some(s))
+            .map(Some)
     } else {
         Err(Error::new(
             magnus::exception::type_error(),
@@ -161,11 +161,11 @@ pub fn parse_parquet_columns_args(
     };
     let batch_size = kwargs.optional.2.flatten();
-    if let Some(sz) = batch_size {
-        if sz <= 0 {
+    if let Some(batch_size) = batch_size {
+        if batch_size == 0 {
             return Err(Error::new(
-                ruby.exception_arg_error(),
-                format!("batch_size must be > 0, got {}", sz),
+                magnus::exception::arg_error(),
+                "Batch size must be greater than 0",
             ));
         }
     }

data/ext/parquet/src/writer/mod.rs CHANGED Viewed

@@ -111,13 +111,13 @@ pub fn parse_parquet_write_args(
     if let Some(type_val) = type_val {
         // If it has a type: :struct, it's the new DSL format
         // Use parse_string_or_symbol to handle both String and Symbol values
-        let ttype = parse_string_or_symbol(&ruby, type_val)?;
+        let ttype = parse_string_or_symbol(ruby, type_val)?;
         if let Some(ref type_str) = ttype {
             if type_str == "struct" {
                 // Parse using the new schema approach
-                let schema_node = crate::parse_schema_node(&ruby, schema_value)?;
+                let schema_node = crate::parse_schema_node(ruby, schema_value)?;
-                validate_schema_node(&ruby, &schema_node)?;
+                validate_schema_node(ruby, &schema_node)?;
                 return Ok(ParquetWriteArgs {
                     read_from,
@@ -143,22 +143,21 @@ pub fn parse_parquet_write_args(
                         "Schema fields must be an array",
                     )
                 })?
-                .len()
-                == 0)
+                .is_empty())
     {
         // If schema is nil or an empty array, we need to peek at the first value to determine column count
         let first_value = read_from.funcall::<_, _, Value>("peek", ())?;
         // Default to nullable:true for auto-inferred fields
-        crate::infer_schema_from_first_row(&ruby, first_value, true)?
+        crate::infer_schema_from_first_row(ruby, first_value, true)?
     } else {
         // Legacy array format - use our centralized parser
-        crate::parse_legacy_schema(&ruby, schema_value)?
+        crate::parse_legacy_schema(ruby, schema_value)?
     };
     // Convert the legacy schema fields to SchemaNode (DSL format)
-    let schema_node = crate::legacy_schema_to_dsl(&ruby, schema_fields)?;
+    let schema_node = crate::legacy_schema_to_dsl(ruby, schema_fields)?;
-    validate_schema_node(&ruby, &schema_node)?;
+    validate_schema_node(ruby, &schema_node)?;
     Ok(ParquetWriteArgs {
         read_from,
@@ -195,6 +194,9 @@ fn arrow_data_type_to_parquet_schema_type(dt: &DataType) -> Result<ParquetSchema
         }
         DataType::Float32 => Ok(PST::Primitive(PrimitiveType::Float32)),
         DataType::Float64 => Ok(PST::Primitive(PrimitiveType::Float64)),
+        DataType::Decimal128(precision, scale) => Ok(PST::Primitive(PrimitiveType::Decimal128(
+            *precision, *scale,
+        ))),
         DataType::Date32 => Ok(PST::Primitive(PrimitiveType::Date32)),
         DataType::Date64 => {
             // Our code typically uses Date32 or Timestamp for 64. But Arrow has Date64
@@ -414,15 +416,21 @@ fn create_writer(
     compression: Option<String>,
 ) -> Result<WriterOutput, ParquetGemError> {
     // Create writer properties with compression based on the option
+    let compression_setting = match compression.map(|s| s.to_lowercase()).as_deref() {
+        Some("none") | Some("uncompressed") => Ok(Compression::UNCOMPRESSED),
+        Some("snappy") => Ok(Compression::SNAPPY),
+        Some("gzip") => Ok(Compression::GZIP(GzipLevel::default())),
+        Some("lz4") => Ok(Compression::LZ4),
+        Some("zstd") => Ok(Compression::ZSTD(ZstdLevel::default())),
+        None => Ok(Compression::UNCOMPRESSED),
+        other => Err(MagnusError::new(
+            magnus::exception::arg_error(),
+            format!("Invalid compression option: {:?}", other),
+        )),
+    }?;
     let props = WriterProperties::builder()
-        .set_compression(match compression.as_deref() {
-            Some("none") | Some("uncompressed") => Compression::UNCOMPRESSED,
-            Some("snappy") => Compression::SNAPPY,
-            Some("gzip") => Compression::GZIP(GzipLevel::default()),
-            Some("lz4") => Compression::LZ4,
-            Some("zstd") => Compression::ZSTD(ZstdLevel::default()),
-            _ => Compression::UNCOMPRESSED,
-        })
+        .set_compression(compression_setting)
         .build();
     if write_to.is_kind_of(ruby.class_string()) {

data/ext/parquet/src/writer/write_columns.rs CHANGED Viewed

@@ -11,12 +11,12 @@ use crate::{
 use crate::{types::PrimitiveType, SchemaNode};
 use arrow_array::{Array, RecordBatch};
 use magnus::{value::ReprValue, Error as MagnusError, RArray, Ruby, Value};
-use std::sync::Arc;
+use std::{rc::Rc, sync::Arc};
 #[inline]
 pub fn write_columns(args: &[Value]) -> Result<(), MagnusError> {
     let ruby = unsafe { Ruby::get_unchecked() };
-    write_columns_impl(Arc::new(ruby), args).map_err(|e| {
+    write_columns_impl(Rc::new(ruby), args).map_err(|e| {
         let z: MagnusError = e.into();
         z
     })?;
@@ -24,7 +24,7 @@ pub fn write_columns(args: &[Value]) -> Result<(), MagnusError> {
 }
 #[inline]
-fn write_columns_impl(ruby: Arc<Ruby>, args: &[Value]) -> Result<(), ParquetGemError> {
+fn write_columns_impl(ruby: Rc<Ruby>, args: &[Value]) -> Result<(), ParquetGemError> {
     let ParquetWriteArgs {
         read_from,
         write_to,
@@ -94,7 +94,7 @@ fn write_columns_impl(ruby: Arc<Ruby>, args: &[Value]) -> Result<(), ParquetGemE
                     };
                     if batch_array.len() != schema_len {
-                        return Err(MagnusError::new(
+                        Err(MagnusError::new(
                             magnus::exception::type_error(),
                             format!(
                                 "Batch column count ({}) does not match schema length ({}). Schema expects columns: {:?}",
@@ -118,7 +118,7 @@ fn write_columns_impl(ruby: Arc<Ruby>, args: &[Value]) -> Result<(), ParquetGemE
                                 ))?,
                             };
                         if top_fields.len() != fields.len() {
-                            return Err(MagnusError::new(
+                            Err(MagnusError::new(
                                 magnus::exception::runtime_error(),
                                 "Mismatch top-level DSL fields vs Arrow fields",
                             ))?;
@@ -140,31 +140,34 @@ fn write_columns_impl(ruby: Arc<Ruby>, args: &[Value]) -> Result<(), ParquetGemE
                                     parquet_type,
                                     // Format is handled internally now
                                     ..
-                                } => match parquet_type {
-                                    &PrimitiveType::Int8 => PST::Primitive(PrimitiveType::Int8),
-                                    &PrimitiveType::Int16 => PST::Primitive(PrimitiveType::Int16),
-                                    &PrimitiveType::Int32 => PST::Primitive(PrimitiveType::Int32),
-                                    &PrimitiveType::Int64 => PST::Primitive(PrimitiveType::Int64),
-                                    &PrimitiveType::UInt8 => PST::Primitive(PrimitiveType::UInt8),
-                                    &PrimitiveType::UInt16 => PST::Primitive(PrimitiveType::UInt16),
-                                    &PrimitiveType::UInt32 => PST::Primitive(PrimitiveType::UInt32),
-                                    &PrimitiveType::UInt64 => PST::Primitive(PrimitiveType::UInt64),
-                                    &PrimitiveType::Float32 => {
+                                } => match *parquet_type {
+                                    PrimitiveType::Int8 => PST::Primitive(PrimitiveType::Int8),
+                                    PrimitiveType::Int16 => PST::Primitive(PrimitiveType::Int16),
+                                    PrimitiveType::Int32 => PST::Primitive(PrimitiveType::Int32),
+                                    PrimitiveType::Int64 => PST::Primitive(PrimitiveType::Int64),
+                                    PrimitiveType::UInt8 => PST::Primitive(PrimitiveType::UInt8),
+                                    PrimitiveType::UInt16 => PST::Primitive(PrimitiveType::UInt16),
+                                    PrimitiveType::UInt32 => PST::Primitive(PrimitiveType::UInt32),
+                                    PrimitiveType::UInt64 => PST::Primitive(PrimitiveType::UInt64),
+                                    PrimitiveType::Float32 => {
                                         PST::Primitive(PrimitiveType::Float32)
                                     }
-                                    &PrimitiveType::Float64 => {
+                                    PrimitiveType::Float64 => {
                                         PST::Primitive(PrimitiveType::Float64)
                                     }
-                                    &PrimitiveType::String => PST::Primitive(PrimitiveType::String),
-                                    &PrimitiveType::Binary => PST::Primitive(PrimitiveType::Binary),
-                                    &PrimitiveType::Boolean => {
+                                    PrimitiveType::Decimal128(precision, scale) => {
+                                        PST::Primitive(PrimitiveType::Decimal128(precision, scale))
+                                    }
+                                    PrimitiveType::String => PST::Primitive(PrimitiveType::String),
+                                    PrimitiveType::Binary => PST::Primitive(PrimitiveType::Binary),
+                                    PrimitiveType::Boolean => {
                                         PST::Primitive(PrimitiveType::Boolean)
                                     }
-                                    &PrimitiveType::Date32 => PST::Primitive(PrimitiveType::Date32),
-                                    &PrimitiveType::TimestampMillis => {
+                                    PrimitiveType::Date32 => PST::Primitive(PrimitiveType::Date32),
+                                    PrimitiveType::TimestampMillis => {
                                         PST::Primitive(PrimitiveType::TimestampMillis)
                                     }
-                                    &PrimitiveType::TimestampMicros => {
+                                    PrimitiveType::TimestampMicros => {
                                         PST::Primitive(PrimitiveType::TimestampMicros)
                                     }
                                 },
@@ -205,12 +208,12 @@ fn write_columns_impl(ruby: Arc<Ruby>, args: &[Value]) -> Result<(), ParquetGemE
                     if e.is_kind_of(ruby.exception_stop_iteration()) {
                         break;
                     }
-                    return Err(e)?;
+                    Err(e)?;
                 }
             }
         }
     } else {
-        return Err(MagnusError::new(
+        Err(MagnusError::new(
             magnus::exception::type_error(),
             "read_from must be an Enumerator".to_string(),
         ))?;

data/ext/parquet/src/writer/write_rows.rs CHANGED Viewed

@@ -16,14 +16,14 @@ use magnus::{
     value::ReprValue, Error as MagnusError, RArray, RHash, Ruby, Symbol, TryConvert, Value,
 };
 use rand::Rng;
-use std::sync::Arc;
+use std::{rc::Rc, sync::Arc};
 const MIN_SAMPLES_FOR_ESTIMATE: usize = 10;
 #[inline]
 pub fn write_rows(args: &[Value]) -> Result<(), MagnusError> {
     let ruby = unsafe { Ruby::get_unchecked() };
-    write_rows_impl(Arc::new(ruby), args).map_err(|e| {
+    write_rows_impl(Rc::new(ruby), args).map_err(|e| {
         let z: MagnusError = e.into();
         z
     })?;
@@ -31,7 +31,7 @@ pub fn write_rows(args: &[Value]) -> Result<(), MagnusError> {
 }
 #[inline]
-fn write_rows_impl(ruby: Arc<Ruby>, args: &[Value]) -> Result<(), ParquetGemError> {
+fn write_rows_impl(ruby: Rc<Ruby>, args: &[Value]) -> Result<(), ParquetGemError> {
     let ParquetWriteArgs {
         read_from,
         write_to,
@@ -83,8 +83,8 @@ fn write_rows_impl(ruby: Arc<Ruby>, args: &[Value]) -> Result<(), ParquetGemErro
                         })?;
                         let row_size = estimate_single_row_size(&row_array, &column_collectors)?;
                         size_samples.push(row_size);
-                    } else if rng.random_range(0..=total_rows) < sample_size as usize {
-                        let idx = rng.random_range(0..sample_size as usize);
+                    } else if rng.random_range(0..=total_rows) < sample_size {
+                        let idx = rng.random_range(0..sample_size);
                         let row_array = RArray::from_value(row).ok_or_else(|| {
                             MagnusError::new(ruby.exception_type_error(), "Row must be an array")
                         })?;
@@ -115,12 +115,12 @@ fn write_rows_impl(ruby: Arc<Ruby>, args: &[Value]) -> Result<(), ParquetGemErro
                         }
                         break;
                     }
-                    return Err(e)?;
+                    Err(e)?;
                 }
             }
         }
     } else {
-        return Err(MagnusError::new(
+        Err(MagnusError::new(
             magnus::exception::type_error(),
             "read_from must be an Enumerator".to_string(),
         ))?;
@@ -257,6 +257,7 @@ pub fn estimate_value_size(
         | PST::Primitive(PrimitiveType::UInt64)
         | PST::Primitive(PrimitiveType::Float64) => Ok(8),
         PST::Primitive(PrimitiveType::Boolean) => Ok(1),
+        PST::Primitive(PrimitiveType::Decimal128(_, _)) => Ok(16),
         PST::Primitive(PrimitiveType::Date32)
         | PST::Primitive(PrimitiveType::TimestampMillis)
         | PST::Primitive(PrimitiveType::TimestampMicros) => Ok(8),
@@ -429,15 +430,13 @@ pub fn estimate_value_size(
                             if let Some(field_value) = hash.get(&*field.name) {
                                 total_fields_size +=
                                     estimate_value_size(field_value, &field.type_)?;
+                            } else if field.nullable {
+                                total_fields_size += 0;
                             } else {
-                                if field.nullable {
-                                    total_fields_size += 0;
-                                } else {
-                                    return Err(MagnusError::new(
-                                        magnus::exception::runtime_error(),
-                                        format!("Missing field: {} in hash {:?}", field.name, hash),
-                                    ));
-                                }
+                                return Err(MagnusError::new(
+                                    magnus::exception::runtime_error(),
+                                    format!("Missing field: {} in hash {:?}", field.name, hash),
+                                ));
                             }
                         }
                     }