RubyGems - parquet - Versions diffs - 0.2.5 → 0.2.7 - Mend

parquet 0.2.5 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/ext/parquet/src/types/core_types.rs +10 -8
data/ext/parquet/src/types/parquet_value.rs +7 -3
data/ext/parquet/src/types/type_conversion.rs +86 -30
data/ext/parquet/src/types/writer_types.rs +24 -14
data/ext/parquet/src/utils.rs +16 -5
data/ext/parquet/src/writer/mod.rs +40 -6
data/lib/parquet/version.rb +1 -1
data/lib/parquet.rbi +37 -5
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 90e876ca198a0e1871f692a382f09ceaeec670d162da26f2c102ea4eca4244bf
-  data.tar.gz: 96743e260cbd2fb55f6cdeaf256fbb1e915c57651fdc3f20fdd58b6a34596544
+  metadata.gz: c1ed4f490a4f03443598dbe1b0e110746052f613a4c5575f9b8e47c6e160bb40
+  data.tar.gz: 4db314d1707e633799e996c6fb777135ff0ea364a76c0a7d8fc5c429e2394d9f
 SHA512:
-  metadata.gz: 1609a37c5a9bd9f1d57bb31dd02b2fdb5b608a7c044686e6ef2513c95e53e830bd7bf7048a36904465a32a5915425c7b6bf581c5b35a4fb19f950cbca20913b2
-  data.tar.gz: 96ec18377fc5944556760329c126f440de61d3b378bfa976a66437db03f0a51220c880afd14098a5b1968daa968d2e836c50f83bef21507789ba4df314c48148
+  metadata.gz: b3f0a15cf467d030d3002c21bc6b64b6cd16e91e972b8de1e928abfd9bd373cfb5c4f77cdd1a6db7c620055e9657ec623866e0d8a0cb3a8e21a0c252bde3df87
+  data.tar.gz: 77f41921f5818051b597d2941688f6eca2a24d86333c58dec45d6e47e7161bfdd70e78f50a0f7ddd6cc99356c2b477451ab43adf9caa201501815c6b1a731d5c

data/ext/parquet/src/types/core_types.rs CHANGED Viewed

@@ -40,18 +40,20 @@ impl std::fmt::Display for ParserResultType {
 }
 #[derive(Debug, Clone)]
-pub struct ListField {
-    pub item_type: ParquetSchemaType,
+pub struct ListField<'a> {
+    pub item_type: ParquetSchemaType<'a>,
+    pub format: Option<&'a str>,
 }
 #[derive(Debug, Clone)]
-pub struct MapField {
-    pub key_type: ParquetSchemaType,
-    pub value_type: ParquetSchemaType,
+pub struct MapField<'a> {
+    pub key_type: ParquetSchemaType<'a>,
+    pub value_type: ParquetSchemaType<'a>,
+    pub format: Option<&'a str>,
 }
 #[derive(Debug, Clone)]
-pub enum ParquetSchemaType {
+pub enum ParquetSchemaType<'a> {
     Int8,
     Int16,
     Int32,
@@ -68,6 +70,6 @@ pub enum ParquetSchemaType {
     Date32,
     TimestampMillis,
     TimestampMicros,
-    List(Box<ListField>),
-    Map(Box<MapField>),
+    List(Box<ListField<'a>>),
+    Map(Box<MapField<'a>>),
 }

data/ext/parquet/src/types/parquet_value.rs CHANGED Viewed

@@ -157,6 +157,10 @@ impl IntoValue for ParquetValue {
 impl ParquetValue {
     pub fn from_value(value: Value, type_: &ParquetSchemaType) -> Result<Self, MagnusError> {
+        if value.is_nil() {
+            return Ok(ParquetValue::Null);
+        }
         match type_ {
             ParquetSchemaType::Int8 => {
                 let v = NumericConverter::<i8>::convert_with_string_fallback(value)?;
@@ -211,15 +215,15 @@ impl ParquetValue {
                 Ok(ParquetValue::Boolean(v))
             }
             ParquetSchemaType::Date32 => {
-                let v = convert_to_date32(value)?;
+                let v = convert_to_date32(value, None)?;
                 Ok(ParquetValue::Date32(v))
             }
             ParquetSchemaType::TimestampMillis => {
-                let v = convert_to_timestamp_millis(value)?;
+                let v = convert_to_timestamp_millis(value, None)?;
                 Ok(ParquetValue::TimestampMillis(v, None))
             }
             ParquetSchemaType::TimestampMicros => {
-                let v = convert_to_timestamp_micros(value)?;
+                let v = convert_to_timestamp_micros(value, None)?;
                 Ok(ParquetValue::TimestampMicros(v, None))
             }
             ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => Err(MagnusError::new(

data/ext/parquet/src/types/type_conversion.rs CHANGED Viewed

@@ -30,17 +30,35 @@ where
     }
 }
-pub fn convert_to_date32(value: Value) -> Result<i32, MagnusError> {
+pub fn convert_to_date32(value: Value, format: Option<&str>) -> Result<i32, MagnusError> {
     let ruby = unsafe { Ruby::get_unchecked() };
     if value.is_kind_of(ruby.class_string()) {
         let s = String::try_convert(value)?;
-        // Parse string into Timestamp using jiff
-        let date: jiff::civil::Date = s.parse().map_err(|e| {
-            MagnusError::new(
-                magnus::exception::type_error(),
-                format!("Failed to parse '{}' as date32: {}", s, e),
-            )
-        })?;
+        // Parse string into Date using jiff
+        let date = if let Some(fmt) = format {
+            jiff::civil::Date::strptime(&fmt, &s).or_else(|e1| {
+                // Try parsing as DateTime and convert to Date with zero offset
+                jiff::civil::DateTime::strptime(&fmt, &s)
+                    .and_then(|dt| dt.to_zoned(TimeZone::fixed(Offset::constant(0))))
+                    .map(|dt| dt.date())
+                    .map_err(|e2| {
+                        MagnusError::new(
+                            magnus::exception::type_error(),
+                            format!(
+                                "Failed to parse '{}' with format '{}' as date32: {} (and as datetime: {})",
+                                s, fmt, e1, e2
+                            ),
+                        )
+                    })
+            })?
+        } else {
+            s.parse().map_err(|e| {
+                MagnusError::new(
+                    magnus::exception::type_error(),
+                    format!("Failed to parse '{}' as date32: {}", s, e),
+                )
+            })?
+        };
         let timestamp = date.at(0, 0, 0, 0);
@@ -63,17 +81,36 @@ pub fn convert_to_date32(value: Value) -> Result<i32, MagnusError> {
     }
 }
-pub fn convert_to_timestamp_millis(value: Value) -> Result<i64, MagnusError> {
+pub fn convert_to_timestamp_millis(value: Value, format: Option<&str>) -> Result<i64, MagnusError> {
     let ruby = unsafe { Ruby::get_unchecked() };
     if value.is_kind_of(ruby.class_string()) {
         let s = String::try_convert(value)?;
         // Parse string into Timestamp using jiff
-        let timestamp: jiff::Timestamp = s.parse().map_err(|e| {
-            MagnusError::new(
-                magnus::exception::type_error(),
-                format!("Failed to parse '{}' as timestamp_millis: {}", s, e),
-            )
-        })?;
+        let timestamp = if let Some(fmt) = format {
+            jiff::Timestamp::strptime(&fmt, &s)
+                .or_else(|e1| {
+                    // Try parsing as DateTime and convert to Timestamp with zero offset
+                    jiff::civil::DateTime::strptime(&fmt, &s)
+                        .and_then(|dt| dt.to_zoned(TimeZone::fixed(Offset::constant(0))))
+                        .map(|dt| dt.timestamp())
+                        .map_err(|e2| {
+                            MagnusError::new(
+                                magnus::exception::type_error(),
+                                format!(
+                                    "Failed to parse '{}' with format '{}' as timestamp_millis: {} (and as datetime: {})",
+                                    s, fmt, e1, e2
+                                ),
+                            )
+                        })
+                })?
+        } else {
+            s.parse().map_err(|e| {
+                MagnusError::new(
+                    magnus::exception::type_error(),
+                    format!("Failed to parse '{}' as timestamp_millis: {}", s, e),
+                )
+            })?
+        };
         // Convert to milliseconds
         Ok(timestamp.as_millisecond())
     } else if value.is_kind_of(ruby.class_time()) {
@@ -91,17 +128,36 @@ pub fn convert_to_timestamp_millis(value: Value) -> Result<i64, MagnusError> {
     }
 }
-pub fn convert_to_timestamp_micros(value: Value) -> Result<i64, MagnusError> {
+pub fn convert_to_timestamp_micros(value: Value, format: Option<&str>) -> Result<i64, MagnusError> {
     let ruby = unsafe { Ruby::get_unchecked() };
     if value.is_kind_of(ruby.class_string()) {
         let s = String::try_convert(value)?;
         // Parse string into Timestamp using jiff
-        let timestamp: jiff::Timestamp = s.parse().map_err(|e| {
-            MagnusError::new(
-                magnus::exception::type_error(),
-                format!("Failed to parse '{}' as timestamp_micros: {}", s, e),
-            )
-        })?;
+        let timestamp = if let Some(fmt) = format {
+            jiff::Timestamp::strptime(&fmt, &s).or_else(|e1| {
+                // Try parsing as DateTime and convert to Timestamp with zero offset
+                jiff::civil::DateTime::strptime(&fmt, &s).and_then(|dt| {
+                    dt.to_zoned(TimeZone::fixed(Offset::constant(0)))
+                })
+                .map(|dt| dt.timestamp())
+                .map_err(|e2| {
+                    MagnusError::new(
+                        magnus::exception::type_error(),
+                        format!(
+                            "Failed to parse '{}' with format '{}' as timestamp_micros: {} (and as datetime: {})",
+                            s, fmt, e1, e2
+                        ),
+                    )
+                })
+            })?
+        } else {
+            s.parse().map_err(|e| {
+                MagnusError::new(
+                    magnus::exception::type_error(),
+                    format!("Failed to parse '{}' as timestamp_micros: {}", s, e),
+                )
+            })?
+        };
         // Convert to microseconds
         Ok(timestamp.as_microsecond())
     } else if value.is_kind_of(ruby.class_time()) {
@@ -204,15 +260,15 @@ pub fn convert_to_list(
                     ParquetValue::Boolean(v)
                 }
                 ParquetSchemaType::Date32 => {
-                    let v = convert_to_date32(item_value)?;
+                    let v = convert_to_date32(item_value, list_field.format)?;
                     ParquetValue::Date32(v)
                 }
                 ParquetSchemaType::TimestampMillis => {
-                    let v = convert_to_timestamp_millis(item_value)?;
+                    let v = convert_to_timestamp_millis(item_value, list_field.format)?;
                     ParquetValue::TimestampMillis(v, None)
                 }
                 ParquetSchemaType::TimestampMicros => {
-                    let v = convert_to_timestamp_micros(item_value)?;
+                    let v = convert_to_timestamp_micros(item_value, list_field.format)?;
                     ParquetValue::TimestampMicros(v, None)
                 }
                 ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => {
@@ -310,15 +366,15 @@ pub fn convert_to_map(
                     ParquetValue::Boolean(v)
                 }
                 ParquetSchemaType::Date32 => {
-                    let v = convert_to_date32(value)?;
+                    let v = convert_to_date32(value, map_field.format)?;
                     ParquetValue::Date32(v)
                 }
                 ParquetSchemaType::TimestampMillis => {
-                    let v = convert_to_timestamp_millis(value)?;
+                    let v = convert_to_timestamp_millis(value, map_field.format)?;
                     ParquetValue::TimestampMillis(v, None)
                 }
                 ParquetSchemaType::TimestampMicros => {
-                    let v = convert_to_timestamp_micros(value)?;
+                    let v = convert_to_timestamp_micros(value, map_field.format)?;
                     ParquetValue::TimestampMicros(v, None)
                 }
                 ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => {
@@ -592,12 +648,12 @@ pub fn convert_parquet_values_to_arrow(
             };
             let mut list_builder = ListBuilder::new(value_builder);
             for value in values {
                 match value {
                     ParquetValue::List(items) => {
-                        list_builder.append(true);
                         for item in items {
-                            match list_field.item_type {
+                            match &list_field.item_type {
                                 ParquetSchemaType::Int8 => append_list_value_copy!(
                                     list_builder,
                                     ParquetSchemaType::Int8,

data/ext/parquet/src/types/writer_types.rs CHANGED Viewed

@@ -12,16 +12,17 @@ use tempfile::NamedTempFile;
 use crate::types::{ListField, MapField, ParquetSchemaType};
 #[derive(Debug)]
-pub struct SchemaField {
+pub struct SchemaField<'a> {
     pub name: String,
-    pub type_: ParquetSchemaType,
+    pub type_: ParquetSchemaType<'a>,
+    pub format: Option<String>,
 }
 #[derive(Debug)]
-pub struct ParquetWriteArgs {
+pub struct ParquetWriteArgs<'a> {
     pub read_from: Value,
     pub write_to: Value,
-    pub schema: Vec<SchemaField>,
+    pub schema: Vec<SchemaField<'a>>,
     pub batch_size: Option<usize>,
 }
@@ -51,7 +52,7 @@ impl Write for IoLikeValue {
     }
 }
-impl FromStr for ParquetSchemaType {
+impl<'a> FromStr for ParquetSchemaType<'a> {
     type Err = MagnusError;
     fn from_str(s: &str) -> Result<Self, Self::Err> {
@@ -74,10 +75,12 @@ impl FromStr for ParquetSchemaType {
             "timestamp_micros" => Ok(ParquetSchemaType::TimestampMicros),
             "list" => Ok(ParquetSchemaType::List(Box::new(ListField {
                 item_type: ParquetSchemaType::Int8,
+                format: None,
             }))),
             "map" => Ok(ParquetSchemaType::Map(Box::new(MapField {
                 key_type: ParquetSchemaType::String,
                 value_type: ParquetSchemaType::Int8,
+                format: None,
             }))),
             _ => Err(MagnusError::new(
                 magnus::exception::runtime_error(),
@@ -87,7 +90,7 @@ impl FromStr for ParquetSchemaType {
     }
 }
-impl TryConvert for ParquetSchemaType {
+impl<'a> TryConvert for ParquetSchemaType<'a> {
     fn try_convert(value: Value) -> Result<Self, MagnusError> {
         let ruby = unsafe { Ruby::get_unchecked() };
         let schema_type = parse_string_or_symbol(&ruby, value)?;
@@ -98,7 +101,7 @@ impl TryConvert for ParquetSchemaType {
 // We know this type is safe to move between threads because it's just an enum
 // with simple primitive types and strings
-unsafe impl Send for ParquetSchemaType {}
+unsafe impl<'a> Send for ParquetSchemaType<'a> {}
 fn parse_string_or_symbol(ruby: &Ruby, value: Value) -> Result<Option<String>, MagnusError> {
     if value.is_nil() {
@@ -162,17 +165,19 @@ impl From<ParquetErrorWrapper> for MagnusError {
     }
 }
-pub struct ColumnCollector {
+pub struct ColumnCollector<'a> {
     pub name: String,
-    pub type_: ParquetSchemaType,
+    pub type_: ParquetSchemaType<'a>,
+    pub format: Option<String>,
     pub values: Vec<crate::types::ParquetValue>,
 }
-impl ColumnCollector {
-    pub fn new(name: String, type_: ParquetSchemaType) -> Self {
+impl<'a> ColumnCollector<'a> {
+    pub fn new(name: String, type_: ParquetSchemaType<'a>, format: Option<String>) -> Self {
         Self {
             name,
             type_,
+            format,
             values: Vec::new(),
         }
     }
@@ -185,6 +190,11 @@ impl ColumnCollector {
             NumericConverter,
         };
+        if value.is_nil() {
+            self.values.push(ParquetValue::Null);
+            return Ok(());
+        }
         let parquet_value = match &self.type_ {
             ParquetSchemaType::Int8 => {
                 let v = NumericConverter::<i8>::convert_with_string_fallback(value)?;
@@ -239,15 +249,15 @@ impl ColumnCollector {
                 ParquetValue::Boolean(v)
             }
             ParquetSchemaType::Date32 => {
-                let v = convert_to_date32(value)?;
+                let v = convert_to_date32(value, self.format.as_deref())?;
                 ParquetValue::Date32(v)
             }
             ParquetSchemaType::TimestampMillis => {
-                let v = convert_to_timestamp_millis(value)?;
+                let v = convert_to_timestamp_millis(value, self.format.as_deref())?;
                 ParquetValue::TimestampMillis(v, None)
             }
             ParquetSchemaType::TimestampMicros => {
-                let v = convert_to_timestamp_micros(value)?;
+                let v = convert_to_timestamp_micros(value, self.format.as_deref())?;
                 ParquetValue::TimestampMicros(v, None)
             }
             ParquetSchemaType::List(list_field) => {

data/ext/parquet/src/utils.rs CHANGED Viewed

@@ -39,7 +39,7 @@ pub fn parse_parquet_rows_args(ruby: &Ruby, args: &[Value]) -> Result<ParquetRow
     let parsed_args = scan_args::<(Value,), (), (), (), _, ()>(args)?;
     let (to_read,) = parsed_args.required;
-    let kwargs = get_kwargs::<_, (), (Option<Value>, Option<Vec<String>>), ()>(
+    let kwargs = get_kwargs::<_, (), (Option<Option<Value>>, Option<Option<Vec<String>>>), ()>(
         parsed_args.keywords,
         &[],
         &["result_type", "columns"],
@@ -48,6 +48,7 @@ pub fn parse_parquet_rows_args(ruby: &Ruby, args: &[Value]) -> Result<ParquetRow
     let result_type: ParserResultType = match kwargs
         .optional
         .0
+        .flatten()
         .map(|value| parse_string_or_symbol(ruby, value))
     {
         Some(Ok(Some(parsed))) => parsed.try_into().map_err(|e| {
@@ -75,7 +76,7 @@ pub fn parse_parquet_rows_args(ruby: &Ruby, args: &[Value]) -> Result<ParquetRow
     Ok(ParquetRowsArgs {
         to_read,
         result_type,
-        columns: kwargs.optional.1,
+        columns: kwargs.optional.1.flatten(),
     })
 }
@@ -95,7 +96,16 @@ pub fn parse_parquet_columns_args(
     let parsed_args = scan_args::<(Value,), (), (), (), _, ()>(args)?;
     let (to_read,) = parsed_args.required;
-    let kwargs = get_kwargs::<_, (), (Option<Value>, Option<Vec<String>>, Option<usize>), ()>(
+    let kwargs = get_kwargs::<
+        _,
+        (),
+        (
+            Option<Option<Value>>,
+            Option<Option<Vec<String>>>,
+            Option<Option<usize>>,
+        ),
+        (),
+    >(
         parsed_args.keywords,
         &[],
         &["result_type", "columns", "batch_size"],
@@ -104,6 +114,7 @@ pub fn parse_parquet_columns_args(
     let result_type: ParserResultType = match kwargs
         .optional
         .0
+        .flatten()
         .map(|value| parse_string_or_symbol(ruby, value))
     {
         Some(Ok(Some(parsed))) => parsed.try_into().map_err(|e| {
@@ -131,7 +142,7 @@ pub fn parse_parquet_columns_args(
     Ok(ParquetColumnsArgs {
         to_read,
         result_type,
-        columns: kwargs.optional.1,
-        batch_size: kwargs.optional.2,
+        columns: kwargs.optional.1.flatten(),
+        batch_size: kwargs.optional.2.flatten(),
     })
 }

data/ext/parquet/src/writer/mod.rs CHANGED Viewed

@@ -28,7 +28,7 @@ pub fn parse_parquet_write_args(args: &[Value]) -> Result<ParquetWriteArgs, Magn
     let parsed_args = scan_args::<(Value,), (), (), (), _, ()>(args)?;
     let (read_from,) = parsed_args.required;
-    let kwargs = get_kwargs::<_, (Value, Value), (Option<usize>,), ()>(
+    let kwargs = get_kwargs::<_, (Value, Value), (Option<Option<usize>>,), ()>(
         parsed_args.keywords,
         &["schema", "write_to"],
         &["batch_size"],
@@ -59,18 +59,52 @@ pub fn parse_parquet_write_args(args: &[Value]) -> Result<ParquetWriteArgs, Magn
             ));
         }
-        let (name, type_str) = &entries[0];
+        let (name, type_value) = &entries[0];
         let name = String::try_convert(name.clone())?;
-        let type_ = ParquetSchemaType::try_convert(type_str.clone())?;
-        schema.push(SchemaField { name, type_ });
+        let (type_, format) = if type_value.is_kind_of(ruby.class_hash()) {
+            let type_hash: Vec<(Value, Value)> = type_value.funcall("to_a", ())?;
+            let mut type_str = None;
+            let mut format_str = None;
+            for (key, value) in type_hash {
+                let key = String::try_convert(key)?;
+                match key.as_str() {
+                    "type" => type_str = Some(value),
+                    "format" => format_str = Some(String::try_convert(value)?),
+                    _ => {
+                        return Err(MagnusError::new(
+                            magnus::exception::type_error(),
+                            format!("Unknown key '{}' in type definition", key),
+                        ))
+                    }
+                }
+            }
+            let type_str = type_str.ok_or_else(|| {
+                MagnusError::new(
+                    magnus::exception::type_error(),
+                    "Missing 'type' in type definition",
+                )
+            })?;
+            (ParquetSchemaType::try_convert(type_str)?, format_str)
+        } else {
+            (ParquetSchemaType::try_convert(type_value.clone())?, None)
+        };
+        schema.push(SchemaField {
+            name,
+            type_,
+            format,
+        });
     }
     Ok(ParquetWriteArgs {
         read_from,
         write_to: kwargs.required.1,
         schema,
-        batch_size: kwargs.optional.0,
+        batch_size: kwargs.optional.0.flatten(),
     })
 }
@@ -130,7 +164,7 @@ pub fn write_rows(args: &[Value]) -> Result<(), MagnusError> {
         // Create collectors for each column
         let mut column_collectors: Vec<ColumnCollector> = schema
             .into_iter()
-            .map(|field| ColumnCollector::new(field.name, field.type_))
+            .map(|field| ColumnCollector::new(field.name, field.type_, field.format))
             .collect();
         let mut rows_in_batch = 0;

data/lib/parquet/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Parquet
-  VERSION = "0.2.5"
+  VERSION = "0.2.7"
 end

data/lib/parquet.rbi CHANGED Viewed

@@ -1,4 +1,4 @@
-# typed: strict
+# typed: true
 module Parquet
   # Options:
@@ -7,13 +7,20 @@ module Parquet
   #                    ("hash" or "array" or :hash or :array)
   #   - `columns`: When present, only the specified columns will be included in the output.
   #                This is useful for reducing how much data is read and improving performance.
+  sig do
+    params(
+      input: T.any(String, File, StringIO, IO),
+      result_type: T.nilable(T.any(String, Symbol)),
+      columns: T.nilable(T::Array[String])
+    ).returns(T::Enumerator[T.any(T::Hash[String, T.untyped], T::Array[T.untyped])])
+  end
   sig do
     params(
       input: T.any(String, File, StringIO, IO),
       result_type: T.nilable(T.any(String, Symbol)),
       columns: T.nilable(T::Array[String]),
       blk: T.nilable(T.proc.params(row: T.any(T::Hash[String, T.untyped], T::Array[T.untyped])).void)
-    ).returns(T.any(T::Enumerator[T.any(T::Hash[String, T.untyped], T::Array[T.untyped])], NilClass))
+    ).returns(NilClass)
   end
   def self.each_row(input, result_type: nil, columns: nil, &blk)
   end
@@ -24,6 +31,14 @@ module Parquet
   #                    ("hash" or "array" or :hash or :array)
   #   - `columns`: When present, only the specified columns will be included in the output.
   #   - `batch_size`: When present, specifies the number of rows per batch
+  sig do
+    params(
+      input: T.any(String, File, StringIO, IO),
+      result_type: T.nilable(T.any(String, Symbol)),
+      columns: T.nilable(T::Array[String]),
+      batch_size: T.nilable(Integer)
+    ).returns(T::Enumerator[T.any(T::Hash[String, T.untyped], T::Array[T.untyped])])
+  end
   sig do
     params(
       input: T.any(String, File, StringIO, IO),
@@ -32,14 +47,22 @@ module Parquet
       batch_size: T.nilable(Integer),
       blk:
         T.nilable(T.proc.params(batch: T.any(T::Hash[String, T::Array[T.untyped]], T::Array[T::Array[T.untyped]])).void)
-    ).returns(T.any(T::Enumerator[T.any(T::Hash[String, T.untyped], T::Array[T.untyped])], NilClass))
+    ).returns(NilClass)
   end
   def self.each_column(input, result_type: nil, columns: nil, batch_size: nil, &blk)
   end
   # Options:
   #   - `read_from`: An Enumerator yielding arrays of values representing each row
-  #   - `schema`: Array of hashes specifying column names and types
+  #   - `schema`: Array of hashes specifying column names and types. Supported types:
+  #     - `int8`, `int16`, `int32`, `int64`
+  #     - `uint8`, `uint16`, `uint32`, `uint64`
+  #     - `float`, `double`
+  #     - `string`
+  #     - `binary`
+  #     - `boolean`
+  #     - `date32`
+  #     - `timestamp_millis`, `timestamp_micros`
   #   - `write_to`: String path or IO object to write the parquet file to
   #   - `batch_size`: Optional batch size for writing (defaults to 1000)
   sig do
@@ -55,7 +78,16 @@ module Parquet
   # Options:
   #   - `read_from`: An Enumerator yielding arrays of column batches
-  #   - `schema`: Array of hashes specifying column names and types
+  #   - `schema`: Array of hashes specifying column names and types. Supported types:
+  #     - `int8`, `int16`, `int32`, `int64`
+  #     - `uint8`, `uint16`, `uint32`, `uint64`
+  #     - `float`, `double`
+  #     - `string`
+  #     - `binary`
+  #     - `boolean`
+  #     - `date32`
+  #     - `timestamp_millis`, `timestamp_micros`
+  #     - Looks like [{"column_name" => {"type" => "date32", "format" => "%Y-%m-%d"}}, {"column_name" => "int8"}]
   #   - `write_to`: String path or IO object to write the parquet file to
   sig do
     params(

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: parquet
 version: !ruby/object:Gem::Version
-  version: 0.2.5
+  version: 0.2.7
 platform: ruby
 authors:
 - Nathan Jaremko