parquet 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,14 +26,14 @@ fn parse_string_or_symbol(ruby: &Ruby, value: Value) -> Result<Option<String>, E
26
26
  }
27
27
 
28
28
  #[derive(Debug)]
29
- pub struct ParquetArgs {
29
+ pub struct ParquetRowsArgs {
30
30
  pub to_read: Value,
31
31
  pub result_type: String,
32
32
  pub columns: Option<Vec<String>>,
33
33
  }
34
34
 
35
35
  /// Parse common arguments for CSV parsing
36
- pub fn parse_parquet_args(ruby: &Ruby, args: &[Value]) -> Result<ParquetArgs, Error> {
36
+ pub fn parse_parquet_rows_args(ruby: &Ruby, args: &[Value]) -> Result<ParquetRowsArgs, Error> {
37
37
  let parsed_args = scan_args::<(Value,), (), (), (), _, ()>(args)?;
38
38
  let (to_read,) = parsed_args.required;
39
39
 
@@ -67,9 +67,63 @@ pub fn parse_parquet_args(ruby: &Ruby, args: &[Value]) -> Result<ParquetArgs, Er
67
67
  None => String::from("hash"),
68
68
  };
69
69
 
70
- Ok(ParquetArgs {
70
+ Ok(ParquetRowsArgs {
71
71
  to_read,
72
72
  result_type,
73
73
  columns: kwargs.optional.1,
74
74
  })
75
75
  }
76
+
77
+ #[derive(Debug)]
78
+ pub struct ParquetColumnsArgs {
79
+ pub to_read: Value,
80
+ pub result_type: String,
81
+ pub columns: Option<Vec<String>>,
82
+ pub batch_size: Option<usize>,
83
+ }
84
+
85
+ /// Parse common arguments for CSV parsing
86
+ pub fn parse_parquet_columns_args(
87
+ ruby: &Ruby,
88
+ args: &[Value],
89
+ ) -> Result<ParquetColumnsArgs, Error> {
90
+ let parsed_args = scan_args::<(Value,), (), (), (), _, ()>(args)?;
91
+ let (to_read,) = parsed_args.required;
92
+
93
+ let kwargs = get_kwargs::<_, (), (Option<Value>, Option<Vec<String>>, Option<usize>), ()>(
94
+ parsed_args.keywords,
95
+ &[],
96
+ &["result_type", "columns", "batch_size"],
97
+ )?;
98
+
99
+ let result_type = match kwargs
100
+ .optional
101
+ .0
102
+ .map(|value| parse_string_or_symbol(ruby, value))
103
+ {
104
+ Some(Ok(Some(parsed))) => match parsed.as_str() {
105
+ "hash" | "array" => parsed,
106
+ _ => {
107
+ return Err(Error::new(
108
+ magnus::exception::runtime_error(),
109
+ "result_type must be either 'hash' or 'array'",
110
+ ))
111
+ }
112
+ },
113
+ Some(Ok(None)) => String::from("hash"),
114
+ Some(Err(_)) => {
115
+ return Err(Error::new(
116
+ magnus::exception::type_error(),
117
+ "result_type must be a String or Symbol",
118
+ ))
119
+ }
120
+ None => String::from("hash"),
121
+ };
122
+
123
+ Ok(ParquetColumnsArgs {
124
+ to_read,
125
+ result_type,
126
+ columns: kwargs.optional.1,
127
+ batch_size: kwargs.optional.2,
128
+ })
129
+ }
@@ -1,3 +1,3 @@
1
1
  module Parquet
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.4"
3
3
  end
data/lib/parquet.rbi CHANGED
@@ -1,19 +1,38 @@
1
1
  # typed: strict
2
2
  module Parquet
3
3
  # Options:
4
- # - `input`: String specifying the input file
4
+ # - `input`: String, File, or IO object containing parquet data
5
5
  # - `result_type`: String specifying the output format
6
6
  # ("hash" or "array" or :hash or :array)
7
7
  # - `columns`: When present, only the specified columns will be included in the output.
8
8
  # This is useful for reducing how much data is read and improving performance.
9
9
  sig do
10
10
  params(
11
- input: T.any(String, IO),
11
+ input: T.any(String, File, StringIO, IO),
12
12
  result_type: T.nilable(T.any(String, Symbol)),
13
13
  columns: T.nilable(T::Array[String]),
14
14
  blk: T.nilable(T.proc.params(row: T.any(T::Hash[String, T.untyped], T::Array[T.untyped])).void)
15
- ).returns(T.any(Enumerator, T.untyped))
15
+ ).returns(T.any(Enumerator, NilClass))
16
16
  end
17
17
  def self.each_row(input, result_type: nil, columns: nil, &blk)
18
18
  end
19
+
20
+ # Options:
21
+ # - `input`: String, File, or IO object containing parquet data
22
+ # - `result_type`: String specifying the output format
23
+ # ("hash" or "array" or :hash or :array)
24
+ # - `columns`: When present, only the specified columns will be included in the output.
25
+ # - `batch_size`: When present, specifies the number of rows per batch
26
+ sig do
27
+ params(
28
+ input: T.any(String, File, StringIO, IO),
29
+ result_type: T.nilable(T.any(String, Symbol)),
30
+ columns: T.nilable(T::Array[String]),
31
+ batch_size: T.nilable(Integer),
32
+ blk:
33
+ T.nilable(T.proc.params(batch: T.any(T::Hash[String, T::Array[T.untyped]], T::Array[T::Array[T.untyped]])).void)
34
+ ).returns(T.any(Enumerator, NilClass))
35
+ end
36
+ def self.each_column(input, result_type: nil, columns: nil, batch_size: nil, &blk)
37
+ end
19
38
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-01-03 00:00:00.000000000 Z
11
+ date: 2025-01-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -68,11 +68,11 @@ files:
68
68
  - lib/parquet.rb
69
69
  - lib/parquet.rbi
70
70
  - lib/parquet/version.rb
71
- homepage: https://github.com/njaremko/parquet
71
+ homepage: https://github.com/njaremko/parquet-ruby
72
72
  licenses:
73
73
  - MIT
74
74
  metadata:
75
- homepage_uri: https://github.com/njaremko/parquet
75
+ homepage_uri: https://github.com/njaremko/parquet-ruby
76
76
  source_code_uri: https://github.com/njaremko/parquet-ruby
77
77
  readme_uri: https://github.com/njaremko/parquet-ruby/blob/main/README.md
78
78
  changelog_uri: https://github.com/njaremko/parquet-ruby/blob/main/CHANGELOG.md