parquet 0.0.2 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -26,14 +26,14 @@ fn parse_string_or_symbol(ruby: &Ruby, value: Value) -> Result<Option<String>, E
26
26
  }
27
27
 
28
28
  #[derive(Debug)]
29
- pub struct ParquetArgs {
29
+ pub struct ParquetRowsArgs {
30
30
  pub to_read: Value,
31
31
  pub result_type: String,
32
32
  pub columns: Option<Vec<String>>,
33
33
  }
34
34
 
35
35
  /// Parse common arguments for CSV parsing
36
- pub fn parse_parquet_args(ruby: &Ruby, args: &[Value]) -> Result<ParquetArgs, Error> {
36
+ pub fn parse_parquet_rows_args(ruby: &Ruby, args: &[Value]) -> Result<ParquetRowsArgs, Error> {
37
37
  let parsed_args = scan_args::<(Value,), (), (), (), _, ()>(args)?;
38
38
  let (to_read,) = parsed_args.required;
39
39
 
@@ -67,9 +67,63 @@ pub fn parse_parquet_args(ruby: &Ruby, args: &[Value]) -> Result<ParquetArgs, Er
67
67
  None => String::from("hash"),
68
68
  };
69
69
 
70
- Ok(ParquetArgs {
70
+ Ok(ParquetRowsArgs {
71
71
  to_read,
72
72
  result_type,
73
73
  columns: kwargs.optional.1,
74
74
  })
75
75
  }
76
+
77
+ #[derive(Debug)]
78
+ pub struct ParquetColumnsArgs {
79
+ pub to_read: Value,
80
+ pub result_type: String,
81
+ pub columns: Option<Vec<String>>,
82
+ pub batch_size: Option<usize>,
83
+ }
84
+
85
+ /// Parse common arguments for CSV parsing
86
+ pub fn parse_parquet_columns_args(
87
+ ruby: &Ruby,
88
+ args: &[Value],
89
+ ) -> Result<ParquetColumnsArgs, Error> {
90
+ let parsed_args = scan_args::<(Value,), (), (), (), _, ()>(args)?;
91
+ let (to_read,) = parsed_args.required;
92
+
93
+ let kwargs = get_kwargs::<_, (), (Option<Value>, Option<Vec<String>>, Option<usize>), ()>(
94
+ parsed_args.keywords,
95
+ &[],
96
+ &["result_type", "columns", "batch_size"],
97
+ )?;
98
+
99
+ let result_type = match kwargs
100
+ .optional
101
+ .0
102
+ .map(|value| parse_string_or_symbol(ruby, value))
103
+ {
104
+ Some(Ok(Some(parsed))) => match parsed.as_str() {
105
+ "hash" | "array" => parsed,
106
+ _ => {
107
+ return Err(Error::new(
108
+ magnus::exception::runtime_error(),
109
+ "result_type must be either 'hash' or 'array'",
110
+ ))
111
+ }
112
+ },
113
+ Some(Ok(None)) => String::from("hash"),
114
+ Some(Err(_)) => {
115
+ return Err(Error::new(
116
+ magnus::exception::type_error(),
117
+ "result_type must be a String or Symbol",
118
+ ))
119
+ }
120
+ None => String::from("hash"),
121
+ };
122
+
123
+ Ok(ParquetColumnsArgs {
124
+ to_read,
125
+ result_type,
126
+ columns: kwargs.optional.1,
127
+ batch_size: kwargs.optional.2,
128
+ })
129
+ }
@@ -1,3 +1,3 @@
1
1
  module Parquet
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.4"
3
3
  end
data/lib/parquet.rbi CHANGED
@@ -1,19 +1,38 @@
1
1
  # typed: strict
2
2
  module Parquet
3
3
  # Options:
4
- # - `input`: String specifying the input file
4
+ # - `input`: String, File, or IO object containing parquet data
5
5
  # - `result_type`: String specifying the output format
6
6
  # ("hash" or "array" or :hash or :array)
7
7
  # - `columns`: When present, only the specified columns will be included in the output.
8
8
  # This is useful for reducing how much data is read and improving performance.
9
9
  sig do
10
10
  params(
11
- input: T.any(String, IO),
11
+ input: T.any(String, File, StringIO, IO),
12
12
  result_type: T.nilable(T.any(String, Symbol)),
13
13
  columns: T.nilable(T::Array[String]),
14
14
  blk: T.nilable(T.proc.params(row: T.any(T::Hash[String, T.untyped], T::Array[T.untyped])).void)
15
- ).returns(T.any(Enumerator, T.untyped))
15
+ ).returns(T.any(Enumerator, NilClass))
16
16
  end
17
17
  def self.each_row(input, result_type: nil, columns: nil, &blk)
18
18
  end
19
+
20
+ # Options:
21
+ # - `input`: String, File, or IO object containing parquet data
22
+ # - `result_type`: String specifying the output format
23
+ # ("hash" or "array" or :hash or :array)
24
+ # - `columns`: When present, only the specified columns will be included in the output.
25
+ # - `batch_size`: When present, specifies the number of rows per batch
26
+ sig do
27
+ params(
28
+ input: T.any(String, File, StringIO, IO),
29
+ result_type: T.nilable(T.any(String, Symbol)),
30
+ columns: T.nilable(T::Array[String]),
31
+ batch_size: T.nilable(Integer),
32
+ blk:
33
+ T.nilable(T.proc.params(batch: T.any(T::Hash[String, T::Array[T.untyped]], T::Array[T::Array[T.untyped]])).void)
34
+ ).returns(T.any(Enumerator, NilClass))
35
+ end
36
+ def self.each_column(input, result_type: nil, columns: nil, batch_size: nil, &blk)
37
+ end
19
38
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-01-03 00:00:00.000000000 Z
11
+ date: 2025-01-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -68,11 +68,11 @@ files:
68
68
  - lib/parquet.rb
69
69
  - lib/parquet.rbi
70
70
  - lib/parquet/version.rb
71
- homepage: https://github.com/njaremko/parquet
71
+ homepage: https://github.com/njaremko/parquet-ruby
72
72
  licenses:
73
73
  - MIT
74
74
  metadata:
75
- homepage_uri: https://github.com/njaremko/parquet
75
+ homepage_uri: https://github.com/njaremko/parquet-ruby
76
76
  source_code_uri: https://github.com/njaremko/parquet-ruby
77
77
  readme_uri: https://github.com/njaremko/parquet-ruby/blob/main/README.md
78
78
  changelog_uri: https://github.com/njaremko/parquet-ruby/blob/main/CHANGELOG.md