parquet 0.3.3 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b44b6f3ca56a0f4318c361b309c1af213186dcf09e09acbe54561a2dcc920042
4
- data.tar.gz: 68aa0636e1467e008ec29bbdb43737904b8f0a26f2d91a5169005f896950687f
3
+ metadata.gz: 2b5b56cca903ed731d6981d3113e3833a6e6a6a0ffcd301040b32ab0c72bf9c1
4
+ data.tar.gz: 0e4486e2a67a051852166ac81754c9bfb2807c2ffa51eeda5edb41050432e930
5
5
  SHA512:
6
- metadata.gz: 30966ae0335b9caa4458a79e5d627b920f2368605785a978690490a7ed05beb1f7ab25eeeed7349cd3c05b49573dc68405cb8c52af5aad5240a42ce4a56b184b
7
- data.tar.gz: 5ea6021844baac3bb31acf41f0fdc3795710f9eff21e9c3556692e70f10398249d802b6f3bb22f414d282f2ff33a2ea951fe9d3040625002752d86311eeae160
6
+ metadata.gz: affe353c972f130973b309ca1ee928928278254830fa39bee8e4bed5452b5b381e2d27f6986067a0f9b27769a78f195887d371dc4825f61096414af92e9edb94
7
+ data.tar.gz: 9c823757be4b81d3ccafb57571c1d98b530a0d10696712083a0447b4871bf90720ba588d8c48777926b3f7a7f2ffede0c2ed7c9a076420b4dea183b7290ba47e
@@ -184,37 +184,53 @@ impl Length for RubyReader {
184
184
  }
185
185
  RubyReader::RubyIoLike { inner } => {
186
186
  let unwrapped_inner = ruby.get_inner(*inner);
187
- let current_pos = unwrapped_inner.funcall::<_, _, u64>("seek", (0, 1));
188
187
 
189
- if let Err(e) = current_pos {
188
+ // Get current position
189
+ let current_pos = match unwrapped_inner.funcall::<_, _, u64>("pos", ()) {
190
+ Ok(pos) => pos,
191
+ Err(e) => {
192
+ eprintln!("Error seeking: {}", e);
193
+ return 0;
194
+ }
195
+ };
196
+
197
+ // Seek to end
198
+ if let Err(e) = unwrapped_inner.funcall::<_, _, u64>("seek", (0, 2)) {
190
199
  eprintln!("Error seeking: {}", e);
191
200
  return 0;
192
201
  }
193
202
 
194
- if let Err(e) = unwrapped_inner.funcall::<_, _, u64>("seek", (0, 2)) {
203
+ // Offset at the end of the file is the length of the file
204
+ let size = match unwrapped_inner.funcall::<_, _, u64>("pos", ()) {
205
+ Ok(pos) => pos,
206
+ Err(e) => {
207
+ eprintln!("Error seeking: {}", e);
208
+ return 0;
209
+ }
210
+ };
211
+
212
+ // Restore original position
213
+ if let Err(e) = unwrapped_inner.funcall::<_, _, u64>("seek", (current_pos, 0)) {
195
214
  eprintln!("Error seeking: {}", e);
196
215
  return 0;
197
216
  }
198
217
 
199
- let size = unwrapped_inner.funcall::<_, _, u64>("pos", ());
200
-
201
- match size {
202
- Ok(size) => {
203
- // Restore original position
204
- if let Err(e) = unwrapped_inner.funcall::<_, _, u64>(
205
- "seek",
206
- (current_pos.expect("Current position is not set!"), 0),
207
- ) {
208
- eprintln!("Error seeking: {}", e);
209
- return 0;
210
- }
211
- size
212
- }
218
+ let final_pos = match unwrapped_inner.funcall::<_, _, u64>("pos", ()) {
219
+ Ok(pos) => pos,
213
220
  Err(e) => {
214
221
  eprintln!("Error seeking: {}", e);
215
222
  return 0;
216
223
  }
217
- }
224
+ };
225
+
226
+ assert_eq!(
227
+ current_pos, final_pos,
228
+ "Failed to restore original position in seekable IO object. Started at position {}, but ended at {}",
229
+ current_pos,
230
+ final_pos
231
+ );
232
+
233
+ size
218
234
  }
219
235
  }
220
236
  }
@@ -136,7 +136,8 @@ impl TryIntoValue for ParquetField {
136
136
  .map_err(|e| ReaderError::Utf8Error(e))
137
137
  .and_then(|s| Ok(s.into_value_with(handle)))?)
138
138
  } else {
139
- Ok(handle.str_from_slice(s.as_bytes()).as_value())
139
+ let s = String::from_utf8_lossy(s.as_bytes());
140
+ Ok(s.into_value_with(handle))
140
141
  }
141
142
  }
142
143
  Field::Byte(b) => Ok(b.into_value_with(handle)),
@@ -83,7 +83,7 @@ pub fn parse_parquet_rows_args(ruby: &Ruby, args: &[Value]) -> Result<ParquetRow
83
83
  None => ParserResultType::Hash,
84
84
  };
85
85
 
86
- let strict = kwargs.optional.2.flatten().unwrap_or(false);
86
+ let strict = kwargs.optional.2.flatten().unwrap_or(true);
87
87
 
88
88
  Ok(ParquetRowsArgs {
89
89
  to_read,
@@ -159,6 +159,6 @@ pub fn parse_parquet_columns_args(
159
159
  result_type,
160
160
  columns: kwargs.optional.1.flatten(),
161
161
  batch_size: kwargs.optional.2.flatten(),
162
- strict: kwargs.optional.3.flatten().unwrap_or(false),
162
+ strict: kwargs.optional.3.flatten().unwrap_or(true),
163
163
  })
164
164
  }
@@ -1,3 +1,3 @@
1
1
  module Parquet
2
- VERSION = "0.3.3"
2
+ VERSION = "0.4.1"
3
3
  end
data/lib/parquet.rbi CHANGED
@@ -11,7 +11,8 @@ module Parquet
11
11
  params(
12
12
  input: T.any(String, File, StringIO, IO),
13
13
  result_type: T.nilable(T.any(String, Symbol)),
14
- columns: T.nilable(T::Array[String])
14
+ columns: T.nilable(T::Array[String]),
15
+ strict: T.nilable(T::Boolean)
15
16
  ).returns(T::Enumerator[T.any(T::Hash[String, T.untyped], T::Array[T.untyped])])
16
17
  end
17
18
  sig do
@@ -19,10 +20,11 @@ module Parquet
19
20
  input: T.any(String, File, StringIO, IO),
20
21
  result_type: T.nilable(T.any(String, Symbol)),
21
22
  columns: T.nilable(T::Array[String]),
23
+ strict: T.nilable(T::Boolean),
22
24
  blk: T.nilable(T.proc.params(row: T.any(T::Hash[String, T.untyped], T::Array[T.untyped])).void)
23
25
  ).returns(NilClass)
24
26
  end
25
- def self.each_row(input, result_type: nil, columns: nil, &blk)
27
+ def self.each_row(input, result_type: nil, columns: nil, strict: nil, &blk)
26
28
  end
27
29
 
28
30
  # Options:
@@ -36,7 +38,8 @@ module Parquet
36
38
  input: T.any(String, File, StringIO, IO),
37
39
  result_type: T.nilable(T.any(String, Symbol)),
38
40
  columns: T.nilable(T::Array[String]),
39
- batch_size: T.nilable(Integer)
41
+ batch_size: T.nilable(Integer),
42
+ strict: T.nilable(T::Boolean)
40
43
  ).returns(T::Enumerator[T.any(T::Hash[String, T.untyped], T::Array[T.untyped])])
41
44
  end
42
45
  sig do
@@ -45,11 +48,12 @@ module Parquet
45
48
  result_type: T.nilable(T.any(String, Symbol)),
46
49
  columns: T.nilable(T::Array[String]),
47
50
  batch_size: T.nilable(Integer),
51
+ strict: T.nilable(T::Boolean),
48
52
  blk:
49
53
  T.nilable(T.proc.params(batch: T.any(T::Hash[String, T::Array[T.untyped]], T::Array[T::Array[T.untyped]])).void)
50
54
  ).returns(NilClass)
51
55
  end
52
- def self.each_column(input, result_type: nil, columns: nil, batch_size: nil, &blk)
56
+ def self.each_column(input, result_type: nil, columns: nil, batch_size: nil, strict: nil, &blk)
53
57
  end
54
58
 
55
59
  # Options:
@@ -80,7 +84,15 @@ module Parquet
80
84
  sample_size: T.nilable(Integer)
81
85
  ).void
82
86
  end
83
- def self.write_rows(read_from, schema:, write_to:, batch_size: nil, flush_threshold: nil, compression: nil, sample_size: nil)
87
+ def self.write_rows(
88
+ read_from,
89
+ schema:,
90
+ write_to:,
91
+ batch_size: nil,
92
+ flush_threshold: nil,
93
+ compression: nil,
94
+ sample_size: nil
95
+ )
84
96
  end
85
97
 
86
98
  # Options:
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-02-19 00:00:00.000000000 Z
11
+ date: 2025-02-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys