parquet 0.3.3 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/parquet/src/ruby_reader.rs +34 -18
- data/ext/parquet/src/types/record_types.rs +2 -1
- data/ext/parquet/src/utils.rs +2 -2
- data/lib/parquet/version.rb +1 -1
- data/lib/parquet.rbi +17 -5
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2b5b56cca903ed731d6981d3113e3833a6e6a6a0ffcd301040b32ab0c72bf9c1
|
4
|
+
data.tar.gz: 0e4486e2a67a051852166ac81754c9bfb2807c2ffa51eeda5edb41050432e930
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: affe353c972f130973b309ca1ee928928278254830fa39bee8e4bed5452b5b381e2d27f6986067a0f9b27769a78f195887d371dc4825f61096414af92e9edb94
|
7
|
+
data.tar.gz: 9c823757be4b81d3ccafb57571c1d98b530a0d10696712083a0447b4871bf90720ba588d8c48777926b3f7a7f2ffede0c2ed7c9a076420b4dea183b7290ba47e
|
@@ -184,37 +184,53 @@ impl Length for RubyReader {
|
|
184
184
|
}
|
185
185
|
RubyReader::RubyIoLike { inner } => {
|
186
186
|
let unwrapped_inner = ruby.get_inner(*inner);
|
187
|
-
let current_pos = unwrapped_inner.funcall::<_, _, u64>("seek", (0, 1));
|
188
187
|
|
189
|
-
|
188
|
+
// Get current position
|
189
|
+
let current_pos = match unwrapped_inner.funcall::<_, _, u64>("pos", ()) {
|
190
|
+
Ok(pos) => pos,
|
191
|
+
Err(e) => {
|
192
|
+
eprintln!("Error seeking: {}", e);
|
193
|
+
return 0;
|
194
|
+
}
|
195
|
+
};
|
196
|
+
|
197
|
+
// Seek to end
|
198
|
+
if let Err(e) = unwrapped_inner.funcall::<_, _, u64>("seek", (0, 2)) {
|
190
199
|
eprintln!("Error seeking: {}", e);
|
191
200
|
return 0;
|
192
201
|
}
|
193
202
|
|
194
|
-
|
203
|
+
// Offset at the end of the file is the length of the file
|
204
|
+
let size = match unwrapped_inner.funcall::<_, _, u64>("pos", ()) {
|
205
|
+
Ok(pos) => pos,
|
206
|
+
Err(e) => {
|
207
|
+
eprintln!("Error seeking: {}", e);
|
208
|
+
return 0;
|
209
|
+
}
|
210
|
+
};
|
211
|
+
|
212
|
+
// Restore original position
|
213
|
+
if let Err(e) = unwrapped_inner.funcall::<_, _, u64>("seek", (current_pos, 0)) {
|
195
214
|
eprintln!("Error seeking: {}", e);
|
196
215
|
return 0;
|
197
216
|
}
|
198
217
|
|
199
|
-
let
|
200
|
-
|
201
|
-
match size {
|
202
|
-
Ok(size) => {
|
203
|
-
// Restore original position
|
204
|
-
if let Err(e) = unwrapped_inner.funcall::<_, _, u64>(
|
205
|
-
"seek",
|
206
|
-
(current_pos.expect("Current position is not set!"), 0),
|
207
|
-
) {
|
208
|
-
eprintln!("Error seeking: {}", e);
|
209
|
-
return 0;
|
210
|
-
}
|
211
|
-
size
|
212
|
-
}
|
218
|
+
let final_pos = match unwrapped_inner.funcall::<_, _, u64>("pos", ()) {
|
219
|
+
Ok(pos) => pos,
|
213
220
|
Err(e) => {
|
214
221
|
eprintln!("Error seeking: {}", e);
|
215
222
|
return 0;
|
216
223
|
}
|
217
|
-
}
|
224
|
+
};
|
225
|
+
|
226
|
+
assert_eq!(
|
227
|
+
current_pos, final_pos,
|
228
|
+
"Failed to restore original position in seekable IO object. Started at position {}, but ended at {}",
|
229
|
+
current_pos,
|
230
|
+
final_pos
|
231
|
+
);
|
232
|
+
|
233
|
+
size
|
218
234
|
}
|
219
235
|
}
|
220
236
|
}
|
@@ -136,7 +136,8 @@ impl TryIntoValue for ParquetField {
|
|
136
136
|
.map_err(|e| ReaderError::Utf8Error(e))
|
137
137
|
.and_then(|s| Ok(s.into_value_with(handle)))?)
|
138
138
|
} else {
|
139
|
-
|
139
|
+
let s = String::from_utf8_lossy(s.as_bytes());
|
140
|
+
Ok(s.into_value_with(handle))
|
140
141
|
}
|
141
142
|
}
|
142
143
|
Field::Byte(b) => Ok(b.into_value_with(handle)),
|
data/ext/parquet/src/utils.rs
CHANGED
@@ -83,7 +83,7 @@ pub fn parse_parquet_rows_args(ruby: &Ruby, args: &[Value]) -> Result<ParquetRow
|
|
83
83
|
None => ParserResultType::Hash,
|
84
84
|
};
|
85
85
|
|
86
|
-
let strict = kwargs.optional.2.flatten().unwrap_or(
|
86
|
+
let strict = kwargs.optional.2.flatten().unwrap_or(true);
|
87
87
|
|
88
88
|
Ok(ParquetRowsArgs {
|
89
89
|
to_read,
|
@@ -159,6 +159,6 @@ pub fn parse_parquet_columns_args(
|
|
159
159
|
result_type,
|
160
160
|
columns: kwargs.optional.1.flatten(),
|
161
161
|
batch_size: kwargs.optional.2.flatten(),
|
162
|
-
strict: kwargs.optional.3.flatten().unwrap_or(
|
162
|
+
strict: kwargs.optional.3.flatten().unwrap_or(true),
|
163
163
|
})
|
164
164
|
}
|
data/lib/parquet/version.rb
CHANGED
data/lib/parquet.rbi
CHANGED
@@ -11,7 +11,8 @@ module Parquet
|
|
11
11
|
params(
|
12
12
|
input: T.any(String, File, StringIO, IO),
|
13
13
|
result_type: T.nilable(T.any(String, Symbol)),
|
14
|
-
columns: T.nilable(T::Array[String])
|
14
|
+
columns: T.nilable(T::Array[String]),
|
15
|
+
strict: T.nilable(T::Boolean)
|
15
16
|
).returns(T::Enumerator[T.any(T::Hash[String, T.untyped], T::Array[T.untyped])])
|
16
17
|
end
|
17
18
|
sig do
|
@@ -19,10 +20,11 @@ module Parquet
|
|
19
20
|
input: T.any(String, File, StringIO, IO),
|
20
21
|
result_type: T.nilable(T.any(String, Symbol)),
|
21
22
|
columns: T.nilable(T::Array[String]),
|
23
|
+
strict: T.nilable(T::Boolean),
|
22
24
|
blk: T.nilable(T.proc.params(row: T.any(T::Hash[String, T.untyped], T::Array[T.untyped])).void)
|
23
25
|
).returns(NilClass)
|
24
26
|
end
|
25
|
-
def self.each_row(input, result_type: nil, columns: nil, &blk)
|
27
|
+
def self.each_row(input, result_type: nil, columns: nil, strict: nil, &blk)
|
26
28
|
end
|
27
29
|
|
28
30
|
# Options:
|
@@ -36,7 +38,8 @@ module Parquet
|
|
36
38
|
input: T.any(String, File, StringIO, IO),
|
37
39
|
result_type: T.nilable(T.any(String, Symbol)),
|
38
40
|
columns: T.nilable(T::Array[String]),
|
39
|
-
batch_size: T.nilable(Integer)
|
41
|
+
batch_size: T.nilable(Integer),
|
42
|
+
strict: T.nilable(T::Boolean)
|
40
43
|
).returns(T::Enumerator[T.any(T::Hash[String, T.untyped], T::Array[T.untyped])])
|
41
44
|
end
|
42
45
|
sig do
|
@@ -45,11 +48,12 @@ module Parquet
|
|
45
48
|
result_type: T.nilable(T.any(String, Symbol)),
|
46
49
|
columns: T.nilable(T::Array[String]),
|
47
50
|
batch_size: T.nilable(Integer),
|
51
|
+
strict: T.nilable(T::Boolean),
|
48
52
|
blk:
|
49
53
|
T.nilable(T.proc.params(batch: T.any(T::Hash[String, T::Array[T.untyped]], T::Array[T::Array[T.untyped]])).void)
|
50
54
|
).returns(NilClass)
|
51
55
|
end
|
52
|
-
def self.each_column(input, result_type: nil, columns: nil, batch_size: nil, &blk)
|
56
|
+
def self.each_column(input, result_type: nil, columns: nil, batch_size: nil, strict: nil, &blk)
|
53
57
|
end
|
54
58
|
|
55
59
|
# Options:
|
@@ -80,7 +84,15 @@ module Parquet
|
|
80
84
|
sample_size: T.nilable(Integer)
|
81
85
|
).void
|
82
86
|
end
|
83
|
-
def self.write_rows(
|
87
|
+
def self.write_rows(
|
88
|
+
read_from,
|
89
|
+
schema:,
|
90
|
+
write_to:,
|
91
|
+
batch_size: nil,
|
92
|
+
flush_threshold: nil,
|
93
|
+
compression: nil,
|
94
|
+
sample_size: nil
|
95
|
+
)
|
84
96
|
end
|
85
97
|
|
86
98
|
# Options:
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parquet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-02-
|
11
|
+
date: 2025-02-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|