parquet 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/parquet/src/ruby_reader.rs +17 -8
- data/ext/parquet/src/types/record_types.rs +2 -1
- data/ext/parquet/src/utils.rs +2 -2
- data/lib/parquet/version.rb +1 -1
- data/lib/parquet.rbi +17 -5
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bb28e49c647fcf9dddba5d18806b366e73ecaa6fb3bf8a3534eb7df4749710f0
|
4
|
+
data.tar.gz: 44953fd9a1f3fc89f24754a6cb555f4d32a90d4af4bb4aa3c14fc373a1c389df
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eaa9767d45deab2c3ba3fb0fc4c7aeb222b4446494262ff01083bdc488cca3164feee0f2f07bff644a3196479da0db4049dee1395b8b89f87f5aeba24a97b60b
|
7
|
+
data.tar.gz: 805a9cad6698f6ade3a61b40eb514a948ed6b07682db0bd3e724aad533468a114053b9589ec6261c31e8fe6b244c4808457f9e801a6991dffa86a36326736ab0
|
@@ -34,9 +34,10 @@ impl RubyReader {
|
|
34
34
|
}
|
35
35
|
|
36
36
|
// For now, don't use this. Having to use seek in length is scary.
|
37
|
-
fn is_seekable_io_like(
|
38
|
-
|
39
|
-
|
37
|
+
fn is_seekable_io_like(value: &Value) -> bool {
|
38
|
+
Self::is_io_like(value)
|
39
|
+
&& value.respond_to("seek", false).unwrap_or(false)
|
40
|
+
&& value.respond_to("pos", false).unwrap_or(false)
|
40
41
|
}
|
41
42
|
}
|
42
43
|
|
@@ -92,10 +93,14 @@ impl Seek for RubyReader {
|
|
92
93
|
let unwrapped_inner = ruby.get_inner(*inner);
|
93
94
|
|
94
95
|
let new_offset = match pos {
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
96
|
+
SeekFrom::Start(off) => off as usize,
|
97
|
+
SeekFrom::Current(off) => {
|
98
|
+
let signed = *original_offset as i64 + off;
|
99
|
+
signed.max(0) as usize
|
100
|
+
}
|
101
|
+
SeekFrom::End(off) => {
|
102
|
+
let signed = unwrapped_inner.len() as i64 + off;
|
103
|
+
signed.max(0) as usize
|
99
104
|
}
|
100
105
|
};
|
101
106
|
|
@@ -111,8 +116,12 @@ impl Seek for RubyReader {
|
|
111
116
|
SeekFrom::End(i) => (2, i),
|
112
117
|
};
|
113
118
|
|
119
|
+
unwrapped_inner
|
120
|
+
.funcall::<_, _, u64>("seek", (ruby_offset, whence))
|
121
|
+
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
|
122
|
+
|
114
123
|
let new_position = unwrapped_inner
|
115
|
-
.funcall("
|
124
|
+
.funcall::<_, _, u64>("pos", ())
|
116
125
|
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
|
117
126
|
|
118
127
|
Ok(new_position)
|
@@ -136,7 +136,8 @@ impl TryIntoValue for ParquetField {
|
|
136
136
|
.map_err(|e| ReaderError::Utf8Error(e))
|
137
137
|
.and_then(|s| Ok(s.into_value_with(handle)))?)
|
138
138
|
} else {
|
139
|
-
|
139
|
+
let s = String::from_utf8_lossy(s.as_bytes());
|
140
|
+
Ok(s.into_value_with(handle))
|
140
141
|
}
|
141
142
|
}
|
142
143
|
Field::Byte(b) => Ok(b.into_value_with(handle)),
|
data/ext/parquet/src/utils.rs
CHANGED
@@ -83,7 +83,7 @@ pub fn parse_parquet_rows_args(ruby: &Ruby, args: &[Value]) -> Result<ParquetRow
|
|
83
83
|
None => ParserResultType::Hash,
|
84
84
|
};
|
85
85
|
|
86
|
-
let strict = kwargs.optional.2.flatten().unwrap_or(
|
86
|
+
let strict = kwargs.optional.2.flatten().unwrap_or(true);
|
87
87
|
|
88
88
|
Ok(ParquetRowsArgs {
|
89
89
|
to_read,
|
@@ -159,6 +159,6 @@ pub fn parse_parquet_columns_args(
|
|
159
159
|
result_type,
|
160
160
|
columns: kwargs.optional.1.flatten(),
|
161
161
|
batch_size: kwargs.optional.2.flatten(),
|
162
|
-
strict: kwargs.optional.3.flatten().unwrap_or(
|
162
|
+
strict: kwargs.optional.3.flatten().unwrap_or(true),
|
163
163
|
})
|
164
164
|
}
|
data/lib/parquet/version.rb
CHANGED
data/lib/parquet.rbi
CHANGED
@@ -11,7 +11,8 @@ module Parquet
|
|
11
11
|
params(
|
12
12
|
input: T.any(String, File, StringIO, IO),
|
13
13
|
result_type: T.nilable(T.any(String, Symbol)),
|
14
|
-
columns: T.nilable(T::Array[String])
|
14
|
+
columns: T.nilable(T::Array[String]),
|
15
|
+
strict: T.nilable(T::Boolean)
|
15
16
|
).returns(T::Enumerator[T.any(T::Hash[String, T.untyped], T::Array[T.untyped])])
|
16
17
|
end
|
17
18
|
sig do
|
@@ -19,10 +20,11 @@ module Parquet
|
|
19
20
|
input: T.any(String, File, StringIO, IO),
|
20
21
|
result_type: T.nilable(T.any(String, Symbol)),
|
21
22
|
columns: T.nilable(T::Array[String]),
|
23
|
+
strict: T.nilable(T::Boolean),
|
22
24
|
blk: T.nilable(T.proc.params(row: T.any(T::Hash[String, T.untyped], T::Array[T.untyped])).void)
|
23
25
|
).returns(NilClass)
|
24
26
|
end
|
25
|
-
def self.each_row(input, result_type: nil, columns: nil, &blk)
|
27
|
+
def self.each_row(input, result_type: nil, columns: nil, strict: nil, &blk)
|
26
28
|
end
|
27
29
|
|
28
30
|
# Options:
|
@@ -36,7 +38,8 @@ module Parquet
|
|
36
38
|
input: T.any(String, File, StringIO, IO),
|
37
39
|
result_type: T.nilable(T.any(String, Symbol)),
|
38
40
|
columns: T.nilable(T::Array[String]),
|
39
|
-
batch_size: T.nilable(Integer)
|
41
|
+
batch_size: T.nilable(Integer),
|
42
|
+
strict: T.nilable(T::Boolean)
|
40
43
|
).returns(T::Enumerator[T.any(T::Hash[String, T.untyped], T::Array[T.untyped])])
|
41
44
|
end
|
42
45
|
sig do
|
@@ -45,11 +48,12 @@ module Parquet
|
|
45
48
|
result_type: T.nilable(T.any(String, Symbol)),
|
46
49
|
columns: T.nilable(T::Array[String]),
|
47
50
|
batch_size: T.nilable(Integer),
|
51
|
+
strict: T.nilable(T::Boolean),
|
48
52
|
blk:
|
49
53
|
T.nilable(T.proc.params(batch: T.any(T::Hash[String, T::Array[T.untyped]], T::Array[T::Array[T.untyped]])).void)
|
50
54
|
).returns(NilClass)
|
51
55
|
end
|
52
|
-
def self.each_column(input, result_type: nil, columns: nil, batch_size: nil, &blk)
|
56
|
+
def self.each_column(input, result_type: nil, columns: nil, batch_size: nil, strict: nil, &blk)
|
53
57
|
end
|
54
58
|
|
55
59
|
# Options:
|
@@ -80,7 +84,15 @@ module Parquet
|
|
80
84
|
sample_size: T.nilable(Integer)
|
81
85
|
).void
|
82
86
|
end
|
83
|
-
def self.write_rows(
|
87
|
+
def self.write_rows(
|
88
|
+
read_from,
|
89
|
+
schema:,
|
90
|
+
write_to:,
|
91
|
+
batch_size: nil,
|
92
|
+
flush_threshold: nil,
|
93
|
+
compression: nil,
|
94
|
+
sample_size: nil
|
95
|
+
)
|
84
96
|
end
|
85
97
|
|
86
98
|
# Options:
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parquet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-02-
|
11
|
+
date: 2025-02-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|