parquet 0.3.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/parquet/src/types/record_types.rs +2 -1
- data/ext/parquet/src/utils.rs +2 -2
- data/lib/parquet/version.rb +1 -1
- data/lib/parquet.rbi +17 -5
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bb28e49c647fcf9dddba5d18806b366e73ecaa6fb3bf8a3534eb7df4749710f0
|
4
|
+
data.tar.gz: 44953fd9a1f3fc89f24754a6cb555f4d32a90d4af4bb4aa3c14fc373a1c389df
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eaa9767d45deab2c3ba3fb0fc4c7aeb222b4446494262ff01083bdc488cca3164feee0f2f07bff644a3196479da0db4049dee1395b8b89f87f5aeba24a97b60b
|
7
|
+
data.tar.gz: 805a9cad6698f6ade3a61b40eb514a948ed6b07682db0bd3e724aad533468a114053b9589ec6261c31e8fe6b244c4808457f9e801a6991dffa86a36326736ab0
|
@@ -136,7 +136,8 @@ impl TryIntoValue for ParquetField {
|
|
136
136
|
.map_err(|e| ReaderError::Utf8Error(e))
|
137
137
|
.and_then(|s| Ok(s.into_value_with(handle)))?)
|
138
138
|
} else {
|
139
|
-
|
139
|
+
let s = String::from_utf8_lossy(s.as_bytes());
|
140
|
+
Ok(s.into_value_with(handle))
|
140
141
|
}
|
141
142
|
}
|
142
143
|
Field::Byte(b) => Ok(b.into_value_with(handle)),
|
data/ext/parquet/src/utils.rs
CHANGED
@@ -83,7 +83,7 @@ pub fn parse_parquet_rows_args(ruby: &Ruby, args: &[Value]) -> Result<ParquetRow
|
|
83
83
|
None => ParserResultType::Hash,
|
84
84
|
};
|
85
85
|
|
86
|
-
let strict = kwargs.optional.2.flatten().unwrap_or(
|
86
|
+
let strict = kwargs.optional.2.flatten().unwrap_or(true);
|
87
87
|
|
88
88
|
Ok(ParquetRowsArgs {
|
89
89
|
to_read,
|
@@ -159,6 +159,6 @@ pub fn parse_parquet_columns_args(
|
|
159
159
|
result_type,
|
160
160
|
columns: kwargs.optional.1.flatten(),
|
161
161
|
batch_size: kwargs.optional.2.flatten(),
|
162
|
-
strict: kwargs.optional.3.flatten().unwrap_or(
|
162
|
+
strict: kwargs.optional.3.flatten().unwrap_or(true),
|
163
163
|
})
|
164
164
|
}
|
data/lib/parquet/version.rb
CHANGED
data/lib/parquet.rbi
CHANGED
@@ -11,7 +11,8 @@ module Parquet
|
|
11
11
|
params(
|
12
12
|
input: T.any(String, File, StringIO, IO),
|
13
13
|
result_type: T.nilable(T.any(String, Symbol)),
|
14
|
-
columns: T.nilable(T::Array[String])
|
14
|
+
columns: T.nilable(T::Array[String]),
|
15
|
+
strict: T.nilable(T::Boolean)
|
15
16
|
).returns(T::Enumerator[T.any(T::Hash[String, T.untyped], T::Array[T.untyped])])
|
16
17
|
end
|
17
18
|
sig do
|
@@ -19,10 +20,11 @@ module Parquet
|
|
19
20
|
input: T.any(String, File, StringIO, IO),
|
20
21
|
result_type: T.nilable(T.any(String, Symbol)),
|
21
22
|
columns: T.nilable(T::Array[String]),
|
23
|
+
strict: T.nilable(T::Boolean),
|
22
24
|
blk: T.nilable(T.proc.params(row: T.any(T::Hash[String, T.untyped], T::Array[T.untyped])).void)
|
23
25
|
).returns(NilClass)
|
24
26
|
end
|
25
|
-
def self.each_row(input, result_type: nil, columns: nil, &blk)
|
27
|
+
def self.each_row(input, result_type: nil, columns: nil, strict: nil, &blk)
|
26
28
|
end
|
27
29
|
|
28
30
|
# Options:
|
@@ -36,7 +38,8 @@ module Parquet
|
|
36
38
|
input: T.any(String, File, StringIO, IO),
|
37
39
|
result_type: T.nilable(T.any(String, Symbol)),
|
38
40
|
columns: T.nilable(T::Array[String]),
|
39
|
-
batch_size: T.nilable(Integer)
|
41
|
+
batch_size: T.nilable(Integer),
|
42
|
+
strict: T.nilable(T::Boolean)
|
40
43
|
).returns(T::Enumerator[T.any(T::Hash[String, T.untyped], T::Array[T.untyped])])
|
41
44
|
end
|
42
45
|
sig do
|
@@ -45,11 +48,12 @@ module Parquet
|
|
45
48
|
result_type: T.nilable(T.any(String, Symbol)),
|
46
49
|
columns: T.nilable(T::Array[String]),
|
47
50
|
batch_size: T.nilable(Integer),
|
51
|
+
strict: T.nilable(T::Boolean),
|
48
52
|
blk:
|
49
53
|
T.nilable(T.proc.params(batch: T.any(T::Hash[String, T::Array[T.untyped]], T::Array[T::Array[T.untyped]])).void)
|
50
54
|
).returns(NilClass)
|
51
55
|
end
|
52
|
-
def self.each_column(input, result_type: nil, columns: nil, batch_size: nil, &blk)
|
56
|
+
def self.each_column(input, result_type: nil, columns: nil, batch_size: nil, strict: nil, &blk)
|
53
57
|
end
|
54
58
|
|
55
59
|
# Options:
|
@@ -80,7 +84,15 @@ module Parquet
|
|
80
84
|
sample_size: T.nilable(Integer)
|
81
85
|
).void
|
82
86
|
end
|
83
|
-
def self.write_rows(
|
87
|
+
def self.write_rows(
|
88
|
+
read_from,
|
89
|
+
schema:,
|
90
|
+
write_to:,
|
91
|
+
batch_size: nil,
|
92
|
+
flush_threshold: nil,
|
93
|
+
compression: nil,
|
94
|
+
sample_size: nil
|
95
|
+
)
|
84
96
|
end
|
85
97
|
|
86
98
|
# Options:
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parquet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-02-
|
11
|
+
date: 2025-02-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|