parquet 0.0.5 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +50 -0
- data/README.md +92 -2
- data/ext/parquet/Cargo.toml +1 -0
- data/ext/parquet/src/lib.rs +5 -3
- data/ext/parquet/src/{reader.rs → reader/mod.rs} +5 -2
- data/ext/parquet/src/types/core_types.rs +73 -0
- data/ext/parquet/src/types/mod.rs +30 -0
- data/ext/parquet/src/types/parquet_value.rs +458 -0
- data/ext/parquet/src/types/record_types.rs +204 -0
- data/ext/parquet/src/types/timestamp.rs +85 -0
- data/ext/parquet/src/types/type_conversion.rs +753 -0
- data/ext/parquet/src/types/writer_types.rs +270 -0
- data/ext/parquet/src/writer/mod.rs +403 -0
- data/lib/parquet/version.rb +1 -1
- data/lib/parquet.rbi +33 -2
- metadata +13 -6
- data/ext/parquet/src/types.rs +0 -763
- /data/ext/parquet/src/{parquet_column_reader.rs → reader/parquet_column_reader.rs} +0 -0
- /data/ext/parquet/src/{parquet_row_reader.rs → reader/parquet_row_reader.rs} +0 -0
data/lib/parquet.rbi
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# typed: strict
|
2
|
+
|
2
3
|
module Parquet
|
3
4
|
# Options:
|
4
5
|
# - `input`: String, File, or IO object containing parquet data
|
@@ -12,7 +13,7 @@ module Parquet
|
|
12
13
|
result_type: T.nilable(T.any(String, Symbol)),
|
13
14
|
columns: T.nilable(T::Array[String]),
|
14
15
|
blk: T.nilable(T.proc.params(row: T.any(T::Hash[String, T.untyped], T::Array[T.untyped])).void)
|
15
|
-
).returns(T.any(Enumerator, NilClass))
|
16
|
+
).returns(T.any(T::Enumerator[T.any(T::Hash[String, T.untyped], T::Array[T.untyped])], NilClass))
|
16
17
|
end
|
17
18
|
def self.each_row(input, result_type: nil, columns: nil, &blk)
|
18
19
|
end
|
@@ -31,8 +32,38 @@ module Parquet
|
|
31
32
|
batch_size: T.nilable(Integer),
|
32
33
|
blk:
|
33
34
|
T.nilable(T.proc.params(batch: T.any(T::Hash[String, T::Array[T.untyped]], T::Array[T::Array[T.untyped]])).void)
|
34
|
-
).returns(T.any(Enumerator, NilClass))
|
35
|
+
).returns(T.any(T::Enumerator[T.any(T::Hash[String, T.untyped], T::Array[T.untyped])], NilClass))
|
35
36
|
end
|
36
37
|
def self.each_column(input, result_type: nil, columns: nil, batch_size: nil, &blk)
|
37
38
|
end
|
39
|
+
|
40
|
+
# Options:
|
41
|
+
# - `read_from`: An Enumerator yielding arrays of values representing each row
|
42
|
+
# - `schema`: Array of hashes specifying column names and types
|
43
|
+
# - `write_to`: String path or IO object to write the parquet file to
|
44
|
+
# - `batch_size`: Optional batch size for writing (defaults to 1000)
|
45
|
+
sig do
|
46
|
+
params(
|
47
|
+
read_from: T::Enumerator[T::Array[T.untyped]],
|
48
|
+
schema: T::Array[T::Hash[String, String]],
|
49
|
+
write_to: T.any(String, IO),
|
50
|
+
batch_size: T.nilable(Integer)
|
51
|
+
).void
|
52
|
+
end
|
53
|
+
def self.write_rows(read_from, schema:, write_to:, batch_size: nil)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Options:
|
57
|
+
# - `read_from`: An Enumerator yielding arrays of column batches
|
58
|
+
# - `schema`: Array of hashes specifying column names and types
|
59
|
+
# - `write_to`: String path or IO object to write the parquet file to
|
60
|
+
sig do
|
61
|
+
params(
|
62
|
+
read_from: T::Enumerator[T::Array[T::Array[T.untyped]]],
|
63
|
+
schema: T::Array[T::Hash[String, String]],
|
64
|
+
write_to: T.any(String, IO)
|
65
|
+
).void
|
66
|
+
end
|
67
|
+
def self.write_columns(read_from, schema:, write_to:)
|
68
|
+
end
|
38
69
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parquet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-01-
|
11
|
+
date: 2025-01-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -60,13 +60,20 @@ files:
|
|
60
60
|
- ext/parquet/src/enumerator.rs
|
61
61
|
- ext/parquet/src/header_cache.rs
|
62
62
|
- ext/parquet/src/lib.rs
|
63
|
-
- ext/parquet/src/
|
64
|
-
- ext/parquet/src/
|
65
|
-
- ext/parquet/src/reader.rs
|
63
|
+
- ext/parquet/src/reader/mod.rs
|
64
|
+
- ext/parquet/src/reader/parquet_column_reader.rs
|
65
|
+
- ext/parquet/src/reader/parquet_row_reader.rs
|
66
66
|
- ext/parquet/src/ruby_integration.rs
|
67
67
|
- ext/parquet/src/ruby_reader.rs
|
68
|
-
- ext/parquet/src/types.rs
|
68
|
+
- ext/parquet/src/types/core_types.rs
|
69
|
+
- ext/parquet/src/types/mod.rs
|
70
|
+
- ext/parquet/src/types/parquet_value.rs
|
71
|
+
- ext/parquet/src/types/record_types.rs
|
72
|
+
- ext/parquet/src/types/timestamp.rs
|
73
|
+
- ext/parquet/src/types/type_conversion.rs
|
74
|
+
- ext/parquet/src/types/writer_types.rs
|
69
75
|
- ext/parquet/src/utils.rs
|
76
|
+
- ext/parquet/src/writer/mod.rs
|
70
77
|
- lib/parquet.rb
|
71
78
|
- lib/parquet.rbi
|
72
79
|
- lib/parquet/version.rb
|