red-parquet 12.0.1 → 13.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/parquet/arrow-file-reader.rb +28 -0
- data/lib/parquet/loader.rb +1 -0
- data/lib/parquet/version.rb +1 -1
- data/test/test-arrow-file-reader.rb +66 -0
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9c93ce2b0904caac902ca0439acb097d44dc6081109c954108e6d7c5ce661cb7
|
4
|
+
data.tar.gz: 99107b1b1fc9d5fac8861fc5326fe05b036891e9e454e3f67a2456a87c80d17e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 715881ca396263186ea896798e33388cd1dd7d43f7e60c71ffeaaa61d4f7f9f91388494f1fe8ad9f6db68f9e8f51baedac4f6fd183b50bddf8923bc02e5af5f8
|
7
|
+
data.tar.gz: 7268ab3d6871063c265fd6342d9a16a5ef70aa0d2e92fbe5f2fd1f06a1f3138ebbccc22ff30042c342155e0f86ec95bd0b7022deb87cd7bd8e249d2598b1c724
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Parquet
|
19
|
+
class ArrowFileReader
|
20
|
+
def each_row_group
|
21
|
+
return to_enum(__method__) {n_row_groups} unless block_given?
|
22
|
+
|
23
|
+
n_row_groups.times do |i|
|
24
|
+
yield(read_row_group(i))
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/parquet/loader.rb
CHANGED
data/lib/parquet/version.rb
CHANGED
@@ -0,0 +1,66 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
class TestArrowFileReader < Test::Unit::TestCase
|
19
|
+
def setup
|
20
|
+
@schema = Arrow::Schema.new(visible: :boolean)
|
21
|
+
table = Arrow::Table.new(@schema, [[true], [false]])
|
22
|
+
Tempfile.create(["red-parquet", ".parquet"]) do |file|
|
23
|
+
@file = file
|
24
|
+
Parquet::ArrowFileWriter.open(table.schema, @file.path) do |writer|
|
25
|
+
chunk_size = 1
|
26
|
+
writer.write_table(table, chunk_size)
|
27
|
+
end
|
28
|
+
yield
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
sub_test_case("#each_row_group") do
|
33
|
+
test("block") do
|
34
|
+
Arrow::FileInputStream.open(@file.path) do |input|
|
35
|
+
reader = Parquet::ArrowFileReader.new(input)
|
36
|
+
row_groups = []
|
37
|
+
reader.each_row_group do |row_group|
|
38
|
+
row_groups << row_group
|
39
|
+
end
|
40
|
+
assert_equal([
|
41
|
+
Arrow::Table.new(@schema, [[true]]),
|
42
|
+
Arrow::Table.new(@schema, [[false]])
|
43
|
+
],
|
44
|
+
row_groups)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
test("without block") do
|
49
|
+
Arrow::FileInputStream.open(@file.path) do |input|
|
50
|
+
reader = Parquet::ArrowFileReader.new(input)
|
51
|
+
each_row_group = reader.each_row_group
|
52
|
+
assert_equal({
|
53
|
+
size: 2,
|
54
|
+
to_a: [
|
55
|
+
Arrow::Table.new(@schema, [[true]]),
|
56
|
+
Arrow::Table.new(@schema, [[false]])
|
57
|
+
],
|
58
|
+
},
|
59
|
+
{
|
60
|
+
size: each_row_group.size,
|
61
|
+
to_a: each_row_group.to_a,
|
62
|
+
})
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-parquet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 13.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Apache Arrow Developers
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-08-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - '='
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 13.0.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - '='
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 13.0.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -81,6 +81,7 @@ files:
|
|
81
81
|
- Rakefile
|
82
82
|
- dependency-check/Rakefile
|
83
83
|
- lib/parquet.rb
|
84
|
+
- lib/parquet/arrow-file-reader.rb
|
84
85
|
- lib/parquet/arrow-table-loadable.rb
|
85
86
|
- lib/parquet/arrow-table-savable.rb
|
86
87
|
- lib/parquet/loader.rb
|
@@ -89,6 +90,7 @@ files:
|
|
89
90
|
- red-parquet.gemspec
|
90
91
|
- test/helper.rb
|
91
92
|
- test/run-test.rb
|
93
|
+
- test/test-arrow-file-reader.rb
|
92
94
|
- test/test-arrow-table.rb
|
93
95
|
- test/test-boolean-statistics.rb
|
94
96
|
homepage: https://arrow.apache.org/
|
@@ -117,5 +119,6 @@ summary: Red Parquet is the Ruby bindings of Apache Parquet
|
|
117
119
|
test_files:
|
118
120
|
- test/helper.rb
|
119
121
|
- test/run-test.rb
|
122
|
+
- test/test-arrow-file-reader.rb
|
120
123
|
- test/test-arrow-table.rb
|
121
124
|
- test/test-boolean-statistics.rb
|