red-parquet 0.17.1 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/parquet/arrow-table-savable.rb +18 -2
- data/lib/parquet/loader.rb +1 -0
- data/lib/parquet/version.rb +1 -1
- data/lib/parquet/writer-properties.rb +28 -0
- data/test/test-arrow-table.rb +48 -5
- metadata +8 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fe7b0bb0ec8f4a2420a2ab1001f2b296e7ceb7946e06aa3abe523163f67c7349
|
4
|
+
data.tar.gz: 3714403cabdb00f02dcd815a7db91141882de6192bd3b4b1cd12c213ed31c08c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6a071b5bed3343f0500f42fc242327d08634f08d3443a167c6b46d65b0b2645b63c24b21c4272d082bf0548bb4249250c27854370d672b66d6822d89f37db323
|
7
|
+
data.tar.gz: 4745b76798fbeab130a36235a14ba8d694597c789a3b284cc8cb71108d86c4e1c1ae2a9c45f994aa44624bc8b16bdd62606aa837fe95214d4bd4d8d6f06de2e4
|
@@ -19,9 +19,25 @@ module Parquet
|
|
19
19
|
module ArrowTableSavable
|
20
20
|
private
|
21
21
|
def save_as_parquet
|
22
|
+
properties = WriterProperties.new
|
23
|
+
@options.each do |key, value|
|
24
|
+
next if value.nil?
|
25
|
+
set_method_name = "set_#{key}"
|
26
|
+
next unless properties.respond_to?(set_method_name)
|
27
|
+
case value
|
28
|
+
when ::Array, ::Hash
|
29
|
+
value.each do |path, v|
|
30
|
+
properties.__send__(set_method_name, v, path)
|
31
|
+
end
|
32
|
+
else
|
33
|
+
properties.__send__(set_method_name, value)
|
34
|
+
end
|
35
|
+
end
|
22
36
|
chunk_size = @options[:chunk_size] || 1024 # TODO
|
23
|
-
|
24
|
-
|
37
|
+
open_raw_output_stream do |output|
|
38
|
+
ArrowFileWriter.open(@table.schema,
|
39
|
+
output,
|
40
|
+
properties) do |writer|
|
25
41
|
writer.write_table(@table, chunk_size)
|
26
42
|
end
|
27
43
|
end
|
data/lib/parquet/loader.rb
CHANGED
data/lib/parquet/version.rb
CHANGED
@@ -0,0 +1,28 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Parquet
|
19
|
+
class WriterProperties
|
20
|
+
def set_dictionary(enable, path=nil)
|
21
|
+
if enable
|
22
|
+
enable_dictionary(path)
|
23
|
+
else
|
24
|
+
disable_dictionary(path)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/test/test-arrow-table.rb
CHANGED
@@ -19,7 +19,8 @@ class TestArrowTableReader < Test::Unit::TestCase
|
|
19
19
|
def setup
|
20
20
|
@count_field = Arrow::Field.new("count", :uint8)
|
21
21
|
@visible_field = Arrow::Field.new("visible", :boolean)
|
22
|
-
|
22
|
+
@label_field = Arrow::Field.new("label", :string)
|
23
|
+
schema = Arrow::Schema.new([@count_field, @visible_field, @label_field])
|
23
24
|
count_arrays = [
|
24
25
|
Arrow::UInt8Array.new([1, 2]),
|
25
26
|
Arrow::UInt8Array.new([4, 8, 16]),
|
@@ -33,16 +34,30 @@ class TestArrowTableReader < Test::Unit::TestCase
|
|
33
34
|
Arrow::BooleanArray.new([nil]),
|
34
35
|
Arrow::BooleanArray.new([nil]),
|
35
36
|
]
|
37
|
+
label_arrays = [
|
38
|
+
Arrow::StringArray.new(["a"]),
|
39
|
+
Arrow::StringArray.new(["b", "c"]),
|
40
|
+
Arrow::StringArray.new(["d", nil, nil]),
|
41
|
+
Arrow::StringArray.new(["e", "f"]),
|
42
|
+
]
|
36
43
|
@count_array = Arrow::ChunkedArray.new(count_arrays)
|
37
44
|
@visible_array = Arrow::ChunkedArray.new(visible_arrays)
|
38
|
-
@
|
45
|
+
@label_array = Arrow::ChunkedArray.new(label_arrays)
|
46
|
+
@table = Arrow::Table.new(schema,
|
47
|
+
[@count_array, @visible_array, @label_array])
|
48
|
+
|
49
|
+
@output = Tempfile.open(["red-parquet", ".parquet"])
|
50
|
+
begin
|
51
|
+
yield(@output)
|
52
|
+
ensure
|
53
|
+
@output.close!
|
54
|
+
end
|
39
55
|
end
|
40
56
|
|
41
57
|
def test_save_load_path
|
42
|
-
|
43
|
-
@table.save(tempfile.path)
|
58
|
+
@table.save(@output.path)
|
44
59
|
assert do
|
45
|
-
@table.equal_metadata(Arrow::Table.load(
|
60
|
+
@table.equal_metadata(Arrow::Table.load(@output.path), false)
|
46
61
|
end
|
47
62
|
end
|
48
63
|
|
@@ -53,4 +68,32 @@ class TestArrowTableReader < Test::Unit::TestCase
|
|
53
68
|
@table.equal_metadata(Arrow::Table.load(buffer, format: :parquet), false)
|
54
69
|
end
|
55
70
|
end
|
71
|
+
|
72
|
+
def test_save_load_compression
|
73
|
+
@table.save(@output.path, compression: :zstd)
|
74
|
+
assert do
|
75
|
+
@table.equal_metadata(Arrow::Table.load(@output.path), false)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def test_save_load_compression_path
|
80
|
+
@table.save(@output.path, compression: {"count" => :zstd})
|
81
|
+
assert do
|
82
|
+
@table.equal_metadata(Arrow::Table.load(@output.path), false)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def test_save_load_dictionary
|
87
|
+
@table.save(@output.path, dictionary: false)
|
88
|
+
assert do
|
89
|
+
@table.equal_metadata(Arrow::Table.load(@output.path), false)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def test_save_load_dictionary_path
|
94
|
+
@table.save(@output.path, dictionary: [["label", false]])
|
95
|
+
assert do
|
96
|
+
@table.equal_metadata(Arrow::Table.load(@output.path), false)
|
97
|
+
end
|
98
|
+
end
|
56
99
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-parquet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Apache Arrow Developers
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-07-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - '='
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.
|
19
|
+
version: 1.0.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - '='
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.
|
26
|
+
version: 1.0.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -85,6 +85,7 @@ files:
|
|
85
85
|
- lib/parquet/arrow-table-savable.rb
|
86
86
|
- lib/parquet/loader.rb
|
87
87
|
- lib/parquet/version.rb
|
88
|
+
- lib/parquet/writer-properties.rb
|
88
89
|
- red-parquet.gemspec
|
89
90
|
- test/helper.rb
|
90
91
|
- test/run-test.rb
|
@@ -93,7 +94,7 @@ homepage: https://arrow.apache.org/
|
|
93
94
|
licenses:
|
94
95
|
- Apache-2.0
|
95
96
|
metadata: {}
|
96
|
-
post_install_message:
|
97
|
+
post_install_message:
|
97
98
|
rdoc_options: []
|
98
99
|
require_paths:
|
99
100
|
- lib
|
@@ -109,7 +110,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
109
110
|
version: '0'
|
110
111
|
requirements: []
|
111
112
|
rubygems_version: 3.1.2
|
112
|
-
signing_key:
|
113
|
+
signing_key:
|
113
114
|
specification_version: 4
|
114
115
|
summary: Red Parquet is the Ruby bindings of Apache Parquet
|
115
116
|
test_files:
|