red-parquet 0.17.1 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/dependency-check/Rakefile +5 -1
- data/lib/parquet/arrow-table-savable.rb +19 -3
- data/lib/parquet/loader.rb +1 -0
- data/lib/parquet/version.rb +1 -1
- data/lib/parquet/writer-properties.rb +28 -0
- data/test/test-arrow-table.rb +48 -5
- metadata +10 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d6b91c2b8aab70448c3aba923900385ebd9218d69760f8baf5a01dd1ad9977ef
|
4
|
+
data.tar.gz: 90a3980cada0d03897167cb53b1f9197bea059b72431c60ace09f33105d5f61e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 746f209269f4e2d273636d72de17b0c5a284f9a0adb244af8f94a558e0e5d93f77f743a2b8df8d05d313a02f4aa3110cf0c83b27e5176d5213cef7abae7cccd8
|
7
|
+
data.tar.gz: f46ffe92f4c692f46e4577345453e89a0063e897f7a3c669f1e6d83d785136cbab02eb8aaee069a18fd5cb6c8bddf8251e053b2765af9bd72dc598f235fb6afc
|
data/dependency-check/Rakefile
CHANGED
@@ -19,6 +19,7 @@
|
|
19
19
|
|
20
20
|
require "pkg-config"
|
21
21
|
require "native-package-installer"
|
22
|
+
require_relative "../lib/parquet/version"
|
22
23
|
|
23
24
|
case RUBY_PLATFORM
|
24
25
|
when /mingw|mswin/
|
@@ -33,7 +34,10 @@ end
|
|
33
34
|
namespace :dependency do
|
34
35
|
desc "Check dependency"
|
35
36
|
task :check do
|
36
|
-
unless PKGConfig.check_version?("parquet-glib"
|
37
|
+
unless PKGConfig.check_version?("parquet-glib",
|
38
|
+
Parquet::Version::MAJOR,
|
39
|
+
Parquet::Version::MINOR,
|
40
|
+
Parquet::Version::MICRO)
|
37
41
|
unless NativePackageInstaller.install(:debian => "libparquet-glib-dev",
|
38
42
|
:redhat => "parquet-glib-devel")
|
39
43
|
exit(false)
|
@@ -19,9 +19,25 @@ module Parquet
|
|
19
19
|
module ArrowTableSavable
|
20
20
|
private
|
21
21
|
def save_as_parquet
|
22
|
-
|
23
|
-
|
24
|
-
|
22
|
+
properties = WriterProperties.new
|
23
|
+
@options.each do |key, value|
|
24
|
+
next if value.nil?
|
25
|
+
set_method_name = "set_#{key}"
|
26
|
+
next unless properties.respond_to?(set_method_name)
|
27
|
+
case value
|
28
|
+
when ::Array, ::Hash
|
29
|
+
value.each do |path, v|
|
30
|
+
properties.__send__(set_method_name, v, path)
|
31
|
+
end
|
32
|
+
else
|
33
|
+
properties.__send__(set_method_name, value)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
chunk_size = @options[:chunk_size] || @table.n_rows
|
37
|
+
open_raw_output_stream do |output|
|
38
|
+
ArrowFileWriter.open(@table.schema,
|
39
|
+
output,
|
40
|
+
properties) do |writer|
|
25
41
|
writer.write_table(@table, chunk_size)
|
26
42
|
end
|
27
43
|
end
|
data/lib/parquet/loader.rb
CHANGED
data/lib/parquet/version.rb
CHANGED
@@ -0,0 +1,28 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Parquet
|
19
|
+
class WriterProperties
|
20
|
+
def set_dictionary(enable, path=nil)
|
21
|
+
if enable
|
22
|
+
enable_dictionary(path)
|
23
|
+
else
|
24
|
+
disable_dictionary(path)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/test/test-arrow-table.rb
CHANGED
@@ -19,7 +19,8 @@ class TestArrowTableReader < Test::Unit::TestCase
|
|
19
19
|
def setup
|
20
20
|
@count_field = Arrow::Field.new("count", :uint8)
|
21
21
|
@visible_field = Arrow::Field.new("visible", :boolean)
|
22
|
-
|
22
|
+
@label_field = Arrow::Field.new("label", :string)
|
23
|
+
schema = Arrow::Schema.new([@count_field, @visible_field, @label_field])
|
23
24
|
count_arrays = [
|
24
25
|
Arrow::UInt8Array.new([1, 2]),
|
25
26
|
Arrow::UInt8Array.new([4, 8, 16]),
|
@@ -33,16 +34,30 @@ class TestArrowTableReader < Test::Unit::TestCase
|
|
33
34
|
Arrow::BooleanArray.new([nil]),
|
34
35
|
Arrow::BooleanArray.new([nil]),
|
35
36
|
]
|
37
|
+
label_arrays = [
|
38
|
+
Arrow::StringArray.new(["a"]),
|
39
|
+
Arrow::StringArray.new(["b", "c"]),
|
40
|
+
Arrow::StringArray.new(["d", nil, nil]),
|
41
|
+
Arrow::StringArray.new(["e", "f"]),
|
42
|
+
]
|
36
43
|
@count_array = Arrow::ChunkedArray.new(count_arrays)
|
37
44
|
@visible_array = Arrow::ChunkedArray.new(visible_arrays)
|
38
|
-
@
|
45
|
+
@label_array = Arrow::ChunkedArray.new(label_arrays)
|
46
|
+
@table = Arrow::Table.new(schema,
|
47
|
+
[@count_array, @visible_array, @label_array])
|
48
|
+
|
49
|
+
@output = Tempfile.open(["red-parquet", ".parquet"])
|
50
|
+
begin
|
51
|
+
yield(@output)
|
52
|
+
ensure
|
53
|
+
@output.close!
|
54
|
+
end
|
39
55
|
end
|
40
56
|
|
41
57
|
def test_save_load_path
|
42
|
-
|
43
|
-
@table.save(tempfile.path)
|
58
|
+
@table.save(@output.path)
|
44
59
|
assert do
|
45
|
-
@table.equal_metadata(Arrow::Table.load(
|
60
|
+
@table.equal_metadata(Arrow::Table.load(@output.path), false)
|
46
61
|
end
|
47
62
|
end
|
48
63
|
|
@@ -53,4 +68,32 @@ class TestArrowTableReader < Test::Unit::TestCase
|
|
53
68
|
@table.equal_metadata(Arrow::Table.load(buffer, format: :parquet), false)
|
54
69
|
end
|
55
70
|
end
|
71
|
+
|
72
|
+
def test_save_load_compression
|
73
|
+
@table.save(@output.path, compression: :zstd)
|
74
|
+
assert do
|
75
|
+
@table.equal_metadata(Arrow::Table.load(@output.path), false)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def test_save_load_compression_path
|
80
|
+
@table.save(@output.path, compression: {"count" => :zstd})
|
81
|
+
assert do
|
82
|
+
@table.equal_metadata(Arrow::Table.load(@output.path), false)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def test_save_load_dictionary
|
87
|
+
@table.save(@output.path, dictionary: false)
|
88
|
+
assert do
|
89
|
+
@table.equal_metadata(Arrow::Table.load(@output.path), false)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def test_save_load_dictionary_path
|
94
|
+
@table.save(@output.path, dictionary: [["label", false]])
|
95
|
+
assert do
|
96
|
+
@table.equal_metadata(Arrow::Table.load(@output.path), false)
|
97
|
+
end
|
98
|
+
end
|
56
99
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-parquet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 4.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Apache Arrow Developers
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-04-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - '='
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.
|
19
|
+
version: 4.0.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - '='
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.
|
26
|
+
version: 4.0.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -85,6 +85,7 @@ files:
|
|
85
85
|
- lib/parquet/arrow-table-savable.rb
|
86
86
|
- lib/parquet/loader.rb
|
87
87
|
- lib/parquet/version.rb
|
88
|
+
- lib/parquet/writer-properties.rb
|
88
89
|
- red-parquet.gemspec
|
89
90
|
- test/helper.rb
|
90
91
|
- test/run-test.rb
|
@@ -93,7 +94,7 @@ homepage: https://arrow.apache.org/
|
|
93
94
|
licenses:
|
94
95
|
- Apache-2.0
|
95
96
|
metadata: {}
|
96
|
-
post_install_message:
|
97
|
+
post_install_message:
|
97
98
|
rdoc_options: []
|
98
99
|
require_paths:
|
99
100
|
- lib
|
@@ -108,11 +109,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
108
109
|
- !ruby/object:Gem::Version
|
109
110
|
version: '0'
|
110
111
|
requirements: []
|
111
|
-
rubygems_version: 3.
|
112
|
-
signing_key:
|
112
|
+
rubygems_version: 3.2.15
|
113
|
+
signing_key:
|
113
114
|
specification_version: 4
|
114
115
|
summary: Red Parquet is the Ruby bindings of Apache Parquet
|
115
116
|
test_files:
|
116
|
-
- test/run-test.rb
|
117
117
|
- test/helper.rb
|
118
|
+
- test/run-test.rb
|
118
119
|
- test/test-arrow-table.rb
|