red-parquet 0.16.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +5 -3
- data/dependency-check/Rakefile +5 -1
- data/lib/parquet/arrow-table-savable.rb +19 -3
- data/lib/parquet/loader.rb +1 -0
- data/lib/parquet/version.rb +1 -1
- data/lib/parquet/writer-properties.rb +28 -0
- data/test/run-test.rb +8 -1
- data/test/test-arrow-table.rb +53 -6
- metadata +10 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: be8752047a8d28fdc406e270e00bed959dee407eaca6a1d9926e9d62ee80ae5b
|
4
|
+
data.tar.gz: b09324d81dcbf726b9115f411e881b8706339d4e7b0755ca47fb02ecc5af4758
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5249d476253ff922f4552032ab87fe251b428698314a1a45160601b9f5ccac92af6337934509c5cfd8a9ee1c463fb1b8824215692c216aa5c65cb0a40d4d4cde
|
7
|
+
data.tar.gz: 7c5009575fb4c0039813fdf6d0b3ce53eeb32c2760415cc397391bd74bf89589cf45421c060261b097eca7173ee69cb4dc73be97fbcf9b8a712addf2e37155f8
|
data/Rakefile
CHANGED
@@ -30,10 +30,12 @@ release_task.prerequisites.replace(["build", "release:rubygem_push"])
|
|
30
30
|
|
31
31
|
desc "Run tests"
|
32
32
|
task :test do
|
33
|
-
cd(
|
34
|
-
|
33
|
+
cd(base_dir) do
|
34
|
+
cd("dependency-check") do
|
35
|
+
ruby("-S", "rake")
|
36
|
+
end
|
37
|
+
ruby("test/run-test.rb")
|
35
38
|
end
|
36
|
-
ruby("test/run-test.rb")
|
37
39
|
end
|
38
40
|
|
39
41
|
task default: :test
|
data/dependency-check/Rakefile
CHANGED
@@ -19,6 +19,7 @@
|
|
19
19
|
|
20
20
|
require "pkg-config"
|
21
21
|
require "native-package-installer"
|
22
|
+
require_relative "../lib/parquet/version"
|
22
23
|
|
23
24
|
case RUBY_PLATFORM
|
24
25
|
when /mingw|mswin/
|
@@ -33,7 +34,10 @@ end
|
|
33
34
|
namespace :dependency do
|
34
35
|
desc "Check dependency"
|
35
36
|
task :check do
|
36
|
-
unless PKGConfig.check_version?("parquet-glib"
|
37
|
+
unless PKGConfig.check_version?("parquet-glib",
|
38
|
+
Parquet::Version::MAJOR,
|
39
|
+
Parquet::Version::MINOR,
|
40
|
+
Parquet::Version::MICRO)
|
37
41
|
unless NativePackageInstaller.install(:debian => "libparquet-glib-dev",
|
38
42
|
:redhat => "parquet-glib-devel")
|
39
43
|
exit(false)
|
@@ -19,9 +19,25 @@ module Parquet
|
|
19
19
|
module ArrowTableSavable
|
20
20
|
private
|
21
21
|
def save_as_parquet
|
22
|
-
|
23
|
-
|
24
|
-
|
22
|
+
properties = WriterProperties.new
|
23
|
+
@options.each do |key, value|
|
24
|
+
next if value.nil?
|
25
|
+
set_method_name = "set_#{key}"
|
26
|
+
next unless properties.respond_to?(set_method_name)
|
27
|
+
case value
|
28
|
+
when ::Array, ::Hash
|
29
|
+
value.each do |path, v|
|
30
|
+
properties.__send__(set_method_name, v, path)
|
31
|
+
end
|
32
|
+
else
|
33
|
+
properties.__send__(set_method_name, value)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
chunk_size = @options[:chunk_size] || @table.n_rows
|
37
|
+
open_raw_output_stream do |output|
|
38
|
+
ArrowFileWriter.open(@table.schema,
|
39
|
+
output,
|
40
|
+
properties) do |writer|
|
25
41
|
writer.write_table(@table, chunk_size)
|
26
42
|
end
|
27
43
|
end
|
data/lib/parquet/loader.rb
CHANGED
data/lib/parquet/version.rb
CHANGED
@@ -0,0 +1,28 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Parquet
|
19
|
+
class WriterProperties
|
20
|
+
def set_dictionary(enable, path=nil)
|
21
|
+
if enable
|
22
|
+
enable_dictionary(path)
|
23
|
+
else
|
24
|
+
disable_dictionary(path)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/test/run-test.rb
CHANGED
@@ -34,7 +34,14 @@ test_dir = base_dir + "test"
|
|
34
34
|
arrow_lib_dir = arrow_base_dir + "lib"
|
35
35
|
arrow_ext_dir = arrow_base_dir + "ext" + "arrow"
|
36
36
|
|
37
|
-
|
37
|
+
build_dir = ENV["BUILD_DIR"]
|
38
|
+
if build_dir
|
39
|
+
arrow_build_dir = Pathname.new(build_dir) + "red-arrow"
|
40
|
+
else
|
41
|
+
arrow_build_dir = arrow_ext_dir
|
42
|
+
end
|
43
|
+
|
44
|
+
$LOAD_PATH.unshift(arrow_build_dir.to_s)
|
38
45
|
$LOAD_PATH.unshift(arrow_lib_dir.to_s)
|
39
46
|
$LOAD_PATH.unshift(lib_dir.to_s)
|
40
47
|
|
data/test/test-arrow-table.rb
CHANGED
@@ -19,7 +19,8 @@ class TestArrowTableReader < Test::Unit::TestCase
|
|
19
19
|
def setup
|
20
20
|
@count_field = Arrow::Field.new("count", :uint8)
|
21
21
|
@visible_field = Arrow::Field.new("visible", :boolean)
|
22
|
-
|
22
|
+
@label_field = Arrow::Field.new("label", :string)
|
23
|
+
schema = Arrow::Schema.new([@count_field, @visible_field, @label_field])
|
23
24
|
count_arrays = [
|
24
25
|
Arrow::UInt8Array.new([1, 2]),
|
25
26
|
Arrow::UInt8Array.new([4, 8, 16]),
|
@@ -33,20 +34,66 @@ class TestArrowTableReader < Test::Unit::TestCase
|
|
33
34
|
Arrow::BooleanArray.new([nil]),
|
34
35
|
Arrow::BooleanArray.new([nil]),
|
35
36
|
]
|
37
|
+
label_arrays = [
|
38
|
+
Arrow::StringArray.new(["a"]),
|
39
|
+
Arrow::StringArray.new(["b", "c"]),
|
40
|
+
Arrow::StringArray.new(["d", nil, nil]),
|
41
|
+
Arrow::StringArray.new(["e", "f"]),
|
42
|
+
]
|
36
43
|
@count_array = Arrow::ChunkedArray.new(count_arrays)
|
37
44
|
@visible_array = Arrow::ChunkedArray.new(visible_arrays)
|
38
|
-
@
|
45
|
+
@label_array = Arrow::ChunkedArray.new(label_arrays)
|
46
|
+
@table = Arrow::Table.new(schema,
|
47
|
+
[@count_array, @visible_array, @label_array])
|
48
|
+
|
49
|
+
@output = Tempfile.open(["red-parquet", ".parquet"])
|
50
|
+
begin
|
51
|
+
yield(@output)
|
52
|
+
ensure
|
53
|
+
@output.close!
|
54
|
+
end
|
39
55
|
end
|
40
56
|
|
41
57
|
def test_save_load_path
|
42
|
-
|
43
|
-
|
44
|
-
|
58
|
+
@table.save(@output.path)
|
59
|
+
assert do
|
60
|
+
@table.equal_metadata(Arrow::Table.load(@output.path), false)
|
61
|
+
end
|
45
62
|
end
|
46
63
|
|
47
64
|
def test_save_load_buffer
|
48
65
|
buffer = Arrow::ResizableBuffer.new(1024)
|
49
66
|
@table.save(buffer, format: :parquet)
|
50
|
-
|
67
|
+
assert do
|
68
|
+
@table.equal_metadata(Arrow::Table.load(buffer, format: :parquet), false)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def test_save_load_compression
|
73
|
+
@table.save(@output.path, compression: :zstd)
|
74
|
+
assert do
|
75
|
+
@table.equal_metadata(Arrow::Table.load(@output.path), false)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def test_save_load_compression_path
|
80
|
+
@table.save(@output.path, compression: {"count" => :zstd})
|
81
|
+
assert do
|
82
|
+
@table.equal_metadata(Arrow::Table.load(@output.path), false)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def test_save_load_dictionary
|
87
|
+
@table.save(@output.path, dictionary: false)
|
88
|
+
assert do
|
89
|
+
@table.equal_metadata(Arrow::Table.load(@output.path), false)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def test_save_load_dictionary_path
|
94
|
+
@table.save(@output.path, dictionary: [["label", false]])
|
95
|
+
assert do
|
96
|
+
@table.equal_metadata(Arrow::Table.load(@output.path), false)
|
97
|
+
end
|
51
98
|
end
|
52
99
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-parquet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Apache Arrow Developers
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-10-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - '='
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.
|
19
|
+
version: 2.0.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - '='
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.
|
26
|
+
version: 2.0.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -85,6 +85,7 @@ files:
|
|
85
85
|
- lib/parquet/arrow-table-savable.rb
|
86
86
|
- lib/parquet/loader.rb
|
87
87
|
- lib/parquet/version.rb
|
88
|
+
- lib/parquet/writer-properties.rb
|
88
89
|
- red-parquet.gemspec
|
89
90
|
- test/helper.rb
|
90
91
|
- test/run-test.rb
|
@@ -93,7 +94,7 @@ homepage: https://arrow.apache.org/
|
|
93
94
|
licenses:
|
94
95
|
- Apache-2.0
|
95
96
|
metadata: {}
|
96
|
-
post_install_message:
|
97
|
+
post_install_message:
|
97
98
|
rdoc_options: []
|
98
99
|
require_paths:
|
99
100
|
- lib
|
@@ -108,12 +109,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
108
109
|
- !ruby/object:Gem::Version
|
109
110
|
version: '0'
|
110
111
|
requirements: []
|
111
|
-
|
112
|
-
|
113
|
-
signing_key:
|
112
|
+
rubygems_version: 3.1.2
|
113
|
+
signing_key:
|
114
114
|
specification_version: 4
|
115
115
|
summary: Red Parquet is the Ruby bindings of Apache Parquet
|
116
116
|
test_files:
|
117
|
-
- test/helper.rb
|
118
117
|
- test/run-test.rb
|
118
|
+
- test/helper.rb
|
119
119
|
- test/test-arrow-table.rb
|