red-parquet 0.17.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f70b51b41d1f3270363f9492b9f797d9eb2c3bb43ec507766874390b0e8970a8
4
- data.tar.gz: bb36f2b4640b2a6e1746e08f362b3fc84af6fd76d4806ed99ee7cbf67a7f5dc9
3
+ metadata.gz: 6bc3ea63383a2a988aac1fd3596007c44427e226447df1610985deb4752b2142
4
+ data.tar.gz: 2f36454279a9f304df19279e429d87f2e2fe76314fb8523c1917ba443d23dc61
5
5
  SHA512:
6
- metadata.gz: 99b3b1008d9f1c43732f25d04e3b3e2633e4cd4b9f0034573d1b5e399bf6680eff3b645b40c60bce92c6bd9ae7ac60bfd4b394d17f2c0011804da32bef10350b
7
- data.tar.gz: 51f70aa5c9c36f336c7c4f14d02b61e12e6a31937a0c22aa5251083b22f6f8eee14246c983a5a8ca9031c22d4ef0e4156ef0a970505e1f7c9b298efa6d11b50f
6
+ metadata.gz: 8d2f7f3fd3e9034fbcac06f37377b14093bda97e073b3ce7d302e78e88814c5496a3eb68f7a6e942ded966266badc89608ac38867cedb6e000bd4c8167184d38
7
+ data.tar.gz: 72ca6746ff2aa8435e5511e6b423ba365b1b2f007f21d5760895117203cab9f0d68a3e95d56f051f207c90b6875c7637813c67a76f9610f3ef00187f403b642f
@@ -19,6 +19,7 @@
19
19
 
20
20
  require "pkg-config"
21
21
  require "native-package-installer"
22
+ require_relative "../lib/parquet/version"
22
23
 
23
24
  case RUBY_PLATFORM
24
25
  when /mingw|mswin/
@@ -33,7 +34,10 @@ end
33
34
  namespace :dependency do
34
35
  desc "Check dependency"
35
36
  task :check do
36
- unless PKGConfig.check_version?("parquet-glib")
37
+ unless PKGConfig.check_version?("parquet-glib",
38
+ Parquet::Version::MAJOR,
39
+ Parquet::Version::MINOR,
40
+ Parquet::Version::MICRO)
37
41
  unless NativePackageInstaller.install(:debian => "libparquet-glib-dev",
38
42
  :redhat => "parquet-glib-devel")
39
43
  exit(false)
@@ -19,9 +19,25 @@ module Parquet
19
19
  module ArrowTableSavable
20
20
  private
21
21
  def save_as_parquet
22
- chunk_size = @options[:chunk_size] || 1024 # TODO
23
- open_output_stream do |output|
24
- Parquet::ArrowFileWriter.open(@table.schema, output) do |writer|
22
+ properties = WriterProperties.new
23
+ @options.each do |key, value|
24
+ next if value.nil?
25
+ set_method_name = "set_#{key}"
26
+ next unless properties.respond_to?(set_method_name)
27
+ case value
28
+ when ::Array, ::Hash
29
+ value.each do |path, v|
30
+ properties.__send__(set_method_name, v, path)
31
+ end
32
+ else
33
+ properties.__send__(set_method_name, value)
34
+ end
35
+ end
36
+ chunk_size = @options[:chunk_size] || @table.n_rows
37
+ open_raw_output_stream do |output|
38
+ ArrowFileWriter.open(@table.schema,
39
+ output,
40
+ properties) do |writer|
25
41
  writer.write_table(@table, chunk_size)
26
42
  end
27
43
  end
@@ -31,6 +31,7 @@ module Parquet
31
31
  def require_libraries
32
32
  require "parquet/arrow-table-loadable"
33
33
  require "parquet/arrow-table-savable"
34
+ require "parquet/writer-properties"
34
35
  end
35
36
 
36
37
  def load_object_info(info)
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Parquet
19
- VERSION = "0.17.0"
19
+ VERSION = "3.0.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
@@ -0,0 +1,28 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Parquet
19
+ class WriterProperties
20
+ def set_dictionary(enable, path=nil)
21
+ if enable
22
+ enable_dictionary(path)
23
+ else
24
+ disable_dictionary(path)
25
+ end
26
+ end
27
+ end
28
+ end
@@ -19,7 +19,8 @@ class TestArrowTableReader < Test::Unit::TestCase
19
19
  def setup
20
20
  @count_field = Arrow::Field.new("count", :uint8)
21
21
  @visible_field = Arrow::Field.new("visible", :boolean)
22
- schema = Arrow::Schema.new([@count_field, @visible_field])
22
+ @label_field = Arrow::Field.new("label", :string)
23
+ schema = Arrow::Schema.new([@count_field, @visible_field, @label_field])
23
24
  count_arrays = [
24
25
  Arrow::UInt8Array.new([1, 2]),
25
26
  Arrow::UInt8Array.new([4, 8, 16]),
@@ -33,16 +34,30 @@ class TestArrowTableReader < Test::Unit::TestCase
33
34
  Arrow::BooleanArray.new([nil]),
34
35
  Arrow::BooleanArray.new([nil]),
35
36
  ]
37
+ label_arrays = [
38
+ Arrow::StringArray.new(["a"]),
39
+ Arrow::StringArray.new(["b", "c"]),
40
+ Arrow::StringArray.new(["d", nil, nil]),
41
+ Arrow::StringArray.new(["e", "f"]),
42
+ ]
36
43
  @count_array = Arrow::ChunkedArray.new(count_arrays)
37
44
  @visible_array = Arrow::ChunkedArray.new(visible_arrays)
38
- @table = Arrow::Table.new(schema, [@count_array, @visible_array])
45
+ @label_array = Arrow::ChunkedArray.new(label_arrays)
46
+ @table = Arrow::Table.new(schema,
47
+ [@count_array, @visible_array, @label_array])
48
+
49
+ @output = Tempfile.open(["red-parquet", ".parquet"])
50
+ begin
51
+ yield(@output)
52
+ ensure
53
+ @output.close!
54
+ end
39
55
  end
40
56
 
41
57
  def test_save_load_path
42
- tempfile = Tempfile.open(["red-parquet", ".parquet"])
43
- @table.save(tempfile.path)
58
+ @table.save(@output.path)
44
59
  assert do
45
- @table.equal_metadata(Arrow::Table.load(tempfile.path), false)
60
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
46
61
  end
47
62
  end
48
63
 
@@ -53,4 +68,32 @@ class TestArrowTableReader < Test::Unit::TestCase
53
68
  @table.equal_metadata(Arrow::Table.load(buffer, format: :parquet), false)
54
69
  end
55
70
  end
71
+
72
+ def test_save_load_compression
73
+ @table.save(@output.path, compression: :zstd)
74
+ assert do
75
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
76
+ end
77
+ end
78
+
79
+ def test_save_load_compression_path
80
+ @table.save(@output.path, compression: {"count" => :zstd})
81
+ assert do
82
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
83
+ end
84
+ end
85
+
86
+ def test_save_load_dictionary
87
+ @table.save(@output.path, dictionary: false)
88
+ assert do
89
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
90
+ end
91
+ end
92
+
93
+ def test_save_load_dictionary_path
94
+ @table.save(@output.path, dictionary: [["label", false]])
95
+ assert do
96
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
97
+ end
98
+ end
56
99
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.0
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Apache Arrow Developers
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-04-20 00:00:00.000000000 Z
11
+ date: 2021-01-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: red-arrow
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - '='
18
18
  - !ruby/object:Gem::Version
19
- version: 0.17.0
19
+ version: 3.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - '='
25
25
  - !ruby/object:Gem::Version
26
- version: 0.17.0
26
+ version: 3.0.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -85,6 +85,7 @@ files:
85
85
  - lib/parquet/arrow-table-savable.rb
86
86
  - lib/parquet/loader.rb
87
87
  - lib/parquet/version.rb
88
+ - lib/parquet/writer-properties.rb
88
89
  - red-parquet.gemspec
89
90
  - test/helper.rb
90
91
  - test/run-test.rb
@@ -108,11 +109,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
108
109
  - !ruby/object:Gem::Version
109
110
  version: '0'
110
111
  requirements: []
111
- rubygems_version: 3.1.2
112
+ rubygems_version: 3.2.5
112
113
  signing_key:
113
114
  specification_version: 4
114
115
  summary: Red Parquet is the Ruby bindings of Apache Parquet
115
116
  test_files:
116
- - test/run-test.rb
117
117
  - test/helper.rb
118
+ - test/run-test.rb
118
119
  - test/test-arrow-table.rb