red-parquet 0.17.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f70b51b41d1f3270363f9492b9f797d9eb2c3bb43ec507766874390b0e8970a8
4
- data.tar.gz: bb36f2b4640b2a6e1746e08f362b3fc84af6fd76d4806ed99ee7cbf67a7f5dc9
3
+ metadata.gz: 6bc3ea63383a2a988aac1fd3596007c44427e226447df1610985deb4752b2142
4
+ data.tar.gz: 2f36454279a9f304df19279e429d87f2e2fe76314fb8523c1917ba443d23dc61
5
5
  SHA512:
6
- metadata.gz: 99b3b1008d9f1c43732f25d04e3b3e2633e4cd4b9f0034573d1b5e399bf6680eff3b645b40c60bce92c6bd9ae7ac60bfd4b394d17f2c0011804da32bef10350b
7
- data.tar.gz: 51f70aa5c9c36f336c7c4f14d02b61e12e6a31937a0c22aa5251083b22f6f8eee14246c983a5a8ca9031c22d4ef0e4156ef0a970505e1f7c9b298efa6d11b50f
6
+ metadata.gz: 8d2f7f3fd3e9034fbcac06f37377b14093bda97e073b3ce7d302e78e88814c5496a3eb68f7a6e942ded966266badc89608ac38867cedb6e000bd4c8167184d38
7
+ data.tar.gz: 72ca6746ff2aa8435e5511e6b423ba365b1b2f007f21d5760895117203cab9f0d68a3e95d56f051f207c90b6875c7637813c67a76f9610f3ef00187f403b642f
@@ -19,6 +19,7 @@
19
19
 
20
20
  require "pkg-config"
21
21
  require "native-package-installer"
22
+ require_relative "../lib/parquet/version"
22
23
 
23
24
  case RUBY_PLATFORM
24
25
  when /mingw|mswin/
@@ -33,7 +34,10 @@ end
33
34
  namespace :dependency do
34
35
  desc "Check dependency"
35
36
  task :check do
36
- unless PKGConfig.check_version?("parquet-glib")
37
+ unless PKGConfig.check_version?("parquet-glib",
38
+ Parquet::Version::MAJOR,
39
+ Parquet::Version::MINOR,
40
+ Parquet::Version::MICRO)
37
41
  unless NativePackageInstaller.install(:debian => "libparquet-glib-dev",
38
42
  :redhat => "parquet-glib-devel")
39
43
  exit(false)
@@ -19,9 +19,25 @@ module Parquet
19
19
  module ArrowTableSavable
20
20
  private
21
21
  def save_as_parquet
22
- chunk_size = @options[:chunk_size] || 1024 # TODO
23
- open_output_stream do |output|
24
- Parquet::ArrowFileWriter.open(@table.schema, output) do |writer|
22
+ properties = WriterProperties.new
23
+ @options.each do |key, value|
24
+ next if value.nil?
25
+ set_method_name = "set_#{key}"
26
+ next unless properties.respond_to?(set_method_name)
27
+ case value
28
+ when ::Array, ::Hash
29
+ value.each do |path, v|
30
+ properties.__send__(set_method_name, v, path)
31
+ end
32
+ else
33
+ properties.__send__(set_method_name, value)
34
+ end
35
+ end
36
+ chunk_size = @options[:chunk_size] || @table.n_rows
37
+ open_raw_output_stream do |output|
38
+ ArrowFileWriter.open(@table.schema,
39
+ output,
40
+ properties) do |writer|
25
41
  writer.write_table(@table, chunk_size)
26
42
  end
27
43
  end
@@ -31,6 +31,7 @@ module Parquet
31
31
  def require_libraries
32
32
  require "parquet/arrow-table-loadable"
33
33
  require "parquet/arrow-table-savable"
34
+ require "parquet/writer-properties"
34
35
  end
35
36
 
36
37
  def load_object_info(info)
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Parquet
19
- VERSION = "0.17.0"
19
+ VERSION = "3.0.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
@@ -0,0 +1,28 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Parquet
19
+ class WriterProperties
20
+ def set_dictionary(enable, path=nil)
21
+ if enable
22
+ enable_dictionary(path)
23
+ else
24
+ disable_dictionary(path)
25
+ end
26
+ end
27
+ end
28
+ end
@@ -19,7 +19,8 @@ class TestArrowTableReader < Test::Unit::TestCase
19
19
  def setup
20
20
  @count_field = Arrow::Field.new("count", :uint8)
21
21
  @visible_field = Arrow::Field.new("visible", :boolean)
22
- schema = Arrow::Schema.new([@count_field, @visible_field])
22
+ @label_field = Arrow::Field.new("label", :string)
23
+ schema = Arrow::Schema.new([@count_field, @visible_field, @label_field])
23
24
  count_arrays = [
24
25
  Arrow::UInt8Array.new([1, 2]),
25
26
  Arrow::UInt8Array.new([4, 8, 16]),
@@ -33,16 +34,30 @@ class TestArrowTableReader < Test::Unit::TestCase
33
34
  Arrow::BooleanArray.new([nil]),
34
35
  Arrow::BooleanArray.new([nil]),
35
36
  ]
37
+ label_arrays = [
38
+ Arrow::StringArray.new(["a"]),
39
+ Arrow::StringArray.new(["b", "c"]),
40
+ Arrow::StringArray.new(["d", nil, nil]),
41
+ Arrow::StringArray.new(["e", "f"]),
42
+ ]
36
43
  @count_array = Arrow::ChunkedArray.new(count_arrays)
37
44
  @visible_array = Arrow::ChunkedArray.new(visible_arrays)
38
- @table = Arrow::Table.new(schema, [@count_array, @visible_array])
45
+ @label_array = Arrow::ChunkedArray.new(label_arrays)
46
+ @table = Arrow::Table.new(schema,
47
+ [@count_array, @visible_array, @label_array])
48
+
49
+ @output = Tempfile.open(["red-parquet", ".parquet"])
50
+ begin
51
+ yield(@output)
52
+ ensure
53
+ @output.close!
54
+ end
39
55
  end
40
56
 
41
57
  def test_save_load_path
42
- tempfile = Tempfile.open(["red-parquet", ".parquet"])
43
- @table.save(tempfile.path)
58
+ @table.save(@output.path)
44
59
  assert do
45
- @table.equal_metadata(Arrow::Table.load(tempfile.path), false)
60
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
46
61
  end
47
62
  end
48
63
 
@@ -53,4 +68,32 @@ class TestArrowTableReader < Test::Unit::TestCase
53
68
  @table.equal_metadata(Arrow::Table.load(buffer, format: :parquet), false)
54
69
  end
55
70
  end
71
+
72
+ def test_save_load_compression
73
+ @table.save(@output.path, compression: :zstd)
74
+ assert do
75
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
76
+ end
77
+ end
78
+
79
+ def test_save_load_compression_path
80
+ @table.save(@output.path, compression: {"count" => :zstd})
81
+ assert do
82
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
83
+ end
84
+ end
85
+
86
+ def test_save_load_dictionary
87
+ @table.save(@output.path, dictionary: false)
88
+ assert do
89
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
90
+ end
91
+ end
92
+
93
+ def test_save_load_dictionary_path
94
+ @table.save(@output.path, dictionary: [["label", false]])
95
+ assert do
96
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
97
+ end
98
+ end
56
99
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.0
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Apache Arrow Developers
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-04-20 00:00:00.000000000 Z
11
+ date: 2021-01-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: red-arrow
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - '='
18
18
  - !ruby/object:Gem::Version
19
- version: 0.17.0
19
+ version: 3.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - '='
25
25
  - !ruby/object:Gem::Version
26
- version: 0.17.0
26
+ version: 3.0.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -85,6 +85,7 @@ files:
85
85
  - lib/parquet/arrow-table-savable.rb
86
86
  - lib/parquet/loader.rb
87
87
  - lib/parquet/version.rb
88
+ - lib/parquet/writer-properties.rb
88
89
  - red-parquet.gemspec
89
90
  - test/helper.rb
90
91
  - test/run-test.rb
@@ -108,11 +109,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
108
109
  - !ruby/object:Gem::Version
109
110
  version: '0'
110
111
  requirements: []
111
- rubygems_version: 3.1.2
112
+ rubygems_version: 3.2.5
112
113
  signing_key:
113
114
  specification_version: 4
114
115
  summary: Red Parquet is the Ruby bindings of Apache Parquet
115
116
  test_files:
116
- - test/run-test.rb
117
117
  - test/helper.rb
118
+ - test/run-test.rb
118
119
  - test/test-arrow-table.rb