red-parquet 0.15.1 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 699be2bb00f42f7dc5d021fa0526fff161c53bbe54439a2d9e437bedb3b4bde9
4
- data.tar.gz: 1d8bd764218b5a761ad95d1847a73d987d76ae8c1dc887a9aa9011d54b96d89b
3
+ metadata.gz: 9a50764732fd4ddaf7563956b624d09ef09a72ea4fa64c9388ce64033831f223
4
+ data.tar.gz: edee5db1aa5a76071d1128384dcbeb1af6755fcaf3004cd2d24474f25b9193ad
5
5
  SHA512:
6
- metadata.gz: 42492ad141078327bd03a484dd164a7a07e3d3c1ab54e842ac046e6d9fd0e206a4f0c926730076f4ff55c5c5227db29181b0861f45a6d25c48e58b077b6df75a
7
- data.tar.gz: 87a62a96ed642ef474495553459e6d405fd1d8e483709c19a43df0c5d845ce6ce236405b36f27dd30575616199e3e52a70a270d2af99e7b402ce6c01257f7f2c
6
+ metadata.gz: 6f9f51ff3af67261bbf97c36d954995e97daa6f4df7413f85106878d13091515f49831d81b4c2fe366495e05e0eedf5dd0d62fec46cab206c7420c128ac56576
7
+ data.tar.gz: 513ece4c28ec862f7694349a0f4740c1a86ba3cb39fbb5d262aa188cf471dbeb40fdb30aa1ac1edf80a1efa6bd058ccd99e0b9ad3b33f99ebda6af402b68bc40
data/README.md CHANGED
@@ -27,7 +27,7 @@ Red Parquet is the Ruby bindings of Apache Parquet. Red Parquet is based on GObj
27
27
 
28
28
  Red Parquet uses [Apache Parquet GLib](https://github.com/apache/arrow/tree/master/c_glib/parquet-glib) and [gobject-introspection gem](https://rubygems.org/gems/gobject-introspection) to generate Ruby bindings of Apache Parquet.
29
29
 
30
- Apache Parquet GLib is a C wrapper for [Apache Parquet C++](https://github.com/apache/arrow/tree/master/cpp/parquet). GObject Introspection can't use Apache Parquet C++ directly. Apache Parquet GLib is a bridge between Apache Parquet C++ and GObject Introspection.
30
+ Apache Parquet GLib is a C wrapper for [Apache Parquet C++](https://github.com/apache/arrow/tree/master/cpp/src/parquet). GObject Introspection can't use Apache Parquet C++ directly. Apache Parquet GLib is a bridge between Apache Parquet C++ and GObject Introspection.
31
31
 
32
32
  gobject-introspection gem is a Ruby bindings of GObject Introspection. Red Parquet uses GObject Introspection via gobject-introspection gem.
33
33
 
data/Rakefile CHANGED
@@ -30,10 +30,12 @@ release_task.prerequisites.replace(["build", "release:rubygem_push"])
30
30
 
31
31
  desc "Run tests"
32
32
  task :test do
33
- cd("dependency-check") do
34
- ruby("-S", "rake")
33
+ cd(base_dir) do
34
+ cd("dependency-check") do
35
+ ruby("-S", "rake")
36
+ end
37
+ ruby("test/run-test.rb")
35
38
  end
36
- ruby("test/run-test.rb")
37
39
  end
38
40
 
39
41
  task default: :test
@@ -19,9 +19,25 @@ module Parquet
19
19
  module ArrowTableSavable
20
20
  private
21
21
  def save_as_parquet
22
+ properties = WriterProperties.new
23
+ @options.each do |key, value|
24
+ next if value.nil?
25
+ set_method_name = "set_#{key}"
26
+ next unless properties.respond_to?(set_method_name)
27
+ case value
28
+ when ::Array, ::Hash
29
+ value.each do |path, v|
30
+ properties.__send__(set_method_name, v, path)
31
+ end
32
+ else
33
+ properties.__send__(set_method_name, value)
34
+ end
35
+ end
22
36
  chunk_size = @options[:chunk_size] || 1024 # TODO
23
- open_output_stream do |output|
24
- Parquet::ArrowFileWriter.open(@table.schema, output) do |writer|
37
+ open_raw_output_stream do |output|
38
+ ArrowFileWriter.open(@table.schema,
39
+ output,
40
+ properties) do |writer|
25
41
  writer.write_table(@table, chunk_size)
26
42
  end
27
43
  end
@@ -31,6 +31,7 @@ module Parquet
31
31
  def require_libraries
32
32
  require "parquet/arrow-table-loadable"
33
33
  require "parquet/arrow-table-savable"
34
+ require "parquet/writer-properties"
34
35
  end
35
36
 
36
37
  def load_object_info(info)
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Parquet
19
- VERSION = "0.15.1"
19
+ VERSION = "1.0.1"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
@@ -0,0 +1,28 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Parquet
19
+ class WriterProperties
20
+ def set_dictionary(enable, path=nil)
21
+ if enable
22
+ enable_dictionary(path)
23
+ else
24
+ disable_dictionary(path)
25
+ end
26
+ end
27
+ end
28
+ end
@@ -34,7 +34,14 @@ test_dir = base_dir + "test"
34
34
  arrow_lib_dir = arrow_base_dir + "lib"
35
35
  arrow_ext_dir = arrow_base_dir + "ext" + "arrow"
36
36
 
37
- $LOAD_PATH.unshift(arrow_ext_dir.to_s)
37
+ build_dir = ENV["BUILD_DIR"]
38
+ if build_dir
39
+ arrow_build_dir = Pathname.new(build_dir) + "red-arrow"
40
+ else
41
+ arrow_build_dir = arrow_ext_dir
42
+ end
43
+
44
+ $LOAD_PATH.unshift(arrow_build_dir.to_s)
38
45
  $LOAD_PATH.unshift(arrow_lib_dir.to_s)
39
46
  $LOAD_PATH.unshift(lib_dir.to_s)
40
47
 
@@ -19,7 +19,8 @@ class TestArrowTableReader < Test::Unit::TestCase
19
19
  def setup
20
20
  @count_field = Arrow::Field.new("count", :uint8)
21
21
  @visible_field = Arrow::Field.new("visible", :boolean)
22
- schema = Arrow::Schema.new([@count_field, @visible_field])
22
+ @label_field = Arrow::Field.new("label", :string)
23
+ schema = Arrow::Schema.new([@count_field, @visible_field, @label_field])
23
24
  count_arrays = [
24
25
  Arrow::UInt8Array.new([1, 2]),
25
26
  Arrow::UInt8Array.new([4, 8, 16]),
@@ -33,20 +34,66 @@ class TestArrowTableReader < Test::Unit::TestCase
33
34
  Arrow::BooleanArray.new([nil]),
34
35
  Arrow::BooleanArray.new([nil]),
35
36
  ]
37
+ label_arrays = [
38
+ Arrow::StringArray.new(["a"]),
39
+ Arrow::StringArray.new(["b", "c"]),
40
+ Arrow::StringArray.new(["d", nil, nil]),
41
+ Arrow::StringArray.new(["e", "f"]),
42
+ ]
36
43
  @count_array = Arrow::ChunkedArray.new(count_arrays)
37
44
  @visible_array = Arrow::ChunkedArray.new(visible_arrays)
38
- @table = Arrow::Table.new(schema, [@count_array, @visible_array])
45
+ @label_array = Arrow::ChunkedArray.new(label_arrays)
46
+ @table = Arrow::Table.new(schema,
47
+ [@count_array, @visible_array, @label_array])
48
+
49
+ @output = Tempfile.open(["red-parquet", ".parquet"])
50
+ begin
51
+ yield(@output)
52
+ ensure
53
+ @output.close!
54
+ end
39
55
  end
40
56
 
41
57
  def test_save_load_path
42
- tempfile = Tempfile.open(["red-parquet", ".parquet"])
43
- @table.save(tempfile.path)
44
- assert_equal(@table, Arrow::Table.load(tempfile.path))
58
+ @table.save(@output.path)
59
+ assert do
60
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
61
+ end
45
62
  end
46
63
 
47
64
  def test_save_load_buffer
48
65
  buffer = Arrow::ResizableBuffer.new(1024)
49
66
  @table.save(buffer, format: :parquet)
50
- assert_equal(@table, Arrow::Table.load(buffer, format: :parquet))
67
+ assert do
68
+ @table.equal_metadata(Arrow::Table.load(buffer, format: :parquet), false)
69
+ end
70
+ end
71
+
72
+ def test_save_load_compression
73
+ @table.save(@output.path, compression: :zstd)
74
+ assert do
75
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
76
+ end
77
+ end
78
+
79
+ def test_save_load_compression_path
80
+ @table.save(@output.path, compression: {"count" => :zstd})
81
+ assert do
82
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
83
+ end
84
+ end
85
+
86
+ def test_save_load_dictionary
87
+ @table.save(@output.path, dictionary: false)
88
+ assert do
89
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
90
+ end
91
+ end
92
+
93
+ def test_save_load_dictionary_path
94
+ @table.save(@output.path, dictionary: [["label", false]])
95
+ assert do
96
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
97
+ end
51
98
  end
52
99
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.15.1
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Apache Arrow Developers
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-01 00:00:00.000000000 Z
11
+ date: 2020-08-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: red-arrow
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - '='
18
18
  - !ruby/object:Gem::Version
19
- version: 0.15.1
19
+ version: 1.0.1
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - '='
25
25
  - !ruby/object:Gem::Version
26
- version: 0.15.1
26
+ version: 1.0.1
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -85,6 +85,7 @@ files:
85
85
  - lib/parquet/arrow-table-savable.rb
86
86
  - lib/parquet/loader.rb
87
87
  - lib/parquet/version.rb
88
+ - lib/parquet/writer-properties.rb
88
89
  - red-parquet.gemspec
89
90
  - test/helper.rb
90
91
  - test/run-test.rb
@@ -93,7 +94,7 @@ homepage: https://arrow.apache.org/
93
94
  licenses:
94
95
  - Apache-2.0
95
96
  metadata: {}
96
- post_install_message:
97
+ post_install_message:
97
98
  rdoc_options: []
98
99
  require_paths:
99
100
  - lib
@@ -108,8 +109,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
108
109
  - !ruby/object:Gem::Version
109
110
  version: '0'
110
111
  requirements: []
111
- rubygems_version: 3.0.6
112
- signing_key:
112
+ rubygems_version: 3.1.2
113
+ signing_key:
113
114
  specification_version: 4
114
115
  summary: Red Parquet is the Ruby bindings of Apache Parquet
115
116
  test_files: