red-parquet 0.17.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 11d3c62ada89745e9665423932e1161866dfe8cc01adbee2623b22bdbcd52a72
4
- data.tar.gz: a7818ef72443c282b14fedb5d2fd39118ed06227ffd270fa8758fb1ae232e28d
3
+ metadata.gz: fe7b0bb0ec8f4a2420a2ab1001f2b296e7ceb7946e06aa3abe523163f67c7349
4
+ data.tar.gz: 3714403cabdb00f02dcd815a7db91141882de6192bd3b4b1cd12c213ed31c08c
5
5
  SHA512:
6
- metadata.gz: c401ddccc2252fb4521ba99969bea4f6534797ac5f682084837c72b3ed7ee5a46d386512153f9f0c12ed60de27e7c32530eb448e7ed046fdbe3e92f7538819e1
7
- data.tar.gz: e682d30bf304aead6172dd984dd786b62b3e85916ff6f75ef419f1ccf1a870d5540f1be95f46b6f5ca196ffe0eda44fe754d24e7888ee3b30b2d56e2708150a0
6
+ metadata.gz: 6a071b5bed3343f0500f42fc242327d08634f08d3443a167c6b46d65b0b2645b63c24b21c4272d082bf0548bb4249250c27854370d672b66d6822d89f37db323
7
+ data.tar.gz: 4745b76798fbeab130a36235a14ba8d694597c789a3b284cc8cb71108d86c4e1c1ae2a9c45f994aa44624bc8b16bdd62606aa837fe95214d4bd4d8d6f06de2e4
@@ -19,9 +19,25 @@ module Parquet
19
19
  module ArrowTableSavable
20
20
  private
21
21
  def save_as_parquet
22
+ properties = WriterProperties.new
23
+ @options.each do |key, value|
24
+ next if value.nil?
25
+ set_method_name = "set_#{key}"
26
+ next unless properties.respond_to?(set_method_name)
27
+ case value
28
+ when ::Array, ::Hash
29
+ value.each do |path, v|
30
+ properties.__send__(set_method_name, v, path)
31
+ end
32
+ else
33
+ properties.__send__(set_method_name, value)
34
+ end
35
+ end
22
36
  chunk_size = @options[:chunk_size] || 1024 # TODO
23
- open_output_stream do |output|
24
- Parquet::ArrowFileWriter.open(@table.schema, output) do |writer|
37
+ open_raw_output_stream do |output|
38
+ ArrowFileWriter.open(@table.schema,
39
+ output,
40
+ properties) do |writer|
25
41
  writer.write_table(@table, chunk_size)
26
42
  end
27
43
  end
@@ -31,6 +31,7 @@ module Parquet
31
31
  def require_libraries
32
32
  require "parquet/arrow-table-loadable"
33
33
  require "parquet/arrow-table-savable"
34
+ require "parquet/writer-properties"
34
35
  end
35
36
 
36
37
  def load_object_info(info)
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Parquet
19
- VERSION = "0.17.1"
19
+ VERSION = "1.0.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
@@ -0,0 +1,28 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Parquet
19
+ class WriterProperties
20
+ def set_dictionary(enable, path=nil)
21
+ if enable
22
+ enable_dictionary(path)
23
+ else
24
+ disable_dictionary(path)
25
+ end
26
+ end
27
+ end
28
+ end
@@ -19,7 +19,8 @@ class TestArrowTableReader < Test::Unit::TestCase
19
19
  def setup
20
20
  @count_field = Arrow::Field.new("count", :uint8)
21
21
  @visible_field = Arrow::Field.new("visible", :boolean)
22
- schema = Arrow::Schema.new([@count_field, @visible_field])
22
+ @label_field = Arrow::Field.new("label", :string)
23
+ schema = Arrow::Schema.new([@count_field, @visible_field, @label_field])
23
24
  count_arrays = [
24
25
  Arrow::UInt8Array.new([1, 2]),
25
26
  Arrow::UInt8Array.new([4, 8, 16]),
@@ -33,16 +34,30 @@ class TestArrowTableReader < Test::Unit::TestCase
33
34
  Arrow::BooleanArray.new([nil]),
34
35
  Arrow::BooleanArray.new([nil]),
35
36
  ]
37
+ label_arrays = [
38
+ Arrow::StringArray.new(["a"]),
39
+ Arrow::StringArray.new(["b", "c"]),
40
+ Arrow::StringArray.new(["d", nil, nil]),
41
+ Arrow::StringArray.new(["e", "f"]),
42
+ ]
36
43
  @count_array = Arrow::ChunkedArray.new(count_arrays)
37
44
  @visible_array = Arrow::ChunkedArray.new(visible_arrays)
38
- @table = Arrow::Table.new(schema, [@count_array, @visible_array])
45
+ @label_array = Arrow::ChunkedArray.new(label_arrays)
46
+ @table = Arrow::Table.new(schema,
47
+ [@count_array, @visible_array, @label_array])
48
+
49
+ @output = Tempfile.open(["red-parquet", ".parquet"])
50
+ begin
51
+ yield(@output)
52
+ ensure
53
+ @output.close!
54
+ end
39
55
  end
40
56
 
41
57
  def test_save_load_path
42
- tempfile = Tempfile.open(["red-parquet", ".parquet"])
43
- @table.save(tempfile.path)
58
+ @table.save(@output.path)
44
59
  assert do
45
- @table.equal_metadata(Arrow::Table.load(tempfile.path), false)
60
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
46
61
  end
47
62
  end
48
63
 
@@ -53,4 +68,32 @@ class TestArrowTableReader < Test::Unit::TestCase
53
68
  @table.equal_metadata(Arrow::Table.load(buffer, format: :parquet), false)
54
69
  end
55
70
  end
71
+
72
+ def test_save_load_compression
73
+ @table.save(@output.path, compression: :zstd)
74
+ assert do
75
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
76
+ end
77
+ end
78
+
79
+ def test_save_load_compression_path
80
+ @table.save(@output.path, compression: {"count" => :zstd})
81
+ assert do
82
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
83
+ end
84
+ end
85
+
86
+ def test_save_load_dictionary
87
+ @table.save(@output.path, dictionary: false)
88
+ assert do
89
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
90
+ end
91
+ end
92
+
93
+ def test_save_load_dictionary_path
94
+ @table.save(@output.path, dictionary: [["label", false]])
95
+ assert do
96
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
97
+ end
98
+ end
56
99
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.1
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Apache Arrow Developers
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-05-18 00:00:00.000000000 Z
11
+ date: 2020-07-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: red-arrow
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - '='
18
18
  - !ruby/object:Gem::Version
19
- version: 0.17.1
19
+ version: 1.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - '='
25
25
  - !ruby/object:Gem::Version
26
- version: 0.17.1
26
+ version: 1.0.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -85,6 +85,7 @@ files:
85
85
  - lib/parquet/arrow-table-savable.rb
86
86
  - lib/parquet/loader.rb
87
87
  - lib/parquet/version.rb
88
+ - lib/parquet/writer-properties.rb
88
89
  - red-parquet.gemspec
89
90
  - test/helper.rb
90
91
  - test/run-test.rb
@@ -93,7 +94,7 @@ homepage: https://arrow.apache.org/
93
94
  licenses:
94
95
  - Apache-2.0
95
96
  metadata: {}
96
- post_install_message:
97
+ post_install_message:
97
98
  rdoc_options: []
98
99
  require_paths:
99
100
  - lib
@@ -109,7 +110,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
109
110
  version: '0'
110
111
  requirements: []
111
112
  rubygems_version: 3.1.2
112
- signing_key:
113
+ signing_key:
113
114
  specification_version: 4
114
115
  summary: Red Parquet is the Ruby bindings of Apache Parquet
115
116
  test_files: