red-parquet 0.16.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a8d253cc5855c8c6a474a8258ffbb07ba1a8d1a232cdb15930d0022b9ff0fc94
4
- data.tar.gz: d44a86bf31748f8f19e68a3eabab7945ab331769b30c72dd5d804bc5b638b0c4
3
+ metadata.gz: be8752047a8d28fdc406e270e00bed959dee407eaca6a1d9926e9d62ee80ae5b
4
+ data.tar.gz: b09324d81dcbf726b9115f411e881b8706339d4e7b0755ca47fb02ecc5af4758
5
5
  SHA512:
6
- metadata.gz: 907010ccfd3a11245e668b447db3df753e31f91e31576d7cf125cb31907ae14fad5bfea75c2b3220092a971b88dba08383a13ae5040267100abfc53865993cf8
7
- data.tar.gz: 75891ebbbc5605851e167333550896abfeef809c76b5dc440e32fbe755f94c2a985fb81226511a1cb400ca3b6822c600f12ea419f08f2dde2d77f319ef0b70ea
6
+ metadata.gz: 5249d476253ff922f4552032ab87fe251b428698314a1a45160601b9f5ccac92af6337934509c5cfd8a9ee1c463fb1b8824215692c216aa5c65cb0a40d4d4cde
7
+ data.tar.gz: 7c5009575fb4c0039813fdf6d0b3ce53eeb32c2760415cc397391bd74bf89589cf45421c060261b097eca7173ee69cb4dc73be97fbcf9b8a712addf2e37155f8
data/Rakefile CHANGED
@@ -30,10 +30,12 @@ release_task.prerequisites.replace(["build", "release:rubygem_push"])
30
30
 
31
31
  desc "Run tests"
32
32
  task :test do
33
- cd("dependency-check") do
34
- ruby("-S", "rake")
33
+ cd(base_dir) do
34
+ cd("dependency-check") do
35
+ ruby("-S", "rake")
36
+ end
37
+ ruby("test/run-test.rb")
35
38
  end
36
- ruby("test/run-test.rb")
37
39
  end
38
40
 
39
41
  task default: :test
@@ -19,6 +19,7 @@
19
19
 
20
20
  require "pkg-config"
21
21
  require "native-package-installer"
22
+ require_relative "../lib/parquet/version"
22
23
 
23
24
  case RUBY_PLATFORM
24
25
  when /mingw|mswin/
@@ -33,7 +34,10 @@ end
33
34
  namespace :dependency do
34
35
  desc "Check dependency"
35
36
  task :check do
36
- unless PKGConfig.check_version?("parquet-glib")
37
+ unless PKGConfig.check_version?("parquet-glib",
38
+ Parquet::Version::MAJOR,
39
+ Parquet::Version::MINOR,
40
+ Parquet::Version::MICRO)
37
41
  unless NativePackageInstaller.install(:debian => "libparquet-glib-dev",
38
42
  :redhat => "parquet-glib-devel")
39
43
  exit(false)
@@ -19,9 +19,25 @@ module Parquet
19
19
  module ArrowTableSavable
20
20
  private
21
21
  def save_as_parquet
22
- chunk_size = @options[:chunk_size] || 1024 # TODO
23
- open_output_stream do |output|
24
- Parquet::ArrowFileWriter.open(@table.schema, output) do |writer|
22
+ properties = WriterProperties.new
23
+ @options.each do |key, value|
24
+ next if value.nil?
25
+ set_method_name = "set_#{key}"
26
+ next unless properties.respond_to?(set_method_name)
27
+ case value
28
+ when ::Array, ::Hash
29
+ value.each do |path, v|
30
+ properties.__send__(set_method_name, v, path)
31
+ end
32
+ else
33
+ properties.__send__(set_method_name, value)
34
+ end
35
+ end
36
+ chunk_size = @options[:chunk_size] || @table.n_rows
37
+ open_raw_output_stream do |output|
38
+ ArrowFileWriter.open(@table.schema,
39
+ output,
40
+ properties) do |writer|
25
41
  writer.write_table(@table, chunk_size)
26
42
  end
27
43
  end
@@ -31,6 +31,7 @@ module Parquet
31
31
  def require_libraries
32
32
  require "parquet/arrow-table-loadable"
33
33
  require "parquet/arrow-table-savable"
34
+ require "parquet/writer-properties"
34
35
  end
35
36
 
36
37
  def load_object_info(info)
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Parquet
19
- VERSION = "0.16.0"
19
+ VERSION = "2.0.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
@@ -0,0 +1,28 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Parquet
19
+ class WriterProperties
20
+ def set_dictionary(enable, path=nil)
21
+ if enable
22
+ enable_dictionary(path)
23
+ else
24
+ disable_dictionary(path)
25
+ end
26
+ end
27
+ end
28
+ end
@@ -34,7 +34,14 @@ test_dir = base_dir + "test"
34
34
  arrow_lib_dir = arrow_base_dir + "lib"
35
35
  arrow_ext_dir = arrow_base_dir + "ext" + "arrow"
36
36
 
37
- $LOAD_PATH.unshift(arrow_ext_dir.to_s)
37
+ build_dir = ENV["BUILD_DIR"]
38
+ if build_dir
39
+ arrow_build_dir = Pathname.new(build_dir) + "red-arrow"
40
+ else
41
+ arrow_build_dir = arrow_ext_dir
42
+ end
43
+
44
+ $LOAD_PATH.unshift(arrow_build_dir.to_s)
38
45
  $LOAD_PATH.unshift(arrow_lib_dir.to_s)
39
46
  $LOAD_PATH.unshift(lib_dir.to_s)
40
47
 
@@ -19,7 +19,8 @@ class TestArrowTableReader < Test::Unit::TestCase
19
19
  def setup
20
20
  @count_field = Arrow::Field.new("count", :uint8)
21
21
  @visible_field = Arrow::Field.new("visible", :boolean)
22
- schema = Arrow::Schema.new([@count_field, @visible_field])
22
+ @label_field = Arrow::Field.new("label", :string)
23
+ schema = Arrow::Schema.new([@count_field, @visible_field, @label_field])
23
24
  count_arrays = [
24
25
  Arrow::UInt8Array.new([1, 2]),
25
26
  Arrow::UInt8Array.new([4, 8, 16]),
@@ -33,20 +34,66 @@ class TestArrowTableReader < Test::Unit::TestCase
33
34
  Arrow::BooleanArray.new([nil]),
34
35
  Arrow::BooleanArray.new([nil]),
35
36
  ]
37
+ label_arrays = [
38
+ Arrow::StringArray.new(["a"]),
39
+ Arrow::StringArray.new(["b", "c"]),
40
+ Arrow::StringArray.new(["d", nil, nil]),
41
+ Arrow::StringArray.new(["e", "f"]),
42
+ ]
36
43
  @count_array = Arrow::ChunkedArray.new(count_arrays)
37
44
  @visible_array = Arrow::ChunkedArray.new(visible_arrays)
38
- @table = Arrow::Table.new(schema, [@count_array, @visible_array])
45
+ @label_array = Arrow::ChunkedArray.new(label_arrays)
46
+ @table = Arrow::Table.new(schema,
47
+ [@count_array, @visible_array, @label_array])
48
+
49
+ @output = Tempfile.open(["red-parquet", ".parquet"])
50
+ begin
51
+ yield(@output)
52
+ ensure
53
+ @output.close!
54
+ end
39
55
  end
40
56
 
41
57
  def test_save_load_path
42
- tempfile = Tempfile.open(["red-parquet", ".parquet"])
43
- @table.save(tempfile.path)
44
- assert_equal(@table, Arrow::Table.load(tempfile.path))
58
+ @table.save(@output.path)
59
+ assert do
60
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
61
+ end
45
62
  end
46
63
 
47
64
  def test_save_load_buffer
48
65
  buffer = Arrow::ResizableBuffer.new(1024)
49
66
  @table.save(buffer, format: :parquet)
50
- assert_equal(@table, Arrow::Table.load(buffer, format: :parquet))
67
+ assert do
68
+ @table.equal_metadata(Arrow::Table.load(buffer, format: :parquet), false)
69
+ end
70
+ end
71
+
72
+ def test_save_load_compression
73
+ @table.save(@output.path, compression: :zstd)
74
+ assert do
75
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
76
+ end
77
+ end
78
+
79
+ def test_save_load_compression_path
80
+ @table.save(@output.path, compression: {"count" => :zstd})
81
+ assert do
82
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
83
+ end
84
+ end
85
+
86
+ def test_save_load_dictionary
87
+ @table.save(@output.path, dictionary: false)
88
+ assert do
89
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
90
+ end
91
+ end
92
+
93
+ def test_save_load_dictionary_path
94
+ @table.save(@output.path, dictionary: [["label", false]])
95
+ assert do
96
+ @table.equal_metadata(Arrow::Table.load(@output.path), false)
97
+ end
51
98
  end
52
99
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.16.0
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Apache Arrow Developers
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-02-07 00:00:00.000000000 Z
11
+ date: 2020-10-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: red-arrow
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - '='
18
18
  - !ruby/object:Gem::Version
19
- version: 0.16.0
19
+ version: 2.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - '='
25
25
  - !ruby/object:Gem::Version
26
- version: 0.16.0
26
+ version: 2.0.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -85,6 +85,7 @@ files:
85
85
  - lib/parquet/arrow-table-savable.rb
86
86
  - lib/parquet/loader.rb
87
87
  - lib/parquet/version.rb
88
+ - lib/parquet/writer-properties.rb
88
89
  - red-parquet.gemspec
89
90
  - test/helper.rb
90
91
  - test/run-test.rb
@@ -93,7 +94,7 @@ homepage: https://arrow.apache.org/
93
94
  licenses:
94
95
  - Apache-2.0
95
96
  metadata: {}
96
- post_install_message:
97
+ post_install_message:
97
98
  rdoc_options: []
98
99
  require_paths:
99
100
  - lib
@@ -108,12 +109,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
108
109
  - !ruby/object:Gem::Version
109
110
  version: '0'
110
111
  requirements: []
111
- rubyforge_project:
112
- rubygems_version: 2.7.6.2
113
- signing_key:
112
+ rubygems_version: 3.1.2
113
+ signing_key:
114
114
  specification_version: 4
115
115
  summary: Red Parquet is the Ruby bindings of Apache Parquet
116
116
  test_files:
117
- - test/helper.rb
118
117
  - test/run-test.rb
118
+ - test/helper.rb
119
119
  - test/test-arrow-table.rb