red-parquet 21.0.0 → 23.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dea271e0c9cad54df53e7505a5a15e4d5366684db1a9fc550c859a104a46c94d
4
- data.tar.gz: d82ac2ab36beef09fc8cef1f6ff37e149ac6c5ef41298954514694903ff617b7
3
+ metadata.gz: 323fa3b4a88ef118dd4580d50a34c8f30db87b93338e3b43d142b17d19ee6efa
4
+ data.tar.gz: 0a0d9fb8fced5f7d4ac62d57138782d1a45a476030d25e40666d50c9fe0290cc
5
5
  SHA512:
6
- metadata.gz: a79b25fd5fe7b80e61ea1dabdd9f542266c83202c620aeb54280f74ab4f7bf7cdb50fffcb46f17434484930188241849b1aa5ffa789ffaacea9ef16265db01f7
7
- data.tar.gz: ab025f30c30cea3371c1091ddf9ccd3562499028a5f7f72cf70d94fe16ec059203ddf4fffb9eb7981e8dc60260d891fa950875e466de75cbc2fce7b95ed970f8
6
+ metadata.gz: 1e45caca717bdefade59a70d8273c8f20a20050651d256db56d47a814567626627e6bb5385f645a4b6ae97a56aa025fa0b48cddcf29ded4b81750e8713a65c76
7
+ data.tar.gz: 634879c4e7919e28094f963e5d6e44511e4a2ce48c0ef622fc1178c0c297779f4a020588835d4ed778188661f927c9709186bbfdda44356ba8cd38cde363045e
@@ -17,6 +17,8 @@
17
17
 
18
18
  module Parquet
19
19
  class ArrowFileReader
20
+ include Arrow::BlockClosable
21
+
20
22
  def each_row_group
21
23
  return to_enum(__method__) {n_row_groups} unless block_given?
22
24
 
@@ -17,6 +17,8 @@
17
17
 
18
18
  module Parquet
19
19
  class ArrowFileWriter
20
+ include Arrow::BlockClosable
21
+
20
22
  # Write data to Apache Parquet.
21
23
  #
22
24
  # @return [void]
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Parquet
19
- VERSION = "21.0.0"
19
+ VERSION = "23.0.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
data/red-parquet.gemspec CHANGED
@@ -29,7 +29,7 @@ Gem::Specification.new do |spec|
29
29
  ]
30
30
  spec.version = version_components.compact.join(".")
31
31
  spec.homepage = "https://arrow.apache.org/"
32
- spec.authors = ["Apache Arrow Developers"]
32
+ spec.authors = ["The Apache Software Foundation"]
33
33
  spec.email = ["dev@arrow.apache.org"]
34
34
 
35
35
  spec.summary = "Red Parquet is the Ruby bindings of Apache Parquet"
@@ -0,0 +1,49 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class TestArrayStatistics < Test::Unit::TestCase
19
+ def setup
20
+ data = Tempfile.create(["red-parquet", ".parquet"]) do |file|
21
+ table = Arrow::Table.new(int64: [nil, -(2 ** 32), 2 ** 32])
22
+ table.save(file)
23
+ File.read(file, mode: "rb")
24
+ end
25
+ loaded_table = Arrow::Table.load(Arrow::Buffer.new(data),
26
+ format: :parquet)
27
+ @statistics = loaded_table[:int64].data.chunks[0].statistics
28
+ end
29
+
30
+ def test_null_count
31
+ assert do
32
+ @statistics.has_null_count?
33
+ end
34
+ assert do
35
+ @statistics.null_count_exact?
36
+ end
37
+ assert_equal(1, @statistics.null_count)
38
+ end
39
+
40
+ def test_distinct_count
41
+ assert do
42
+ not @statistics.has_distinct_count?
43
+ end
44
+ assert do
45
+ not @statistics.distinct_count_exact?
46
+ end
47
+ assert_nil(@statistics.distinct_count)
48
+ end
49
+ end
@@ -50,6 +50,7 @@ class TestArrowTable < Test::Unit::TestCase
50
50
  begin
51
51
  yield(@output)
52
52
  ensure
53
+ GC.start # Ensure freeing Arrow::Table that refers @output.path.
53
54
  @output.close!
54
55
  end
55
56
  end
@@ -17,15 +17,22 @@
17
17
 
18
18
  class TestBooleanStatistics < Test::Unit::TestCase
19
19
  def setup
20
- file = Tempfile.open(["data", ".parquet"])
21
- array = Arrow::BooleanArray.new([nil, false, true])
22
- table = Arrow::Table.new("boolean" => array)
23
- writer = Parquet::ArrowFileWriter.new(table.schema, file.path)
24
- chunk_size = 1024
25
- writer.write_table(table, chunk_size)
26
- writer.close
27
- reader = Parquet::ArrowFileReader.new(file.path)
28
- @statistics = reader.metadata.get_row_group(0).get_column_chunk(0).statistics
20
+ Tempfile.create(["data", ".parquet"]) do |file|
21
+ array = Arrow::BooleanArray.new([nil, false, true])
22
+ table = Arrow::Table.new("boolean" => array)
23
+ Parquet::ArrowFileWriter.open(table.schema, file.path) do |writer|
24
+ chunk_size = 1024
25
+ writer.write_table(table, chunk_size)
26
+ writer.close
27
+ end
28
+ Parquet::ArrowFileReader.open(file.path) do |reader|
29
+ @statistics =
30
+ reader.metadata.get_row_group(0).get_column_chunk(0).statistics
31
+ yield
32
+ @statistics = nil
33
+ end
34
+ GC.start # Ensure freeing @statistics that refers file.path.
35
+ end
29
36
  end
30
37
 
31
38
  def test_min
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 21.0.0
4
+ version: 23.0.0
5
5
  platform: ruby
6
6
  authors:
7
- - Apache Arrow Developers
7
+ - The Apache Software Foundation
8
8
  bindir: bin
9
9
  cert_chain: []
10
10
  date: 1980-01-02 00:00:00.000000000 Z
@@ -15,14 +15,14 @@ dependencies:
15
15
  requirements:
16
16
  - - '='
17
17
  - !ruby/object:Gem::Version
18
- version: 21.0.0
18
+ version: 23.0.0
19
19
  type: :runtime
20
20
  prerelease: false
21
21
  version_requirements: !ruby/object:Gem::Requirement
22
22
  requirements:
23
23
  - - '='
24
24
  - !ruby/object:Gem::Version
25
- version: 21.0.0
25
+ version: 23.0.0
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: bundler
28
28
  requirement: !ruby/object:Gem::Requirement
@@ -90,6 +90,7 @@ files:
90
90
  - red-parquet.gemspec
91
91
  - test/helper.rb
92
92
  - test/run-test.rb
93
+ - test/test-array-statistics.rb
93
94
  - test/test-arrow-file-reader.rb
94
95
  - test/test-arrow-file-writer.rb
95
96
  - test/test-arrow-table.rb
@@ -112,12 +113,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
112
113
  - !ruby/object:Gem::Version
113
114
  version: '0'
114
115
  requirements: []
115
- rubygems_version: 3.7.1
116
+ rubygems_version: 3.6.7
116
117
  specification_version: 4
117
118
  summary: Red Parquet is the Ruby bindings of Apache Parquet
118
119
  test_files:
119
120
  - test/helper.rb
120
121
  - test/run-test.rb
122
+ - test/test-array-statistics.rb
121
123
  - test/test-arrow-file-reader.rb
122
124
  - test/test-arrow-file-writer.rb
123
125
  - test/test-arrow-table.rb