red-parquet 17.0.0 → 19.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0b7cae9089a2332476bfd0b64f3be6608981b04da2d8c9e4981acd75905a152c
4
- data.tar.gz: 324ea3d7f7e73be78774e9fd054d7c8cff1227094933456c4a7cc4477192b3f0
3
+ metadata.gz: cc39b64f08309e9d7598e8ee408ae4af553860d1f69d393f873cd4e4d437fa93
4
+ data.tar.gz: 81de0f80f47979e0026b8fd41a8fc7eda9637bf783b8ded1fd0380a1a0700d4f
5
5
  SHA512:
6
- metadata.gz: 847e684deaf1bf3b47f99e8c5400412e965310fc6e7fb13a270e2714f81acf30f9ee0e196896f2ea31ef2e92a89fb21f04ac231cb9230e2abee7e0e4341e3b7b
7
- data.tar.gz: 8af0aecff57bbeae1cb40d76387f0136440803c173152bcf5cdfeaad970c3c4ee798ebc046838c9d30e6ad0d739119a69902fd7673fcf7b21d59d78e8aee2374
6
+ metadata.gz: b7521afbb76eff54ac7c18c819bcb77708b16ec38910d46ff50f4b64588c0731d6d3813fd4f56a302283fa107d4a0bf41579e456d05c46ebb997462a8737ad7b
7
+ data.tar.gz: f4cb1da74abb5a881a59ade11aff1d39de0768a1675a00778d496a616e4aab2ec80a9d1154b81bb27c045e0d58074d0c83fb0d560bfa1cac410ac80b4c4281f9
@@ -0,0 +1,98 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Parquet
19
+ class ArrowFileWriter
20
+ # Write data to Apache Parquet.
21
+ #
22
+ # @return [void]
23
+ #
24
+ # @overload write(record_batch)
25
+ #
26
+ # @param record_batch [Arrow::RecordBatch] The record batch to
27
+ # be written.
28
+ #
29
+ # @example Write a record batch
30
+ # record_batch = Arrow::RecordBatch.new(enabled: [true, false])
31
+ # schema = record_batch.schema
32
+ # Parquet::ArrowFileWriter.open(schema, "data.parquet") do |writer|
33
+ # writer.write(record_batch)
34
+ # end
35
+ #
36
+ # @overload write(table, chunk_size: nil)
37
+ #
38
+ # @param table [Arrow::Table] The table to be written.
39
+ #
40
+ # @param chunk_size [nil, Integer] (nil) The maximum number of
41
+ # rows to write per row group.
42
+ #
43
+ # If this is `nil`, the default value (`1024 * 1024`) is used.
44
+ #
45
+ # @example Write a record batch with the default chunk size
46
+ # table = Arrow::Table.new(enabled: [true, false])
47
+ # schema = table.schema
48
+ # Parquet::ArrowFileWriter.open(schema, "data.parquet") do |writer|
49
+ # writer.write(table)
50
+ # end
51
+ #
52
+ # @example Write a record batch with the specified chunk size
53
+ # table = Arrow::Table.new(enabled: [true, false])
54
+ # schema = table.schema
55
+ # Parquet::ArrowFileWriter.open(schema, "data.parquet") do |writer|
56
+ # writer.write(table, chunk_size: 1)
57
+ # end
58
+ #
59
+ # @overload write(raw_records)
60
+ #
61
+ # @param data [Array<Hash>, Array<Array>] The data to be written
62
+ # as primitive Ruby objects.
63
+ #
64
+ # @example Write a record batch with Array<Array> based data
65
+ # schema = Arrow::Schema.new(enabled: :boolean)
66
+ # raw_records = [
67
+ # [true],
68
+ # [false],
69
+ # ]
70
+ # Parquet::ArrowFileWriter.open(schema, "data.parquet") do |writer|
71
+ # writer.write(raw_records)
72
+ # end
73
+ #
74
+ # @example Write a record batch with Array<Hash> based data
75
+ # schema = Arrow::Schema.new(enabled: :boolean)
76
+ # raw_columns = [
77
+ # enabled: [true, false],
78
+ # ]
79
+ # Parquet::ArrowFileWriter.open(schema, "data.parquet") do |writer|
80
+ # writer.write(raw_columns)
81
+ # end
82
+ #
83
+ # @since 18.0.0
84
+ def write(target, chunk_size: nil)
85
+ case target
86
+ when Arrow::RecordBatch
87
+ write_record_batch(target)
88
+ when Arrow::Table
89
+ # Same as parquet::DEFAULT_MAX_ROW_GROUP_LENGTH in C++
90
+ chunk_size ||= 1024 * 1024
91
+ write_table(target, chunk_size)
92
+ else
93
+ record_batch = Arrow::RecordBatch.new(schema, target)
94
+ write_record_batch(record_batch)
95
+ end
96
+ end
97
+ end
98
+ end
@@ -30,6 +30,7 @@ module Parquet
30
30
 
31
31
  def require_libraries
32
32
  require "parquet/arrow-file-reader"
33
+ require "parquet/arrow-file-writer"
33
34
  require "parquet/arrow-table-loadable"
34
35
  require "parquet/arrow-table-savable"
35
36
  require "parquet/writer-properties"
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Parquet
19
- VERSION = "17.0.0"
19
+ VERSION = "19.0.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
@@ -0,0 +1,76 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class TestArrowFileWriter < Test::Unit::TestCase
19
+ def open_buffer_output_stream
20
+ buffer = Arrow::ResizableBuffer.new(4096)
21
+ Arrow::BufferOutputStream.open(buffer) do |output|
22
+ yield(output)
23
+ end
24
+ buffer
25
+ end
26
+
27
+ sub_test_case("#write") do
28
+ test("RecordBatch") do
29
+ schema = Arrow::Schema.new(visible: :boolean)
30
+ record_batch = Arrow::RecordBatch.new(schema, [[true], [false]])
31
+ buffer = open_buffer_output_stream do |output|
32
+ Parquet::ArrowFileWriter.open(record_batch.schema, output) do |writer|
33
+ writer.write(record_batch)
34
+ end
35
+ end
36
+ assert_equal(record_batch.to_table,
37
+ Arrow::Table.load(buffer, format: :parquet))
38
+ end
39
+
40
+ test("Table") do
41
+ schema = Arrow::Schema.new(visible: :boolean)
42
+ table = Arrow::Table.new(schema, [[true], [false]])
43
+ buffer = open_buffer_output_stream do |output|
44
+ Parquet::ArrowFileWriter.open(table.schema, output) do |writer|
45
+ writer.write(table)
46
+ end
47
+ end
48
+ assert_equal(table,
49
+ Arrow::Table.load(buffer, format: :parquet))
50
+ end
51
+
52
+ test("[[]]") do
53
+ schema = Arrow::Schema.new(visible: :boolean)
54
+ raw_records = [[true], [false]]
55
+ buffer = open_buffer_output_stream do |output|
56
+ Parquet::ArrowFileWriter.open(schema, output) do |writer|
57
+ writer.write(raw_records)
58
+ end
59
+ end
60
+ assert_equal(Arrow::RecordBatch.new(schema, raw_records).to_table,
61
+ Arrow::Table.load(buffer, format: :parquet))
62
+ end
63
+
64
+ test("[{}]") do
65
+ schema = Arrow::Schema.new(visible: :boolean)
66
+ raw_columns = [visible: [true, false]]
67
+ buffer = open_buffer_output_stream do |output|
68
+ Parquet::ArrowFileWriter.open(schema, output) do |writer|
69
+ writer.write(raw_columns)
70
+ end
71
+ end
72
+ assert_equal(Arrow::RecordBatch.new(schema, raw_columns).to_table,
73
+ Arrow::Table.load(buffer, format: :parquet))
74
+ end
75
+ end
76
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 17.0.0
4
+ version: 19.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Apache Arrow Developers
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-19 00:00:00.000000000 Z
11
+ date: 2025-01-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: red-arrow
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - '='
18
18
  - !ruby/object:Gem::Version
19
- version: 17.0.0
19
+ version: 19.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - '='
25
25
  - !ruby/object:Gem::Version
26
- version: 17.0.0
26
+ version: 19.0.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -82,6 +82,7 @@ files:
82
82
  - dependency-check/Rakefile
83
83
  - lib/parquet.rb
84
84
  - lib/parquet/arrow-file-reader.rb
85
+ - lib/parquet/arrow-file-writer.rb
85
86
  - lib/parquet/arrow-table-loadable.rb
86
87
  - lib/parquet/arrow-table-savable.rb
87
88
  - lib/parquet/loader.rb
@@ -91,13 +92,14 @@ files:
91
92
  - test/helper.rb
92
93
  - test/run-test.rb
93
94
  - test/test-arrow-file-reader.rb
95
+ - test/test-arrow-file-writer.rb
94
96
  - test/test-arrow-table.rb
95
97
  - test/test-boolean-statistics.rb
96
98
  homepage: https://arrow.apache.org/
97
99
  licenses:
98
100
  - Apache-2.0
99
101
  metadata: {}
100
- post_install_message:
102
+ post_install_message:
101
103
  rdoc_options: []
102
104
  require_paths:
103
105
  - lib
@@ -112,13 +114,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
112
114
  - !ruby/object:Gem::Version
113
115
  version: '0'
114
116
  requirements: []
115
- rubygems_version: 3.3.5
116
- signing_key:
117
+ rubygems_version: 3.5.16
118
+ signing_key:
117
119
  specification_version: 4
118
120
  summary: Red Parquet is the Ruby bindings of Apache Parquet
119
121
  test_files:
120
122
  - test/helper.rb
121
123
  - test/run-test.rb
122
124
  - test/test-arrow-file-reader.rb
125
+ - test/test-arrow-file-writer.rb
123
126
  - test/test-arrow-table.rb
124
127
  - test/test-boolean-statistics.rb