red-parquet 17.0.0 → 18.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0b7cae9089a2332476bfd0b64f3be6608981b04da2d8c9e4981acd75905a152c
4
- data.tar.gz: 324ea3d7f7e73be78774e9fd054d7c8cff1227094933456c4a7cc4477192b3f0
3
+ metadata.gz: cecbea49f27646046853df48ab3cbee973a252a949e0307611555222c2d8d726
4
+ data.tar.gz: c9d0cd4b119c142ef03ce6c41d4c82bc3ad645d80fede4a13ac2f54bc6559af4
5
5
  SHA512:
6
- metadata.gz: 847e684deaf1bf3b47f99e8c5400412e965310fc6e7fb13a270e2714f81acf30f9ee0e196896f2ea31ef2e92a89fb21f04ac231cb9230e2abee7e0e4341e3b7b
7
- data.tar.gz: 8af0aecff57bbeae1cb40d76387f0136440803c173152bcf5cdfeaad970c3c4ee798ebc046838c9d30e6ad0d739119a69902fd7673fcf7b21d59d78e8aee2374
6
+ metadata.gz: 3c5ef482cf2b632964849e84a4ed00feb59895c234270814ef812690a81cf46ba1311d53a4473f7903d7508a41199f82d3f58b2d01f3f4ae1efedfb55ed3a003
7
+ data.tar.gz: 1309ee59d3088712864bbf6783ddd07a13dbf7014091d2c523f81c05ec69e166d9c5449bdde6ffc3f84308c3abedc4e7e558d58135c5d51efd9daa0d0f9a0aad
@@ -0,0 +1,98 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Parquet
19
+ class ArrowFileWriter
20
+ # Write data to Apache Parquet.
21
+ #
22
+ # @return [void]
23
+ #
24
+ # @overload write(record_batch)
25
+ #
26
+ # @param record_batch [Arrow::RecordBatch] The record batch to
27
+ # be written.
28
+ #
29
+ # @example Write a record batch
30
+ # record_batch = Arrow::RecordBatch.new(enabled: [true, false])
31
+ # schema = record_batch.schema
32
+ # Parquet::ArrowFileWriter.open(schema, "data.parquet") do |writer|
33
+ # writer.write(record_batch)
34
+ # end
35
+ #
36
+ # @overload write(table, chunk_size: nil)
37
+ #
38
+ # @param table [Arrow::Table] The table to be written.
39
+ #
40
+ # @param chunk_size [nil, Integer] (nil) The maximum number of
41
+ # rows to write per row group.
42
+ #
43
+ # If this is `nil`, the default value (`1024 * 1024`) is used.
44
+ #
45
+ # @example Write a record batch with the default chunk size
46
+ # table = Arrow::Table.new(enabled: [true, false])
47
+ # schema = table.schema
48
+ # Parquet::ArrowFileWriter.open(schema, "data.parquet") do |writer|
49
+ # writer.write(table)
50
+ # end
51
+ #
52
+ # @example Write a record batch with the specified chunk size
53
+ # table = Arrow::Table.new(enabled: [true, false])
54
+ # schema = table.schema
55
+ # Parquet::ArrowFileWriter.open(schema, "data.parquet") do |writer|
56
+ # writer.write(table, chunk_size: 1)
57
+ # end
58
+ #
59
+ # @overload write(raw_records)
60
+ #
61
+ # @param data [Array<Hash>, Array<Array>] The data to be written
62
+ # as primitive Ruby objects.
63
+ #
64
+ # @example Write a record batch with Array<Array> based data
65
+ # schema = Arrow::Schema.new(enabled: :boolean)
66
+ # raw_records = [
67
+ # [true],
68
+ # [false],
69
+ # ]
70
+ # Parquet::ArrowFileWriter.open(schema, "data.parquet") do |writer|
71
+ # writer.write(raw_records)
72
+ # end
73
+ #
74
+ # @example Write a record batch with Array<Hash> based data
75
+ # schema = Arrow::Schema.new(enabled: :boolean)
76
+ # raw_columns = [
77
+ # enabled: [true, false],
78
+ # ]
79
+ # Parquet::ArrowFileWriter.open(schema, "data.parquet") do |writer|
80
+ # writer.write(raw_columns)
81
+ # end
82
+ #
83
+ # @since 18.0.0
84
+ def write(target, chunk_size: nil)
85
+ case target
86
+ when Arrow::RecordBatch
87
+ write_record_batch(target)
88
+ when Arrow::Table
89
+ # Same as parquet::DEFAULT_MAX_ROW_GROUP_LENGTH in C++
90
+ chunk_size ||= 1024 * 1024
91
+ write_table(target, chunk_size)
92
+ else
93
+ record_batch = Arrow::RecordBatch.new(schema, target)
94
+ write_record_batch(record_batch)
95
+ end
96
+ end
97
+ end
98
+ end
@@ -30,6 +30,7 @@ module Parquet
30
30
 
31
31
  def require_libraries
32
32
  require "parquet/arrow-file-reader"
33
+ require "parquet/arrow-file-writer"
33
34
  require "parquet/arrow-table-loadable"
34
35
  require "parquet/arrow-table-savable"
35
36
  require "parquet/writer-properties"
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Parquet
19
- VERSION = "17.0.0"
19
+ VERSION = "18.0.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
@@ -0,0 +1,76 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class TestArrowFileWriter < Test::Unit::TestCase
19
+ def open_buffer_output_stream
20
+ buffer = Arrow::ResizableBuffer.new(4096)
21
+ Arrow::BufferOutputStream.open(buffer) do |output|
22
+ yield(output)
23
+ end
24
+ buffer
25
+ end
26
+
27
+ sub_test_case("#write") do
28
+ test("RecordBatch") do
29
+ schema = Arrow::Schema.new(visible: :boolean)
30
+ record_batch = Arrow::RecordBatch.new(schema, [[true], [false]])
31
+ buffer = open_buffer_output_stream do |output|
32
+ Parquet::ArrowFileWriter.open(record_batch.schema, output) do |writer|
33
+ writer.write(record_batch)
34
+ end
35
+ end
36
+ assert_equal(record_batch.to_table,
37
+ Arrow::Table.load(buffer, format: :parquet))
38
+ end
39
+
40
+ test("Table") do
41
+ schema = Arrow::Schema.new(visible: :boolean)
42
+ table = Arrow::Table.new(schema, [[true], [false]])
43
+ buffer = open_buffer_output_stream do |output|
44
+ Parquet::ArrowFileWriter.open(table.schema, output) do |writer|
45
+ writer.write(table)
46
+ end
47
+ end
48
+ assert_equal(table,
49
+ Arrow::Table.load(buffer, format: :parquet))
50
+ end
51
+
52
+ test("[[]]") do
53
+ schema = Arrow::Schema.new(visible: :boolean)
54
+ raw_records = [[true], [false]]
55
+ buffer = open_buffer_output_stream do |output|
56
+ Parquet::ArrowFileWriter.open(schema, output) do |writer|
57
+ writer.write(raw_records)
58
+ end
59
+ end
60
+ assert_equal(Arrow::RecordBatch.new(schema, raw_records).to_table,
61
+ Arrow::Table.load(buffer, format: :parquet))
62
+ end
63
+
64
+ test("[{}]") do
65
+ schema = Arrow::Schema.new(visible: :boolean)
66
+ raw_columns = [visible: [true, false]]
67
+ buffer = open_buffer_output_stream do |output|
68
+ Parquet::ArrowFileWriter.open(schema, output) do |writer|
69
+ writer.write(raw_columns)
70
+ end
71
+ end
72
+ assert_equal(Arrow::RecordBatch.new(schema, raw_columns).to_table,
73
+ Arrow::Table.load(buffer, format: :parquet))
74
+ end
75
+ end
76
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 17.0.0
4
+ version: 18.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Apache Arrow Developers
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-19 00:00:00.000000000 Z
11
+ date: 2024-10-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: red-arrow
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - '='
18
18
  - !ruby/object:Gem::Version
19
- version: 17.0.0
19
+ version: 18.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - '='
25
25
  - !ruby/object:Gem::Version
26
- version: 17.0.0
26
+ version: 18.0.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -82,6 +82,7 @@ files:
82
82
  - dependency-check/Rakefile
83
83
  - lib/parquet.rb
84
84
  - lib/parquet/arrow-file-reader.rb
85
+ - lib/parquet/arrow-file-writer.rb
85
86
  - lib/parquet/arrow-table-loadable.rb
86
87
  - lib/parquet/arrow-table-savable.rb
87
88
  - lib/parquet/loader.rb
@@ -91,13 +92,14 @@ files:
91
92
  - test/helper.rb
92
93
  - test/run-test.rb
93
94
  - test/test-arrow-file-reader.rb
95
+ - test/test-arrow-file-writer.rb
94
96
  - test/test-arrow-table.rb
95
97
  - test/test-boolean-statistics.rb
96
98
  homepage: https://arrow.apache.org/
97
99
  licenses:
98
100
  - Apache-2.0
99
101
  metadata: {}
100
- post_install_message:
102
+ post_install_message:
101
103
  rdoc_options: []
102
104
  require_paths:
103
105
  - lib
@@ -112,13 +114,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
112
114
  - !ruby/object:Gem::Version
113
115
  version: '0'
114
116
  requirements: []
115
- rubygems_version: 3.3.5
116
- signing_key:
117
+ rubygems_version: 3.4.20
118
+ signing_key:
117
119
  specification_version: 4
118
120
  summary: Red Parquet is the Ruby bindings of Apache Parquet
119
121
  test_files:
120
122
  - test/helper.rb
121
123
  - test/run-test.rb
122
124
  - test/test-arrow-file-reader.rb
125
+ - test/test-arrow-file-writer.rb
123
126
  - test/test-arrow-table.rb
124
127
  - test/test-boolean-statistics.rb