red-arrow-dataset 4.0.1 → 5.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 00cf96f680d62ad003d931b3628313f83dd351b34222a59e5d1f85018ea20d7e
4
- data.tar.gz: 399966d45ae4bb31868c9ae21df1b5ee6e6738297597b8536718db398f2a927b
3
+ metadata.gz: bd2475450c901efd8a4a2e311459285098b42a714ada39e41959e8c7d7f477c8
4
+ data.tar.gz: 7d696272844899dbdedf1d1e4ba379a54c91a1187da33c97a6d5eddb89c5f588
5
5
  SHA512:
6
- metadata.gz: c61e5f5ecd8ed9b027091a55ca9213e206c71c7d13c6f11714ec4ad9cdf58c45b4811a46315e267a9fcbf7d5d7a893af2b2dd935dce23c6c898d0c4ae37f4a83
7
- data.tar.gz: 0acea5814073860de6780405ff597227dd9fc0971b1312f2e51af2e43c710650968b301160ba4542498189728fb87f2a66cc9344293d80ea71efaba8fe25166c
6
+ metadata.gz: 61868f12b9d4b607ebf3e408a81e576c0684a433b9ffbb2245abc34dd5a4f797a78ed05544c74a0bdd0a8caa7da01cc80e9885f32f90c63c788f7b1380d04df1
7
+ data.tar.gz: dd3022730a0d70182217dc80a7364db74eaba3bab783bb673b0776e1c5b562c8200ec6c10d81fda61627aca3ce458d287141f0c97e2b1e706ddda752deab2b35
@@ -16,21 +16,13 @@
16
16
  # under the License.
17
17
 
18
18
  module ArrowDataset
19
- class ScanOptions
19
+ class Dataset
20
20
  class << self
21
- def try_convert(value)
22
- case value
23
- when Hash
24
- return nil unless value.key?(:schema)
25
- options = new(value[:schema])
26
- value.each do |name, value|
27
- next if name == :schema
28
- options.__send__("#{name}=", value)
29
- end
30
- options
31
- else
32
- nil
33
- end
21
+ def build(*args)
22
+ factory_class = ArrowDataset.const_get("#{name}Factory")
23
+ factory = factory_class.new(*args)
24
+ yield(factory)
25
+ factory.finish
34
26
  end
35
27
  end
36
28
  end
@@ -29,8 +29,7 @@ module ArrowDataset
29
29
  end
30
30
 
31
31
  def require_libraries
32
- require "arrow-dataset/in-memory-scan-task"
33
- require "arrow-dataset/scan-options"
32
+ require "arrow-dataset/dataset"
34
33
  end
35
34
  end
36
35
  end
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module ArrowDataset
19
- VERSION = "4.0.1"
19
+ VERSION = "5.0.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
data/test/helper.rb CHANGED
@@ -17,4 +17,6 @@
17
17
 
18
18
  require "arrow-dataset"
19
19
 
20
+ require "tmpdir"
21
+
20
22
  require "test-unit"
@@ -15,22 +15,24 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- class TestScanOptions < Test::Unit::TestCase
18
+ class TestFileSystemDataset < Test::Unit::TestCase
19
19
  def setup
20
- @record_batches = [
21
- Arrow::RecordBatch.new(visible: [true, false, true],
22
- point: [1, 2, 3]),
23
- ]
24
- @schema = @record_batches.first.schema
20
+ Dir.mktmpdir do |tmpdir|
21
+ @dir = tmpdir
22
+ @path = File.join(@dir, "table.arrow")
23
+ @table = Arrow::Table.new(visible: [true, false, true],
24
+ point: [1, 2, 3])
25
+ @table.save(@path)
26
+ @format = ArrowDataset::IPCFileFormat.new
27
+ yield
28
+ end
25
29
  end
26
30
 
27
- sub_test_case(".try_convert") do
28
- def test_hash
29
- batch_size = 1024
30
- context = ArrowDataset::ScanOptions.try_convert(schema: @schema,
31
- batch_size: batch_size)
32
- assert_equal([@schema, batch_size],
33
- [context.schema, context.batch_size])
31
+ test(".build") do
32
+ dataset = ArrowDataset::FileSystemDataset.build(@format) do |factory|
33
+ factory.file_system = Arrow::LocalFileSystem.new
34
+ factory.add_path(File.expand_path(@path))
34
35
  end
36
+ assert_equal(@table, dataset.to_table)
35
37
  end
36
38
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-arrow-dataset
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.1
4
+ version: 5.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Apache Arrow Developers
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-27 00:00:00.000000000 Z
11
+ date: 2021-07-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: red-arrow
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - '='
18
18
  - !ruby/object:Gem::Version
19
- version: 4.0.1
19
+ version: 5.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - '='
25
25
  - !ruby/object:Gem::Version
26
- version: 4.0.1
26
+ version: 5.0.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -82,21 +82,18 @@ files:
82
82
  - Rakefile
83
83
  - dependency-check/Rakefile
84
84
  - lib/arrow-dataset.rb
85
- - lib/arrow-dataset/in-memory-fragment.rb
86
- - lib/arrow-dataset/in-memory-scan-task.rb
85
+ - lib/arrow-dataset/dataset.rb
87
86
  - lib/arrow-dataset/loader.rb
88
- - lib/arrow-dataset/scan-options.rb
89
87
  - lib/arrow-dataset/version.rb
90
88
  - red-arrow-dataset.gemspec
91
89
  - test/helper.rb
92
90
  - test/run-test.rb
93
- - test/test-in-memory-scan-task.rb
94
- - test/test-scan-options.rb
91
+ - test/test-file-system-dataset.rb
95
92
  homepage: https://arrow.apache.org/
96
93
  licenses:
97
94
  - Apache-2.0
98
95
  metadata: {}
99
- post_install_message:
96
+ post_install_message:
100
97
  rdoc_options: []
101
98
  require_paths:
102
99
  - lib
@@ -111,12 +108,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
111
108
  - !ruby/object:Gem::Version
112
109
  version: '0'
113
110
  requirements: []
114
- rubygems_version: 3.2.5
115
- signing_key:
111
+ rubygems_version: 3.2.22
112
+ signing_key:
116
113
  specification_version: 4
117
114
  summary: Red Arrow Dataset is the Ruby bindings of Apache Arrow Dataset
118
115
  test_files:
119
116
  - test/helper.rb
120
117
  - test/run-test.rb
121
- - test/test-scan-options.rb
122
- - test/test-in-memory-scan-task.rb
118
+ - test/test-file-system-dataset.rb
@@ -1,32 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- module ArrowDataset
19
- class InMemoryFragment
20
- alias_method :initialize_raw, :initialize
21
- private :initialize_raw
22
- def initialize(schema, record_batches)
23
- record_batches = record_batches.collect do |record_batch|
24
- unless record_batch.is_a?(Arrow::RecordBatch)
25
- record_batch = Arrow::RecordBatch.new(record_batch)
26
- end
27
- record_batch
28
- end
29
- initialize_raw(schema, record_batches)
30
- end
31
- end
32
- end
@@ -1,35 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- module ArrowDataset
19
- class InMemoryScanTask
20
- alias_method :initialize_raw, :initialize
21
- private :initialize_raw
22
- def initialize(record_batches, **options)
23
- record_batches = record_batches.collect do |record_batch|
24
- unless record_batch.is_a?(Arrow::RecordBatch)
25
- record_batch = Arrow::RecordBatch.new(record_batch)
26
- end
27
- record_batch
28
- end
29
- options[:schema] ||= record_batches.first.schema
30
- fragment = options.delete(:fragment)
31
- fragment ||= InMemoryFragment.new(options[:schema], record_batches)
32
- initialize_raw(record_batches, options, fragment)
33
- end
34
- end
35
- end
@@ -1,33 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- class TestInMemoryScanTask < Test::Unit::TestCase
19
- def setup
20
- @record_batches = [
21
- Arrow::RecordBatch.new(visible: [true, false, true],
22
- point: [1, 2, 3]),
23
- ]
24
- end
25
-
26
- sub_test_case(".new") do
27
- test("[[Arrow::RecordBatch]]") do
28
- scan_task = ArrowDataset::InMemoryScanTask.new(@record_batches)
29
- assert_equal(@record_batches,
30
- scan_task.execute.to_a)
31
- end
32
- end
33
- end