red-arrow-dataset 2.0.0 → 5.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f1abf8cb2fb601ddcdd8b0f0014a454037259d001c240ecaa7d5257a13d52602
4
- data.tar.gz: b592225816719f4fb2b6fa3d88190c6d9897c9104b8a5927c450ced1cd8d5711
3
+ metadata.gz: bd2475450c901efd8a4a2e311459285098b42a714ada39e41959e8c7d7f477c8
4
+ data.tar.gz: 7d696272844899dbdedf1d1e4ba379a54c91a1187da33c97a6d5eddb89c5f588
5
5
  SHA512:
6
- metadata.gz: d8df982d7587d657770f7c293d233f7cbd888591f65495a4f9667e31608625cc31ec15d5751229867f0c599fb24f715ec7159acc030f66ce19973bf525ecf8d2
7
- data.tar.gz: fc734ec58deee49fe18aa75065da563a672eb1d830faa6ad6ea19b22f1d8a806d8ee7b2c68fa7919eb5a0f00424480d0405ed0f28a8009935600fcfaa346aa76
6
+ metadata.gz: 61868f12b9d4b607ebf3e408a81e576c0684a433b9ffbb2245abc34dd5a4f797a78ed05544c74a0bdd0a8caa7da01cc80e9885f32f90c63c788f7b1380d04df1
7
+ data.tar.gz: dd3022730a0d70182217dc80a7364db74eaba3bab783bb673b0776e1c5b562c8200ec6c10d81fda61627aca3ce458d287141f0c97e2b1e706ddda752deab2b35
@@ -16,21 +16,13 @@
16
16
  # under the License.
17
17
 
18
18
  module ArrowDataset
19
- class ScanOptions
19
+ class Dataset
20
20
  class << self
21
- def try_convert(value)
22
- case value
23
- when Hash
24
- return nil unless value.key?(:schema)
25
- options = new(value[:schema])
26
- value.each do |name, value|
27
- next if name == :schema
28
- options.__send__("#{name}=", value)
29
- end
30
- options
31
- else
32
- nil
33
- end
21
+ def build(*args)
22
+ factory_class = ArrowDataset.const_get("#{name}Factory")
23
+ factory = factory_class.new(*args)
24
+ yield(factory)
25
+ factory.finish
34
26
  end
35
27
  end
36
28
  end
@@ -29,8 +29,7 @@ module ArrowDataset
29
29
  end
30
30
 
31
31
  def require_libraries
32
- require "arrow-dataset/in-memory-scan-task"
33
- require "arrow-dataset/scan-options"
32
+ require "arrow-dataset/dataset"
34
33
  end
35
34
  end
36
35
  end
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module ArrowDataset
19
- VERSION = "2.0.0"
19
+ VERSION = "5.0.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
data/test/helper.rb CHANGED
@@ -17,4 +17,6 @@
17
17
 
18
18
  require "arrow-dataset"
19
19
 
20
+ require "tmpdir"
21
+
20
22
  require "test-unit"
@@ -15,19 +15,24 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- class TestInMemoryScanTask < Test::Unit::TestCase
18
+ class TestFileSystemDataset < Test::Unit::TestCase
19
19
  def setup
20
- @record_batches = [
21
- Arrow::RecordBatch.new(visible: [true, false, true],
22
- point: [1, 2, 3]),
23
- ]
20
+ Dir.mktmpdir do |tmpdir|
21
+ @dir = tmpdir
22
+ @path = File.join(@dir, "table.arrow")
23
+ @table = Arrow::Table.new(visible: [true, false, true],
24
+ point: [1, 2, 3])
25
+ @table.save(@path)
26
+ @format = ArrowDataset::IPCFileFormat.new
27
+ yield
28
+ end
24
29
  end
25
30
 
26
- sub_test_case(".new") do
27
- test("[[Arrow::RecordBatch]]") do
28
- scan_task = ArrowDataset::InMemoryScanTask.new(@record_batches)
29
- assert_equal(@record_batches,
30
- scan_task.execute.to_a)
31
+ test(".build") do
32
+ dataset = ArrowDataset::FileSystemDataset.build(@format) do |factory|
33
+ factory.file_system = Arrow::LocalFileSystem.new
34
+ factory.add_path(File.expand_path(@path))
31
35
  end
36
+ assert_equal(@table, dataset.to_table)
32
37
  end
33
38
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-arrow-dataset
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 5.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Apache Arrow Developers
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-19 00:00:00.000000000 Z
11
+ date: 2021-07-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: red-arrow
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - '='
18
18
  - !ruby/object:Gem::Version
19
- version: 2.0.0
19
+ version: 5.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - '='
25
25
  - !ruby/object:Gem::Version
26
- version: 2.0.0
26
+ version: 5.0.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -82,15 +82,13 @@ files:
82
82
  - Rakefile
83
83
  - dependency-check/Rakefile
84
84
  - lib/arrow-dataset.rb
85
- - lib/arrow-dataset/in-memory-scan-task.rb
85
+ - lib/arrow-dataset/dataset.rb
86
86
  - lib/arrow-dataset/loader.rb
87
- - lib/arrow-dataset/scan-options.rb
88
87
  - lib/arrow-dataset/version.rb
89
88
  - red-arrow-dataset.gemspec
90
89
  - test/helper.rb
91
90
  - test/run-test.rb
92
- - test/test-in-memory-scan-task.rb
93
- - test/test-scan-options.rb
91
+ - test/test-file-system-dataset.rb
94
92
  homepage: https://arrow.apache.org/
95
93
  licenses:
96
94
  - Apache-2.0
@@ -110,12 +108,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
110
108
  - !ruby/object:Gem::Version
111
109
  version: '0'
112
110
  requirements: []
113
- rubygems_version: 3.1.2
111
+ rubygems_version: 3.2.22
114
112
  signing_key:
115
113
  specification_version: 4
116
114
  summary: Red Arrow Dataset is the Ruby bindings of Apache Arrow Dataset
117
115
  test_files:
118
- - test/run-test.rb
119
116
  - test/helper.rb
120
- - test/test-scan-options.rb
121
- - test/test-in-memory-scan-task.rb
117
+ - test/run-test.rb
118
+ - test/test-file-system-dataset.rb
@@ -1,34 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- module ArrowDataset
19
- class InMemoryScanTask
20
- alias_method :initialize_raw, :initialize
21
- private :initialize_raw
22
- def initialize(record_batches, **options)
23
- record_batches = record_batches.collect do |record_batch|
24
- unless record_batch.is_a?(Arrow::RecordBatch)
25
- record_batch = Arrow::RecordBatch.new(record_batch)
26
- end
27
- record_batch
28
- end
29
- context = options.delete(:context) || ScanContext.new
30
- options[:schema] ||= record_batches.first.schema
31
- initialize_raw(record_batches, options, context)
32
- end
33
- end
34
- end
@@ -1,36 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- class TestScanOptions < Test::Unit::TestCase
19
- def setup
20
- @record_batches = [
21
- Arrow::RecordBatch.new(visible: [true, false, true],
22
- point: [1, 2, 3]),
23
- ]
24
- @schema = @record_batches.first.schema
25
- end
26
-
27
- sub_test_case(".try_convert") do
28
- def test_hash
29
- batch_size = 1024
30
- context = ArrowDataset::ScanOptions.try_convert(schema: @schema,
31
- batch_size: batch_size)
32
- assert_equal([@schema, batch_size],
33
- [context.schema, context.batch_size])
34
- end
35
- end
36
- end