red-arrow-dataset 4.0.1 → 5.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/arrow-dataset/{scan-options.rb → dataset.rb} +6 -14
- data/lib/arrow-dataset/loader.rb +1 -2
- data/lib/arrow-dataset/version.rb +1 -1
- data/test/helper.rb +2 -0
- data/test/{test-scan-options.rb → test-file-system-dataset.rb} +15 -13
- metadata +11 -15
- data/lib/arrow-dataset/in-memory-fragment.rb +0 -32
- data/lib/arrow-dataset/in-memory-scan-task.rb +0 -35
- data/test/test-in-memory-scan-task.rb +0 -33
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bd2475450c901efd8a4a2e311459285098b42a714ada39e41959e8c7d7f477c8
|
4
|
+
data.tar.gz: 7d696272844899dbdedf1d1e4ba379a54c91a1187da33c97a6d5eddb89c5f588
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 61868f12b9d4b607ebf3e408a81e576c0684a433b9ffbb2245abc34dd5a4f797a78ed05544c74a0bdd0a8caa7da01cc80e9885f32f90c63c788f7b1380d04df1
|
7
|
+
data.tar.gz: dd3022730a0d70182217dc80a7364db74eaba3bab783bb673b0776e1c5b562c8200ec6c10d81fda61627aca3ce458d287141f0c97e2b1e706ddda752deab2b35
|
@@ -16,21 +16,13 @@
|
|
16
16
|
# under the License.
|
17
17
|
|
18
18
|
module ArrowDataset
|
19
|
-
class
|
19
|
+
class Dataset
|
20
20
|
class << self
|
21
|
-
def
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
value.each do |name, value|
|
27
|
-
next if name == :schema
|
28
|
-
options.__send__("#{name}=", value)
|
29
|
-
end
|
30
|
-
options
|
31
|
-
else
|
32
|
-
nil
|
33
|
-
end
|
21
|
+
def build(*args)
|
22
|
+
factory_class = ArrowDataset.const_get("#{name}Factory")
|
23
|
+
factory = factory_class.new(*args)
|
24
|
+
yield(factory)
|
25
|
+
factory.finish
|
34
26
|
end
|
35
27
|
end
|
36
28
|
end
|
data/lib/arrow-dataset/loader.rb
CHANGED
data/test/helper.rb
CHANGED
@@ -15,22 +15,24 @@
|
|
15
15
|
# specific language governing permissions and limitations
|
16
16
|
# under the License.
|
17
17
|
|
18
|
-
class
|
18
|
+
class TestFileSystemDataset < Test::Unit::TestCase
|
19
19
|
def setup
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
20
|
+
Dir.mktmpdir do |tmpdir|
|
21
|
+
@dir = tmpdir
|
22
|
+
@path = File.join(@dir, "table.arrow")
|
23
|
+
@table = Arrow::Table.new(visible: [true, false, true],
|
24
|
+
point: [1, 2, 3])
|
25
|
+
@table.save(@path)
|
26
|
+
@format = ArrowDataset::IPCFileFormat.new
|
27
|
+
yield
|
28
|
+
end
|
25
29
|
end
|
26
30
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
batch_size: batch_size)
|
32
|
-
assert_equal([@schema, batch_size],
|
33
|
-
[context.schema, context.batch_size])
|
31
|
+
test(".build") do
|
32
|
+
dataset = ArrowDataset::FileSystemDataset.build(@format) do |factory|
|
33
|
+
factory.file_system = Arrow::LocalFileSystem.new
|
34
|
+
factory.add_path(File.expand_path(@path))
|
34
35
|
end
|
36
|
+
assert_equal(@table, dataset.to_table)
|
35
37
|
end
|
36
38
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-arrow-dataset
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 5.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Apache Arrow Developers
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-07-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - '='
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 5.0.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - '='
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 5.0.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -82,21 +82,18 @@ files:
|
|
82
82
|
- Rakefile
|
83
83
|
- dependency-check/Rakefile
|
84
84
|
- lib/arrow-dataset.rb
|
85
|
-
- lib/arrow-dataset/
|
86
|
-
- lib/arrow-dataset/in-memory-scan-task.rb
|
85
|
+
- lib/arrow-dataset/dataset.rb
|
87
86
|
- lib/arrow-dataset/loader.rb
|
88
|
-
- lib/arrow-dataset/scan-options.rb
|
89
87
|
- lib/arrow-dataset/version.rb
|
90
88
|
- red-arrow-dataset.gemspec
|
91
89
|
- test/helper.rb
|
92
90
|
- test/run-test.rb
|
93
|
-
- test/test-
|
94
|
-
- test/test-scan-options.rb
|
91
|
+
- test/test-file-system-dataset.rb
|
95
92
|
homepage: https://arrow.apache.org/
|
96
93
|
licenses:
|
97
94
|
- Apache-2.0
|
98
95
|
metadata: {}
|
99
|
-
post_install_message:
|
96
|
+
post_install_message:
|
100
97
|
rdoc_options: []
|
101
98
|
require_paths:
|
102
99
|
- lib
|
@@ -111,12 +108,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
111
108
|
- !ruby/object:Gem::Version
|
112
109
|
version: '0'
|
113
110
|
requirements: []
|
114
|
-
rubygems_version: 3.2.
|
115
|
-
signing_key:
|
111
|
+
rubygems_version: 3.2.22
|
112
|
+
signing_key:
|
116
113
|
specification_version: 4
|
117
114
|
summary: Red Arrow Dataset is the Ruby bindings of Apache Arrow Dataset
|
118
115
|
test_files:
|
119
116
|
- test/helper.rb
|
120
117
|
- test/run-test.rb
|
121
|
-
- test/test-
|
122
|
-
- test/test-in-memory-scan-task.rb
|
118
|
+
- test/test-file-system-dataset.rb
|
@@ -1,32 +0,0 @@
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
3
|
-
# distributed with this work for additional information
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
6
|
-
# "License"); you may not use this file except in compliance
|
7
|
-
# with the License. You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
12
|
-
# software distributed under the License is distributed on an
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
-
# KIND, either express or implied. See the License for the
|
15
|
-
# specific language governing permissions and limitations
|
16
|
-
# under the License.
|
17
|
-
|
18
|
-
module ArrowDataset
|
19
|
-
class InMemoryFragment
|
20
|
-
alias_method :initialize_raw, :initialize
|
21
|
-
private :initialize_raw
|
22
|
-
def initialize(schema, record_batches)
|
23
|
-
record_batches = record_batches.collect do |record_batch|
|
24
|
-
unless record_batch.is_a?(Arrow::RecordBatch)
|
25
|
-
record_batch = Arrow::RecordBatch.new(record_batch)
|
26
|
-
end
|
27
|
-
record_batch
|
28
|
-
end
|
29
|
-
initialize_raw(schema, record_batches)
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
@@ -1,35 +0,0 @@
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
3
|
-
# distributed with this work for additional information
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
6
|
-
# "License"); you may not use this file except in compliance
|
7
|
-
# with the License. You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
12
|
-
# software distributed under the License is distributed on an
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
-
# KIND, either express or implied. See the License for the
|
15
|
-
# specific language governing permissions and limitations
|
16
|
-
# under the License.
|
17
|
-
|
18
|
-
module ArrowDataset
|
19
|
-
class InMemoryScanTask
|
20
|
-
alias_method :initialize_raw, :initialize
|
21
|
-
private :initialize_raw
|
22
|
-
def initialize(record_batches, **options)
|
23
|
-
record_batches = record_batches.collect do |record_batch|
|
24
|
-
unless record_batch.is_a?(Arrow::RecordBatch)
|
25
|
-
record_batch = Arrow::RecordBatch.new(record_batch)
|
26
|
-
end
|
27
|
-
record_batch
|
28
|
-
end
|
29
|
-
options[:schema] ||= record_batches.first.schema
|
30
|
-
fragment = options.delete(:fragment)
|
31
|
-
fragment ||= InMemoryFragment.new(options[:schema], record_batches)
|
32
|
-
initialize_raw(record_batches, options, fragment)
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
@@ -1,33 +0,0 @@
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
3
|
-
# distributed with this work for additional information
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
6
|
-
# "License"); you may not use this file except in compliance
|
7
|
-
# with the License. You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
12
|
-
# software distributed under the License is distributed on an
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
-
# KIND, either express or implied. See the License for the
|
15
|
-
# specific language governing permissions and limitations
|
16
|
-
# under the License.
|
17
|
-
|
18
|
-
class TestInMemoryScanTask < Test::Unit::TestCase
|
19
|
-
def setup
|
20
|
-
@record_batches = [
|
21
|
-
Arrow::RecordBatch.new(visible: [true, false, true],
|
22
|
-
point: [1, 2, 3]),
|
23
|
-
]
|
24
|
-
end
|
25
|
-
|
26
|
-
sub_test_case(".new") do
|
27
|
-
test("[[Arrow::RecordBatch]]") do
|
28
|
-
scan_task = ArrowDataset::InMemoryScanTask.new(@record_batches)
|
29
|
-
assert_equal(@record_batches,
|
30
|
-
scan_task.execute.to_a)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|