red-arrow-dataset 10.0.1 → 12.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/arrow-dataset/arrow-table-loadable.rb +11 -3
- data/lib/arrow-dataset/dataset.rb +5 -2
- data/lib/arrow-dataset/finish-options.rb +38 -0
- data/lib/arrow-dataset/loader.rb +1 -0
- data/lib/arrow-dataset/version.rb +1 -1
- data/test/test-arrow-table.rb +13 -0
- metadata +9 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0a6d1268fdbf232588bc49f00578d381e2ad8e906a6b52a91d2515fe017af561
|
4
|
+
data.tar.gz: 64c8116eb4f2eb7b24458244bb57d19e5cad0f54013a3020b15dbd1c832e18cf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ad96f7ae5d41ab3b894559a049c35c475544359b66e003216fcb36eb84756b338ad39143f3ee44652d8cffbcda965a4bbc216ba38e6b3bad3e73b0a1d474695c
|
7
|
+
data.tar.gz: c28a4c2587883b4209d79bffdb61ab1c4c8cf7d1abee0df02a7a26a7a6f52185e96b01e61b25a83ad79df5679d536bd9da08e738e686540956d762a0a7705de4
|
data/README.md
CHANGED
@@ -25,9 +25,9 @@ Red Arrow Dataset is the Ruby bindings of Apache Arrow Dataset. Red Arrow Datase
|
|
25
25
|
|
26
26
|
[GObject Introspection](https://wiki.gnome.org/action/show/Projects/GObjectIntrospection) is a middleware for language bindings of C library. GObject Introspection can generate language bindings automatically at runtime.
|
27
27
|
|
28
|
-
Red Arrow Dataset uses [Apache Arrow Dataset GLib](https://github.com/apache/arrow/tree/
|
28
|
+
Red Arrow Dataset uses [Apache Arrow Dataset GLib](https://github.com/apache/arrow/tree/main/c_glib) and [gobject-introspection gem](https://rubygems.org/gems/gobject-introspection) to generate Ruby bindings of Apache Arrow Dataset.
|
29
29
|
|
30
|
-
Apache Arrow Dataset GLib is a C wrapper for [Apache Arrow Dataset C++](https://github.com/apache/arrow/tree/
|
30
|
+
Apache Arrow Dataset GLib is a C wrapper for [Apache Arrow Dataset C++](https://github.com/apache/arrow/tree/main/cpp). GObject Introspection can't use Apache Arrow Dataset C++ directly. Apache Arrow Dataset GLib is a bridge between Apache Arrow Dataset C++ and GObject Introspection.
|
31
31
|
|
32
32
|
gobject-introspection gem is a Ruby bindings of GObject Introspection. Red Arrow Dataset uses GObject Introspection via gobject-introspection gem.
|
33
33
|
|
@@ -36,13 +36,21 @@ module ArrowDataset
|
|
36
36
|
end
|
37
37
|
|
38
38
|
def internal_load_from_uri(uri)
|
39
|
-
|
39
|
+
options = @options.dup
|
40
|
+
format = FileFormat.resolve(options.delete(:format))
|
40
41
|
dataset = FileSystemDataset.build(format) do |factory|
|
41
42
|
factory.file_system_uri = uri
|
43
|
+
finish_options = FinishOptions.new
|
44
|
+
FinishOptions.instance_methods(false).each do |method|
|
45
|
+
next unless method.to_s.end_with?("=")
|
46
|
+
value = options.delete(method[0..-2].to_sym)
|
47
|
+
next if value.nil?
|
48
|
+
finish_options.public_send(method, value)
|
49
|
+
end
|
50
|
+
finish_options
|
42
51
|
end
|
43
52
|
scanner_builder = dataset.begin_scan
|
44
|
-
|
45
|
-
next if key == :format
|
53
|
+
options.each do |key, value|
|
46
54
|
next if value.nil?
|
47
55
|
setter = "#{key}="
|
48
56
|
next unless scanner_builder.respond_to?(setter)
|
@@ -21,8 +21,11 @@ module ArrowDataset
|
|
21
21
|
def build(*args)
|
22
22
|
factory_class = ArrowDataset.const_get("#{name}Factory")
|
23
23
|
factory = factory_class.new(*args)
|
24
|
-
yield(factory)
|
25
|
-
|
24
|
+
options = yield(factory)
|
25
|
+
unless options.is_a?(FinishOptions)
|
26
|
+
options = FinishOptions.try_convert(options)
|
27
|
+
end
|
28
|
+
factory.finish(options)
|
26
29
|
end
|
27
30
|
end
|
28
31
|
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module ArrowDataset
|
19
|
+
class FinishOptions
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when Hash
|
25
|
+
options = new
|
26
|
+
value.each do |k, v|
|
27
|
+
setter = "#{k}="
|
28
|
+
next unless options.respond_to?(setter)
|
29
|
+
options.public_send(setter, v)
|
30
|
+
end
|
31
|
+
options
|
32
|
+
else
|
33
|
+
nil
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/arrow-dataset/loader.rb
CHANGED
data/test/test-arrow-table.rb
CHANGED
@@ -76,5 +76,18 @@ class TestArrowTable < Test::Unit::TestCase
|
|
76
76
|
Arrow::Table.load(@dir,
|
77
77
|
filter: ["equal", :visible, true]))
|
78
78
|
end
|
79
|
+
|
80
|
+
def test_schema
|
81
|
+
uri = build_file_uri(@path1)
|
82
|
+
@table1.save(uri)
|
83
|
+
schema = Arrow::Schema.new(visible: :boolean,
|
84
|
+
point: :int64)
|
85
|
+
assert_equal(Arrow::Table.new(schema,
|
86
|
+
[
|
87
|
+
@table1[:visible].data,
|
88
|
+
@table1[:point].cast(:int64),
|
89
|
+
]),
|
90
|
+
Arrow::Table.load(uri, schema: schema))
|
91
|
+
end
|
79
92
|
end
|
80
93
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-arrow-dataset
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 12.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Apache Arrow Developers
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-05-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - '='
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 12.0.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - '='
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 12.0.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -87,6 +87,7 @@ files:
|
|
87
87
|
- lib/arrow-dataset/dataset.rb
|
88
88
|
- lib/arrow-dataset/file-format.rb
|
89
89
|
- lib/arrow-dataset/file-system-dataset-factory.rb
|
90
|
+
- lib/arrow-dataset/finish-options.rb
|
90
91
|
- lib/arrow-dataset/loader.rb
|
91
92
|
- lib/arrow-dataset/version.rb
|
92
93
|
- red-arrow-dataset.gemspec
|
@@ -98,7 +99,7 @@ homepage: https://arrow.apache.org/
|
|
98
99
|
licenses:
|
99
100
|
- Apache-2.0
|
100
101
|
metadata: {}
|
101
|
-
post_install_message:
|
102
|
+
post_install_message:
|
102
103
|
rdoc_options: []
|
103
104
|
require_paths:
|
104
105
|
- lib
|
@@ -113,8 +114,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
113
114
|
- !ruby/object:Gem::Version
|
114
115
|
version: '0'
|
115
116
|
requirements: []
|
116
|
-
rubygems_version: 3.3.
|
117
|
-
signing_key:
|
117
|
+
rubygems_version: 3.3.5
|
118
|
+
signing_key:
|
118
119
|
specification_version: 4
|
119
120
|
summary: Red Arrow Dataset is the Ruby bindings of Apache Arrow Dataset
|
120
121
|
test_files:
|