red-arrow-dataset 10.0.1 → 12.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 52af15e972e3de01889a09a6d6ae58217754a6cfd16c7751a769ddd72bdddc6d
4
- data.tar.gz: 88ec8ae29e8c89211da9fd24f030c0a736215ff098cd3813b817f0025f02e1ae
3
+ metadata.gz: 0a6d1268fdbf232588bc49f00578d381e2ad8e906a6b52a91d2515fe017af561
4
+ data.tar.gz: 64c8116eb4f2eb7b24458244bb57d19e5cad0f54013a3020b15dbd1c832e18cf
5
5
  SHA512:
6
- metadata.gz: 2c5a288d3c68097d6c4264225f470d2eb87718404f9397bef52d29bafee50d7fb9631c5f6cf85a8e0945996ceee79295632a3d14bdc5373b3ac645538c1ce0d9
7
- data.tar.gz: 615c5ac2c995a5a7b59aae5a4599ec75ee409d4ab924682cc9bef9414c104efec897a6084799e63945b49b93432788f16bb750fb003e8e84d6ba59e092bf9e7a
6
+ metadata.gz: ad96f7ae5d41ab3b894559a049c35c475544359b66e003216fcb36eb84756b338ad39143f3ee44652d8cffbcda965a4bbc216ba38e6b3bad3e73b0a1d474695c
7
+ data.tar.gz: c28a4c2587883b4209d79bffdb61ab1c4c8cf7d1abee0df02a7a26a7a6f52185e96b01e61b25a83ad79df5679d536bd9da08e738e686540956d762a0a7705de4
data/README.md CHANGED
@@ -25,9 +25,9 @@ Red Arrow Dataset is the Ruby bindings of Apache Arrow Dataset. Red Arrow Datase
25
25
 
26
26
  [GObject Introspection](https://wiki.gnome.org/action/show/Projects/GObjectIntrospection) is a middleware for language bindings of C library. GObject Introspection can generate language bindings automatically at runtime.
27
27
 
28
- Red Arrow Dataset uses [Apache Arrow Dataset GLib](https://github.com/apache/arrow/tree/master/c_glib) and [gobject-introspection gem](https://rubygems.org/gems/gobject-introspection) to generate Ruby bindings of Apache Arrow Dataset.
28
+ Red Arrow Dataset uses [Apache Arrow Dataset GLib](https://github.com/apache/arrow/tree/main/c_glib) and [gobject-introspection gem](https://rubygems.org/gems/gobject-introspection) to generate Ruby bindings of Apache Arrow Dataset.
29
29
 
30
- Apache Arrow Dataset GLib is a C wrapper for [Apache Arrow Dataset C++](https://github.com/apache/arrow/tree/master/cpp). GObject Introspection can't use Apache Arrow Dataset C++ directly. Apache Arrow Dataset GLib is a bridge between Apache Arrow Dataset C++ and GObject Introspection.
30
+ Apache Arrow Dataset GLib is a C wrapper for [Apache Arrow Dataset C++](https://github.com/apache/arrow/tree/main/cpp). GObject Introspection can't use Apache Arrow Dataset C++ directly. Apache Arrow Dataset GLib is a bridge between Apache Arrow Dataset C++ and GObject Introspection.
31
31
 
32
32
  gobject-introspection gem is a Ruby bindings of GObject Introspection. Red Arrow Dataset uses GObject Introspection via gobject-introspection gem.
33
33
 
@@ -36,13 +36,21 @@ module ArrowDataset
36
36
  end
37
37
 
38
38
  def internal_load_from_uri(uri)
39
- format = FileFormat.resolve(@options[:format])
39
+ options = @options.dup
40
+ format = FileFormat.resolve(options.delete(:format))
40
41
  dataset = FileSystemDataset.build(format) do |factory|
41
42
  factory.file_system_uri = uri
43
+ finish_options = FinishOptions.new
44
+ FinishOptions.instance_methods(false).each do |method|
45
+ next unless method.to_s.end_with?("=")
46
+ value = options.delete(method[0..-2].to_sym)
47
+ next if value.nil?
48
+ finish_options.public_send(method, value)
49
+ end
50
+ finish_options
42
51
  end
43
52
  scanner_builder = dataset.begin_scan
44
- @options.each do |key, value|
45
- next if key == :format
53
+ options.each do |key, value|
46
54
  next if value.nil?
47
55
  setter = "#{key}="
48
56
  next unless scanner_builder.respond_to?(setter)
@@ -21,8 +21,11 @@ module ArrowDataset
21
21
  def build(*args)
22
22
  factory_class = ArrowDataset.const_get("#{name}Factory")
23
23
  factory = factory_class.new(*args)
24
- yield(factory)
25
- factory.finish
24
+ options = yield(factory)
25
+ unless options.is_a?(FinishOptions)
26
+ options = FinishOptions.try_convert(options)
27
+ end
28
+ factory.finish(options)
26
29
  end
27
30
  end
28
31
  end
@@ -0,0 +1,38 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module ArrowDataset
19
+ class FinishOptions
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Hash
25
+ options = new
26
+ value.each do |k, v|
27
+ setter = "#{k}="
28
+ next unless options.respond_to?(setter)
29
+ options.public_send(setter, v)
30
+ end
31
+ options
32
+ else
33
+ nil
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -34,6 +34,7 @@ module ArrowDataset
34
34
  require "arrow-dataset/dataset"
35
35
  require "arrow-dataset/file-format"
36
36
  require "arrow-dataset/file-system-dataset-factory"
37
+ require "arrow-dataset/finish-options"
37
38
  end
38
39
  end
39
40
  end
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module ArrowDataset
19
- VERSION = "10.0.1"
19
+ VERSION = "12.0.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
@@ -76,5 +76,18 @@ class TestArrowTable < Test::Unit::TestCase
76
76
  Arrow::Table.load(@dir,
77
77
  filter: ["equal", :visible, true]))
78
78
  end
79
+
80
+ def test_schema
81
+ uri = build_file_uri(@path1)
82
+ @table1.save(uri)
83
+ schema = Arrow::Schema.new(visible: :boolean,
84
+ point: :int64)
85
+ assert_equal(Arrow::Table.new(schema,
86
+ [
87
+ @table1[:visible].data,
88
+ @table1[:point].cast(:int64),
89
+ ]),
90
+ Arrow::Table.load(uri, schema: schema))
91
+ end
79
92
  end
80
93
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-arrow-dataset
3
3
  version: !ruby/object:Gem::Version
4
- version: 10.0.1
4
+ version: 12.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Apache Arrow Developers
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-10 00:00:00.000000000 Z
11
+ date: 2023-05-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: red-arrow
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - '='
18
18
  - !ruby/object:Gem::Version
19
- version: 10.0.1
19
+ version: 12.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - '='
25
25
  - !ruby/object:Gem::Version
26
- version: 10.0.1
26
+ version: 12.0.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -87,6 +87,7 @@ files:
87
87
  - lib/arrow-dataset/dataset.rb
88
88
  - lib/arrow-dataset/file-format.rb
89
89
  - lib/arrow-dataset/file-system-dataset-factory.rb
90
+ - lib/arrow-dataset/finish-options.rb
90
91
  - lib/arrow-dataset/loader.rb
91
92
  - lib/arrow-dataset/version.rb
92
93
  - red-arrow-dataset.gemspec
@@ -98,7 +99,7 @@ homepage: https://arrow.apache.org/
98
99
  licenses:
99
100
  - Apache-2.0
100
101
  metadata: {}
101
- post_install_message:
102
+ post_install_message:
102
103
  rdoc_options: []
103
104
  require_paths:
104
105
  - lib
@@ -113,8 +114,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
113
114
  - !ruby/object:Gem::Version
114
115
  version: '0'
115
116
  requirements: []
116
- rubygems_version: 3.3.15
117
- signing_key:
117
+ rubygems_version: 3.3.5
118
+ signing_key:
118
119
  specification_version: 4
119
120
  summary: Red Arrow Dataset is the Ruby bindings of Apache Arrow Dataset
120
121
  test_files: