red-arrow-dataset 10.0.1 → 11.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 52af15e972e3de01889a09a6d6ae58217754a6cfd16c7751a769ddd72bdddc6d
4
- data.tar.gz: 88ec8ae29e8c89211da9fd24f030c0a736215ff098cd3813b817f0025f02e1ae
3
+ metadata.gz: e359bff12f88d0d14a8890b1ca27616a8a36ef01e090c6aae6c5590af99f9899
4
+ data.tar.gz: c291a76e52a71502480617fb6fa18ed122ddbec79dad31ce35d2d8afaf02447d
5
5
  SHA512:
6
- metadata.gz: 2c5a288d3c68097d6c4264225f470d2eb87718404f9397bef52d29bafee50d7fb9631c5f6cf85a8e0945996ceee79295632a3d14bdc5373b3ac645538c1ce0d9
7
- data.tar.gz: 615c5ac2c995a5a7b59aae5a4599ec75ee409d4ab924682cc9bef9414c104efec897a6084799e63945b49b93432788f16bb750fb003e8e84d6ba59e092bf9e7a
6
+ metadata.gz: 5dc98531b0877703e1f57b9ba4e601bee6c1fe2556b959cbc876a3fe1b43564b16f78a9973c9c066784c0e3de393d3a2686c63776a748439bf7c761ec16d8815
7
+ data.tar.gz: 5f8dcce29250233c2d513d427b99f1eb77dbb6dba9cca72e9dc9854681f6af6fc503839ee8a66a1f0151bf61afb235e614cc8365fb794a84229db1e73fd437fe
@@ -36,13 +36,21 @@ module ArrowDataset
36
36
  end
37
37
 
38
38
  def internal_load_from_uri(uri)
39
- format = FileFormat.resolve(@options[:format])
39
+ options = @options.dup
40
+ format = FileFormat.resolve(options.delete(:format))
40
41
  dataset = FileSystemDataset.build(format) do |factory|
41
42
  factory.file_system_uri = uri
43
+ finish_options = FinishOptions.new
44
+ FinishOptions.instance_methods(false).each do |method|
45
+ next unless method.to_s.end_with?("=")
46
+ value = options.delete(method[0..-2].to_sym)
47
+ next if value.nil?
48
+ finish_options.public_send(method, value)
49
+ end
50
+ finish_options
42
51
  end
43
52
  scanner_builder = dataset.begin_scan
44
- @options.each do |key, value|
45
- next if key == :format
53
+ options.each do |key, value|
46
54
  next if value.nil?
47
55
  setter = "#{key}="
48
56
  next unless scanner_builder.respond_to?(setter)
@@ -21,8 +21,11 @@ module ArrowDataset
21
21
  def build(*args)
22
22
  factory_class = ArrowDataset.const_get("#{name}Factory")
23
23
  factory = factory_class.new(*args)
24
- yield(factory)
25
- factory.finish
24
+ options = yield(factory)
25
+ unless options.is_a?(FinishOptions)
26
+ options = FinishOptions.try_convert(options)
27
+ end
28
+ factory.finish(options)
26
29
  end
27
30
  end
28
31
  end
@@ -0,0 +1,38 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module ArrowDataset
19
+ class FinishOptions
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Hash
25
+ options = new
26
+ value.each do |k, v|
27
+ setter = "#{k}="
28
+ next unless options.respond_to?(setter)
29
+ options.public_send(setter, v)
30
+ end
31
+ options
32
+ else
33
+ nil
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -34,6 +34,7 @@ module ArrowDataset
34
34
  require "arrow-dataset/dataset"
35
35
  require "arrow-dataset/file-format"
36
36
  require "arrow-dataset/file-system-dataset-factory"
37
+ require "arrow-dataset/finish-options"
37
38
  end
38
39
  end
39
40
  end
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module ArrowDataset
19
- VERSION = "10.0.1"
19
+ VERSION = "11.0.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
@@ -76,5 +76,18 @@ class TestArrowTable < Test::Unit::TestCase
76
76
  Arrow::Table.load(@dir,
77
77
  filter: ["equal", :visible, true]))
78
78
  end
79
+
80
+ def test_schema
81
+ uri = build_file_uri(@path1)
82
+ @table1.save(uri)
83
+ schema = Arrow::Schema.new(visible: :boolean,
84
+ point: :int64)
85
+ assert_equal(Arrow::Table.new(schema,
86
+ [
87
+ @table1[:visible].data,
88
+ @table1[:point].cast(:int64),
89
+ ]),
90
+ Arrow::Table.load(uri, schema: schema))
91
+ end
79
92
  end
80
93
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-arrow-dataset
3
3
  version: !ruby/object:Gem::Version
4
- version: 10.0.1
4
+ version: 11.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Apache Arrow Developers
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-10 00:00:00.000000000 Z
11
+ date: 2023-02-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: red-arrow
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - '='
18
18
  - !ruby/object:Gem::Version
19
- version: 10.0.1
19
+ version: 11.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - '='
25
25
  - !ruby/object:Gem::Version
26
- version: 10.0.1
26
+ version: 11.0.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -87,6 +87,7 @@ files:
87
87
  - lib/arrow-dataset/dataset.rb
88
88
  - lib/arrow-dataset/file-format.rb
89
89
  - lib/arrow-dataset/file-system-dataset-factory.rb
90
+ - lib/arrow-dataset/finish-options.rb
90
91
  - lib/arrow-dataset/loader.rb
91
92
  - lib/arrow-dataset/version.rb
92
93
  - red-arrow-dataset.gemspec
@@ -113,7 +114,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
113
114
  - !ruby/object:Gem::Version
114
115
  version: '0'
115
116
  requirements: []
116
- rubygems_version: 3.3.15
117
+ rubygems_version: 3.5.0.dev
117
118
  signing_key:
118
119
  specification_version: 4
119
120
  summary: Red Arrow Dataset is the Ruby bindings of Apache Arrow Dataset