red-arrow 10.0.1 → 12.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/ext/arrow/converters.hpp +45 -41
- data/ext/arrow/extconf.rb +14 -2
- data/ext/arrow/raw-records.cpp +1 -2
- data/ext/arrow/values.cpp +1 -2
- data/lib/arrow/array-computable.rb +13 -0
- data/lib/arrow/array.rb +5 -0
- data/lib/arrow/chunked-array.rb +23 -1
- data/lib/arrow/column-containable.rb +9 -0
- data/lib/arrow/column.rb +1 -0
- data/lib/arrow/data-type.rb +9 -0
- data/lib/arrow/dense-union-array-builder.rb +49 -0
- data/lib/arrow/dense-union-array.rb +26 -0
- data/lib/arrow/half-float-array-builder.rb +32 -0
- data/lib/arrow/half-float-array.rb +24 -0
- data/lib/arrow/half-float.rb +118 -0
- data/lib/arrow/input-referable.rb +29 -0
- data/lib/arrow/loader.rb +10 -0
- data/lib/arrow/raw-table-converter.rb +7 -5
- data/lib/arrow/record-batch-file-reader.rb +2 -0
- data/lib/arrow/record-batch-stream-reader.rb +2 -0
- data/lib/arrow/record-batch.rb +6 -2
- data/lib/arrow/scalar.rb +67 -0
- data/lib/arrow/slicer.rb +61 -0
- data/lib/arrow/sparse-union-array-builder.rb +56 -0
- data/lib/arrow/sparse-union-array.rb +26 -0
- data/lib/arrow/struct-array-builder.rb +0 -5
- data/lib/arrow/table-loader.rb +4 -4
- data/lib/arrow/table-saver.rb +1 -0
- data/lib/arrow/table.rb +178 -31
- data/lib/arrow/tensor.rb +4 -0
- data/lib/arrow/union-array-builder.rb +59 -0
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +1 -1
- data/test/raw-records/test-basic-arrays.rb +10 -0
- data/test/raw-records/test-dense-union-array.rb +90 -45
- data/test/raw-records/test-list-array.rb +28 -10
- data/test/raw-records/test-map-array.rb +39 -10
- data/test/raw-records/test-sparse-union-array.rb +86 -41
- data/test/raw-records/test-struct-array.rb +22 -8
- data/test/test-array.rb +7 -0
- data/test/test-chunked-array.rb +9 -0
- data/test/test-data-type.rb +2 -1
- data/test/test-dense-union-array.rb +42 -0
- data/test/test-dense-union-data-type.rb +1 -1
- data/test/test-function.rb +7 -7
- data/test/test-group.rb +58 -58
- data/test/test-half-float-array.rb +43 -0
- data/test/test-half-float.rb +130 -0
- data/test/test-record-batch-file-reader.rb +21 -0
- data/test/test-record-batch-stream-reader.rb +129 -0
- data/test/test-scalar.rb +65 -0
- data/test/test-slicer.rb +194 -129
- data/test/test-sparse-union-array.rb +38 -0
- data/test/test-table.rb +324 -40
- data/test/values/test-basic-arrays.rb +10 -0
- data/test/values/test-dense-union-array.rb +88 -45
- data/test/values/test-list-array.rb +26 -10
- data/test/values/test-map-array.rb +33 -10
- data/test/values/test-sparse-union-array.rb +84 -41
- data/test/values/test-struct-array.rb +20 -8
- metadata +30 -9
data/lib/arrow/loader.rb
CHANGED
@@ -39,6 +39,7 @@ module Arrow
|
|
39
39
|
require "arrow/field-containable"
|
40
40
|
require "arrow/generic-filterable"
|
41
41
|
require "arrow/generic-takeable"
|
42
|
+
require "arrow/input-referable"
|
42
43
|
require "arrow/record-containable"
|
43
44
|
require "arrow/symbol-values-appendable"
|
44
45
|
|
@@ -69,6 +70,8 @@ module Arrow
|
|
69
70
|
require "arrow/decimal256-array"
|
70
71
|
require "arrow/decimal256-array-builder"
|
71
72
|
require "arrow/decimal256-data-type"
|
73
|
+
require "arrow/dense-union-array"
|
74
|
+
require "arrow/dense-union-array-builder"
|
72
75
|
require "arrow/dense-union-data-type"
|
73
76
|
require "arrow/dictionary-array"
|
74
77
|
require "arrow/dictionary-data-type"
|
@@ -81,6 +84,9 @@ module Arrow
|
|
81
84
|
require "arrow/fixed-size-binary-array-builder"
|
82
85
|
require "arrow/function"
|
83
86
|
require "arrow/group"
|
87
|
+
require "arrow/half-float"
|
88
|
+
require "arrow/half-float-array"
|
89
|
+
require "arrow/half-float-array-builder"
|
84
90
|
require "arrow/list-array-builder"
|
85
91
|
require "arrow/list-data-type"
|
86
92
|
require "arrow/map-array"
|
@@ -105,6 +111,8 @@ module Arrow
|
|
105
111
|
require "arrow/sort-key"
|
106
112
|
require "arrow/sort-options"
|
107
113
|
require "arrow/source-node-options"
|
114
|
+
require "arrow/sparse-union-array"
|
115
|
+
require "arrow/sparse-union-array-builder"
|
108
116
|
require "arrow/sparse-union-data-type"
|
109
117
|
require "arrow/string-dictionary-array-builder"
|
110
118
|
require "arrow/string-array-builder"
|
@@ -130,6 +138,7 @@ module Arrow
|
|
130
138
|
require "arrow/timestamp-array"
|
131
139
|
require "arrow/timestamp-array-builder"
|
132
140
|
require "arrow/timestamp-data-type"
|
141
|
+
require "arrow/union-array-builder"
|
133
142
|
require "arrow/writable"
|
134
143
|
end
|
135
144
|
|
@@ -196,6 +205,7 @@ module Arrow
|
|
196
205
|
"Arrow::Date64Array",
|
197
206
|
"Arrow::Decimal128Array",
|
198
207
|
"Arrow::Decimal256Array",
|
208
|
+
"Arrow::HalfFloatArray",
|
199
209
|
"Arrow::Time32Array",
|
200
210
|
"Arrow::Time64Array",
|
201
211
|
"Arrow::TimestampArray"
|
@@ -35,14 +35,16 @@ module Arrow
|
|
35
35
|
fields = []
|
36
36
|
@values = []
|
37
37
|
@raw_table.each do |name, array|
|
38
|
-
if array.respond_to?(:
|
39
|
-
|
38
|
+
if array.respond_to?(:to_arrow_chunked_array)
|
39
|
+
chunked_array = array.to_arrow_chunked_array
|
40
|
+
elsif array.respond_to?(:to_arrow_array)
|
41
|
+
chunked_array = ChunkedArray.new([array.to_arrow_array])
|
40
42
|
else
|
41
43
|
array = array.to_ary if array.respond_to?(:to_ary)
|
42
|
-
|
44
|
+
chunked_array = ChunkedArray.new([ArrayBuilder.build(array)])
|
43
45
|
end
|
44
|
-
fields << Field.new(name.to_s,
|
45
|
-
@values <<
|
46
|
+
fields << Field.new(name.to_s, chunked_array.value_data_type)
|
47
|
+
@values << chunked_array
|
46
48
|
end
|
47
49
|
@schema = Schema.new(fields)
|
48
50
|
end
|
data/lib/arrow/record-batch.rb
CHANGED
@@ -19,9 +19,11 @@ require "arrow/raw-table-converter"
|
|
19
19
|
|
20
20
|
module Arrow
|
21
21
|
class RecordBatch
|
22
|
+
include Enumerable
|
23
|
+
|
22
24
|
include ColumnContainable
|
25
|
+
include InputReferable
|
23
26
|
include RecordContainable
|
24
|
-
include Enumerable
|
25
27
|
|
26
28
|
class << self
|
27
29
|
def new(*args)
|
@@ -56,7 +58,9 @@ module Arrow
|
|
56
58
|
#
|
57
59
|
# @since 0.12.0
|
58
60
|
def to_table
|
59
|
-
Table.new(schema, [self])
|
61
|
+
table = Table.new(schema, [self])
|
62
|
+
share_input(table)
|
63
|
+
table
|
60
64
|
end
|
61
65
|
|
62
66
|
def respond_to_missing?(name, include_private)
|
data/lib/arrow/scalar.rb
CHANGED
@@ -17,6 +17,73 @@
|
|
17
17
|
|
18
18
|
module Arrow
|
19
19
|
class Scalar
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when self
|
25
|
+
value
|
26
|
+
when true, false
|
27
|
+
BooleanScalar.new(value)
|
28
|
+
when Symbol, String
|
29
|
+
StringScalar.new(value.to_s)
|
30
|
+
when Integer
|
31
|
+
Int64Scalar.new(value)
|
32
|
+
when Float
|
33
|
+
DoubleScalar.new(value)
|
34
|
+
else
|
35
|
+
nil
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Ensure returning suitable {Arrow::Scalar}.
|
40
|
+
#
|
41
|
+
# @overload resolve(scalar)
|
42
|
+
#
|
43
|
+
# Returns the given scalar itself. This is convenient to
|
44
|
+
# use this method as {Arrow::Scalar} converter.
|
45
|
+
#
|
46
|
+
# @param scalar [Arrow::Scalar] The scalar.
|
47
|
+
#
|
48
|
+
# @return [Arrow::Scalar] The given scalar itself.
|
49
|
+
#
|
50
|
+
# @overload resolve(value)
|
51
|
+
#
|
52
|
+
# Creates a suitable scalar from the given value. For example,
|
53
|
+
# you can create {Arrow::BooleanScalar} from `true`.
|
54
|
+
#
|
55
|
+
# @param value [Object] The value.
|
56
|
+
#
|
57
|
+
# @return [Arrow::Scalar] A suitable {Arrow::Scalar} for `value`.
|
58
|
+
#
|
59
|
+
# @overload resolve(value, data_type)
|
60
|
+
#
|
61
|
+
# Creates a scalar of `data_type.scalar_class` from the given
|
62
|
+
# value. For example, you can create {Arrow::Int32Scalar} from
|
63
|
+
# `29` and {Arrow::Int32DataType}.
|
64
|
+
#
|
65
|
+
# @param value [Object] The value.
|
66
|
+
#
|
67
|
+
# @param data_type [Arrow::DataType] The {Arrow::DataType} to
|
68
|
+
# decide the returned scalar class.
|
69
|
+
#
|
70
|
+
# @return [Arrow::Scalar] A suitable {Arrow::Scalar} for `value`.
|
71
|
+
#
|
72
|
+
# @since 12.0.0
|
73
|
+
def resolve(value, data_type=nil)
|
74
|
+
return try_convert(value) if data_type.nil?
|
75
|
+
|
76
|
+
data_type = DataType.resolve(data_type)
|
77
|
+
scalar_class = data_type.scalar_class
|
78
|
+
case value
|
79
|
+
when Scalar
|
80
|
+
return value if value.class == scalar_class
|
81
|
+
value = value.value
|
82
|
+
end
|
83
|
+
scalar_class.new(value)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
20
87
|
# @param other [Arrow::Scalar] The scalar to be compared.
|
21
88
|
# @param options [Arrow::EqualOptions, Hash] (nil)
|
22
89
|
# The options to custom how to compare.
|
data/lib/arrow/slicer.rb
CHANGED
@@ -162,6 +162,40 @@ module Arrow
|
|
162
162
|
def reject(&block)
|
163
163
|
RejectCondition.new(@column, block)
|
164
164
|
end
|
165
|
+
|
166
|
+
def end_with?(substring, ignore_case: false)
|
167
|
+
MatchSubstringFamilyCondition.new("ends_with",
|
168
|
+
@column, substring, ignore_case)
|
169
|
+
end
|
170
|
+
|
171
|
+
def match_like?(pattern, ignore_case: false)
|
172
|
+
MatchSubstringFamilyCondition.new("match_like",
|
173
|
+
@column, pattern, ignore_case)
|
174
|
+
end
|
175
|
+
|
176
|
+
def match_substring?(pattern, ignore_case: nil)
|
177
|
+
case pattern
|
178
|
+
when String
|
179
|
+
ignore_case = false if ignore_case.nil?
|
180
|
+
MatchSubstringFamilyCondition.new("match_substring",
|
181
|
+
@column, pattern, ignore_case)
|
182
|
+
when Regexp
|
183
|
+
ignore_case = pattern.casefold? if ignore_case.nil?
|
184
|
+
MatchSubstringFamilyCondition.new("match_substring_regex",
|
185
|
+
@column,
|
186
|
+
pattern.source,
|
187
|
+
ignore_case)
|
188
|
+
else
|
189
|
+
message =
|
190
|
+
"pattern must be either String or Regexp: #{pattern.inspect}"
|
191
|
+
raise ArgumentError, message
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def start_with?(substring, ignore_case: false)
|
196
|
+
MatchSubstringFamilyCondition.new("starts_with",
|
197
|
+
@column, substring, ignore_case)
|
198
|
+
end
|
165
199
|
end
|
166
200
|
|
167
201
|
class NotColumnCondition < Condition
|
@@ -351,5 +385,32 @@ module Arrow
|
|
351
385
|
BooleanArray.new(raw_array)
|
352
386
|
end
|
353
387
|
end
|
388
|
+
|
389
|
+
class MatchSubstringFamilyCondition < Condition
|
390
|
+
def initialize(function, column, pattern, ignore_case, invert: false)
|
391
|
+
@function = function
|
392
|
+
@column = column
|
393
|
+
@options = MatchSubstringOptions.new
|
394
|
+
@options.pattern = pattern
|
395
|
+
@options.ignore_case = ignore_case
|
396
|
+
@invert = invert
|
397
|
+
end
|
398
|
+
|
399
|
+
def !@
|
400
|
+
MatchSubstringFamilyCondition.new(@function,
|
401
|
+
@column,
|
402
|
+
@options.pattern,
|
403
|
+
@options.ignore_case?,
|
404
|
+
invert: !@invert)
|
405
|
+
end
|
406
|
+
|
407
|
+
def evaluate
|
408
|
+
datum = Function.find(@function).execute([@column.data], @options)
|
409
|
+
if @invert
|
410
|
+
datum = Function.find("invert").execute([datum])
|
411
|
+
end
|
412
|
+
datum.value
|
413
|
+
end
|
414
|
+
end
|
354
415
|
end
|
355
416
|
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class SparseUnionArrayBuilder
|
20
|
+
alias_method :append_value_raw, :append_value
|
21
|
+
|
22
|
+
# @overload append_value
|
23
|
+
#
|
24
|
+
# Starts appending an union record. You need to append values of
|
25
|
+
# fields.
|
26
|
+
#
|
27
|
+
# @overload append_value(value)
|
28
|
+
#
|
29
|
+
# Appends an union record including values of fields.
|
30
|
+
#
|
31
|
+
# @param value [nil, Hash] The union record value.
|
32
|
+
#
|
33
|
+
# If this is `nil`, the union record is null.
|
34
|
+
#
|
35
|
+
# If this is `Hash`, it's values of fields.
|
36
|
+
#
|
37
|
+
# @since 12.0.0
|
38
|
+
def append_value(value)
|
39
|
+
if value.nil?
|
40
|
+
append_null
|
41
|
+
else
|
42
|
+
key = value.keys[0]
|
43
|
+
child_info = child_infos[key]
|
44
|
+
append_value_raw(child_info[:id])
|
45
|
+
child_infos.each do |child_key, child_info|
|
46
|
+
builder = child_info[:builder]
|
47
|
+
if child_key == key
|
48
|
+
builder.append(value.values[0])
|
49
|
+
else
|
50
|
+
builder.append_null
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class SparseUnionArray
|
20
|
+
def get_value(i)
|
21
|
+
child_id = get_child_id(i)
|
22
|
+
field = get_field(child_id)
|
23
|
+
field[i]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/arrow/table-loader.rb
CHANGED
@@ -161,7 +161,7 @@ module Arrow
|
|
161
161
|
record_batches << record_batch
|
162
162
|
end
|
163
163
|
table = Table.new(schema, record_batches)
|
164
|
-
table.
|
164
|
+
table.refer_input(input)
|
165
165
|
table
|
166
166
|
end
|
167
167
|
|
@@ -211,7 +211,7 @@ module Arrow
|
|
211
211
|
field_indexes = @options[:field_indexes]
|
212
212
|
reader.set_field_indexes(field_indexes) if field_indexes
|
213
213
|
table = reader.read_stripes
|
214
|
-
table.
|
214
|
+
table.refer_input(input)
|
215
215
|
table
|
216
216
|
end
|
217
217
|
end
|
@@ -245,7 +245,7 @@ module Arrow
|
|
245
245
|
open_input_stream do |input|
|
246
246
|
reader = FeatherFileReader.new(input)
|
247
247
|
table = reader.read
|
248
|
-
table.
|
248
|
+
table.refer_input(input)
|
249
249
|
table
|
250
250
|
end
|
251
251
|
end
|
@@ -254,7 +254,7 @@ module Arrow
|
|
254
254
|
open_input_stream do |input|
|
255
255
|
reader = JSONReader.new(input)
|
256
256
|
table = reader.read
|
257
|
-
table.
|
257
|
+
table.refer_input(input)
|
258
258
|
table
|
259
259
|
end
|
260
260
|
end
|