red-arrow 10.0.1 → 12.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/ext/arrow/converters.hpp +45 -41
- data/ext/arrow/extconf.rb +14 -2
- data/ext/arrow/raw-records.cpp +1 -2
- data/ext/arrow/values.cpp +1 -2
- data/lib/arrow/array-computable.rb +13 -0
- data/lib/arrow/array.rb +5 -0
- data/lib/arrow/chunked-array.rb +23 -1
- data/lib/arrow/column-containable.rb +9 -0
- data/lib/arrow/column.rb +1 -0
- data/lib/arrow/data-type.rb +9 -0
- data/lib/arrow/dense-union-array-builder.rb +49 -0
- data/lib/arrow/dense-union-array.rb +26 -0
- data/lib/arrow/half-float-array-builder.rb +32 -0
- data/lib/arrow/half-float-array.rb +24 -0
- data/lib/arrow/half-float.rb +118 -0
- data/lib/arrow/input-referable.rb +29 -0
- data/lib/arrow/loader.rb +10 -0
- data/lib/arrow/raw-table-converter.rb +7 -5
- data/lib/arrow/record-batch-file-reader.rb +2 -0
- data/lib/arrow/record-batch-stream-reader.rb +2 -0
- data/lib/arrow/record-batch.rb +6 -2
- data/lib/arrow/scalar.rb +67 -0
- data/lib/arrow/slicer.rb +61 -0
- data/lib/arrow/sparse-union-array-builder.rb +56 -0
- data/lib/arrow/sparse-union-array.rb +26 -0
- data/lib/arrow/struct-array-builder.rb +0 -5
- data/lib/arrow/table-loader.rb +4 -4
- data/lib/arrow/table-saver.rb +1 -0
- data/lib/arrow/table.rb +178 -31
- data/lib/arrow/tensor.rb +4 -0
- data/lib/arrow/union-array-builder.rb +59 -0
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +1 -1
- data/test/raw-records/test-basic-arrays.rb +10 -0
- data/test/raw-records/test-dense-union-array.rb +90 -45
- data/test/raw-records/test-list-array.rb +28 -10
- data/test/raw-records/test-map-array.rb +39 -10
- data/test/raw-records/test-sparse-union-array.rb +86 -41
- data/test/raw-records/test-struct-array.rb +22 -8
- data/test/test-array.rb +7 -0
- data/test/test-chunked-array.rb +9 -0
- data/test/test-data-type.rb +2 -1
- data/test/test-dense-union-array.rb +42 -0
- data/test/test-dense-union-data-type.rb +1 -1
- data/test/test-function.rb +7 -7
- data/test/test-group.rb +58 -58
- data/test/test-half-float-array.rb +43 -0
- data/test/test-half-float.rb +130 -0
- data/test/test-record-batch-file-reader.rb +21 -0
- data/test/test-record-batch-stream-reader.rb +129 -0
- data/test/test-scalar.rb +65 -0
- data/test/test-slicer.rb +194 -129
- data/test/test-sparse-union-array.rb +38 -0
- data/test/test-table.rb +324 -40
- data/test/values/test-basic-arrays.rb +10 -0
- data/test/values/test-dense-union-array.rb +88 -45
- data/test/values/test-list-array.rb +26 -10
- data/test/values/test-map-array.rb +33 -10
- data/test/values/test-sparse-union-array.rb +84 -41
- data/test/values/test-struct-array.rb +20 -8
- metadata +30 -9
data/lib/arrow/loader.rb
CHANGED
@@ -39,6 +39,7 @@ module Arrow
|
|
39
39
|
require "arrow/field-containable"
|
40
40
|
require "arrow/generic-filterable"
|
41
41
|
require "arrow/generic-takeable"
|
42
|
+
require "arrow/input-referable"
|
42
43
|
require "arrow/record-containable"
|
43
44
|
require "arrow/symbol-values-appendable"
|
44
45
|
|
@@ -69,6 +70,8 @@ module Arrow
|
|
69
70
|
require "arrow/decimal256-array"
|
70
71
|
require "arrow/decimal256-array-builder"
|
71
72
|
require "arrow/decimal256-data-type"
|
73
|
+
require "arrow/dense-union-array"
|
74
|
+
require "arrow/dense-union-array-builder"
|
72
75
|
require "arrow/dense-union-data-type"
|
73
76
|
require "arrow/dictionary-array"
|
74
77
|
require "arrow/dictionary-data-type"
|
@@ -81,6 +84,9 @@ module Arrow
|
|
81
84
|
require "arrow/fixed-size-binary-array-builder"
|
82
85
|
require "arrow/function"
|
83
86
|
require "arrow/group"
|
87
|
+
require "arrow/half-float"
|
88
|
+
require "arrow/half-float-array"
|
89
|
+
require "arrow/half-float-array-builder"
|
84
90
|
require "arrow/list-array-builder"
|
85
91
|
require "arrow/list-data-type"
|
86
92
|
require "arrow/map-array"
|
@@ -105,6 +111,8 @@ module Arrow
|
|
105
111
|
require "arrow/sort-key"
|
106
112
|
require "arrow/sort-options"
|
107
113
|
require "arrow/source-node-options"
|
114
|
+
require "arrow/sparse-union-array"
|
115
|
+
require "arrow/sparse-union-array-builder"
|
108
116
|
require "arrow/sparse-union-data-type"
|
109
117
|
require "arrow/string-dictionary-array-builder"
|
110
118
|
require "arrow/string-array-builder"
|
@@ -130,6 +138,7 @@ module Arrow
|
|
130
138
|
require "arrow/timestamp-array"
|
131
139
|
require "arrow/timestamp-array-builder"
|
132
140
|
require "arrow/timestamp-data-type"
|
141
|
+
require "arrow/union-array-builder"
|
133
142
|
require "arrow/writable"
|
134
143
|
end
|
135
144
|
|
@@ -196,6 +205,7 @@ module Arrow
|
|
196
205
|
"Arrow::Date64Array",
|
197
206
|
"Arrow::Decimal128Array",
|
198
207
|
"Arrow::Decimal256Array",
|
208
|
+
"Arrow::HalfFloatArray",
|
199
209
|
"Arrow::Time32Array",
|
200
210
|
"Arrow::Time64Array",
|
201
211
|
"Arrow::TimestampArray"
|
@@ -35,14 +35,16 @@ module Arrow
|
|
35
35
|
fields = []
|
36
36
|
@values = []
|
37
37
|
@raw_table.each do |name, array|
|
38
|
-
if array.respond_to?(:
|
39
|
-
|
38
|
+
if array.respond_to?(:to_arrow_chunked_array)
|
39
|
+
chunked_array = array.to_arrow_chunked_array
|
40
|
+
elsif array.respond_to?(:to_arrow_array)
|
41
|
+
chunked_array = ChunkedArray.new([array.to_arrow_array])
|
40
42
|
else
|
41
43
|
array = array.to_ary if array.respond_to?(:to_ary)
|
42
|
-
|
44
|
+
chunked_array = ChunkedArray.new([ArrayBuilder.build(array)])
|
43
45
|
end
|
44
|
-
fields << Field.new(name.to_s,
|
45
|
-
@values <<
|
46
|
+
fields << Field.new(name.to_s, chunked_array.value_data_type)
|
47
|
+
@values << chunked_array
|
46
48
|
end
|
47
49
|
@schema = Schema.new(fields)
|
48
50
|
end
|
data/lib/arrow/record-batch.rb
CHANGED
@@ -19,9 +19,11 @@ require "arrow/raw-table-converter"
|
|
19
19
|
|
20
20
|
module Arrow
|
21
21
|
class RecordBatch
|
22
|
+
include Enumerable
|
23
|
+
|
22
24
|
include ColumnContainable
|
25
|
+
include InputReferable
|
23
26
|
include RecordContainable
|
24
|
-
include Enumerable
|
25
27
|
|
26
28
|
class << self
|
27
29
|
def new(*args)
|
@@ -56,7 +58,9 @@ module Arrow
|
|
56
58
|
#
|
57
59
|
# @since 0.12.0
|
58
60
|
def to_table
|
59
|
-
Table.new(schema, [self])
|
61
|
+
table = Table.new(schema, [self])
|
62
|
+
share_input(table)
|
63
|
+
table
|
60
64
|
end
|
61
65
|
|
62
66
|
def respond_to_missing?(name, include_private)
|
data/lib/arrow/scalar.rb
CHANGED
@@ -17,6 +17,73 @@
|
|
17
17
|
|
18
18
|
module Arrow
|
19
19
|
class Scalar
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when self
|
25
|
+
value
|
26
|
+
when true, false
|
27
|
+
BooleanScalar.new(value)
|
28
|
+
when Symbol, String
|
29
|
+
StringScalar.new(value.to_s)
|
30
|
+
when Integer
|
31
|
+
Int64Scalar.new(value)
|
32
|
+
when Float
|
33
|
+
DoubleScalar.new(value)
|
34
|
+
else
|
35
|
+
nil
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Ensure returning suitable {Arrow::Scalar}.
|
40
|
+
#
|
41
|
+
# @overload resolve(scalar)
|
42
|
+
#
|
43
|
+
# Returns the given scalar itself. This is convenient to
|
44
|
+
# use this method as {Arrow::Scalar} converter.
|
45
|
+
#
|
46
|
+
# @param scalar [Arrow::Scalar] The scalar.
|
47
|
+
#
|
48
|
+
# @return [Arrow::Scalar] The given scalar itself.
|
49
|
+
#
|
50
|
+
# @overload resolve(value)
|
51
|
+
#
|
52
|
+
# Creates a suitable scalar from the given value. For example,
|
53
|
+
# you can create {Arrow::BooleanScalar} from `true`.
|
54
|
+
#
|
55
|
+
# @param value [Object] The value.
|
56
|
+
#
|
57
|
+
# @return [Arrow::Scalar] A suitable {Arrow::Scalar} for `value`.
|
58
|
+
#
|
59
|
+
# @overload resolve(value, data_type)
|
60
|
+
#
|
61
|
+
# Creates a scalar of `data_type.scalar_class` from the given
|
62
|
+
# value. For example, you can create {Arrow::Int32Scalar} from
|
63
|
+
# `29` and {Arrow::Int32DataType}.
|
64
|
+
#
|
65
|
+
# @param value [Object] The value.
|
66
|
+
#
|
67
|
+
# @param data_type [Arrow::DataType] The {Arrow::DataType} to
|
68
|
+
# decide the returned scalar class.
|
69
|
+
#
|
70
|
+
# @return [Arrow::Scalar] A suitable {Arrow::Scalar} for `value`.
|
71
|
+
#
|
72
|
+
# @since 12.0.0
|
73
|
+
def resolve(value, data_type=nil)
|
74
|
+
return try_convert(value) if data_type.nil?
|
75
|
+
|
76
|
+
data_type = DataType.resolve(data_type)
|
77
|
+
scalar_class = data_type.scalar_class
|
78
|
+
case value
|
79
|
+
when Scalar
|
80
|
+
return value if value.class == scalar_class
|
81
|
+
value = value.value
|
82
|
+
end
|
83
|
+
scalar_class.new(value)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
20
87
|
# @param other [Arrow::Scalar] The scalar to be compared.
|
21
88
|
# @param options [Arrow::EqualOptions, Hash] (nil)
|
22
89
|
# The options to custom how to compare.
|
data/lib/arrow/slicer.rb
CHANGED
@@ -162,6 +162,40 @@ module Arrow
|
|
162
162
|
def reject(&block)
|
163
163
|
RejectCondition.new(@column, block)
|
164
164
|
end
|
165
|
+
|
166
|
+
def end_with?(substring, ignore_case: false)
|
167
|
+
MatchSubstringFamilyCondition.new("ends_with",
|
168
|
+
@column, substring, ignore_case)
|
169
|
+
end
|
170
|
+
|
171
|
+
def match_like?(pattern, ignore_case: false)
|
172
|
+
MatchSubstringFamilyCondition.new("match_like",
|
173
|
+
@column, pattern, ignore_case)
|
174
|
+
end
|
175
|
+
|
176
|
+
def match_substring?(pattern, ignore_case: nil)
|
177
|
+
case pattern
|
178
|
+
when String
|
179
|
+
ignore_case = false if ignore_case.nil?
|
180
|
+
MatchSubstringFamilyCondition.new("match_substring",
|
181
|
+
@column, pattern, ignore_case)
|
182
|
+
when Regexp
|
183
|
+
ignore_case = pattern.casefold? if ignore_case.nil?
|
184
|
+
MatchSubstringFamilyCondition.new("match_substring_regex",
|
185
|
+
@column,
|
186
|
+
pattern.source,
|
187
|
+
ignore_case)
|
188
|
+
else
|
189
|
+
message =
|
190
|
+
"pattern must be either String or Regexp: #{pattern.inspect}"
|
191
|
+
raise ArgumentError, message
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def start_with?(substring, ignore_case: false)
|
196
|
+
MatchSubstringFamilyCondition.new("starts_with",
|
197
|
+
@column, substring, ignore_case)
|
198
|
+
end
|
165
199
|
end
|
166
200
|
|
167
201
|
class NotColumnCondition < Condition
|
@@ -351,5 +385,32 @@ module Arrow
|
|
351
385
|
BooleanArray.new(raw_array)
|
352
386
|
end
|
353
387
|
end
|
388
|
+
|
389
|
+
class MatchSubstringFamilyCondition < Condition
|
390
|
+
def initialize(function, column, pattern, ignore_case, invert: false)
|
391
|
+
@function = function
|
392
|
+
@column = column
|
393
|
+
@options = MatchSubstringOptions.new
|
394
|
+
@options.pattern = pattern
|
395
|
+
@options.ignore_case = ignore_case
|
396
|
+
@invert = invert
|
397
|
+
end
|
398
|
+
|
399
|
+
def !@
|
400
|
+
MatchSubstringFamilyCondition.new(@function,
|
401
|
+
@column,
|
402
|
+
@options.pattern,
|
403
|
+
@options.ignore_case?,
|
404
|
+
invert: !@invert)
|
405
|
+
end
|
406
|
+
|
407
|
+
def evaluate
|
408
|
+
datum = Function.find(@function).execute([@column.data], @options)
|
409
|
+
if @invert
|
410
|
+
datum = Function.find("invert").execute([datum])
|
411
|
+
end
|
412
|
+
datum.value
|
413
|
+
end
|
414
|
+
end
|
354
415
|
end
|
355
416
|
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class SparseUnionArrayBuilder
|
20
|
+
alias_method :append_value_raw, :append_value
|
21
|
+
|
22
|
+
# @overload append_value
|
23
|
+
#
|
24
|
+
# Starts appending an union record. You need to append values of
|
25
|
+
# fields.
|
26
|
+
#
|
27
|
+
# @overload append_value(value)
|
28
|
+
#
|
29
|
+
# Appends an union record including values of fields.
|
30
|
+
#
|
31
|
+
# @param value [nil, Hash] The union record value.
|
32
|
+
#
|
33
|
+
# If this is `nil`, the union record is null.
|
34
|
+
#
|
35
|
+
# If this is `Hash`, it's values of fields.
|
36
|
+
#
|
37
|
+
# @since 12.0.0
|
38
|
+
def append_value(value)
|
39
|
+
if value.nil?
|
40
|
+
append_null
|
41
|
+
else
|
42
|
+
key = value.keys[0]
|
43
|
+
child_info = child_infos[key]
|
44
|
+
append_value_raw(child_info[:id])
|
45
|
+
child_infos.each do |child_key, child_info|
|
46
|
+
builder = child_info[:builder]
|
47
|
+
if child_key == key
|
48
|
+
builder.append(value.values[0])
|
49
|
+
else
|
50
|
+
builder.append_null
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class SparseUnionArray
|
20
|
+
def get_value(i)
|
21
|
+
child_id = get_child_id(i)
|
22
|
+
field = get_field(child_id)
|
23
|
+
field[i]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/arrow/table-loader.rb
CHANGED
@@ -161,7 +161,7 @@ module Arrow
|
|
161
161
|
record_batches << record_batch
|
162
162
|
end
|
163
163
|
table = Table.new(schema, record_batches)
|
164
|
-
table.
|
164
|
+
table.refer_input(input)
|
165
165
|
table
|
166
166
|
end
|
167
167
|
|
@@ -211,7 +211,7 @@ module Arrow
|
|
211
211
|
field_indexes = @options[:field_indexes]
|
212
212
|
reader.set_field_indexes(field_indexes) if field_indexes
|
213
213
|
table = reader.read_stripes
|
214
|
-
table.
|
214
|
+
table.refer_input(input)
|
215
215
|
table
|
216
216
|
end
|
217
217
|
end
|
@@ -245,7 +245,7 @@ module Arrow
|
|
245
245
|
open_input_stream do |input|
|
246
246
|
reader = FeatherFileReader.new(input)
|
247
247
|
table = reader.read
|
248
|
-
table.
|
248
|
+
table.refer_input(input)
|
249
249
|
table
|
250
250
|
end
|
251
251
|
end
|
@@ -254,7 +254,7 @@ module Arrow
|
|
254
254
|
open_input_stream do |input|
|
255
255
|
reader = JSONReader.new(input)
|
256
256
|
table = reader.read
|
257
|
-
table.
|
257
|
+
table.refer_input(input)
|
258
258
|
table
|
259
259
|
end
|
260
260
|
end
|