red-arrow 2.0.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/arrow/arrow.cpp +3 -0
- data/ext/arrow/converters.hpp +15 -2
- data/ext/arrow/memory-view.cpp +311 -0
- data/ext/arrow/memory-view.hpp +26 -0
- data/ext/arrow/raw-records.cpp +1 -0
- data/ext/arrow/values.cpp +1 -0
- data/lib/arrow/array-builder.rb +11 -6
- data/lib/arrow/array.rb +130 -0
- data/lib/arrow/bigdecimal-extension.rb +5 -1
- data/lib/arrow/buffer.rb +10 -6
- data/lib/arrow/constructor-arguments-gc-guardable.rb +25 -0
- data/lib/arrow/data-type.rb +14 -5
- data/lib/arrow/datum.rb +98 -0
- data/lib/arrow/decimal128-array-builder.rb +21 -25
- data/lib/arrow/decimal128-data-type.rb +2 -0
- data/lib/arrow/decimal128.rb +18 -0
- data/lib/arrow/decimal256-array-builder.rb +61 -0
- data/lib/arrow/decimal256-array.rb +25 -0
- data/lib/arrow/decimal256-data-type.rb +73 -0
- data/lib/arrow/decimal256.rb +60 -0
- data/lib/arrow/dense-union-data-type.rb +2 -2
- data/lib/arrow/dictionary-data-type.rb +2 -2
- data/lib/arrow/equal-options.rb +38 -0
- data/lib/arrow/fixed-size-binary-array-builder.rb +38 -0
- data/lib/arrow/fixed-size-binary-array.rb +26 -0
- data/lib/arrow/loader.rb +46 -0
- data/lib/arrow/scalar.rb +32 -0
- data/lib/arrow/sort-key.rb +193 -0
- data/lib/arrow/sort-options.rb +109 -0
- data/lib/arrow/sparse-union-data-type.rb +2 -2
- data/lib/arrow/table.rb +2 -2
- data/lib/arrow/time32-data-type.rb +2 -2
- data/lib/arrow/time64-data-type.rb +2 -2
- data/lib/arrow/timestamp-data-type.rb +2 -2
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +3 -1
- data/test/helper.rb +1 -0
- data/test/raw-records/test-basic-arrays.rb +17 -0
- data/test/raw-records/test-dense-union-array.rb +14 -0
- data/test/raw-records/test-list-array.rb +20 -0
- data/test/raw-records/test-sparse-union-array.rb +14 -0
- data/test/raw-records/test-struct-array.rb +15 -0
- data/test/test-array.rb +156 -2
- data/test/test-bigdecimal.rb +20 -3
- data/test/test-boolean-scalar.rb +26 -0
- data/test/test-decimal128-array-builder.rb +18 -1
- data/test/test-decimal128-data-type.rb +2 -2
- data/test/test-decimal128.rb +38 -0
- data/test/test-decimal256-array-builder.rb +112 -0
- data/test/test-decimal256-array.rb +38 -0
- data/test/test-decimal256-data-type.rb +31 -0
- data/test/test-decimal256.rb +102 -0
- data/test/test-fixed-size-binary-array-builder.rb +92 -0
- data/test/test-fixed-size-binary-array.rb +36 -0
- data/test/test-float-scalar.rb +46 -0
- data/test/test-function.rb +176 -0
- data/test/test-memory-view.rb +434 -0
- data/test/test-orc.rb +19 -23
- data/test/test-sort-indices.rb +40 -0
- data/test/test-sort-key.rb +81 -0
- data/test/test-sort-options.rb +58 -0
- data/test/test-struct-array-builder.rb +8 -8
- data/test/test-struct-array.rb +2 -2
- data/test/values/test-basic-arrays.rb +11 -0
- data/test/values/test-dense-union-array.rb +14 -0
- data/test/values/test-list-array.rb +18 -0
- data/test/values/test-sparse-union-array.rb +14 -0
- data/test/values/test-struct-array.rb +15 -0
- metadata +127 -59
data/lib/arrow/array.rb
CHANGED
@@ -55,6 +55,18 @@ module Arrow
|
|
55
55
|
end
|
56
56
|
end
|
57
57
|
|
58
|
+
# @param other [Arrow::Array] The array to be compared.
|
59
|
+
# @param options [Arrow::EqualOptions, Hash] (nil)
|
60
|
+
# The options to custom how to compare.
|
61
|
+
#
|
62
|
+
# @return [Boolean]
|
63
|
+
# `true` if both of them have the same data, `false` otherwise.
|
64
|
+
#
|
65
|
+
# @since 5.0.0
|
66
|
+
def equal_array?(other, options=nil)
|
67
|
+
equal_options(other, options)
|
68
|
+
end
|
69
|
+
|
58
70
|
def each
|
59
71
|
return to_enum(__method__) unless block_given?
|
60
72
|
|
@@ -100,5 +112,123 @@ module Arrow
|
|
100
112
|
is_in_raw(values)
|
101
113
|
end
|
102
114
|
end
|
115
|
+
|
116
|
+
# @api private
|
117
|
+
alias_method :concatenate_raw, :concatenate
|
118
|
+
# Concatenates the given other arrays to the array.
|
119
|
+
#
|
120
|
+
# @param other_arrays [::Array, Arrow::Array] The arrays to be
|
121
|
+
# concatenated.
|
122
|
+
#
|
123
|
+
# Each other array is processed by {#resolve} before they're
|
124
|
+
# concatenated.
|
125
|
+
#
|
126
|
+
# @example Raw Ruby Array
|
127
|
+
# array = Arrow::Int32Array.new([1])
|
128
|
+
# array.concatenate([2, 3], [4]) # => Arrow::Int32Array.new([1, 2, 3, 4])
|
129
|
+
#
|
130
|
+
# @example Arrow::Array
|
131
|
+
# array = Arrow::Int32Array.new([1])
|
132
|
+
# array.concatenate(Arrow::Int32Array.new([2, 3]),
|
133
|
+
# Arrow::Int8Array.new([4])) # => Arrow::Int32Array.new([1, 2, 3, 4])
|
134
|
+
#
|
135
|
+
# @since 4.0.0
|
136
|
+
def concatenate(*other_arrays)
|
137
|
+
other_arrays = other_arrays.collect do |other_array|
|
138
|
+
resolve(other_array)
|
139
|
+
end
|
140
|
+
concatenate_raw(other_arrays)
|
141
|
+
end
|
142
|
+
|
143
|
+
# Concatenates the given other array to the array.
|
144
|
+
#
|
145
|
+
# If you have multiple arrays to be concatenated, you should use
|
146
|
+
# {#concatenate} to concatenate multiple arrays at once.
|
147
|
+
#
|
148
|
+
# @param other_array [::Array, Arrow::Array] The array to be concatenated.
|
149
|
+
#
|
150
|
+
# `@other_array` is processed by {#resolve} before it's
|
151
|
+
# concatenated.
|
152
|
+
#
|
153
|
+
# @example Raw Ruby Array
|
154
|
+
# Arrow::Int32Array.new([1]) + [2, 3] # => Arrow::Int32Array.new([1, 2, 3])
|
155
|
+
#
|
156
|
+
# @example Arrow::Array
|
157
|
+
# Arrow::Int32Array.new([1]) +
|
158
|
+
# Arrow::Int32Array.new([2, 3]) # => Arrow::Int32Array.new([1, 2, 3])
|
159
|
+
#
|
160
|
+
# @since 4.0.0
|
161
|
+
def +(other_array)
|
162
|
+
concatenate(other_array)
|
163
|
+
end
|
164
|
+
|
165
|
+
# Ensures returning the same data type array from the given array.
|
166
|
+
#
|
167
|
+
# @return [Arrow::Array]
|
168
|
+
#
|
169
|
+
# @overload resolve(other_raw_array)
|
170
|
+
#
|
171
|
+
# @param other_raw_array [::Array] A raw Ruby Array. A new Arrow::Array
|
172
|
+
# is built by `self.class.new`.
|
173
|
+
#
|
174
|
+
# @example Raw Ruby Array
|
175
|
+
# int32_array = Arrow::Int32Array.new([1])
|
176
|
+
# other_array = int32_array.resolve([2, 3, 4])
|
177
|
+
# other_array # => Arrow::Int32Array.new([2, 3, 4])
|
178
|
+
#
|
179
|
+
# @overload resolve(other_array)
|
180
|
+
#
|
181
|
+
# @param other_array [Arrow::Array] Another Arrow::Array.
|
182
|
+
#
|
183
|
+
# If the given other array is an same data type array of
|
184
|
+
# `self`, the given other array is returned as-is.
|
185
|
+
#
|
186
|
+
# If the given other array isn't an same data type array of
|
187
|
+
# `self`, the given other array is casted.
|
188
|
+
#
|
189
|
+
# @example Same data type
|
190
|
+
# int32_array = Arrow::Int32Array.new([1])
|
191
|
+
# other_int32_array = Arrow::Int32Array.new([2, 3, 4])
|
192
|
+
# other_array = int32_array.resolve(other_int32_array)
|
193
|
+
# other_array.object_id == other_int32_array.object_id
|
194
|
+
#
|
195
|
+
# @example Other data type
|
196
|
+
# int32_array = Arrow::Int32Array.new([1])
|
197
|
+
# other_int8_array = Arrow::Int8Array.new([2, 3, 4])
|
198
|
+
# other_array = int32_array.resolve(other_int32_array)
|
199
|
+
# other_array #=> Arrow::Int32Array.new([2, 3, 4])
|
200
|
+
#
|
201
|
+
# @since 4.0.0
|
202
|
+
def resolve(other_array)
|
203
|
+
if other_array.is_a?(::Array)
|
204
|
+
builder_class = self.class.builder_class
|
205
|
+
if builder_class.nil?
|
206
|
+
message =
|
207
|
+
"[array][resolve] can't build #{value_data_type} array " +
|
208
|
+
"from raw Ruby Array"
|
209
|
+
raise ArgumentError, message
|
210
|
+
end
|
211
|
+
if builder_class.buildable?([other_array])
|
212
|
+
other_array = builder_class.build(other_array)
|
213
|
+
elsif builder_class.buildable?([value_data_type, other_array])
|
214
|
+
other_array = builder_class.build(value_data_type, other_array)
|
215
|
+
else
|
216
|
+
message =
|
217
|
+
"[array][resolve] need to implement " +
|
218
|
+
"a feature that building #{value_data_type} array " +
|
219
|
+
"from raw Ruby Array"
|
220
|
+
raise NotImpelemented, message
|
221
|
+
end
|
222
|
+
other_array
|
223
|
+
elsif other_array.respond_to?(:value_data_type)
|
224
|
+
return other_array if value_data_type == other_array.value_data_type
|
225
|
+
other_array.cast(value_data_type)
|
226
|
+
else
|
227
|
+
message =
|
228
|
+
"[array][resolve] can't build #{value_data_type} array: " +
|
229
|
+
"#{other_array.inspect}"
|
230
|
+
raise ArgumentError, message
|
231
|
+
end
|
232
|
+
end
|
103
233
|
end
|
104
234
|
end
|
data/lib/arrow/buffer.rb
CHANGED
@@ -17,12 +17,16 @@
|
|
17
17
|
|
18
18
|
module Arrow
|
19
19
|
class Buffer
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when String
|
25
|
+
new(value)
|
26
|
+
else
|
27
|
+
nil
|
28
|
+
end
|
29
|
+
end
|
26
30
|
end
|
27
31
|
end
|
28
32
|
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
module ConstructorArgumentsGCGuardable
|
20
|
+
def initialize(*args)
|
21
|
+
super
|
22
|
+
@arguments = args
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/lib/arrow/data-type.rb
CHANGED
@@ -18,7 +18,7 @@
|
|
18
18
|
module Arrow
|
19
19
|
class DataType
|
20
20
|
class << self
|
21
|
-
#
|
21
|
+
# Ensure returning suitable {Arrow::DataType}.
|
22
22
|
#
|
23
23
|
# @overload resolve(data_type)
|
24
24
|
#
|
@@ -31,17 +31,21 @@ module Arrow
|
|
31
31
|
#
|
32
32
|
# @overload resolve(name)
|
33
33
|
#
|
34
|
-
# Creates a suitable data type from type name. For
|
35
|
-
# you can create {Arrow::BooleanDataType} from
|
34
|
+
# Creates a suitable data type from the given type name. For
|
35
|
+
# example, you can create {Arrow::BooleanDataType} from
|
36
|
+
# `:boolean`.
|
36
37
|
#
|
37
38
|
# @param name [String, Symbol] The type name of the data type.
|
38
39
|
#
|
40
|
+
# @return [Arrow::DataType] A new suitable data type.
|
41
|
+
#
|
39
42
|
# @example Create a boolean data type
|
40
43
|
# Arrow::DataType.resolve(:boolean)
|
41
44
|
#
|
42
45
|
# @overload resolve(name_with_arguments)
|
43
46
|
#
|
44
|
-
# Creates a suitable data type from type name
|
47
|
+
# Creates a new suitable data type from the given type name
|
48
|
+
# with arguments.
|
45
49
|
#
|
46
50
|
# @param name_with_arguments [::Array<String, ...>]
|
47
51
|
# The type name of the data type as the first element.
|
@@ -51,6 +55,8 @@ module Arrow
|
|
51
55
|
# For example, {Arrow::TimestampDataType} needs unit as
|
52
56
|
# additional information.
|
53
57
|
#
|
58
|
+
# @return [Arrow::DataType] A new suitable data type.
|
59
|
+
#
|
54
60
|
# @example Create a boolean data type
|
55
61
|
# Arrow::DataType.resolve([:boolean])
|
56
62
|
#
|
@@ -59,7 +65,8 @@ module Arrow
|
|
59
65
|
#
|
60
66
|
# @overload resolve(description)
|
61
67
|
#
|
62
|
-
# Creates a suitable data type from data type
|
68
|
+
# Creates a new suitable data type from the given data type
|
69
|
+
# description.
|
63
70
|
#
|
64
71
|
# Data type description is a raw `Hash`. Data type description
|
65
72
|
# must have `:type` value. `:type` is the type of the data type.
|
@@ -74,6 +81,8 @@ module Arrow
|
|
74
81
|
# @option description [String, Symbol] :type The type name of
|
75
82
|
# the data type.
|
76
83
|
#
|
84
|
+
# @return [Arrow::DataType] A new suitable data type.
|
85
|
+
#
|
77
86
|
# @example Create a boolean data type
|
78
87
|
# Arrow::DataType.resolve(type: :boolean)
|
79
88
|
#
|
data/lib/arrow/datum.rb
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class Datum
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when Array
|
25
|
+
ArrayDatum.new(value)
|
26
|
+
when ChunkedArray
|
27
|
+
ChunkedArrayDatum.new(value)
|
28
|
+
when Scalar
|
29
|
+
ScalarDatum.new(value)
|
30
|
+
when ::Array
|
31
|
+
ArrayDatum.new(ArrayBuilder.build(value))
|
32
|
+
when Integer
|
33
|
+
case value
|
34
|
+
when (0..((2 ** 8) - 1))
|
35
|
+
try_convert(UInt8Scalar.new(value))
|
36
|
+
when ((-(2 ** 7))..((2 ** 7) - 1))
|
37
|
+
try_convert(Int8Scalar.new(value))
|
38
|
+
when (0..((2 ** 16) - 1))
|
39
|
+
try_convert(UInt16Scalar.new(value))
|
40
|
+
when ((-(2 ** 15))..((2 ** 15) - 1))
|
41
|
+
try_convert(Int16Scalar.new(value))
|
42
|
+
when (0..((2 ** 32) - 1))
|
43
|
+
try_convert(UInt32Scalar.new(value))
|
44
|
+
when ((-(2 ** 31))..((2 ** 31) - 1))
|
45
|
+
try_convert(Int32Scalar.new(value))
|
46
|
+
when (0..((2 ** 64) - 1))
|
47
|
+
try_convert(UInt64Scalar.new(value))
|
48
|
+
when ((-(2 ** 63))..((2 ** 63) - 1))
|
49
|
+
try_convert(Int64Scalar.new(value))
|
50
|
+
else
|
51
|
+
nil
|
52
|
+
end
|
53
|
+
when Float
|
54
|
+
try_convert(DoubleScalar.new(value))
|
55
|
+
when true, false
|
56
|
+
try_convert(BooleanScalar.new(value))
|
57
|
+
when String
|
58
|
+
if value.ascii_only? or value.encoding == Encoding::UTF_8
|
59
|
+
if value.bytesize <= ((2 ** 31) - 1)
|
60
|
+
try_convert(StringScalar.new(value))
|
61
|
+
else
|
62
|
+
try_convert(LargeStringScalar.new(value))
|
63
|
+
end
|
64
|
+
else
|
65
|
+
if value.bytesize <= ((2 ** 31) - 1)
|
66
|
+
try_convert(BinaryScalar.new(value))
|
67
|
+
else
|
68
|
+
try_convert(LargeBinaryScalar.new(value))
|
69
|
+
end
|
70
|
+
end
|
71
|
+
when Date
|
72
|
+
date32_value = (value - Date32ArrayBuilder::UNIX_EPOCH).to_i
|
73
|
+
try_convert(Date32Scalar.new(date32_value))
|
74
|
+
when Time
|
75
|
+
case value.unit
|
76
|
+
when TimeUnit::SECOND, TimeUnit::MILLI
|
77
|
+
data_type = Time32DataType.new(value.unit)
|
78
|
+
scalar_class = Time32Scalar
|
79
|
+
else
|
80
|
+
data_type = Time64DataType.new(value.unit)
|
81
|
+
scalar_class = Time64Scalar
|
82
|
+
end
|
83
|
+
try_convert(scalar_class.new(data_type, value.value))
|
84
|
+
when ::Time
|
85
|
+
data_type = TimestampDataType.new(:nano)
|
86
|
+
timestamp_value = value.to_i * 1_000_000_000 + value.nsec
|
87
|
+
try_convert(TimestampScalar.new(data_type, timestamp_value))
|
88
|
+
when Decimal128
|
89
|
+
data_type = TimestampDataType.new(:nano)
|
90
|
+
timestamp_value = value.to_i * 1_000_000_000 + value.nsec
|
91
|
+
try_convert(Decimal128Scalar.new(data_type, timestamp_value))
|
92
|
+
else
|
93
|
+
nil
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -26,36 +26,32 @@ module Arrow
|
|
26
26
|
|
27
27
|
alias_method :append_value_raw, :append_value
|
28
28
|
def append_value(value)
|
29
|
-
|
30
|
-
when nil
|
31
|
-
return append_null
|
32
|
-
when String
|
33
|
-
value = Decimal128.new(value)
|
34
|
-
when Float
|
35
|
-
value = Decimal128.new(value.to_s)
|
36
|
-
when BigDecimal
|
37
|
-
value = value.to_arrow
|
38
|
-
end
|
39
|
-
append_value_raw(value)
|
29
|
+
append_value_raw(normalize_value(value))
|
40
30
|
end
|
41
31
|
|
32
|
+
alias_method :append_values_raw, :append_values
|
42
33
|
def append_values(values, is_valids=nil)
|
43
|
-
if
|
44
|
-
|
45
|
-
|
46
|
-
append_value(values[i])
|
47
|
-
else
|
48
|
-
append_null
|
49
|
-
end
|
34
|
+
if values.is_a?(::Array)
|
35
|
+
values = values.collect do |value|
|
36
|
+
normalize_value(value)
|
50
37
|
end
|
38
|
+
append_values_raw(values, is_valids)
|
51
39
|
else
|
52
|
-
values
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
40
|
+
append_values_packed(values, is_valids)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
def normalize_value(value)
|
46
|
+
case value
|
47
|
+
when String
|
48
|
+
Decimal128.new(value)
|
49
|
+
when Float
|
50
|
+
Decimal128.new(value.to_s)
|
51
|
+
when BigDecimal
|
52
|
+
Decimal128.new(value.to_s)
|
53
|
+
else
|
54
|
+
value
|
59
55
|
end
|
60
56
|
end
|
61
57
|
end
|
data/lib/arrow/decimal128.rb
CHANGED
@@ -38,5 +38,23 @@ module Arrow
|
|
38
38
|
to_s_raw
|
39
39
|
end
|
40
40
|
end
|
41
|
+
|
42
|
+
alias_method :abs!, :abs
|
43
|
+
|
44
|
+
# @since 3.0.0
|
45
|
+
def abs
|
46
|
+
copied = dup
|
47
|
+
copied.abs!
|
48
|
+
copied
|
49
|
+
end
|
50
|
+
|
51
|
+
alias_method :negate!, :negate
|
52
|
+
|
53
|
+
# @since 3.0.0
|
54
|
+
def negate
|
55
|
+
copied = dup
|
56
|
+
copied.negate!
|
57
|
+
copied
|
58
|
+
end
|
41
59
|
end
|
42
60
|
end
|