red-arrow 2.0.0 → 5.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/arrow/arrow.cpp +3 -0
- data/ext/arrow/converters.hpp +15 -2
- data/ext/arrow/memory-view.cpp +311 -0
- data/ext/arrow/memory-view.hpp +26 -0
- data/ext/arrow/raw-records.cpp +1 -0
- data/ext/arrow/values.cpp +1 -0
- data/lib/arrow/array-builder.rb +11 -6
- data/lib/arrow/array.rb +130 -0
- data/lib/arrow/bigdecimal-extension.rb +5 -1
- data/lib/arrow/buffer.rb +10 -6
- data/lib/arrow/constructor-arguments-gc-guardable.rb +25 -0
- data/lib/arrow/data-type.rb +14 -5
- data/lib/arrow/datum.rb +98 -0
- data/lib/arrow/decimal128-array-builder.rb +21 -25
- data/lib/arrow/decimal128-data-type.rb +2 -0
- data/lib/arrow/decimal128.rb +18 -0
- data/lib/arrow/decimal256-array-builder.rb +61 -0
- data/lib/arrow/decimal256-array.rb +25 -0
- data/lib/arrow/decimal256-data-type.rb +73 -0
- data/lib/arrow/decimal256.rb +60 -0
- data/lib/arrow/dense-union-data-type.rb +2 -2
- data/lib/arrow/dictionary-data-type.rb +2 -2
- data/lib/arrow/equal-options.rb +38 -0
- data/lib/arrow/fixed-size-binary-array-builder.rb +38 -0
- data/lib/arrow/fixed-size-binary-array.rb +26 -0
- data/lib/arrow/loader.rb +46 -0
- data/lib/arrow/scalar.rb +32 -0
- data/lib/arrow/sort-key.rb +193 -0
- data/lib/arrow/sort-options.rb +109 -0
- data/lib/arrow/sparse-union-data-type.rb +2 -2
- data/lib/arrow/table.rb +2 -2
- data/lib/arrow/time32-data-type.rb +2 -2
- data/lib/arrow/time64-data-type.rb +2 -2
- data/lib/arrow/timestamp-data-type.rb +2 -2
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +3 -1
- data/test/helper.rb +1 -0
- data/test/raw-records/test-basic-arrays.rb +17 -0
- data/test/raw-records/test-dense-union-array.rb +14 -0
- data/test/raw-records/test-list-array.rb +20 -0
- data/test/raw-records/test-sparse-union-array.rb +14 -0
- data/test/raw-records/test-struct-array.rb +15 -0
- data/test/test-array.rb +156 -2
- data/test/test-bigdecimal.rb +20 -3
- data/test/test-boolean-scalar.rb +26 -0
- data/test/test-decimal128-array-builder.rb +18 -1
- data/test/test-decimal128-data-type.rb +2 -2
- data/test/test-decimal128.rb +38 -0
- data/test/test-decimal256-array-builder.rb +112 -0
- data/test/test-decimal256-array.rb +38 -0
- data/test/test-decimal256-data-type.rb +31 -0
- data/test/test-decimal256.rb +102 -0
- data/test/test-fixed-size-binary-array-builder.rb +92 -0
- data/test/test-fixed-size-binary-array.rb +36 -0
- data/test/test-float-scalar.rb +46 -0
- data/test/test-function.rb +176 -0
- data/test/test-memory-view.rb +434 -0
- data/test/test-orc.rb +19 -23
- data/test/test-sort-indices.rb +40 -0
- data/test/test-sort-key.rb +81 -0
- data/test/test-sort-options.rb +58 -0
- data/test/test-struct-array-builder.rb +8 -8
- data/test/test-struct-array.rb +2 -2
- data/test/values/test-basic-arrays.rb +11 -0
- data/test/values/test-dense-union-array.rb +14 -0
- data/test/values/test-list-array.rb +18 -0
- data/test/values/test-sparse-union-array.rb +14 -0
- data/test/values/test-struct-array.rb +15 -0
- metadata +127 -59
data/lib/arrow/array.rb
CHANGED
@@ -55,6 +55,18 @@ module Arrow
|
|
55
55
|
end
|
56
56
|
end
|
57
57
|
|
58
|
+
# @param other [Arrow::Array] The array to be compared.
|
59
|
+
# @param options [Arrow::EqualOptions, Hash] (nil)
|
60
|
+
# The options to custom how to compare.
|
61
|
+
#
|
62
|
+
# @return [Boolean]
|
63
|
+
# `true` if both of them have the same data, `false` otherwise.
|
64
|
+
#
|
65
|
+
# @since 5.0.0
|
66
|
+
def equal_array?(other, options=nil)
|
67
|
+
equal_options(other, options)
|
68
|
+
end
|
69
|
+
|
58
70
|
def each
|
59
71
|
return to_enum(__method__) unless block_given?
|
60
72
|
|
@@ -100,5 +112,123 @@ module Arrow
|
|
100
112
|
is_in_raw(values)
|
101
113
|
end
|
102
114
|
end
|
115
|
+
|
116
|
+
# @api private
|
117
|
+
alias_method :concatenate_raw, :concatenate
|
118
|
+
# Concatenates the given other arrays to the array.
|
119
|
+
#
|
120
|
+
# @param other_arrays [::Array, Arrow::Array] The arrays to be
|
121
|
+
# concatenated.
|
122
|
+
#
|
123
|
+
# Each other array is processed by {#resolve} before they're
|
124
|
+
# concatenated.
|
125
|
+
#
|
126
|
+
# @example Raw Ruby Array
|
127
|
+
# array = Arrow::Int32Array.new([1])
|
128
|
+
# array.concatenate([2, 3], [4]) # => Arrow::Int32Array.new([1, 2, 3, 4])
|
129
|
+
#
|
130
|
+
# @example Arrow::Array
|
131
|
+
# array = Arrow::Int32Array.new([1])
|
132
|
+
# array.concatenate(Arrow::Int32Array.new([2, 3]),
|
133
|
+
# Arrow::Int8Array.new([4])) # => Arrow::Int32Array.new([1, 2, 3, 4])
|
134
|
+
#
|
135
|
+
# @since 4.0.0
|
136
|
+
def concatenate(*other_arrays)
|
137
|
+
other_arrays = other_arrays.collect do |other_array|
|
138
|
+
resolve(other_array)
|
139
|
+
end
|
140
|
+
concatenate_raw(other_arrays)
|
141
|
+
end
|
142
|
+
|
143
|
+
# Concatenates the given other array to the array.
|
144
|
+
#
|
145
|
+
# If you have multiple arrays to be concatenated, you should use
|
146
|
+
# {#concatenate} to concatenate multiple arrays at once.
|
147
|
+
#
|
148
|
+
# @param other_array [::Array, Arrow::Array] The array to be concatenated.
|
149
|
+
#
|
150
|
+
# `@other_array` is processed by {#resolve} before it's
|
151
|
+
# concatenated.
|
152
|
+
#
|
153
|
+
# @example Raw Ruby Array
|
154
|
+
# Arrow::Int32Array.new([1]) + [2, 3] # => Arrow::Int32Array.new([1, 2, 3])
|
155
|
+
#
|
156
|
+
# @example Arrow::Array
|
157
|
+
# Arrow::Int32Array.new([1]) +
|
158
|
+
# Arrow::Int32Array.new([2, 3]) # => Arrow::Int32Array.new([1, 2, 3])
|
159
|
+
#
|
160
|
+
# @since 4.0.0
|
161
|
+
def +(other_array)
|
162
|
+
concatenate(other_array)
|
163
|
+
end
|
164
|
+
|
165
|
+
# Ensures returning the same data type array from the given array.
|
166
|
+
#
|
167
|
+
# @return [Arrow::Array]
|
168
|
+
#
|
169
|
+
# @overload resolve(other_raw_array)
|
170
|
+
#
|
171
|
+
# @param other_raw_array [::Array] A raw Ruby Array. A new Arrow::Array
|
172
|
+
# is built by `self.class.new`.
|
173
|
+
#
|
174
|
+
# @example Raw Ruby Array
|
175
|
+
# int32_array = Arrow::Int32Array.new([1])
|
176
|
+
# other_array = int32_array.resolve([2, 3, 4])
|
177
|
+
# other_array # => Arrow::Int32Array.new([2, 3, 4])
|
178
|
+
#
|
179
|
+
# @overload resolve(other_array)
|
180
|
+
#
|
181
|
+
# @param other_array [Arrow::Array] Another Arrow::Array.
|
182
|
+
#
|
183
|
+
# If the given other array is an same data type array of
|
184
|
+
# `self`, the given other array is returned as-is.
|
185
|
+
#
|
186
|
+
# If the given other array isn't an same data type array of
|
187
|
+
# `self`, the given other array is casted.
|
188
|
+
#
|
189
|
+
# @example Same data type
|
190
|
+
# int32_array = Arrow::Int32Array.new([1])
|
191
|
+
# other_int32_array = Arrow::Int32Array.new([2, 3, 4])
|
192
|
+
# other_array = int32_array.resolve(other_int32_array)
|
193
|
+
# other_array.object_id == other_int32_array.object_id
|
194
|
+
#
|
195
|
+
# @example Other data type
|
196
|
+
# int32_array = Arrow::Int32Array.new([1])
|
197
|
+
# other_int8_array = Arrow::Int8Array.new([2, 3, 4])
|
198
|
+
# other_array = int32_array.resolve(other_int32_array)
|
199
|
+
# other_array #=> Arrow::Int32Array.new([2, 3, 4])
|
200
|
+
#
|
201
|
+
# @since 4.0.0
|
202
|
+
def resolve(other_array)
|
203
|
+
if other_array.is_a?(::Array)
|
204
|
+
builder_class = self.class.builder_class
|
205
|
+
if builder_class.nil?
|
206
|
+
message =
|
207
|
+
"[array][resolve] can't build #{value_data_type} array " +
|
208
|
+
"from raw Ruby Array"
|
209
|
+
raise ArgumentError, message
|
210
|
+
end
|
211
|
+
if builder_class.buildable?([other_array])
|
212
|
+
other_array = builder_class.build(other_array)
|
213
|
+
elsif builder_class.buildable?([value_data_type, other_array])
|
214
|
+
other_array = builder_class.build(value_data_type, other_array)
|
215
|
+
else
|
216
|
+
message =
|
217
|
+
"[array][resolve] need to implement " +
|
218
|
+
"a feature that building #{value_data_type} array " +
|
219
|
+
"from raw Ruby Array"
|
220
|
+
raise NotImpelemented, message
|
221
|
+
end
|
222
|
+
other_array
|
223
|
+
elsif other_array.respond_to?(:value_data_type)
|
224
|
+
return other_array if value_data_type == other_array.value_data_type
|
225
|
+
other_array.cast(value_data_type)
|
226
|
+
else
|
227
|
+
message =
|
228
|
+
"[array][resolve] can't build #{value_data_type} array: " +
|
229
|
+
"#{other_array.inspect}"
|
230
|
+
raise ArgumentError, message
|
231
|
+
end
|
232
|
+
end
|
103
233
|
end
|
104
234
|
end
|
data/lib/arrow/buffer.rb
CHANGED
@@ -17,12 +17,16 @@
|
|
17
17
|
|
18
18
|
module Arrow
|
19
19
|
class Buffer
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when String
|
25
|
+
new(value)
|
26
|
+
else
|
27
|
+
nil
|
28
|
+
end
|
29
|
+
end
|
26
30
|
end
|
27
31
|
end
|
28
32
|
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
module ConstructorArgumentsGCGuardable
|
20
|
+
def initialize(*args)
|
21
|
+
super
|
22
|
+
@arguments = args
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/lib/arrow/data-type.rb
CHANGED
@@ -18,7 +18,7 @@
|
|
18
18
|
module Arrow
|
19
19
|
class DataType
|
20
20
|
class << self
|
21
|
-
#
|
21
|
+
# Ensure returning suitable {Arrow::DataType}.
|
22
22
|
#
|
23
23
|
# @overload resolve(data_type)
|
24
24
|
#
|
@@ -31,17 +31,21 @@ module Arrow
|
|
31
31
|
#
|
32
32
|
# @overload resolve(name)
|
33
33
|
#
|
34
|
-
# Creates a suitable data type from type name. For
|
35
|
-
# you can create {Arrow::BooleanDataType} from
|
34
|
+
# Creates a suitable data type from the given type name. For
|
35
|
+
# example, you can create {Arrow::BooleanDataType} from
|
36
|
+
# `:boolean`.
|
36
37
|
#
|
37
38
|
# @param name [String, Symbol] The type name of the data type.
|
38
39
|
#
|
40
|
+
# @return [Arrow::DataType] A new suitable data type.
|
41
|
+
#
|
39
42
|
# @example Create a boolean data type
|
40
43
|
# Arrow::DataType.resolve(:boolean)
|
41
44
|
#
|
42
45
|
# @overload resolve(name_with_arguments)
|
43
46
|
#
|
44
|
-
# Creates a suitable data type from type name
|
47
|
+
# Creates a new suitable data type from the given type name
|
48
|
+
# with arguments.
|
45
49
|
#
|
46
50
|
# @param name_with_arguments [::Array<String, ...>]
|
47
51
|
# The type name of the data type as the first element.
|
@@ -51,6 +55,8 @@ module Arrow
|
|
51
55
|
# For example, {Arrow::TimestampDataType} needs unit as
|
52
56
|
# additional information.
|
53
57
|
#
|
58
|
+
# @return [Arrow::DataType] A new suitable data type.
|
59
|
+
#
|
54
60
|
# @example Create a boolean data type
|
55
61
|
# Arrow::DataType.resolve([:boolean])
|
56
62
|
#
|
@@ -59,7 +65,8 @@ module Arrow
|
|
59
65
|
#
|
60
66
|
# @overload resolve(description)
|
61
67
|
#
|
62
|
-
# Creates a suitable data type from data type
|
68
|
+
# Creates a new suitable data type from the given data type
|
69
|
+
# description.
|
63
70
|
#
|
64
71
|
# Data type description is a raw `Hash`. Data type description
|
65
72
|
# must have `:type` value. `:type` is the type of the data type.
|
@@ -74,6 +81,8 @@ module Arrow
|
|
74
81
|
# @option description [String, Symbol] :type The type name of
|
75
82
|
# the data type.
|
76
83
|
#
|
84
|
+
# @return [Arrow::DataType] A new suitable data type.
|
85
|
+
#
|
77
86
|
# @example Create a boolean data type
|
78
87
|
# Arrow::DataType.resolve(type: :boolean)
|
79
88
|
#
|
data/lib/arrow/datum.rb
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class Datum
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when Array
|
25
|
+
ArrayDatum.new(value)
|
26
|
+
when ChunkedArray
|
27
|
+
ChunkedArrayDatum.new(value)
|
28
|
+
when Scalar
|
29
|
+
ScalarDatum.new(value)
|
30
|
+
when ::Array
|
31
|
+
ArrayDatum.new(ArrayBuilder.build(value))
|
32
|
+
when Integer
|
33
|
+
case value
|
34
|
+
when (0..((2 ** 8) - 1))
|
35
|
+
try_convert(UInt8Scalar.new(value))
|
36
|
+
when ((-(2 ** 7))..((2 ** 7) - 1))
|
37
|
+
try_convert(Int8Scalar.new(value))
|
38
|
+
when (0..((2 ** 16) - 1))
|
39
|
+
try_convert(UInt16Scalar.new(value))
|
40
|
+
when ((-(2 ** 15))..((2 ** 15) - 1))
|
41
|
+
try_convert(Int16Scalar.new(value))
|
42
|
+
when (0..((2 ** 32) - 1))
|
43
|
+
try_convert(UInt32Scalar.new(value))
|
44
|
+
when ((-(2 ** 31))..((2 ** 31) - 1))
|
45
|
+
try_convert(Int32Scalar.new(value))
|
46
|
+
when (0..((2 ** 64) - 1))
|
47
|
+
try_convert(UInt64Scalar.new(value))
|
48
|
+
when ((-(2 ** 63))..((2 ** 63) - 1))
|
49
|
+
try_convert(Int64Scalar.new(value))
|
50
|
+
else
|
51
|
+
nil
|
52
|
+
end
|
53
|
+
when Float
|
54
|
+
try_convert(DoubleScalar.new(value))
|
55
|
+
when true, false
|
56
|
+
try_convert(BooleanScalar.new(value))
|
57
|
+
when String
|
58
|
+
if value.ascii_only? or value.encoding == Encoding::UTF_8
|
59
|
+
if value.bytesize <= ((2 ** 31) - 1)
|
60
|
+
try_convert(StringScalar.new(value))
|
61
|
+
else
|
62
|
+
try_convert(LargeStringScalar.new(value))
|
63
|
+
end
|
64
|
+
else
|
65
|
+
if value.bytesize <= ((2 ** 31) - 1)
|
66
|
+
try_convert(BinaryScalar.new(value))
|
67
|
+
else
|
68
|
+
try_convert(LargeBinaryScalar.new(value))
|
69
|
+
end
|
70
|
+
end
|
71
|
+
when Date
|
72
|
+
date32_value = (value - Date32ArrayBuilder::UNIX_EPOCH).to_i
|
73
|
+
try_convert(Date32Scalar.new(date32_value))
|
74
|
+
when Time
|
75
|
+
case value.unit
|
76
|
+
when TimeUnit::SECOND, TimeUnit::MILLI
|
77
|
+
data_type = Time32DataType.new(value.unit)
|
78
|
+
scalar_class = Time32Scalar
|
79
|
+
else
|
80
|
+
data_type = Time64DataType.new(value.unit)
|
81
|
+
scalar_class = Time64Scalar
|
82
|
+
end
|
83
|
+
try_convert(scalar_class.new(data_type, value.value))
|
84
|
+
when ::Time
|
85
|
+
data_type = TimestampDataType.new(:nano)
|
86
|
+
timestamp_value = value.to_i * 1_000_000_000 + value.nsec
|
87
|
+
try_convert(TimestampScalar.new(data_type, timestamp_value))
|
88
|
+
when Decimal128
|
89
|
+
data_type = TimestampDataType.new(:nano)
|
90
|
+
timestamp_value = value.to_i * 1_000_000_000 + value.nsec
|
91
|
+
try_convert(Decimal128Scalar.new(data_type, timestamp_value))
|
92
|
+
else
|
93
|
+
nil
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -26,36 +26,32 @@ module Arrow
|
|
26
26
|
|
27
27
|
alias_method :append_value_raw, :append_value
|
28
28
|
def append_value(value)
|
29
|
-
|
30
|
-
when nil
|
31
|
-
return append_null
|
32
|
-
when String
|
33
|
-
value = Decimal128.new(value)
|
34
|
-
when Float
|
35
|
-
value = Decimal128.new(value.to_s)
|
36
|
-
when BigDecimal
|
37
|
-
value = value.to_arrow
|
38
|
-
end
|
39
|
-
append_value_raw(value)
|
29
|
+
append_value_raw(normalize_value(value))
|
40
30
|
end
|
41
31
|
|
32
|
+
alias_method :append_values_raw, :append_values
|
42
33
|
def append_values(values, is_valids=nil)
|
43
|
-
if
|
44
|
-
|
45
|
-
|
46
|
-
append_value(values[i])
|
47
|
-
else
|
48
|
-
append_null
|
49
|
-
end
|
34
|
+
if values.is_a?(::Array)
|
35
|
+
values = values.collect do |value|
|
36
|
+
normalize_value(value)
|
50
37
|
end
|
38
|
+
append_values_raw(values, is_valids)
|
51
39
|
else
|
52
|
-
values
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
40
|
+
append_values_packed(values, is_valids)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
def normalize_value(value)
|
46
|
+
case value
|
47
|
+
when String
|
48
|
+
Decimal128.new(value)
|
49
|
+
when Float
|
50
|
+
Decimal128.new(value.to_s)
|
51
|
+
when BigDecimal
|
52
|
+
Decimal128.new(value.to_s)
|
53
|
+
else
|
54
|
+
value
|
59
55
|
end
|
60
56
|
end
|
61
57
|
end
|
data/lib/arrow/decimal128.rb
CHANGED
@@ -38,5 +38,23 @@ module Arrow
|
|
38
38
|
to_s_raw
|
39
39
|
end
|
40
40
|
end
|
41
|
+
|
42
|
+
alias_method :abs!, :abs
|
43
|
+
|
44
|
+
# @since 3.0.0
|
45
|
+
def abs
|
46
|
+
copied = dup
|
47
|
+
copied.abs!
|
48
|
+
copied
|
49
|
+
end
|
50
|
+
|
51
|
+
alias_method :negate!, :negate
|
52
|
+
|
53
|
+
# @since 3.0.0
|
54
|
+
def negate
|
55
|
+
copied = dup
|
56
|
+
copied.negate!
|
57
|
+
copied
|
58
|
+
end
|
41
59
|
end
|
42
60
|
end
|