red-arrow 1.0.0 → 4.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/arrow/converters.hpp +15 -2
- data/ext/arrow/extconf.rb +14 -3
- data/ext/arrow/raw-records.cpp +1 -0
- data/ext/arrow/values.cpp +1 -0
- data/lib/arrow/array-builder.rb +11 -6
- data/lib/arrow/array.rb +118 -0
- data/lib/arrow/bigdecimal-extension.rb +5 -1
- data/lib/arrow/data-type.rb +14 -5
- data/lib/arrow/decimal128-array-builder.rb +21 -25
- data/lib/arrow/decimal128-data-type.rb +2 -0
- data/lib/arrow/decimal128.rb +18 -0
- data/lib/arrow/decimal256-array-builder.rb +61 -0
- data/lib/arrow/decimal256-array.rb +25 -0
- data/lib/arrow/decimal256-data-type.rb +73 -0
- data/lib/arrow/decimal256.rb +60 -0
- data/lib/arrow/dense-union-data-type.rb +2 -2
- data/lib/arrow/dictionary-data-type.rb +2 -2
- data/lib/arrow/fixed-size-binary-array-builder.rb +38 -0
- data/lib/arrow/fixed-size-binary-array.rb +26 -0
- data/lib/arrow/loader.rb +15 -0
- data/lib/arrow/sort-key.rb +193 -0
- data/lib/arrow/sort-options.rb +109 -0
- data/lib/arrow/sparse-union-data-type.rb +2 -2
- data/lib/arrow/time32-data-type.rb +2 -2
- data/lib/arrow/time64-data-type.rb +2 -2
- data/lib/arrow/timestamp-data-type.rb +2 -2
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +1 -0
- data/test/raw-records/test-basic-arrays.rb +17 -0
- data/test/raw-records/test-dense-union-array.rb +14 -0
- data/test/raw-records/test-list-array.rb +20 -0
- data/test/raw-records/test-sparse-union-array.rb +14 -0
- data/test/raw-records/test-struct-array.rb +15 -0
- data/test/test-array.rb +122 -2
- data/test/test-bigdecimal.rb +20 -3
- data/test/test-decimal128-array-builder.rb +18 -1
- data/test/test-decimal128-data-type.rb +2 -2
- data/test/test-decimal128.rb +38 -0
- data/test/test-decimal256-array-builder.rb +112 -0
- data/test/test-decimal256-array.rb +38 -0
- data/test/test-decimal256-data-type.rb +31 -0
- data/test/test-decimal256.rb +102 -0
- data/test/test-fixed-size-binary-array-builder.rb +92 -0
- data/test/test-fixed-size-binary-array.rb +36 -0
- data/test/test-orc.rb +19 -23
- data/test/test-sort-indices.rb +40 -0
- data/test/test-sort-key.rb +81 -0
- data/test/test-sort-options.rb +58 -0
- data/test/test-struct-array-builder.rb +8 -8
- data/test/test-struct-array.rb +2 -2
- data/test/values/test-basic-arrays.rb +11 -0
- data/test/values/test-dense-union-array.rb +14 -0
- data/test/values/test-list-array.rb +18 -0
- data/test/values/test-sparse-union-array.rb +14 -0
- data/test/values/test-struct-array.rb +15 -0
- metadata +101 -61
@@ -0,0 +1,109 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class SortOptions
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when Symbol, String
|
25
|
+
new(value)
|
26
|
+
when ::Array
|
27
|
+
new(*value)
|
28
|
+
else
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
alias_method :initialize_raw, :initialize
|
35
|
+
private :initialize_raw
|
36
|
+
# @param sort_keys [::Array<String, Symbol, Arrow::SortKey>] The
|
37
|
+
# sort keys to be used. See {Arrow::SortKey.resolve} how to
|
38
|
+
# resolve each sort key in `sort_keys`.
|
39
|
+
#
|
40
|
+
# You can add more sort keys by {#add_sort_key} later.
|
41
|
+
#
|
42
|
+
# @example No initial sort keys
|
43
|
+
# options = Arrow::SortOptions.new
|
44
|
+
# options.sort_keys # => []
|
45
|
+
#
|
46
|
+
# @example String sort keys
|
47
|
+
# options = Arrow::SortOptions.new("count", "-age")
|
48
|
+
# options.sort_keys.collect(&:to_s) # => ["+count", "-age"]
|
49
|
+
#
|
50
|
+
# @example Symbol sort keys
|
51
|
+
# options = Arrow::SortOptions.new(:count, :age)
|
52
|
+
# options.sort_keys.collect(&:to_s) # => ["+count", "+age"]
|
53
|
+
#
|
54
|
+
# @example Mixed sort keys
|
55
|
+
# options = Arrow::SortOptions.new(:count, "-age")
|
56
|
+
# options.sort_keys.collect(&:to_s) # => ["+count", "-age"]
|
57
|
+
#
|
58
|
+
# @since 4.0.0
|
59
|
+
def initialize(*sort_keys)
|
60
|
+
initialize_raw
|
61
|
+
sort_keys.each do |sort_key|
|
62
|
+
add_sort_key(sort_key)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# @api private
|
67
|
+
alias_method :add_sort_key_raw, :add_sort_key
|
68
|
+
# Add a sort key.
|
69
|
+
#
|
70
|
+
# @return [void]
|
71
|
+
#
|
72
|
+
# @overload add_sort_key(key)
|
73
|
+
#
|
74
|
+
# @param key [Arrow::SortKey] The sort key to be added.
|
75
|
+
#
|
76
|
+
# @example Add a key to sort by "price" column in descending order
|
77
|
+
# options = Arrow::SortOptions.new
|
78
|
+
# options.add_sort_key(Arrow::SortKey.new(:price, :descending))
|
79
|
+
# options.sort_keys.collect(&:to_s) # => ["-price"]
|
80
|
+
#
|
81
|
+
# @overload add_sort_key(name)
|
82
|
+
#
|
83
|
+
# @param name [Symbol, String] The sort key name to be
|
84
|
+
# added. See also {Arrow::SortKey#initialize} for the leading
|
85
|
+
# order mark for String name.
|
86
|
+
#
|
87
|
+
# @example Add a key to sort by "price" column in descending order
|
88
|
+
# options = Arrow::SortOptions.new
|
89
|
+
# options.add_sort_key("-price")
|
90
|
+
# options.sort_keys.collect(&:to_s) # => ["-price"]
|
91
|
+
#
|
92
|
+
# @overload add_sort_key(name, order)
|
93
|
+
#
|
94
|
+
# @param name [Symbol, String] The sort key name.
|
95
|
+
#
|
96
|
+
# @param order [Symbol, String, Arrow::SortOrder] The sort
|
97
|
+
# order. See {Arrow::SortKey#initialize} for details.
|
98
|
+
#
|
99
|
+
# @example Add a key to sort by "price" column in descending order
|
100
|
+
# options = Arrow::SortOptions.new
|
101
|
+
# options.add_sort_key("price", :desc)
|
102
|
+
# options.sort_keys.collect(&:to_s) # => ["-price"]
|
103
|
+
#
|
104
|
+
# @since 4.0.0
|
105
|
+
def add_sort_key(name, order=nil)
|
106
|
+
add_sort_key_raw(SortKey.resolve(name, order))
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -33,7 +33,7 @@ module Arrow
|
|
33
33
|
# @param type_codes [::Array<Integer>] The IDs that indicates
|
34
34
|
# corresponding fields.
|
35
35
|
#
|
36
|
-
# @example Create a sparse union data type for {2: visible, 9: count}
|
36
|
+
# @example Create a sparse union data type for `{2: visible, 9: count}`
|
37
37
|
# fields = [
|
38
38
|
# Arrow::Field.new("visible", :boolean),
|
39
39
|
# {
|
@@ -57,7 +57,7 @@ module Arrow
|
|
57
57
|
# @option description [::Array<Integer>] :type_codes The IDs
|
58
58
|
# that indicates corresponding fields.
|
59
59
|
#
|
60
|
-
# @example Create a sparse union data type for {2: visible, 9: count}
|
60
|
+
# @example Create a sparse union data type for `{2: visible, 9: count}`
|
61
61
|
# fields = [
|
62
62
|
# Arrow::Field.new("visible", :boolean),
|
63
63
|
# {
|
@@ -29,7 +29,7 @@ module Arrow
|
|
29
29
|
#
|
30
30
|
# The unit must be second or millisecond.
|
31
31
|
#
|
32
|
-
# @example Create a time32 data type with
|
32
|
+
# @example Create a time32 data type with Arrow::TimeUnit
|
33
33
|
# Arrow::Time32DataType.new(Arrow::TimeUnit::MILLI)
|
34
34
|
#
|
35
35
|
# @example Create a time32 data type with Symbol
|
@@ -45,7 +45,7 @@ module Arrow
|
|
45
45
|
#
|
46
46
|
# The unit must be second or millisecond.
|
47
47
|
#
|
48
|
-
# @example Create a time32 data type with
|
48
|
+
# @example Create a time32 data type with Arrow::TimeUnit
|
49
49
|
# Arrow::Time32DataType.new(unit: Arrow::TimeUnit::MILLI)
|
50
50
|
#
|
51
51
|
# @example Create a time32 data type with Symbol
|
@@ -29,7 +29,7 @@ module Arrow
|
|
29
29
|
#
|
30
30
|
# The unit must be microsecond or nanosecond.
|
31
31
|
#
|
32
|
-
# @example Create a time64 data type with
|
32
|
+
# @example Create a time64 data type with Arrow::TimeUnit
|
33
33
|
# Arrow::Time64DataType.new(Arrow::TimeUnit::NANO)
|
34
34
|
#
|
35
35
|
# @example Create a time64 data type with Symbol
|
@@ -45,7 +45,7 @@ module Arrow
|
|
45
45
|
#
|
46
46
|
# The unit must be microsecond or nanosecond.
|
47
47
|
#
|
48
|
-
# @example Create a time64 data type with
|
48
|
+
# @example Create a time64 data type with Arrow::TimeUnit
|
49
49
|
# Arrow::Time64DataType.new(unit: Arrow::TimeUnit::NANO)
|
50
50
|
#
|
51
51
|
# @example Create a time64 data type with Symbol
|
@@ -27,7 +27,7 @@ module Arrow
|
|
27
27
|
# @param unit [Arrow::TimeUnit, Symbol] The unit of the
|
28
28
|
# timestamp data type.
|
29
29
|
#
|
30
|
-
# @example Create a timestamp data type with
|
30
|
+
# @example Create a timestamp data type with Arrow::TimeUnit
|
31
31
|
# Arrow::TimestampDataType.new(Arrow::TimeUnit::MILLI)
|
32
32
|
#
|
33
33
|
# @example Create a timestamp data type with Symbol
|
@@ -41,7 +41,7 @@ module Arrow
|
|
41
41
|
# @option description [Arrow::TimeUnit, Symbol] :unit The unit of
|
42
42
|
# the timestamp data type.
|
43
43
|
#
|
44
|
-
# @example Create a timestamp data type with
|
44
|
+
# @example Create a timestamp data type with Arrow::TimeUnit
|
45
45
|
# Arrow::TimestampDataType.new(unit: Arrow::TimeUnit::MILLI)
|
46
46
|
#
|
47
47
|
# @example Create a timestamp data type with Symbol
|
data/lib/arrow/version.rb
CHANGED
data/red-arrow.gemspec
CHANGED
@@ -46,6 +46,7 @@ Gem::Specification.new do |spec|
|
|
46
46
|
spec.test_files += Dir.glob("test/**/*")
|
47
47
|
spec.extensions = ["ext/arrow/extconf.rb"]
|
48
48
|
|
49
|
+
spec.add_runtime_dependency("bigdecimal", ">= 2.0.3")
|
49
50
|
spec.add_runtime_dependency("extpp", ">= 0.0.7")
|
50
51
|
spec.add_runtime_dependency("gio2", ">= 3.3.6")
|
51
52
|
spec.add_runtime_dependency("native-package-installer")
|
@@ -329,6 +329,23 @@ module RawRecordsBasicArraysTests
|
|
329
329
|
records)
|
330
330
|
assert_equal(records, target.raw_records)
|
331
331
|
end
|
332
|
+
|
333
|
+
def test_decimal256
|
334
|
+
records = [
|
335
|
+
[BigDecimal("92.92")],
|
336
|
+
[nil],
|
337
|
+
[BigDecimal("29.29")],
|
338
|
+
]
|
339
|
+
target = build({
|
340
|
+
column: {
|
341
|
+
type: :decimal256,
|
342
|
+
precision: 38,
|
343
|
+
scale: 2,
|
344
|
+
}
|
345
|
+
},
|
346
|
+
records)
|
347
|
+
assert_equal(records, target.raw_records)
|
348
|
+
end
|
332
349
|
end
|
333
350
|
|
334
351
|
class RawRecordsRecordBatchBasicArraysTest < Test::Unit::TestCase
|
@@ -345,6 +345,20 @@ module RawRecordsDenseUnionArrayTests
|
|
345
345
|
assert_equal(records, target.raw_records)
|
346
346
|
end
|
347
347
|
|
348
|
+
def test_decimal256
|
349
|
+
records = [
|
350
|
+
[{"0" => BigDecimal("92.92")}],
|
351
|
+
[{"1" => nil}],
|
352
|
+
]
|
353
|
+
target = build({
|
354
|
+
type: :decimal256,
|
355
|
+
precision: 38,
|
356
|
+
scale: 2,
|
357
|
+
},
|
358
|
+
records)
|
359
|
+
assert_equal(records, target.raw_records)
|
360
|
+
end
|
361
|
+
|
348
362
|
def test_list
|
349
363
|
records = [
|
350
364
|
[{"0" => [true, nil, false]}],
|
@@ -379,6 +379,26 @@ module RawRecordsListArrayTests
|
|
379
379
|
assert_equal(records, target.raw_records)
|
380
380
|
end
|
381
381
|
|
382
|
+
def test_decimal256
|
383
|
+
records = [
|
384
|
+
[
|
385
|
+
[
|
386
|
+
BigDecimal("92.92"),
|
387
|
+
nil,
|
388
|
+
BigDecimal("29.29"),
|
389
|
+
],
|
390
|
+
],
|
391
|
+
[nil],
|
392
|
+
]
|
393
|
+
target = build({
|
394
|
+
type: :decimal256,
|
395
|
+
precision: 38,
|
396
|
+
scale: 2,
|
397
|
+
},
|
398
|
+
records)
|
399
|
+
assert_equal(records, target.raw_records)
|
400
|
+
end
|
401
|
+
|
382
402
|
def test_list
|
383
403
|
records = [
|
384
404
|
[
|
@@ -335,6 +335,20 @@ module RawRecordsSparseUnionArrayTests
|
|
335
335
|
assert_equal(records, target.raw_records)
|
336
336
|
end
|
337
337
|
|
338
|
+
def test_decimal256
|
339
|
+
records = [
|
340
|
+
[{"0" => BigDecimal("92.92")}],
|
341
|
+
[{"1" => nil}],
|
342
|
+
]
|
343
|
+
target = build({
|
344
|
+
type: :decimal256,
|
345
|
+
precision: 38,
|
346
|
+
scale: 2,
|
347
|
+
},
|
348
|
+
records)
|
349
|
+
assert_equal(records, target.raw_records)
|
350
|
+
end
|
351
|
+
|
338
352
|
def test_list
|
339
353
|
records = [
|
340
354
|
[{"0" => [true, nil, false]}],
|
@@ -329,6 +329,21 @@ module RawRecordsStructArrayTests
|
|
329
329
|
assert_equal(records, target.raw_records)
|
330
330
|
end
|
331
331
|
|
332
|
+
def test_decimal256
|
333
|
+
records = [
|
334
|
+
[{"field" => BigDecimal("92.92")}],
|
335
|
+
[nil],
|
336
|
+
[{"field" => nil}],
|
337
|
+
]
|
338
|
+
target = build({
|
339
|
+
type: :decimal256,
|
340
|
+
precision: 38,
|
341
|
+
scale: 2,
|
342
|
+
},
|
343
|
+
records)
|
344
|
+
assert_equal(records, target.raw_records)
|
345
|
+
end
|
346
|
+
|
332
347
|
def test_list
|
333
348
|
records = [
|
334
349
|
[{"field" => [true, nil, false]}],
|
data/test/test-array.rb
CHANGED
@@ -160,12 +160,132 @@ class ArrayTest < Test::Unit::TestCase
|
|
160
160
|
|
161
161
|
test("Arrow::ChunkedArray") do
|
162
162
|
chunks = [
|
163
|
-
Arrow::Int16Array.new([1,
|
164
|
-
Arrow::Int16Array.new([
|
163
|
+
Arrow::Int16Array.new([1, 4]),
|
164
|
+
Arrow::Int16Array.new([0, 3])
|
165
165
|
]
|
166
166
|
right = Arrow::ChunkedArray.new(chunks)
|
167
167
|
assert_equal(Arrow::BooleanArray.new([true, true, true, false]),
|
168
168
|
@array.is_in(right))
|
169
169
|
end
|
170
170
|
end
|
171
|
+
|
172
|
+
sub_test_case("#concatenate") do
|
173
|
+
test("Arrow::Array: same") do
|
174
|
+
assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
|
175
|
+
Arrow::Int32Array.new([1, 2, nil]).
|
176
|
+
concatenate(Arrow::Int32Array.new([4, 5]),
|
177
|
+
Arrow::Int32Array.new([6])))
|
178
|
+
end
|
179
|
+
|
180
|
+
test("Arrow::Array: castable") do
|
181
|
+
assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
|
182
|
+
Arrow::Int32Array.new([1, 2, nil]).
|
183
|
+
concatenate(Arrow::Int8Array.new([4, 5]),
|
184
|
+
Arrow::UInt32Array.new([6])))
|
185
|
+
end
|
186
|
+
|
187
|
+
test("Arrow::Array: non-castable") do
|
188
|
+
assert_raise(Arrow::Error::Invalid) do
|
189
|
+
Arrow::Int32Array.new([1, 2, nil]).
|
190
|
+
concatenate(Arrow::StringArray.new(["X"]))
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
test("Array") do
|
195
|
+
assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,nil, 6]),
|
196
|
+
Arrow::Int32Array.new([1, 2, nil]).
|
197
|
+
concatenate([4, nil],
|
198
|
+
[6]))
|
199
|
+
end
|
200
|
+
|
201
|
+
test("invalid") do
|
202
|
+
message = "[array][resolve] can't build int32 array: 4"
|
203
|
+
assert_raise(ArgumentError.new(message)) do
|
204
|
+
Arrow::Int32Array.new([1, 2, nil]).
|
205
|
+
concatenate(4)
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
sub_test_case("#+") do
|
211
|
+
test("Arrow::Array: same") do
|
212
|
+
assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
|
213
|
+
Arrow::Int32Array.new([1, 2, nil]) +
|
214
|
+
Arrow::Int32Array.new([4, 5, 6]))
|
215
|
+
end
|
216
|
+
|
217
|
+
test("Arrow::Array: castable") do
|
218
|
+
assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
|
219
|
+
Arrow::Int32Array.new([1, 2, nil]) +
|
220
|
+
Arrow::Int8Array.new([4, 5, 6]))
|
221
|
+
end
|
222
|
+
|
223
|
+
test("Arrow::Array: non-castable") do
|
224
|
+
assert_raise(Arrow::Error::Invalid) do
|
225
|
+
Arrow::Int32Array.new([1, 2, nil]) +
|
226
|
+
Arrow::StringArray.new(["X"])
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
test("Array") do
|
231
|
+
assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,nil, 6]),
|
232
|
+
Arrow::Int32Array.new([1, 2, nil]) +
|
233
|
+
[4, nil, 6])
|
234
|
+
end
|
235
|
+
|
236
|
+
test("invalid") do
|
237
|
+
message = "[array][resolve] can't build int32 array: 4"
|
238
|
+
assert_raise(ArgumentError.new(message)) do
|
239
|
+
Arrow::Int32Array.new([1, 2, nil]) + 4
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
sub_test_case("#resolve") do
|
245
|
+
test("Arrow::Array: same") do
|
246
|
+
assert_equal(Arrow::Int32Array.new([1, 2, nil]),
|
247
|
+
Arrow::Int32Array.new([]).
|
248
|
+
resolve(Arrow::Int32Array.new([1, 2, nil])))
|
249
|
+
end
|
250
|
+
|
251
|
+
test("Arrow::Array: castable") do
|
252
|
+
assert_equal(Arrow::Int32Array.new([1, 2, nil]),
|
253
|
+
Arrow::Int32Array.new([]).
|
254
|
+
resolve(Arrow::Int8Array.new([1, 2, nil])))
|
255
|
+
end
|
256
|
+
|
257
|
+
test("Arrow::Array: non-castable") do
|
258
|
+
assert_raise(Arrow::Error::Invalid) do
|
259
|
+
Arrow::Int32Array.new([]) +
|
260
|
+
Arrow::StringArray.new(["X"])
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
test("Array: non-parametric") do
|
265
|
+
assert_equal(Arrow::Int32Array.new([1, 2, nil]),
|
266
|
+
Arrow::Int32Array.new([]).
|
267
|
+
resolve([1, 2, nil]))
|
268
|
+
end
|
269
|
+
|
270
|
+
test("Array: parametric") do
|
271
|
+
list_data_type = Arrow::ListDataType.new(name: "visible", type: :boolean)
|
272
|
+
list_array = Arrow::ListArray.new(list_data_type, [])
|
273
|
+
assert_equal(Arrow::ListArray.new(list_data_type,
|
274
|
+
[
|
275
|
+
[true, false],
|
276
|
+
nil,
|
277
|
+
]),
|
278
|
+
list_array.resolve([
|
279
|
+
[true, false],
|
280
|
+
nil,
|
281
|
+
]))
|
282
|
+
end
|
283
|
+
|
284
|
+
test("invalid") do
|
285
|
+
message = "[array][resolve] can't build int32 array: 4"
|
286
|
+
assert_raise(ArgumentError.new(message)) do
|
287
|
+
Arrow::Int32Array.new([]).resolve(4)
|
288
|
+
end
|
289
|
+
end
|
290
|
+
end
|
171
291
|
end
|