red-arrow 1.0.0 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/arrow/converters.hpp +15 -2
- data/ext/arrow/extconf.rb +14 -3
- data/ext/arrow/raw-records.cpp +1 -0
- data/ext/arrow/values.cpp +1 -0
- data/lib/arrow/array-builder.rb +11 -6
- data/lib/arrow/array.rb +118 -0
- data/lib/arrow/bigdecimal-extension.rb +5 -1
- data/lib/arrow/data-type.rb +14 -5
- data/lib/arrow/decimal128-array-builder.rb +21 -25
- data/lib/arrow/decimal128-data-type.rb +2 -0
- data/lib/arrow/decimal128.rb +18 -0
- data/lib/arrow/decimal256-array-builder.rb +61 -0
- data/lib/arrow/decimal256-array.rb +25 -0
- data/lib/arrow/decimal256-data-type.rb +73 -0
- data/lib/arrow/decimal256.rb +60 -0
- data/lib/arrow/dense-union-data-type.rb +2 -2
- data/lib/arrow/dictionary-data-type.rb +2 -2
- data/lib/arrow/fixed-size-binary-array-builder.rb +38 -0
- data/lib/arrow/fixed-size-binary-array.rb +26 -0
- data/lib/arrow/loader.rb +15 -0
- data/lib/arrow/sort-key.rb +193 -0
- data/lib/arrow/sort-options.rb +109 -0
- data/lib/arrow/sparse-union-data-type.rb +2 -2
- data/lib/arrow/time32-data-type.rb +2 -2
- data/lib/arrow/time64-data-type.rb +2 -2
- data/lib/arrow/timestamp-data-type.rb +2 -2
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +1 -0
- data/test/raw-records/test-basic-arrays.rb +17 -0
- data/test/raw-records/test-dense-union-array.rb +14 -0
- data/test/raw-records/test-list-array.rb +20 -0
- data/test/raw-records/test-sparse-union-array.rb +14 -0
- data/test/raw-records/test-struct-array.rb +15 -0
- data/test/test-array.rb +122 -2
- data/test/test-bigdecimal.rb +20 -3
- data/test/test-decimal128-array-builder.rb +18 -1
- data/test/test-decimal128-data-type.rb +2 -2
- data/test/test-decimal128.rb +38 -0
- data/test/test-decimal256-array-builder.rb +112 -0
- data/test/test-decimal256-array.rb +38 -0
- data/test/test-decimal256-data-type.rb +31 -0
- data/test/test-decimal256.rb +102 -0
- data/test/test-fixed-size-binary-array-builder.rb +92 -0
- data/test/test-fixed-size-binary-array.rb +36 -0
- data/test/test-orc.rb +19 -23
- data/test/test-sort-indices.rb +40 -0
- data/test/test-sort-key.rb +81 -0
- data/test/test-sort-options.rb +58 -0
- data/test/test-struct-array-builder.rb +8 -8
- data/test/test-struct-array.rb +2 -2
- data/test/values/test-basic-arrays.rb +11 -0
- data/test/values/test-dense-union-array.rb +14 -0
- data/test/values/test-list-array.rb +18 -0
- data/test/values/test-sparse-union-array.rb +14 -0
- data/test/values/test-struct-array.rb +15 -0
- metadata +101 -61
@@ -0,0 +1,109 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class SortOptions
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when Symbol, String
|
25
|
+
new(value)
|
26
|
+
when ::Array
|
27
|
+
new(*value)
|
28
|
+
else
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
alias_method :initialize_raw, :initialize
|
35
|
+
private :initialize_raw
|
36
|
+
# @param sort_keys [::Array<String, Symbol, Arrow::SortKey>] The
|
37
|
+
# sort keys to be used. See {Arrow::SortKey.resolve} how to
|
38
|
+
# resolve each sort key in `sort_keys`.
|
39
|
+
#
|
40
|
+
# You can add more sort keys by {#add_sort_key} later.
|
41
|
+
#
|
42
|
+
# @example No initial sort keys
|
43
|
+
# options = Arrow::SortOptions.new
|
44
|
+
# options.sort_keys # => []
|
45
|
+
#
|
46
|
+
# @example String sort keys
|
47
|
+
# options = Arrow::SortOptions.new("count", "-age")
|
48
|
+
# options.sort_keys.collect(&:to_s) # => ["+count", "-age"]
|
49
|
+
#
|
50
|
+
# @example Symbol sort keys
|
51
|
+
# options = Arrow::SortOptions.new(:count, :age)
|
52
|
+
# options.sort_keys.collect(&:to_s) # => ["+count", "+age"]
|
53
|
+
#
|
54
|
+
# @example Mixed sort keys
|
55
|
+
# options = Arrow::SortOptions.new(:count, "-age")
|
56
|
+
# options.sort_keys.collect(&:to_s) # => ["+count", "-age"]
|
57
|
+
#
|
58
|
+
# @since 4.0.0
|
59
|
+
def initialize(*sort_keys)
|
60
|
+
initialize_raw
|
61
|
+
sort_keys.each do |sort_key|
|
62
|
+
add_sort_key(sort_key)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# @api private
|
67
|
+
alias_method :add_sort_key_raw, :add_sort_key
|
68
|
+
# Add a sort key.
|
69
|
+
#
|
70
|
+
# @return [void]
|
71
|
+
#
|
72
|
+
# @overload add_sort_key(key)
|
73
|
+
#
|
74
|
+
# @param key [Arrow::SortKey] The sort key to be added.
|
75
|
+
#
|
76
|
+
# @example Add a key to sort by "price" column in descending order
|
77
|
+
# options = Arrow::SortOptions.new
|
78
|
+
# options.add_sort_key(Arrow::SortKey.new(:price, :descending))
|
79
|
+
# options.sort_keys.collect(&:to_s) # => ["-price"]
|
80
|
+
#
|
81
|
+
# @overload add_sort_key(name)
|
82
|
+
#
|
83
|
+
# @param name [Symbol, String] The sort key name to be
|
84
|
+
# added. See also {Arrow::SortKey#initialize} for the leading
|
85
|
+
# order mark for String name.
|
86
|
+
#
|
87
|
+
# @example Add a key to sort by "price" column in descending order
|
88
|
+
# options = Arrow::SortOptions.new
|
89
|
+
# options.add_sort_key("-price")
|
90
|
+
# options.sort_keys.collect(&:to_s) # => ["-price"]
|
91
|
+
#
|
92
|
+
# @overload add_sort_key(name, order)
|
93
|
+
#
|
94
|
+
# @param name [Symbol, String] The sort key name.
|
95
|
+
#
|
96
|
+
# @param order [Symbol, String, Arrow::SortOrder] The sort
|
97
|
+
# order. See {Arrow::SortKey#initialize} for details.
|
98
|
+
#
|
99
|
+
# @example Add a key to sort by "price" column in descending order
|
100
|
+
# options = Arrow::SortOptions.new
|
101
|
+
# options.add_sort_key("price", :desc)
|
102
|
+
# options.sort_keys.collect(&:to_s) # => ["-price"]
|
103
|
+
#
|
104
|
+
# @since 4.0.0
|
105
|
+
def add_sort_key(name, order=nil)
|
106
|
+
add_sort_key_raw(SortKey.resolve(name, order))
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -33,7 +33,7 @@ module Arrow
|
|
33
33
|
# @param type_codes [::Array<Integer>] The IDs that indicates
|
34
34
|
# corresponding fields.
|
35
35
|
#
|
36
|
-
# @example Create a sparse union data type for {2: visible, 9: count}
|
36
|
+
# @example Create a sparse union data type for `{2: visible, 9: count}`
|
37
37
|
# fields = [
|
38
38
|
# Arrow::Field.new("visible", :boolean),
|
39
39
|
# {
|
@@ -57,7 +57,7 @@ module Arrow
|
|
57
57
|
# @option description [::Array<Integer>] :type_codes The IDs
|
58
58
|
# that indicates corresponding fields.
|
59
59
|
#
|
60
|
-
# @example Create a sparse union data type for {2: visible, 9: count}
|
60
|
+
# @example Create a sparse union data type for `{2: visible, 9: count}`
|
61
61
|
# fields = [
|
62
62
|
# Arrow::Field.new("visible", :boolean),
|
63
63
|
# {
|
@@ -29,7 +29,7 @@ module Arrow
|
|
29
29
|
#
|
30
30
|
# The unit must be second or millisecond.
|
31
31
|
#
|
32
|
-
# @example Create a time32 data type with
|
32
|
+
# @example Create a time32 data type with Arrow::TimeUnit
|
33
33
|
# Arrow::Time32DataType.new(Arrow::TimeUnit::MILLI)
|
34
34
|
#
|
35
35
|
# @example Create a time32 data type with Symbol
|
@@ -45,7 +45,7 @@ module Arrow
|
|
45
45
|
#
|
46
46
|
# The unit must be second or millisecond.
|
47
47
|
#
|
48
|
-
# @example Create a time32 data type with
|
48
|
+
# @example Create a time32 data type with Arrow::TimeUnit
|
49
49
|
# Arrow::Time32DataType.new(unit: Arrow::TimeUnit::MILLI)
|
50
50
|
#
|
51
51
|
# @example Create a time32 data type with Symbol
|
@@ -29,7 +29,7 @@ module Arrow
|
|
29
29
|
#
|
30
30
|
# The unit must be microsecond or nanosecond.
|
31
31
|
#
|
32
|
-
# @example Create a time64 data type with
|
32
|
+
# @example Create a time64 data type with Arrow::TimeUnit
|
33
33
|
# Arrow::Time64DataType.new(Arrow::TimeUnit::NANO)
|
34
34
|
#
|
35
35
|
# @example Create a time64 data type with Symbol
|
@@ -45,7 +45,7 @@ module Arrow
|
|
45
45
|
#
|
46
46
|
# The unit must be microsecond or nanosecond.
|
47
47
|
#
|
48
|
-
# @example Create a time64 data type with
|
48
|
+
# @example Create a time64 data type with Arrow::TimeUnit
|
49
49
|
# Arrow::Time64DataType.new(unit: Arrow::TimeUnit::NANO)
|
50
50
|
#
|
51
51
|
# @example Create a time64 data type with Symbol
|
@@ -27,7 +27,7 @@ module Arrow
|
|
27
27
|
# @param unit [Arrow::TimeUnit, Symbol] The unit of the
|
28
28
|
# timestamp data type.
|
29
29
|
#
|
30
|
-
# @example Create a timestamp data type with
|
30
|
+
# @example Create a timestamp data type with Arrow::TimeUnit
|
31
31
|
# Arrow::TimestampDataType.new(Arrow::TimeUnit::MILLI)
|
32
32
|
#
|
33
33
|
# @example Create a timestamp data type with Symbol
|
@@ -41,7 +41,7 @@ module Arrow
|
|
41
41
|
# @option description [Arrow::TimeUnit, Symbol] :unit The unit of
|
42
42
|
# the timestamp data type.
|
43
43
|
#
|
44
|
-
# @example Create a timestamp data type with
|
44
|
+
# @example Create a timestamp data type with Arrow::TimeUnit
|
45
45
|
# Arrow::TimestampDataType.new(unit: Arrow::TimeUnit::MILLI)
|
46
46
|
#
|
47
47
|
# @example Create a timestamp data type with Symbol
|
data/lib/arrow/version.rb
CHANGED
data/red-arrow.gemspec
CHANGED
@@ -46,6 +46,7 @@ Gem::Specification.new do |spec|
|
|
46
46
|
spec.test_files += Dir.glob("test/**/*")
|
47
47
|
spec.extensions = ["ext/arrow/extconf.rb"]
|
48
48
|
|
49
|
+
spec.add_runtime_dependency("bigdecimal", ">= 2.0.3")
|
49
50
|
spec.add_runtime_dependency("extpp", ">= 0.0.7")
|
50
51
|
spec.add_runtime_dependency("gio2", ">= 3.3.6")
|
51
52
|
spec.add_runtime_dependency("native-package-installer")
|
@@ -329,6 +329,23 @@ module RawRecordsBasicArraysTests
|
|
329
329
|
records)
|
330
330
|
assert_equal(records, target.raw_records)
|
331
331
|
end
|
332
|
+
|
333
|
+
def test_decimal256
|
334
|
+
records = [
|
335
|
+
[BigDecimal("92.92")],
|
336
|
+
[nil],
|
337
|
+
[BigDecimal("29.29")],
|
338
|
+
]
|
339
|
+
target = build({
|
340
|
+
column: {
|
341
|
+
type: :decimal256,
|
342
|
+
precision: 38,
|
343
|
+
scale: 2,
|
344
|
+
}
|
345
|
+
},
|
346
|
+
records)
|
347
|
+
assert_equal(records, target.raw_records)
|
348
|
+
end
|
332
349
|
end
|
333
350
|
|
334
351
|
class RawRecordsRecordBatchBasicArraysTest < Test::Unit::TestCase
|
@@ -345,6 +345,20 @@ module RawRecordsDenseUnionArrayTests
|
|
345
345
|
assert_equal(records, target.raw_records)
|
346
346
|
end
|
347
347
|
|
348
|
+
def test_decimal256
|
349
|
+
records = [
|
350
|
+
[{"0" => BigDecimal("92.92")}],
|
351
|
+
[{"1" => nil}],
|
352
|
+
]
|
353
|
+
target = build({
|
354
|
+
type: :decimal256,
|
355
|
+
precision: 38,
|
356
|
+
scale: 2,
|
357
|
+
},
|
358
|
+
records)
|
359
|
+
assert_equal(records, target.raw_records)
|
360
|
+
end
|
361
|
+
|
348
362
|
def test_list
|
349
363
|
records = [
|
350
364
|
[{"0" => [true, nil, false]}],
|
@@ -379,6 +379,26 @@ module RawRecordsListArrayTests
|
|
379
379
|
assert_equal(records, target.raw_records)
|
380
380
|
end
|
381
381
|
|
382
|
+
def test_decimal256
|
383
|
+
records = [
|
384
|
+
[
|
385
|
+
[
|
386
|
+
BigDecimal("92.92"),
|
387
|
+
nil,
|
388
|
+
BigDecimal("29.29"),
|
389
|
+
],
|
390
|
+
],
|
391
|
+
[nil],
|
392
|
+
]
|
393
|
+
target = build({
|
394
|
+
type: :decimal256,
|
395
|
+
precision: 38,
|
396
|
+
scale: 2,
|
397
|
+
},
|
398
|
+
records)
|
399
|
+
assert_equal(records, target.raw_records)
|
400
|
+
end
|
401
|
+
|
382
402
|
def test_list
|
383
403
|
records = [
|
384
404
|
[
|
@@ -335,6 +335,20 @@ module RawRecordsSparseUnionArrayTests
|
|
335
335
|
assert_equal(records, target.raw_records)
|
336
336
|
end
|
337
337
|
|
338
|
+
def test_decimal256
|
339
|
+
records = [
|
340
|
+
[{"0" => BigDecimal("92.92")}],
|
341
|
+
[{"1" => nil}],
|
342
|
+
]
|
343
|
+
target = build({
|
344
|
+
type: :decimal256,
|
345
|
+
precision: 38,
|
346
|
+
scale: 2,
|
347
|
+
},
|
348
|
+
records)
|
349
|
+
assert_equal(records, target.raw_records)
|
350
|
+
end
|
351
|
+
|
338
352
|
def test_list
|
339
353
|
records = [
|
340
354
|
[{"0" => [true, nil, false]}],
|
@@ -329,6 +329,21 @@ module RawRecordsStructArrayTests
|
|
329
329
|
assert_equal(records, target.raw_records)
|
330
330
|
end
|
331
331
|
|
332
|
+
def test_decimal256
|
333
|
+
records = [
|
334
|
+
[{"field" => BigDecimal("92.92")}],
|
335
|
+
[nil],
|
336
|
+
[{"field" => nil}],
|
337
|
+
]
|
338
|
+
target = build({
|
339
|
+
type: :decimal256,
|
340
|
+
precision: 38,
|
341
|
+
scale: 2,
|
342
|
+
},
|
343
|
+
records)
|
344
|
+
assert_equal(records, target.raw_records)
|
345
|
+
end
|
346
|
+
|
332
347
|
def test_list
|
333
348
|
records = [
|
334
349
|
[{"field" => [true, nil, false]}],
|
data/test/test-array.rb
CHANGED
@@ -160,12 +160,132 @@ class ArrayTest < Test::Unit::TestCase
|
|
160
160
|
|
161
161
|
test("Arrow::ChunkedArray") do
|
162
162
|
chunks = [
|
163
|
-
Arrow::Int16Array.new([1,
|
164
|
-
Arrow::Int16Array.new([
|
163
|
+
Arrow::Int16Array.new([1, 4]),
|
164
|
+
Arrow::Int16Array.new([0, 3])
|
165
165
|
]
|
166
166
|
right = Arrow::ChunkedArray.new(chunks)
|
167
167
|
assert_equal(Arrow::BooleanArray.new([true, true, true, false]),
|
168
168
|
@array.is_in(right))
|
169
169
|
end
|
170
170
|
end
|
171
|
+
|
172
|
+
sub_test_case("#concatenate") do
|
173
|
+
test("Arrow::Array: same") do
|
174
|
+
assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
|
175
|
+
Arrow::Int32Array.new([1, 2, nil]).
|
176
|
+
concatenate(Arrow::Int32Array.new([4, 5]),
|
177
|
+
Arrow::Int32Array.new([6])))
|
178
|
+
end
|
179
|
+
|
180
|
+
test("Arrow::Array: castable") do
|
181
|
+
assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
|
182
|
+
Arrow::Int32Array.new([1, 2, nil]).
|
183
|
+
concatenate(Arrow::Int8Array.new([4, 5]),
|
184
|
+
Arrow::UInt32Array.new([6])))
|
185
|
+
end
|
186
|
+
|
187
|
+
test("Arrow::Array: non-castable") do
|
188
|
+
assert_raise(Arrow::Error::Invalid) do
|
189
|
+
Arrow::Int32Array.new([1, 2, nil]).
|
190
|
+
concatenate(Arrow::StringArray.new(["X"]))
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
test("Array") do
|
195
|
+
assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,nil, 6]),
|
196
|
+
Arrow::Int32Array.new([1, 2, nil]).
|
197
|
+
concatenate([4, nil],
|
198
|
+
[6]))
|
199
|
+
end
|
200
|
+
|
201
|
+
test("invalid") do
|
202
|
+
message = "[array][resolve] can't build int32 array: 4"
|
203
|
+
assert_raise(ArgumentError.new(message)) do
|
204
|
+
Arrow::Int32Array.new([1, 2, nil]).
|
205
|
+
concatenate(4)
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
sub_test_case("#+") do
|
211
|
+
test("Arrow::Array: same") do
|
212
|
+
assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
|
213
|
+
Arrow::Int32Array.new([1, 2, nil]) +
|
214
|
+
Arrow::Int32Array.new([4, 5, 6]))
|
215
|
+
end
|
216
|
+
|
217
|
+
test("Arrow::Array: castable") do
|
218
|
+
assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
|
219
|
+
Arrow::Int32Array.new([1, 2, nil]) +
|
220
|
+
Arrow::Int8Array.new([4, 5, 6]))
|
221
|
+
end
|
222
|
+
|
223
|
+
test("Arrow::Array: non-castable") do
|
224
|
+
assert_raise(Arrow::Error::Invalid) do
|
225
|
+
Arrow::Int32Array.new([1, 2, nil]) +
|
226
|
+
Arrow::StringArray.new(["X"])
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
test("Array") do
|
231
|
+
assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,nil, 6]),
|
232
|
+
Arrow::Int32Array.new([1, 2, nil]) +
|
233
|
+
[4, nil, 6])
|
234
|
+
end
|
235
|
+
|
236
|
+
test("invalid") do
|
237
|
+
message = "[array][resolve] can't build int32 array: 4"
|
238
|
+
assert_raise(ArgumentError.new(message)) do
|
239
|
+
Arrow::Int32Array.new([1, 2, nil]) + 4
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
sub_test_case("#resolve") do
|
245
|
+
test("Arrow::Array: same") do
|
246
|
+
assert_equal(Arrow::Int32Array.new([1, 2, nil]),
|
247
|
+
Arrow::Int32Array.new([]).
|
248
|
+
resolve(Arrow::Int32Array.new([1, 2, nil])))
|
249
|
+
end
|
250
|
+
|
251
|
+
test("Arrow::Array: castable") do
|
252
|
+
assert_equal(Arrow::Int32Array.new([1, 2, nil]),
|
253
|
+
Arrow::Int32Array.new([]).
|
254
|
+
resolve(Arrow::Int8Array.new([1, 2, nil])))
|
255
|
+
end
|
256
|
+
|
257
|
+
test("Arrow::Array: non-castable") do
|
258
|
+
assert_raise(Arrow::Error::Invalid) do
|
259
|
+
Arrow::Int32Array.new([]) +
|
260
|
+
Arrow::StringArray.new(["X"])
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
test("Array: non-parametric") do
|
265
|
+
assert_equal(Arrow::Int32Array.new([1, 2, nil]),
|
266
|
+
Arrow::Int32Array.new([]).
|
267
|
+
resolve([1, 2, nil]))
|
268
|
+
end
|
269
|
+
|
270
|
+
test("Array: parametric") do
|
271
|
+
list_data_type = Arrow::ListDataType.new(name: "visible", type: :boolean)
|
272
|
+
list_array = Arrow::ListArray.new(list_data_type, [])
|
273
|
+
assert_equal(Arrow::ListArray.new(list_data_type,
|
274
|
+
[
|
275
|
+
[true, false],
|
276
|
+
nil,
|
277
|
+
]),
|
278
|
+
list_array.resolve([
|
279
|
+
[true, false],
|
280
|
+
nil,
|
281
|
+
]))
|
282
|
+
end
|
283
|
+
|
284
|
+
test("invalid") do
|
285
|
+
message = "[array][resolve] can't build int32 array: 4"
|
286
|
+
assert_raise(ArgumentError.new(message)) do
|
287
|
+
Arrow::Int32Array.new([]).resolve(4)
|
288
|
+
end
|
289
|
+
end
|
290
|
+
end
|
171
291
|
end
|