red-arrow 1.0.0 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/converters.hpp +15 -2
  3. data/ext/arrow/extconf.rb +14 -3
  4. data/ext/arrow/raw-records.cpp +1 -0
  5. data/ext/arrow/values.cpp +1 -0
  6. data/lib/arrow/array-builder.rb +11 -6
  7. data/lib/arrow/array.rb +118 -0
  8. data/lib/arrow/bigdecimal-extension.rb +5 -1
  9. data/lib/arrow/data-type.rb +14 -5
  10. data/lib/arrow/decimal128-array-builder.rb +21 -25
  11. data/lib/arrow/decimal128-data-type.rb +2 -0
  12. data/lib/arrow/decimal128.rb +18 -0
  13. data/lib/arrow/decimal256-array-builder.rb +61 -0
  14. data/lib/arrow/decimal256-array.rb +25 -0
  15. data/lib/arrow/decimal256-data-type.rb +73 -0
  16. data/lib/arrow/decimal256.rb +60 -0
  17. data/lib/arrow/dense-union-data-type.rb +2 -2
  18. data/lib/arrow/dictionary-data-type.rb +2 -2
  19. data/lib/arrow/fixed-size-binary-array-builder.rb +38 -0
  20. data/lib/arrow/fixed-size-binary-array.rb +26 -0
  21. data/lib/arrow/loader.rb +15 -0
  22. data/lib/arrow/sort-key.rb +193 -0
  23. data/lib/arrow/sort-options.rb +109 -0
  24. data/lib/arrow/sparse-union-data-type.rb +2 -2
  25. data/lib/arrow/time32-data-type.rb +2 -2
  26. data/lib/arrow/time64-data-type.rb +2 -2
  27. data/lib/arrow/timestamp-data-type.rb +2 -2
  28. data/lib/arrow/version.rb +1 -1
  29. data/red-arrow.gemspec +1 -0
  30. data/test/raw-records/test-basic-arrays.rb +17 -0
  31. data/test/raw-records/test-dense-union-array.rb +14 -0
  32. data/test/raw-records/test-list-array.rb +20 -0
  33. data/test/raw-records/test-sparse-union-array.rb +14 -0
  34. data/test/raw-records/test-struct-array.rb +15 -0
  35. data/test/test-array.rb +122 -2
  36. data/test/test-bigdecimal.rb +20 -3
  37. data/test/test-decimal128-array-builder.rb +18 -1
  38. data/test/test-decimal128-data-type.rb +2 -2
  39. data/test/test-decimal128.rb +38 -0
  40. data/test/test-decimal256-array-builder.rb +112 -0
  41. data/test/test-decimal256-array.rb +38 -0
  42. data/test/test-decimal256-data-type.rb +31 -0
  43. data/test/test-decimal256.rb +102 -0
  44. data/test/test-fixed-size-binary-array-builder.rb +92 -0
  45. data/test/test-fixed-size-binary-array.rb +36 -0
  46. data/test/test-orc.rb +19 -23
  47. data/test/test-sort-indices.rb +40 -0
  48. data/test/test-sort-key.rb +81 -0
  49. data/test/test-sort-options.rb +58 -0
  50. data/test/test-struct-array-builder.rb +8 -8
  51. data/test/test-struct-array.rb +2 -2
  52. data/test/values/test-basic-arrays.rb +11 -0
  53. data/test/values/test-dense-union-array.rb +14 -0
  54. data/test/values/test-list-array.rb +18 -0
  55. data/test/values/test-sparse-union-array.rb +14 -0
  56. data/test/values/test-struct-array.rb +15 -0
  57. metadata +101 -61
@@ -0,0 +1,109 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class SortOptions
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Symbol, String
25
+ new(value)
26
+ when ::Array
27
+ new(*value)
28
+ else
29
+ nil
30
+ end
31
+ end
32
+ end
33
+
34
+ alias_method :initialize_raw, :initialize
35
+ private :initialize_raw
36
+ # @param sort_keys [::Array<String, Symbol, Arrow::SortKey>] The
37
+ # sort keys to be used. See {Arrow::SortKey.resolve} how to
38
+ # resolve each sort key in `sort_keys`.
39
+ #
40
+ # You can add more sort keys by {#add_sort_key} later.
41
+ #
42
+ # @example No initial sort keys
43
+ # options = Arrow::SortOptions.new
44
+ # options.sort_keys # => []
45
+ #
46
+ # @example String sort keys
47
+ # options = Arrow::SortOptions.new("count", "-age")
48
+ # options.sort_keys.collect(&:to_s) # => ["+count", "-age"]
49
+ #
50
+ # @example Symbol sort keys
51
+ # options = Arrow::SortOptions.new(:count, :age)
52
+ # options.sort_keys.collect(&:to_s) # => ["+count", "+age"]
53
+ #
54
+ # @example Mixed sort keys
55
+ # options = Arrow::SortOptions.new(:count, "-age")
56
+ # options.sort_keys.collect(&:to_s) # => ["+count", "-age"]
57
+ #
58
+ # @since 4.0.0
59
+ def initialize(*sort_keys)
60
+ initialize_raw
61
+ sort_keys.each do |sort_key|
62
+ add_sort_key(sort_key)
63
+ end
64
+ end
65
+
66
+ # @api private
67
+ alias_method :add_sort_key_raw, :add_sort_key
68
+ # Add a sort key.
69
+ #
70
+ # @return [void]
71
+ #
72
+ # @overload add_sort_key(key)
73
+ #
74
+ # @param key [Arrow::SortKey] The sort key to be added.
75
+ #
76
+ # @example Add a key to sort by "price" column in descending order
77
+ # options = Arrow::SortOptions.new
78
+ # options.add_sort_key(Arrow::SortKey.new(:price, :descending))
79
+ # options.sort_keys.collect(&:to_s) # => ["-price"]
80
+ #
81
+ # @overload add_sort_key(name)
82
+ #
83
+ # @param name [Symbol, String] The sort key name to be
84
+ # added. See also {Arrow::SortKey#initialize} for the leading
85
+ # order mark for String name.
86
+ #
87
+ # @example Add a key to sort by "price" column in descending order
88
+ # options = Arrow::SortOptions.new
89
+ # options.add_sort_key("-price")
90
+ # options.sort_keys.collect(&:to_s) # => ["-price"]
91
+ #
92
+ # @overload add_sort_key(name, order)
93
+ #
94
+ # @param name [Symbol, String] The sort key name.
95
+ #
96
+ # @param order [Symbol, String, Arrow::SortOrder] The sort
97
+ # order. See {Arrow::SortKey#initialize} for details.
98
+ #
99
+ # @example Add a key to sort by "price" column in descending order
100
+ # options = Arrow::SortOptions.new
101
+ # options.add_sort_key("price", :desc)
102
+ # options.sort_keys.collect(&:to_s) # => ["-price"]
103
+ #
104
+ # @since 4.0.0
105
+ def add_sort_key(name, order=nil)
106
+ add_sort_key_raw(SortKey.resolve(name, order))
107
+ end
108
+ end
109
+ end
@@ -33,7 +33,7 @@ module Arrow
33
33
  # @param type_codes [::Array<Integer>] The IDs that indicates
34
34
  # corresponding fields.
35
35
  #
36
- # @example Create a sparse union data type for {2: visible, 9: count}
36
+ # @example Create a sparse union data type for `{2: visible, 9: count}`
37
37
  # fields = [
38
38
  # Arrow::Field.new("visible", :boolean),
39
39
  # {
@@ -57,7 +57,7 @@ module Arrow
57
57
  # @option description [::Array<Integer>] :type_codes The IDs
58
58
  # that indicates corresponding fields.
59
59
  #
60
- # @example Create a sparse union data type for {2: visible, 9: count}
60
+ # @example Create a sparse union data type for `{2: visible, 9: count}`
61
61
  # fields = [
62
62
  # Arrow::Field.new("visible", :boolean),
63
63
  # {
@@ -29,7 +29,7 @@ module Arrow
29
29
  #
30
30
  # The unit must be second or millisecond.
31
31
  #
32
- # @example Create a time32 data type with {Arrow::TimeUnit}
32
+ # @example Create a time32 data type with Arrow::TimeUnit
33
33
  # Arrow::Time32DataType.new(Arrow::TimeUnit::MILLI)
34
34
  #
35
35
  # @example Create a time32 data type with Symbol
@@ -45,7 +45,7 @@ module Arrow
45
45
  #
46
46
  # The unit must be second or millisecond.
47
47
  #
48
- # @example Create a time32 data type with {Arrow::TimeUnit}
48
+ # @example Create a time32 data type with Arrow::TimeUnit
49
49
  # Arrow::Time32DataType.new(unit: Arrow::TimeUnit::MILLI)
50
50
  #
51
51
  # @example Create a time32 data type with Symbol
@@ -29,7 +29,7 @@ module Arrow
29
29
  #
30
30
  # The unit must be microsecond or nanosecond.
31
31
  #
32
- # @example Create a time64 data type with {Arrow::TimeUnit}
32
+ # @example Create a time64 data type with Arrow::TimeUnit
33
33
  # Arrow::Time64DataType.new(Arrow::TimeUnit::NANO)
34
34
  #
35
35
  # @example Create a time64 data type with Symbol
@@ -45,7 +45,7 @@ module Arrow
45
45
  #
46
46
  # The unit must be microsecond or nanosecond.
47
47
  #
48
- # @example Create a time64 data type with {Arrow::TimeUnit}
48
+ # @example Create a time64 data type with Arrow::TimeUnit
49
49
  # Arrow::Time64DataType.new(unit: Arrow::TimeUnit::NANO)
50
50
  #
51
51
  # @example Create a time64 data type with Symbol
@@ -27,7 +27,7 @@ module Arrow
27
27
  # @param unit [Arrow::TimeUnit, Symbol] The unit of the
28
28
  # timestamp data type.
29
29
  #
30
- # @example Create a timestamp data type with {Arrow::TimeUnit}
30
+ # @example Create a timestamp data type with Arrow::TimeUnit
31
31
  # Arrow::TimestampDataType.new(Arrow::TimeUnit::MILLI)
32
32
  #
33
33
  # @example Create a timestamp data type with Symbol
@@ -41,7 +41,7 @@ module Arrow
41
41
  # @option description [Arrow::TimeUnit, Symbol] :unit The unit of
42
42
  # the timestamp data type.
43
43
  #
44
- # @example Create a timestamp data type with {Arrow::TimeUnit}
44
+ # @example Create a timestamp data type with Arrow::TimeUnit
45
45
  # Arrow::TimestampDataType.new(unit: Arrow::TimeUnit::MILLI)
46
46
  #
47
47
  # @example Create a timestamp data type with Symbol
data/lib/arrow/version.rb CHANGED
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- VERSION = "1.0.0"
19
+ VERSION = "4.0.1"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
data/red-arrow.gemspec CHANGED
@@ -46,6 +46,7 @@ Gem::Specification.new do |spec|
46
46
  spec.test_files += Dir.glob("test/**/*")
47
47
  spec.extensions = ["ext/arrow/extconf.rb"]
48
48
 
49
+ spec.add_runtime_dependency("bigdecimal", ">= 2.0.3")
49
50
  spec.add_runtime_dependency("extpp", ">= 0.0.7")
50
51
  spec.add_runtime_dependency("gio2", ">= 3.3.6")
51
52
  spec.add_runtime_dependency("native-package-installer")
@@ -329,6 +329,23 @@ module RawRecordsBasicArraysTests
329
329
  records)
330
330
  assert_equal(records, target.raw_records)
331
331
  end
332
+
333
+ def test_decimal256
334
+ records = [
335
+ [BigDecimal("92.92")],
336
+ [nil],
337
+ [BigDecimal("29.29")],
338
+ ]
339
+ target = build({
340
+ column: {
341
+ type: :decimal256,
342
+ precision: 38,
343
+ scale: 2,
344
+ }
345
+ },
346
+ records)
347
+ assert_equal(records, target.raw_records)
348
+ end
332
349
  end
333
350
 
334
351
  class RawRecordsRecordBatchBasicArraysTest < Test::Unit::TestCase
@@ -345,6 +345,20 @@ module RawRecordsDenseUnionArrayTests
345
345
  assert_equal(records, target.raw_records)
346
346
  end
347
347
 
348
+ def test_decimal256
349
+ records = [
350
+ [{"0" => BigDecimal("92.92")}],
351
+ [{"1" => nil}],
352
+ ]
353
+ target = build({
354
+ type: :decimal256,
355
+ precision: 38,
356
+ scale: 2,
357
+ },
358
+ records)
359
+ assert_equal(records, target.raw_records)
360
+ end
361
+
348
362
  def test_list
349
363
  records = [
350
364
  [{"0" => [true, nil, false]}],
@@ -379,6 +379,26 @@ module RawRecordsListArrayTests
379
379
  assert_equal(records, target.raw_records)
380
380
  end
381
381
 
382
+ def test_decimal256
383
+ records = [
384
+ [
385
+ [
386
+ BigDecimal("92.92"),
387
+ nil,
388
+ BigDecimal("29.29"),
389
+ ],
390
+ ],
391
+ [nil],
392
+ ]
393
+ target = build({
394
+ type: :decimal256,
395
+ precision: 38,
396
+ scale: 2,
397
+ },
398
+ records)
399
+ assert_equal(records, target.raw_records)
400
+ end
401
+
382
402
  def test_list
383
403
  records = [
384
404
  [
@@ -335,6 +335,20 @@ module RawRecordsSparseUnionArrayTests
335
335
  assert_equal(records, target.raw_records)
336
336
  end
337
337
 
338
+ def test_decimal256
339
+ records = [
340
+ [{"0" => BigDecimal("92.92")}],
341
+ [{"1" => nil}],
342
+ ]
343
+ target = build({
344
+ type: :decimal256,
345
+ precision: 38,
346
+ scale: 2,
347
+ },
348
+ records)
349
+ assert_equal(records, target.raw_records)
350
+ end
351
+
338
352
  def test_list
339
353
  records = [
340
354
  [{"0" => [true, nil, false]}],
@@ -329,6 +329,21 @@ module RawRecordsStructArrayTests
329
329
  assert_equal(records, target.raw_records)
330
330
  end
331
331
 
332
+ def test_decimal256
333
+ records = [
334
+ [{"field" => BigDecimal("92.92")}],
335
+ [nil],
336
+ [{"field" => nil}],
337
+ ]
338
+ target = build({
339
+ type: :decimal256,
340
+ precision: 38,
341
+ scale: 2,
342
+ },
343
+ records)
344
+ assert_equal(records, target.raw_records)
345
+ end
346
+
332
347
  def test_list
333
348
  records = [
334
349
  [{"field" => [true, nil, false]}],
data/test/test-array.rb CHANGED
@@ -160,12 +160,132 @@ class ArrayTest < Test::Unit::TestCase
160
160
 
161
161
  test("Arrow::ChunkedArray") do
162
162
  chunks = [
163
- Arrow::Int16Array.new([1, 0]),
164
- Arrow::Int16Array.new([1, 0, 3])
163
+ Arrow::Int16Array.new([1, 4]),
164
+ Arrow::Int16Array.new([0, 3])
165
165
  ]
166
166
  right = Arrow::ChunkedArray.new(chunks)
167
167
  assert_equal(Arrow::BooleanArray.new([true, true, true, false]),
168
168
  @array.is_in(right))
169
169
  end
170
170
  end
171
+
172
+ sub_test_case("#concatenate") do
173
+ test("Arrow::Array: same") do
174
+ assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
175
+ Arrow::Int32Array.new([1, 2, nil]).
176
+ concatenate(Arrow::Int32Array.new([4, 5]),
177
+ Arrow::Int32Array.new([6])))
178
+ end
179
+
180
+ test("Arrow::Array: castable") do
181
+ assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
182
+ Arrow::Int32Array.new([1, 2, nil]).
183
+ concatenate(Arrow::Int8Array.new([4, 5]),
184
+ Arrow::UInt32Array.new([6])))
185
+ end
186
+
187
+ test("Arrow::Array: non-castable") do
188
+ assert_raise(Arrow::Error::Invalid) do
189
+ Arrow::Int32Array.new([1, 2, nil]).
190
+ concatenate(Arrow::StringArray.new(["X"]))
191
+ end
192
+ end
193
+
194
+ test("Array") do
195
+ assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,nil, 6]),
196
+ Arrow::Int32Array.new([1, 2, nil]).
197
+ concatenate([4, nil],
198
+ [6]))
199
+ end
200
+
201
+ test("invalid") do
202
+ message = "[array][resolve] can't build int32 array: 4"
203
+ assert_raise(ArgumentError.new(message)) do
204
+ Arrow::Int32Array.new([1, 2, nil]).
205
+ concatenate(4)
206
+ end
207
+ end
208
+ end
209
+
210
+ sub_test_case("#+") do
211
+ test("Arrow::Array: same") do
212
+ assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
213
+ Arrow::Int32Array.new([1, 2, nil]) +
214
+ Arrow::Int32Array.new([4, 5, 6]))
215
+ end
216
+
217
+ test("Arrow::Array: castable") do
218
+ assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
219
+ Arrow::Int32Array.new([1, 2, nil]) +
220
+ Arrow::Int8Array.new([4, 5, 6]))
221
+ end
222
+
223
+ test("Arrow::Array: non-castable") do
224
+ assert_raise(Arrow::Error::Invalid) do
225
+ Arrow::Int32Array.new([1, 2, nil]) +
226
+ Arrow::StringArray.new(["X"])
227
+ end
228
+ end
229
+
230
+ test("Array") do
231
+ assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,nil, 6]),
232
+ Arrow::Int32Array.new([1, 2, nil]) +
233
+ [4, nil, 6])
234
+ end
235
+
236
+ test("invalid") do
237
+ message = "[array][resolve] can't build int32 array: 4"
238
+ assert_raise(ArgumentError.new(message)) do
239
+ Arrow::Int32Array.new([1, 2, nil]) + 4
240
+ end
241
+ end
242
+ end
243
+
244
+ sub_test_case("#resolve") do
245
+ test("Arrow::Array: same") do
246
+ assert_equal(Arrow::Int32Array.new([1, 2, nil]),
247
+ Arrow::Int32Array.new([]).
248
+ resolve(Arrow::Int32Array.new([1, 2, nil])))
249
+ end
250
+
251
+ test("Arrow::Array: castable") do
252
+ assert_equal(Arrow::Int32Array.new([1, 2, nil]),
253
+ Arrow::Int32Array.new([]).
254
+ resolve(Arrow::Int8Array.new([1, 2, nil])))
255
+ end
256
+
257
+ test("Arrow::Array: non-castable") do
258
+ assert_raise(Arrow::Error::Invalid) do
259
+ Arrow::Int32Array.new([]) +
260
+ Arrow::StringArray.new(["X"])
261
+ end
262
+ end
263
+
264
+ test("Array: non-parametric") do
265
+ assert_equal(Arrow::Int32Array.new([1, 2, nil]),
266
+ Arrow::Int32Array.new([]).
267
+ resolve([1, 2, nil]))
268
+ end
269
+
270
+ test("Array: parametric") do
271
+ list_data_type = Arrow::ListDataType.new(name: "visible", type: :boolean)
272
+ list_array = Arrow::ListArray.new(list_data_type, [])
273
+ assert_equal(Arrow::ListArray.new(list_data_type,
274
+ [
275
+ [true, false],
276
+ nil,
277
+ ]),
278
+ list_array.resolve([
279
+ [true, false],
280
+ nil,
281
+ ]))
282
+ end
283
+
284
+ test("invalid") do
285
+ message = "[array][resolve] can't build int32 array: 4"
286
+ assert_raise(ArgumentError.new(message)) do
287
+ Arrow::Int32Array.new([]).resolve(4)
288
+ end
289
+ end
290
+ end
171
291
  end