red-arrow 1.0.0 → 4.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/converters.hpp +15 -2
  3. data/ext/arrow/extconf.rb +14 -3
  4. data/ext/arrow/raw-records.cpp +1 -0
  5. data/ext/arrow/values.cpp +1 -0
  6. data/lib/arrow/array-builder.rb +11 -6
  7. data/lib/arrow/array.rb +118 -0
  8. data/lib/arrow/bigdecimal-extension.rb +5 -1
  9. data/lib/arrow/data-type.rb +14 -5
  10. data/lib/arrow/decimal128-array-builder.rb +21 -25
  11. data/lib/arrow/decimal128-data-type.rb +2 -0
  12. data/lib/arrow/decimal128.rb +18 -0
  13. data/lib/arrow/decimal256-array-builder.rb +61 -0
  14. data/lib/arrow/decimal256-array.rb +25 -0
  15. data/lib/arrow/decimal256-data-type.rb +73 -0
  16. data/lib/arrow/decimal256.rb +60 -0
  17. data/lib/arrow/dense-union-data-type.rb +2 -2
  18. data/lib/arrow/dictionary-data-type.rb +2 -2
  19. data/lib/arrow/fixed-size-binary-array-builder.rb +38 -0
  20. data/lib/arrow/fixed-size-binary-array.rb +26 -0
  21. data/lib/arrow/loader.rb +15 -0
  22. data/lib/arrow/sort-key.rb +193 -0
  23. data/lib/arrow/sort-options.rb +109 -0
  24. data/lib/arrow/sparse-union-data-type.rb +2 -2
  25. data/lib/arrow/time32-data-type.rb +2 -2
  26. data/lib/arrow/time64-data-type.rb +2 -2
  27. data/lib/arrow/timestamp-data-type.rb +2 -2
  28. data/lib/arrow/version.rb +1 -1
  29. data/red-arrow.gemspec +1 -0
  30. data/test/raw-records/test-basic-arrays.rb +17 -0
  31. data/test/raw-records/test-dense-union-array.rb +14 -0
  32. data/test/raw-records/test-list-array.rb +20 -0
  33. data/test/raw-records/test-sparse-union-array.rb +14 -0
  34. data/test/raw-records/test-struct-array.rb +15 -0
  35. data/test/test-array.rb +122 -2
  36. data/test/test-bigdecimal.rb +20 -3
  37. data/test/test-decimal128-array-builder.rb +18 -1
  38. data/test/test-decimal128-data-type.rb +2 -2
  39. data/test/test-decimal128.rb +38 -0
  40. data/test/test-decimal256-array-builder.rb +112 -0
  41. data/test/test-decimal256-array.rb +38 -0
  42. data/test/test-decimal256-data-type.rb +31 -0
  43. data/test/test-decimal256.rb +102 -0
  44. data/test/test-fixed-size-binary-array-builder.rb +92 -0
  45. data/test/test-fixed-size-binary-array.rb +36 -0
  46. data/test/test-orc.rb +19 -23
  47. data/test/test-sort-indices.rb +40 -0
  48. data/test/test-sort-key.rb +81 -0
  49. data/test/test-sort-options.rb +58 -0
  50. data/test/test-struct-array-builder.rb +8 -8
  51. data/test/test-struct-array.rb +2 -2
  52. data/test/values/test-basic-arrays.rb +11 -0
  53. data/test/values/test-dense-union-array.rb +14 -0
  54. data/test/values/test-list-array.rb +18 -0
  55. data/test/values/test-sparse-union-array.rb +14 -0
  56. data/test/values/test-struct-array.rb +15 -0
  57. metadata +101 -61
@@ -0,0 +1,109 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class SortOptions
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Symbol, String
25
+ new(value)
26
+ when ::Array
27
+ new(*value)
28
+ else
29
+ nil
30
+ end
31
+ end
32
+ end
33
+
34
+ alias_method :initialize_raw, :initialize
35
+ private :initialize_raw
36
+ # @param sort_keys [::Array<String, Symbol, Arrow::SortKey>] The
37
+ # sort keys to be used. See {Arrow::SortKey.resolve} how to
38
+ # resolve each sort key in `sort_keys`.
39
+ #
40
+ # You can add more sort keys by {#add_sort_key} later.
41
+ #
42
+ # @example No initial sort keys
43
+ # options = Arrow::SortOptions.new
44
+ # options.sort_keys # => []
45
+ #
46
+ # @example String sort keys
47
+ # options = Arrow::SortOptions.new("count", "-age")
48
+ # options.sort_keys.collect(&:to_s) # => ["+count", "-age"]
49
+ #
50
+ # @example Symbol sort keys
51
+ # options = Arrow::SortOptions.new(:count, :age)
52
+ # options.sort_keys.collect(&:to_s) # => ["+count", "+age"]
53
+ #
54
+ # @example Mixed sort keys
55
+ # options = Arrow::SortOptions.new(:count, "-age")
56
+ # options.sort_keys.collect(&:to_s) # => ["+count", "-age"]
57
+ #
58
+ # @since 4.0.0
59
+ def initialize(*sort_keys)
60
+ initialize_raw
61
+ sort_keys.each do |sort_key|
62
+ add_sort_key(sort_key)
63
+ end
64
+ end
65
+
66
+ # @api private
67
+ alias_method :add_sort_key_raw, :add_sort_key
68
+ # Add a sort key.
69
+ #
70
+ # @return [void]
71
+ #
72
+ # @overload add_sort_key(key)
73
+ #
74
+ # @param key [Arrow::SortKey] The sort key to be added.
75
+ #
76
+ # @example Add a key to sort by "price" column in descending order
77
+ # options = Arrow::SortOptions.new
78
+ # options.add_sort_key(Arrow::SortKey.new(:price, :descending))
79
+ # options.sort_keys.collect(&:to_s) # => ["-price"]
80
+ #
81
+ # @overload add_sort_key(name)
82
+ #
83
+ # @param name [Symbol, String] The sort key name to be
84
+ # added. See also {Arrow::SortKey#initialize} for the leading
85
+ # order mark for String name.
86
+ #
87
+ # @example Add a key to sort by "price" column in descending order
88
+ # options = Arrow::SortOptions.new
89
+ # options.add_sort_key("-price")
90
+ # options.sort_keys.collect(&:to_s) # => ["-price"]
91
+ #
92
+ # @overload add_sort_key(name, order)
93
+ #
94
+ # @param name [Symbol, String] The sort key name.
95
+ #
96
+ # @param order [Symbol, String, Arrow::SortOrder] The sort
97
+ # order. See {Arrow::SortKey#initialize} for details.
98
+ #
99
+ # @example Add a key to sort by "price" column in descending order
100
+ # options = Arrow::SortOptions.new
101
+ # options.add_sort_key("price", :desc)
102
+ # options.sort_keys.collect(&:to_s) # => ["-price"]
103
+ #
104
+ # @since 4.0.0
105
+ def add_sort_key(name, order=nil)
106
+ add_sort_key_raw(SortKey.resolve(name, order))
107
+ end
108
+ end
109
+ end
@@ -33,7 +33,7 @@ module Arrow
33
33
  # @param type_codes [::Array<Integer>] The IDs that indicates
34
34
  # corresponding fields.
35
35
  #
36
- # @example Create a sparse union data type for {2: visible, 9: count}
36
+ # @example Create a sparse union data type for `{2: visible, 9: count}`
37
37
  # fields = [
38
38
  # Arrow::Field.new("visible", :boolean),
39
39
  # {
@@ -57,7 +57,7 @@ module Arrow
57
57
  # @option description [::Array<Integer>] :type_codes The IDs
58
58
  # that indicates corresponding fields.
59
59
  #
60
- # @example Create a sparse union data type for {2: visible, 9: count}
60
+ # @example Create a sparse union data type for `{2: visible, 9: count}`
61
61
  # fields = [
62
62
  # Arrow::Field.new("visible", :boolean),
63
63
  # {
@@ -29,7 +29,7 @@ module Arrow
29
29
  #
30
30
  # The unit must be second or millisecond.
31
31
  #
32
- # @example Create a time32 data type with {Arrow::TimeUnit}
32
+ # @example Create a time32 data type with Arrow::TimeUnit
33
33
  # Arrow::Time32DataType.new(Arrow::TimeUnit::MILLI)
34
34
  #
35
35
  # @example Create a time32 data type with Symbol
@@ -45,7 +45,7 @@ module Arrow
45
45
  #
46
46
  # The unit must be second or millisecond.
47
47
  #
48
- # @example Create a time32 data type with {Arrow::TimeUnit}
48
+ # @example Create a time32 data type with Arrow::TimeUnit
49
49
  # Arrow::Time32DataType.new(unit: Arrow::TimeUnit::MILLI)
50
50
  #
51
51
  # @example Create a time32 data type with Symbol
@@ -29,7 +29,7 @@ module Arrow
29
29
  #
30
30
  # The unit must be microsecond or nanosecond.
31
31
  #
32
- # @example Create a time64 data type with {Arrow::TimeUnit}
32
+ # @example Create a time64 data type with Arrow::TimeUnit
33
33
  # Arrow::Time64DataType.new(Arrow::TimeUnit::NANO)
34
34
  #
35
35
  # @example Create a time64 data type with Symbol
@@ -45,7 +45,7 @@ module Arrow
45
45
  #
46
46
  # The unit must be microsecond or nanosecond.
47
47
  #
48
- # @example Create a time64 data type with {Arrow::TimeUnit}
48
+ # @example Create a time64 data type with Arrow::TimeUnit
49
49
  # Arrow::Time64DataType.new(unit: Arrow::TimeUnit::NANO)
50
50
  #
51
51
  # @example Create a time64 data type with Symbol
@@ -27,7 +27,7 @@ module Arrow
27
27
  # @param unit [Arrow::TimeUnit, Symbol] The unit of the
28
28
  # timestamp data type.
29
29
  #
30
- # @example Create a timestamp data type with {Arrow::TimeUnit}
30
+ # @example Create a timestamp data type with Arrow::TimeUnit
31
31
  # Arrow::TimestampDataType.new(Arrow::TimeUnit::MILLI)
32
32
  #
33
33
  # @example Create a timestamp data type with Symbol
@@ -41,7 +41,7 @@ module Arrow
41
41
  # @option description [Arrow::TimeUnit, Symbol] :unit The unit of
42
42
  # the timestamp data type.
43
43
  #
44
- # @example Create a timestamp data type with {Arrow::TimeUnit}
44
+ # @example Create a timestamp data type with Arrow::TimeUnit
45
45
  # Arrow::TimestampDataType.new(unit: Arrow::TimeUnit::MILLI)
46
46
  #
47
47
  # @example Create a timestamp data type with Symbol
data/lib/arrow/version.rb CHANGED
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- VERSION = "1.0.0"
19
+ VERSION = "4.0.1"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
data/red-arrow.gemspec CHANGED
@@ -46,6 +46,7 @@ Gem::Specification.new do |spec|
46
46
  spec.test_files += Dir.glob("test/**/*")
47
47
  spec.extensions = ["ext/arrow/extconf.rb"]
48
48
 
49
+ spec.add_runtime_dependency("bigdecimal", ">= 2.0.3")
49
50
  spec.add_runtime_dependency("extpp", ">= 0.0.7")
50
51
  spec.add_runtime_dependency("gio2", ">= 3.3.6")
51
52
  spec.add_runtime_dependency("native-package-installer")
@@ -329,6 +329,23 @@ module RawRecordsBasicArraysTests
329
329
  records)
330
330
  assert_equal(records, target.raw_records)
331
331
  end
332
+
333
+ def test_decimal256
334
+ records = [
335
+ [BigDecimal("92.92")],
336
+ [nil],
337
+ [BigDecimal("29.29")],
338
+ ]
339
+ target = build({
340
+ column: {
341
+ type: :decimal256,
342
+ precision: 38,
343
+ scale: 2,
344
+ }
345
+ },
346
+ records)
347
+ assert_equal(records, target.raw_records)
348
+ end
332
349
  end
333
350
 
334
351
  class RawRecordsRecordBatchBasicArraysTest < Test::Unit::TestCase
@@ -345,6 +345,20 @@ module RawRecordsDenseUnionArrayTests
345
345
  assert_equal(records, target.raw_records)
346
346
  end
347
347
 
348
+ def test_decimal256
349
+ records = [
350
+ [{"0" => BigDecimal("92.92")}],
351
+ [{"1" => nil}],
352
+ ]
353
+ target = build({
354
+ type: :decimal256,
355
+ precision: 38,
356
+ scale: 2,
357
+ },
358
+ records)
359
+ assert_equal(records, target.raw_records)
360
+ end
361
+
348
362
  def test_list
349
363
  records = [
350
364
  [{"0" => [true, nil, false]}],
@@ -379,6 +379,26 @@ module RawRecordsListArrayTests
379
379
  assert_equal(records, target.raw_records)
380
380
  end
381
381
 
382
+ def test_decimal256
383
+ records = [
384
+ [
385
+ [
386
+ BigDecimal("92.92"),
387
+ nil,
388
+ BigDecimal("29.29"),
389
+ ],
390
+ ],
391
+ [nil],
392
+ ]
393
+ target = build({
394
+ type: :decimal256,
395
+ precision: 38,
396
+ scale: 2,
397
+ },
398
+ records)
399
+ assert_equal(records, target.raw_records)
400
+ end
401
+
382
402
  def test_list
383
403
  records = [
384
404
  [
@@ -335,6 +335,20 @@ module RawRecordsSparseUnionArrayTests
335
335
  assert_equal(records, target.raw_records)
336
336
  end
337
337
 
338
+ def test_decimal256
339
+ records = [
340
+ [{"0" => BigDecimal("92.92")}],
341
+ [{"1" => nil}],
342
+ ]
343
+ target = build({
344
+ type: :decimal256,
345
+ precision: 38,
346
+ scale: 2,
347
+ },
348
+ records)
349
+ assert_equal(records, target.raw_records)
350
+ end
351
+
338
352
  def test_list
339
353
  records = [
340
354
  [{"0" => [true, nil, false]}],
@@ -329,6 +329,21 @@ module RawRecordsStructArrayTests
329
329
  assert_equal(records, target.raw_records)
330
330
  end
331
331
 
332
+ def test_decimal256
333
+ records = [
334
+ [{"field" => BigDecimal("92.92")}],
335
+ [nil],
336
+ [{"field" => nil}],
337
+ ]
338
+ target = build({
339
+ type: :decimal256,
340
+ precision: 38,
341
+ scale: 2,
342
+ },
343
+ records)
344
+ assert_equal(records, target.raw_records)
345
+ end
346
+
332
347
  def test_list
333
348
  records = [
334
349
  [{"field" => [true, nil, false]}],
data/test/test-array.rb CHANGED
@@ -160,12 +160,132 @@ class ArrayTest < Test::Unit::TestCase
160
160
 
161
161
  test("Arrow::ChunkedArray") do
162
162
  chunks = [
163
- Arrow::Int16Array.new([1, 0]),
164
- Arrow::Int16Array.new([1, 0, 3])
163
+ Arrow::Int16Array.new([1, 4]),
164
+ Arrow::Int16Array.new([0, 3])
165
165
  ]
166
166
  right = Arrow::ChunkedArray.new(chunks)
167
167
  assert_equal(Arrow::BooleanArray.new([true, true, true, false]),
168
168
  @array.is_in(right))
169
169
  end
170
170
  end
171
+
172
+ sub_test_case("#concatenate") do
173
+ test("Arrow::Array: same") do
174
+ assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
175
+ Arrow::Int32Array.new([1, 2, nil]).
176
+ concatenate(Arrow::Int32Array.new([4, 5]),
177
+ Arrow::Int32Array.new([6])))
178
+ end
179
+
180
+ test("Arrow::Array: castable") do
181
+ assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
182
+ Arrow::Int32Array.new([1, 2, nil]).
183
+ concatenate(Arrow::Int8Array.new([4, 5]),
184
+ Arrow::UInt32Array.new([6])))
185
+ end
186
+
187
+ test("Arrow::Array: non-castable") do
188
+ assert_raise(Arrow::Error::Invalid) do
189
+ Arrow::Int32Array.new([1, 2, nil]).
190
+ concatenate(Arrow::StringArray.new(["X"]))
191
+ end
192
+ end
193
+
194
+ test("Array") do
195
+ assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,nil, 6]),
196
+ Arrow::Int32Array.new([1, 2, nil]).
197
+ concatenate([4, nil],
198
+ [6]))
199
+ end
200
+
201
+ test("invalid") do
202
+ message = "[array][resolve] can't build int32 array: 4"
203
+ assert_raise(ArgumentError.new(message)) do
204
+ Arrow::Int32Array.new([1, 2, nil]).
205
+ concatenate(4)
206
+ end
207
+ end
208
+ end
209
+
210
+ sub_test_case("#+") do
211
+ test("Arrow::Array: same") do
212
+ assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
213
+ Arrow::Int32Array.new([1, 2, nil]) +
214
+ Arrow::Int32Array.new([4, 5, 6]))
215
+ end
216
+
217
+ test("Arrow::Array: castable") do
218
+ assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
219
+ Arrow::Int32Array.new([1, 2, nil]) +
220
+ Arrow::Int8Array.new([4, 5, 6]))
221
+ end
222
+
223
+ test("Arrow::Array: non-castable") do
224
+ assert_raise(Arrow::Error::Invalid) do
225
+ Arrow::Int32Array.new([1, 2, nil]) +
226
+ Arrow::StringArray.new(["X"])
227
+ end
228
+ end
229
+
230
+ test("Array") do
231
+ assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,nil, 6]),
232
+ Arrow::Int32Array.new([1, 2, nil]) +
233
+ [4, nil, 6])
234
+ end
235
+
236
+ test("invalid") do
237
+ message = "[array][resolve] can't build int32 array: 4"
238
+ assert_raise(ArgumentError.new(message)) do
239
+ Arrow::Int32Array.new([1, 2, nil]) + 4
240
+ end
241
+ end
242
+ end
243
+
244
+ sub_test_case("#resolve") do
245
+ test("Arrow::Array: same") do
246
+ assert_equal(Arrow::Int32Array.new([1, 2, nil]),
247
+ Arrow::Int32Array.new([]).
248
+ resolve(Arrow::Int32Array.new([1, 2, nil])))
249
+ end
250
+
251
+ test("Arrow::Array: castable") do
252
+ assert_equal(Arrow::Int32Array.new([1, 2, nil]),
253
+ Arrow::Int32Array.new([]).
254
+ resolve(Arrow::Int8Array.new([1, 2, nil])))
255
+ end
256
+
257
+ test("Arrow::Array: non-castable") do
258
+ assert_raise(Arrow::Error::Invalid) do
259
+ Arrow::Int32Array.new([]) +
260
+ Arrow::StringArray.new(["X"])
261
+ end
262
+ end
263
+
264
+ test("Array: non-parametric") do
265
+ assert_equal(Arrow::Int32Array.new([1, 2, nil]),
266
+ Arrow::Int32Array.new([]).
267
+ resolve([1, 2, nil]))
268
+ end
269
+
270
+ test("Array: parametric") do
271
+ list_data_type = Arrow::ListDataType.new(name: "visible", type: :boolean)
272
+ list_array = Arrow::ListArray.new(list_data_type, [])
273
+ assert_equal(Arrow::ListArray.new(list_data_type,
274
+ [
275
+ [true, false],
276
+ nil,
277
+ ]),
278
+ list_array.resolve([
279
+ [true, false],
280
+ nil,
281
+ ]))
282
+ end
283
+
284
+ test("invalid") do
285
+ message = "[array][resolve] can't build int32 array: 4"
286
+ assert_raise(ArgumentError.new(message)) do
287
+ Arrow::Int32Array.new([]).resolve(4)
288
+ end
289
+ end
290
+ end
171
291
  end