red-arrow 0.17.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/converters.hpp +75 -32
  3. data/ext/arrow/extconf.rb +14 -3
  4. data/ext/arrow/raw-records.cpp +3 -1
  5. data/ext/arrow/values.cpp +3 -1
  6. data/lib/arrow/array-builder.rb +11 -6
  7. data/lib/arrow/array.rb +118 -0
  8. data/lib/arrow/bigdecimal-extension.rb +5 -1
  9. data/lib/arrow/buffer.rb +28 -0
  10. data/lib/arrow/data-type.rb +14 -5
  11. data/lib/arrow/decimal128-array-builder.rb +21 -25
  12. data/lib/arrow/decimal128-data-type.rb +2 -0
  13. data/lib/arrow/decimal128.rb +18 -0
  14. data/lib/arrow/decimal256-array-builder.rb +61 -0
  15. data/lib/arrow/decimal256-array.rb +25 -0
  16. data/lib/arrow/decimal256-data-type.rb +73 -0
  17. data/lib/arrow/decimal256.rb +60 -0
  18. data/lib/arrow/dense-union-data-type.rb +2 -2
  19. data/lib/arrow/dictionary-array.rb +24 -0
  20. data/lib/arrow/dictionary-data-type.rb +2 -2
  21. data/lib/arrow/fixed-size-binary-array-builder.rb +38 -0
  22. data/lib/arrow/fixed-size-binary-array.rb +26 -0
  23. data/lib/arrow/loader.rb +18 -0
  24. data/lib/arrow/raw-table-converter.rb +47 -0
  25. data/lib/arrow/record-batch-iterator.rb +22 -0
  26. data/lib/arrow/record-batch.rb +9 -1
  27. data/lib/arrow/sort-key.rb +193 -0
  28. data/lib/arrow/sort-options.rb +109 -0
  29. data/lib/arrow/sparse-union-data-type.rb +2 -2
  30. data/lib/arrow/struct-array-builder.rb +13 -7
  31. data/lib/arrow/table-saver.rb +6 -6
  32. data/lib/arrow/table.rb +5 -24
  33. data/lib/arrow/time32-data-type.rb +2 -2
  34. data/lib/arrow/time64-data-type.rb +2 -2
  35. data/lib/arrow/timestamp-data-type.rb +2 -2
  36. data/lib/arrow/version.rb +1 -1
  37. data/red-arrow.gemspec +1 -0
  38. data/test/raw-records/test-basic-arrays.rb +17 -0
  39. data/test/raw-records/test-dense-union-array.rb +15 -34
  40. data/test/raw-records/test-list-array.rb +20 -0
  41. data/test/raw-records/test-sparse-union-array.rb +15 -33
  42. data/test/raw-records/test-struct-array.rb +15 -0
  43. data/test/test-array.rb +122 -2
  44. data/test/test-bigdecimal.rb +20 -3
  45. data/test/test-buffer.rb +11 -0
  46. data/test/test-decimal128-array-builder.rb +18 -1
  47. data/test/test-decimal128-data-type.rb +2 -2
  48. data/test/test-decimal128.rb +38 -0
  49. data/test/test-decimal256-array-builder.rb +112 -0
  50. data/test/test-decimal256-array.rb +38 -0
  51. data/test/test-decimal256-data-type.rb +31 -0
  52. data/test/test-decimal256.rb +102 -0
  53. data/test/test-dense-union-data-type.rb +2 -2
  54. data/test/test-dictionary-array.rb +41 -0
  55. data/test/test-feather.rb +1 -1
  56. data/test/test-fixed-size-binary-array-builder.rb +92 -0
  57. data/test/test-fixed-size-binary-array.rb +36 -0
  58. data/test/test-orc.rb +19 -23
  59. data/test/test-record-batch-iterator.rb +37 -0
  60. data/test/test-record-batch.rb +14 -0
  61. data/test/test-sort-indices.rb +40 -0
  62. data/test/test-sort-key.rb +81 -0
  63. data/test/test-sort-options.rb +58 -0
  64. data/test/test-sparse-union-data-type.rb +2 -2
  65. data/test/test-struct-array-builder.rb +16 -12
  66. data/test/test-struct-array.rb +2 -2
  67. data/test/values/test-basic-arrays.rb +11 -0
  68. data/test/values/test-dense-union-array.rb +15 -34
  69. data/test/values/test-list-array.rb +18 -0
  70. data/test/values/test-sparse-union-array.rb +15 -33
  71. data/test/values/test-struct-array.rb +15 -0
  72. metadata +107 -59
@@ -379,6 +379,26 @@ module RawRecordsListArrayTests
379
379
  assert_equal(records, target.raw_records)
380
380
  end
381
381
 
382
+ def test_decimal256
383
+ records = [
384
+ [
385
+ [
386
+ BigDecimal("92.92"),
387
+ nil,
388
+ BigDecimal("29.29"),
389
+ ],
390
+ ],
391
+ [nil],
392
+ ]
393
+ target = build({
394
+ type: :decimal256,
395
+ precision: 38,
396
+ scale: 2,
397
+ },
398
+ records)
399
+ assert_equal(records, target.raw_records)
400
+ end
401
+
382
402
  def test_list
383
403
  records = [
384
404
  [
@@ -51,9 +51,7 @@ module RawRecordsSparseUnionArrayTests
51
51
  end
52
52
  records.each do |record|
53
53
  column = record[0]
54
- if column.nil?
55
- type_ids << nil
56
- elsif column.key?("0")
54
+ if column.key?("0")
57
55
  type_ids << type_codes[0]
58
56
  elsif column.key?("1")
59
57
  type_ids << type_codes[1]
@@ -71,7 +69,6 @@ module RawRecordsSparseUnionArrayTests
71
69
  def test_null
72
70
  records = [
73
71
  [{"0" => nil}],
74
- [nil],
75
72
  ]
76
73
  target = build(:null, records)
77
74
  assert_equal(records, target.raw_records)
@@ -80,7 +77,6 @@ module RawRecordsSparseUnionArrayTests
80
77
  def test_boolean
81
78
  records = [
82
79
  [{"0" => true}],
83
- [nil],
84
80
  [{"1" => nil}],
85
81
  ]
86
82
  target = build(:boolean, records)
@@ -90,7 +86,6 @@ module RawRecordsSparseUnionArrayTests
90
86
  def test_int8
91
87
  records = [
92
88
  [{"0" => -(2 ** 7)}],
93
- [nil],
94
89
  [{"1" => nil}],
95
90
  ]
96
91
  target = build(:int8, records)
@@ -100,7 +95,6 @@ module RawRecordsSparseUnionArrayTests
100
95
  def test_uint8
101
96
  records = [
102
97
  [{"0" => (2 ** 8) - 1}],
103
- [nil],
104
98
  [{"1" => nil}],
105
99
  ]
106
100
  target = build(:uint8, records)
@@ -110,7 +104,6 @@ module RawRecordsSparseUnionArrayTests
110
104
  def test_int16
111
105
  records = [
112
106
  [{"0" => -(2 ** 15)}],
113
- [nil],
114
107
  [{"1" => nil}],
115
108
  ]
116
109
  target = build(:int16, records)
@@ -120,7 +113,6 @@ module RawRecordsSparseUnionArrayTests
120
113
  def test_uint16
121
114
  records = [
122
115
  [{"0" => (2 ** 16) - 1}],
123
- [nil],
124
116
  [{"1" => nil}],
125
117
  ]
126
118
  target = build(:uint16, records)
@@ -130,7 +122,6 @@ module RawRecordsSparseUnionArrayTests
130
122
  def test_int32
131
123
  records = [
132
124
  [{"0" => -(2 ** 31)}],
133
- [nil],
134
125
  [{"1" => nil}],
135
126
  ]
136
127
  target = build(:int32, records)
@@ -140,7 +131,6 @@ module RawRecordsSparseUnionArrayTests
140
131
  def test_uint32
141
132
  records = [
142
133
  [{"0" => (2 ** 32) - 1}],
143
- [nil],
144
134
  [{"1" => nil}],
145
135
  ]
146
136
  target = build(:uint32, records)
@@ -150,7 +140,6 @@ module RawRecordsSparseUnionArrayTests
150
140
  def test_int64
151
141
  records = [
152
142
  [{"0" => -(2 ** 63)}],
153
- [nil],
154
143
  [{"1" => nil}],
155
144
  ]
156
145
  target = build(:int64, records)
@@ -160,7 +149,6 @@ module RawRecordsSparseUnionArrayTests
160
149
  def test_uint64
161
150
  records = [
162
151
  [{"0" => (2 ** 64) - 1}],
163
- [nil],
164
152
  [{"1" => nil}],
165
153
  ]
166
154
  target = build(:uint64, records)
@@ -170,7 +158,6 @@ module RawRecordsSparseUnionArrayTests
170
158
  def test_float
171
159
  records = [
172
160
  [{"0" => -1.0}],
173
- [nil],
174
161
  [{"1" => nil}],
175
162
  ]
176
163
  target = build(:float, records)
@@ -180,7 +167,6 @@ module RawRecordsSparseUnionArrayTests
180
167
  def test_double
181
168
  records = [
182
169
  [{"0" => -1.0}],
183
- [nil],
184
170
  [{"1" => nil}],
185
171
  ]
186
172
  target = build(:double, records)
@@ -190,7 +176,6 @@ module RawRecordsSparseUnionArrayTests
190
176
  def test_binary
191
177
  records = [
192
178
  [{"0" => "\xff".b}],
193
- [nil],
194
179
  [{"1" => nil}],
195
180
  ]
196
181
  target = build(:binary, records)
@@ -200,7 +185,6 @@ module RawRecordsSparseUnionArrayTests
200
185
  def test_string
201
186
  records = [
202
187
  [{"0" => "Ruby"}],
203
- [nil],
204
188
  [{"1" => nil}],
205
189
  ]
206
190
  target = build(:string, records)
@@ -210,7 +194,6 @@ module RawRecordsSparseUnionArrayTests
210
194
  def test_date32
211
195
  records = [
212
196
  [{"0" => Date.new(1960, 1, 1)}],
213
- [nil],
214
197
  [{"1" => nil}],
215
198
  ]
216
199
  target = build(:date32, records)
@@ -220,7 +203,6 @@ module RawRecordsSparseUnionArrayTests
220
203
  def test_date64
221
204
  records = [
222
205
  [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
223
- [nil],
224
206
  [{"1" => nil}],
225
207
  ]
226
208
  target = build(:date64, records)
@@ -230,7 +212,6 @@ module RawRecordsSparseUnionArrayTests
230
212
  def test_timestamp_second
231
213
  records = [
232
214
  [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
233
- [nil],
234
215
  [{"1" => nil}],
235
216
  ]
236
217
  target = build({
@@ -244,7 +225,6 @@ module RawRecordsSparseUnionArrayTests
244
225
  def test_timestamp_milli
245
226
  records = [
246
227
  [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
247
- [nil],
248
228
  [{"1" => nil}],
249
229
  ]
250
230
  target = build({
@@ -258,7 +238,6 @@ module RawRecordsSparseUnionArrayTests
258
238
  def test_timestamp_micro
259
239
  records = [
260
240
  [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
261
- [nil],
262
241
  [{"1" => nil}],
263
242
  ]
264
243
  target = build({
@@ -272,7 +251,6 @@ module RawRecordsSparseUnionArrayTests
272
251
  def test_timestamp_nano
273
252
  records = [
274
253
  [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
275
- [nil],
276
254
  [{"1" => nil}],
277
255
  ]
278
256
  target = build({
@@ -288,7 +266,6 @@ module RawRecordsSparseUnionArrayTests
288
266
  records = [
289
267
  # 00:10:00
290
268
  [{"0" => Arrow::Time.new(unit, 60 * 10)}],
291
- [nil],
292
269
  [{"1" => nil}],
293
270
  ]
294
271
  target = build({
@@ -304,7 +281,6 @@ module RawRecordsSparseUnionArrayTests
304
281
  records = [
305
282
  # 00:10:00.123
306
283
  [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}],
307
- [nil],
308
284
  [{"1" => nil}],
309
285
  ]
310
286
  target = build({
@@ -320,7 +296,6 @@ module RawRecordsSparseUnionArrayTests
320
296
  records = [
321
297
  # 00:10:00.123456
322
298
  [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}],
323
- [nil],
324
299
  [{"1" => nil}],
325
300
  ]
326
301
  target = build({
@@ -336,7 +311,6 @@ module RawRecordsSparseUnionArrayTests
336
311
  records = [
337
312
  # 00:10:00.123456789
338
313
  [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}],
339
- [nil],
340
314
  [{"1" => nil}],
341
315
  ]
342
316
  target = build({
@@ -350,7 +324,6 @@ module RawRecordsSparseUnionArrayTests
350
324
  def test_decimal128
351
325
  records = [
352
326
  [{"0" => BigDecimal("92.92")}],
353
- [nil],
354
327
  [{"1" => nil}],
355
328
  ]
356
329
  target = build({
@@ -362,10 +335,23 @@ module RawRecordsSparseUnionArrayTests
362
335
  assert_equal(records, target.raw_records)
363
336
  end
364
337
 
338
+ def test_decimal256
339
+ records = [
340
+ [{"0" => BigDecimal("92.92")}],
341
+ [{"1" => nil}],
342
+ ]
343
+ target = build({
344
+ type: :decimal256,
345
+ precision: 38,
346
+ scale: 2,
347
+ },
348
+ records)
349
+ assert_equal(records, target.raw_records)
350
+ end
351
+
365
352
  def test_list
366
353
  records = [
367
354
  [{"0" => [true, nil, false]}],
368
- [nil],
369
355
  [{"1" => nil}],
370
356
  ]
371
357
  target = build({
@@ -382,7 +368,6 @@ module RawRecordsSparseUnionArrayTests
382
368
  def test_struct
383
369
  records = [
384
370
  [{"0" => {"sub_field" => true}}],
385
- [nil],
386
371
  [{"1" => nil}],
387
372
  [{"0" => {"sub_field" => nil}}],
388
373
  ]
@@ -403,7 +388,6 @@ module RawRecordsSparseUnionArrayTests
403
388
  omit("Need to add support for SparseUnionArrayBuilder")
404
389
  records = [
405
390
  [{"0" => {"field1" => true}}],
406
- [nil],
407
391
  [{"1" => nil}],
408
392
  [{"0" => {"field2" => nil}}],
409
393
  ]
@@ -429,7 +413,6 @@ module RawRecordsSparseUnionArrayTests
429
413
  omit("Need to add support for DenseUnionArrayBuilder")
430
414
  records = [
431
415
  [{"0" => {"field1" => true}}],
432
- [nil],
433
416
  [{"1" => nil}],
434
417
  [{"0" => {"field2" => nil}}],
435
418
  ]
@@ -455,7 +438,6 @@ module RawRecordsSparseUnionArrayTests
455
438
  omit("Need to add support for DictionaryArrayBuilder")
456
439
  records = [
457
440
  [{"0" => "Ruby"}],
458
- [nil],
459
441
  [{"1" => nil}],
460
442
  [{"0" => "GLib"}],
461
443
  ]
@@ -329,6 +329,21 @@ module RawRecordsStructArrayTests
329
329
  assert_equal(records, target.raw_records)
330
330
  end
331
331
 
332
+ def test_decimal256
333
+ records = [
334
+ [{"field" => BigDecimal("92.92")}],
335
+ [nil],
336
+ [{"field" => nil}],
337
+ ]
338
+ target = build({
339
+ type: :decimal256,
340
+ precision: 38,
341
+ scale: 2,
342
+ },
343
+ records)
344
+ assert_equal(records, target.raw_records)
345
+ end
346
+
332
347
  def test_list
333
348
  records = [
334
349
  [{"field" => [true, nil, false]}],
data/test/test-array.rb CHANGED
@@ -160,12 +160,132 @@ class ArrayTest < Test::Unit::TestCase
160
160
 
161
161
  test("Arrow::ChunkedArray") do
162
162
  chunks = [
163
- Arrow::Int16Array.new([1, 0]),
164
- Arrow::Int16Array.new([1, 0, 3])
163
+ Arrow::Int16Array.new([1, 4]),
164
+ Arrow::Int16Array.new([0, 3])
165
165
  ]
166
166
  right = Arrow::ChunkedArray.new(chunks)
167
167
  assert_equal(Arrow::BooleanArray.new([true, true, true, false]),
168
168
  @array.is_in(right))
169
169
  end
170
170
  end
171
+
172
+ sub_test_case("#concatenate") do
173
+ test("Arrow::Array: same") do
174
+ assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
175
+ Arrow::Int32Array.new([1, 2, nil]).
176
+ concatenate(Arrow::Int32Array.new([4, 5]),
177
+ Arrow::Int32Array.new([6])))
178
+ end
179
+
180
+ test("Arrow::Array: castable") do
181
+ assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
182
+ Arrow::Int32Array.new([1, 2, nil]).
183
+ concatenate(Arrow::Int8Array.new([4, 5]),
184
+ Arrow::UInt32Array.new([6])))
185
+ end
186
+
187
+ test("Arrow::Array: non-castable") do
188
+ assert_raise(Arrow::Error::Invalid) do
189
+ Arrow::Int32Array.new([1, 2, nil]).
190
+ concatenate(Arrow::StringArray.new(["X"]))
191
+ end
192
+ end
193
+
194
+ test("Array") do
195
+ assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,nil, 6]),
196
+ Arrow::Int32Array.new([1, 2, nil]).
197
+ concatenate([4, nil],
198
+ [6]))
199
+ end
200
+
201
+ test("invalid") do
202
+ message = "[array][resolve] can't build int32 array: 4"
203
+ assert_raise(ArgumentError.new(message)) do
204
+ Arrow::Int32Array.new([1, 2, nil]).
205
+ concatenate(4)
206
+ end
207
+ end
208
+ end
209
+
210
+ sub_test_case("#+") do
211
+ test("Arrow::Array: same") do
212
+ assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
213
+ Arrow::Int32Array.new([1, 2, nil]) +
214
+ Arrow::Int32Array.new([4, 5, 6]))
215
+ end
216
+
217
+ test("Arrow::Array: castable") do
218
+ assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
219
+ Arrow::Int32Array.new([1, 2, nil]) +
220
+ Arrow::Int8Array.new([4, 5, 6]))
221
+ end
222
+
223
+ test("Arrow::Array: non-castable") do
224
+ assert_raise(Arrow::Error::Invalid) do
225
+ Arrow::Int32Array.new([1, 2, nil]) +
226
+ Arrow::StringArray.new(["X"])
227
+ end
228
+ end
229
+
230
+ test("Array") do
231
+ assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,nil, 6]),
232
+ Arrow::Int32Array.new([1, 2, nil]) +
233
+ [4, nil, 6])
234
+ end
235
+
236
+ test("invalid") do
237
+ message = "[array][resolve] can't build int32 array: 4"
238
+ assert_raise(ArgumentError.new(message)) do
239
+ Arrow::Int32Array.new([1, 2, nil]) + 4
240
+ end
241
+ end
242
+ end
243
+
244
+ sub_test_case("#resolve") do
245
+ test("Arrow::Array: same") do
246
+ assert_equal(Arrow::Int32Array.new([1, 2, nil]),
247
+ Arrow::Int32Array.new([]).
248
+ resolve(Arrow::Int32Array.new([1, 2, nil])))
249
+ end
250
+
251
+ test("Arrow::Array: castable") do
252
+ assert_equal(Arrow::Int32Array.new([1, 2, nil]),
253
+ Arrow::Int32Array.new([]).
254
+ resolve(Arrow::Int8Array.new([1, 2, nil])))
255
+ end
256
+
257
+ test("Arrow::Array: non-castable") do
258
+ assert_raise(Arrow::Error::Invalid) do
259
+ Arrow::Int32Array.new([]) +
260
+ Arrow::StringArray.new(["X"])
261
+ end
262
+ end
263
+
264
+ test("Array: non-parametric") do
265
+ assert_equal(Arrow::Int32Array.new([1, 2, nil]),
266
+ Arrow::Int32Array.new([]).
267
+ resolve([1, 2, nil]))
268
+ end
269
+
270
+ test("Array: parametric") do
271
+ list_data_type = Arrow::ListDataType.new(name: "visible", type: :boolean)
272
+ list_array = Arrow::ListArray.new(list_data_type, [])
273
+ assert_equal(Arrow::ListArray.new(list_data_type,
274
+ [
275
+ [true, false],
276
+ nil,
277
+ ]),
278
+ list_array.resolve([
279
+ [true, false],
280
+ nil,
281
+ ]))
282
+ end
283
+
284
+ test("invalid") do
285
+ message = "[array][resolve] can't build int32 array: 4"
286
+ assert_raise(ArgumentError.new(message)) do
287
+ Arrow::Int32Array.new([]).resolve(4)
288
+ end
289
+ end
290
+ end
171
291
  end
@@ -16,8 +16,25 @@
16
16
  # under the License.
17
17
 
18
18
  class BigDecimalTest < Test::Unit::TestCase
19
- test("#to_arrow") do
20
- assert_equal(Arrow::Decimal128.new("3.14"),
21
- BigDecimal("3.14").to_arrow)
19
+ sub_test_case("#to_arrow") do
20
+ def test_128_positive
21
+ assert_equal(Arrow::Decimal128.new("0.1e38"),
22
+ BigDecimal("0.1e38").to_arrow)
23
+ end
24
+
25
+ def test_128_negative
26
+ assert_equal(Arrow::Decimal128.new("-0.1e38"),
27
+ BigDecimal("-0.1e38").to_arrow)
28
+ end
29
+
30
+ def test_256_positive
31
+ assert_equal(Arrow::Decimal256.new("0.1e39"),
32
+ BigDecimal("0.1e39").to_arrow)
33
+ end
34
+
35
+ def test_256_negative
36
+ assert_equal(Arrow::Decimal256.new("-0.1e39"),
37
+ BigDecimal("-0.1e39").to_arrow)
38
+ end
22
39
  end
23
40
  end