red-arrow 10.0.0 → 16.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/ext/arrow/arrow.cpp +31 -0
  4. data/ext/arrow/converters.hpp +45 -41
  5. data/ext/arrow/extconf.rb +16 -4
  6. data/ext/arrow/raw-records.cpp +155 -2
  7. data/ext/arrow/red-arrow.hpp +2 -0
  8. data/ext/arrow/values.cpp +1 -2
  9. data/lib/arrow/array-computable.rb +13 -0
  10. data/lib/arrow/array.rb +6 -1
  11. data/lib/arrow/chunked-array.rb +35 -1
  12. data/lib/arrow/column-containable.rb +9 -0
  13. data/lib/arrow/column.rb +1 -0
  14. data/lib/arrow/data-type.rb +9 -0
  15. data/lib/arrow/dense-union-array-builder.rb +49 -0
  16. data/lib/arrow/dense-union-array.rb +26 -0
  17. data/lib/arrow/expression.rb +6 -2
  18. data/lib/arrow/function.rb +0 -1
  19. data/lib/arrow/half-float-array-builder.rb +32 -0
  20. data/lib/arrow/half-float-array.rb +24 -0
  21. data/lib/arrow/half-float.rb +118 -0
  22. data/lib/arrow/input-referable.rb +29 -0
  23. data/lib/arrow/loader.rb +11 -0
  24. data/lib/arrow/raw-table-converter.rb +7 -5
  25. data/lib/arrow/record-batch-file-reader.rb +2 -0
  26. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  27. data/lib/arrow/record-batch.rb +6 -2
  28. data/lib/arrow/scalar.rb +67 -0
  29. data/lib/arrow/slicer.rb +61 -0
  30. data/lib/arrow/sort-key.rb +3 -3
  31. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  32. data/lib/arrow/sparse-union-array.rb +26 -0
  33. data/lib/arrow/struct-array-builder.rb +0 -5
  34. data/lib/arrow/table-loader.rb +11 -5
  35. data/lib/arrow/table-saver.rb +1 -0
  36. data/lib/arrow/table.rb +180 -33
  37. data/lib/arrow/tensor.rb +4 -0
  38. data/lib/arrow/timestamp-parser.rb +33 -0
  39. data/lib/arrow/union-array-builder.rb +59 -0
  40. data/lib/arrow/version.rb +1 -1
  41. data/red-arrow.gemspec +2 -1
  42. data/test/each-raw-record/test-basic-arrays.rb +411 -0
  43. data/test/each-raw-record/test-dense-union-array.rb +566 -0
  44. data/test/each-raw-record/test-dictionary-array.rb +341 -0
  45. data/test/each-raw-record/test-list-array.rb +628 -0
  46. data/test/each-raw-record/test-map-array.rb +507 -0
  47. data/test/each-raw-record/test-multiple-columns.rb +72 -0
  48. data/test/each-raw-record/test-sparse-union-array.rb +528 -0
  49. data/test/each-raw-record/test-struct-array.rb +529 -0
  50. data/test/each-raw-record/test-table.rb +47 -0
  51. data/test/helper/omittable.rb +13 -0
  52. data/test/helper.rb +1 -0
  53. data/test/raw-records/test-basic-arrays.rb +11 -1
  54. data/test/raw-records/test-dense-union-array.rb +90 -45
  55. data/test/raw-records/test-list-array.rb +28 -10
  56. data/test/raw-records/test-map-array.rb +39 -10
  57. data/test/raw-records/test-sparse-union-array.rb +86 -41
  58. data/test/raw-records/test-struct-array.rb +22 -8
  59. data/test/test-array.rb +7 -0
  60. data/test/test-chunked-array.rb +9 -0
  61. data/test/test-csv-loader.rb +39 -0
  62. data/test/test-data-type.rb +2 -1
  63. data/test/test-dense-union-array.rb +42 -0
  64. data/test/test-dense-union-data-type.rb +1 -1
  65. data/test/test-expression.rb +11 -0
  66. data/test/test-function.rb +7 -7
  67. data/test/test-group.rb +58 -58
  68. data/test/test-half-float-array.rb +43 -0
  69. data/test/test-half-float.rb +130 -0
  70. data/test/test-ractor.rb +34 -0
  71. data/test/test-record-batch-file-reader.rb +21 -0
  72. data/test/test-record-batch-stream-reader.rb +129 -0
  73. data/test/test-scalar.rb +65 -0
  74. data/test/test-slicer.rb +194 -129
  75. data/test/test-sparse-union-array.rb +38 -0
  76. data/test/test-table.rb +356 -40
  77. data/test/values/test-basic-arrays.rb +10 -0
  78. data/test/values/test-dense-union-array.rb +88 -45
  79. data/test/values/test-list-array.rb +26 -10
  80. data/test/values/test-map-array.rb +33 -10
  81. data/test/values/test-sparse-union-array.rb +84 -41
  82. data/test/values/test-struct-array.rb +20 -8
  83. metadata +62 -9
data/test/test-slicer.rb CHANGED
@@ -45,15 +45,11 @@ class SlicerTest < Test::Unit::TestCase
45
45
  slicer.visible
46
46
  end
47
47
  assert_equal(<<-TABLE, sliced_table.to_s)
48
- count visible
49
- 0 (null) (null)
50
- 1 1 true
51
- 2 (null) (null)
52
- 3 8 true
53
- 4 16 true
54
- 5 (null) (null)
55
- 6 (null) (null)
56
- 7 256 true
48
+ count visible
49
+ 0 1 true
50
+ 1 8 true
51
+ 2 16 true
52
+ 3 256 true
57
53
  TABLE
58
54
  end
59
55
 
@@ -62,16 +58,15 @@ class SlicerTest < Test::Unit::TestCase
62
58
  slicer.count
63
59
  end
64
60
  assert_equal(<<-TABLE, sliced_table.to_s)
65
- count visible
66
- 0 1 true
67
- 1 2 false
68
- 2 4 (null)
69
- 3 8 true
70
- 4 16 true
71
- 5 32 false
72
- 6 64 (null)
73
- 7 (null) (null)
74
- 8 256 true
61
+ count visible
62
+ 0 1 true
63
+ 1 2 false
64
+ 2 4 (null)
65
+ 3 8 true
66
+ 4 16 true
67
+ 5 32 false
68
+ 6 64 (null)
69
+ 7 256 true
75
70
  TABLE
76
71
  end
77
72
  end
@@ -82,13 +77,9 @@ class SlicerTest < Test::Unit::TestCase
82
77
  !slicer.visible
83
78
  end
84
79
  assert_equal(<<-TABLE, sliced_table.to_s)
85
- count visible
86
- 0 (null) (null)
87
- 1 2 false
88
- 2 (null) (null)
89
- 3 32 false
90
- 4 (null) (null)
91
- 5 (null) (null)
80
+ count visible
81
+ 0 2 false
82
+ 1 32 false
92
83
  TABLE
93
84
  end
94
85
 
@@ -97,9 +88,8 @@ class SlicerTest < Test::Unit::TestCase
97
88
  !slicer.count
98
89
  end
99
90
  assert_equal(<<-TABLE, sliced_table.to_s)
100
- count visible
101
- 0 0 (null)
102
- 1 (null) (null)
91
+ count visible
92
+ 0 0 (null)
103
93
  TABLE
104
94
  end
105
95
  end
@@ -151,15 +141,11 @@ class SlicerTest < Test::Unit::TestCase
151
141
  slicer.visible == true
152
142
  end
153
143
  assert_equal(<<-TABLE, sliced_table.to_s)
154
- count visible
155
- 0 (null) (null)
156
- 1 1 true
157
- 2 (null) (null)
158
- 3 8 true
159
- 4 16 true
160
- 5 (null) (null)
161
- 6 (null) (null)
162
- 7 256 true
144
+ count visible
145
+ 0 1 true
146
+ 1 8 true
147
+ 2 16 true
148
+ 3 256 true
163
149
  TABLE
164
150
  end
165
151
  end
@@ -185,13 +171,9 @@ class SlicerTest < Test::Unit::TestCase
185
171
  !(slicer.visible == true)
186
172
  end
187
173
  assert_equal(<<-TABLE, sliced_table.to_s)
188
- count visible
189
- 0 (null) (null)
190
- 1 2 false
191
- 2 (null) (null)
192
- 3 32 false
193
- 4 (null) (null)
194
- 5 (null) (null)
174
+ count visible
175
+ 0 2 false
176
+ 1 32 false
195
177
  TABLE
196
178
  end
197
179
  end
@@ -217,13 +199,9 @@ class SlicerTest < Test::Unit::TestCase
217
199
  slicer.visible != true
218
200
  end
219
201
  assert_equal(<<-TABLE, sliced_table.to_s)
220
- count visible
221
- 0 (null) (null)
222
- 1 2 false
223
- 2 (null) (null)
224
- 3 32 false
225
- 4 (null) (null)
226
- 5 (null) (null)
202
+ count visible
203
+ 0 2 false
204
+ 1 32 false
227
205
  TABLE
228
206
  end
229
207
  end
@@ -233,13 +211,12 @@ class SlicerTest < Test::Unit::TestCase
233
211
  slicer.count < 16
234
212
  end
235
213
  assert_equal(<<-TABLE, sliced_table.to_s)
236
- count visible
237
- 0 0 (null)
238
- 1 1 true
239
- 2 2 false
240
- 3 4 (null)
241
- 4 8 true
242
- 5 (null) (null)
214
+ count visible
215
+ 0 0 (null)
216
+ 1 1 true
217
+ 2 2 false
218
+ 3 4 (null)
219
+ 4 8 true
243
220
  TABLE
244
221
  end
245
222
 
@@ -248,12 +225,11 @@ class SlicerTest < Test::Unit::TestCase
248
225
  !(slicer.count < 16)
249
226
  end
250
227
  assert_equal(<<-TABLE, sliced_table.to_s)
251
- count visible
252
- 0 16 true
253
- 1 32 false
254
- 2 64 (null)
255
- 3 (null) (null)
256
- 4 256 true
228
+ count visible
229
+ 0 16 true
230
+ 1 32 false
231
+ 2 64 (null)
232
+ 3 256 true
257
233
  TABLE
258
234
  end
259
235
 
@@ -262,14 +238,13 @@ class SlicerTest < Test::Unit::TestCase
262
238
  slicer.count <= 16
263
239
  end
264
240
  assert_equal(<<-TABLE, sliced_table.to_s)
265
- count visible
266
- 0 0 (null)
267
- 1 1 true
268
- 2 2 false
269
- 3 4 (null)
270
- 4 8 true
271
- 5 16 true
272
- 6 (null) (null)
241
+ count visible
242
+ 0 0 (null)
243
+ 1 1 true
244
+ 2 2 false
245
+ 3 4 (null)
246
+ 4 8 true
247
+ 5 16 true
273
248
  TABLE
274
249
  end
275
250
 
@@ -278,11 +253,10 @@ class SlicerTest < Test::Unit::TestCase
278
253
  !(slicer.count <= 16)
279
254
  end
280
255
  assert_equal(<<-TABLE, sliced_table.to_s)
281
- count visible
282
- 0 32 false
283
- 1 64 (null)
284
- 2 (null) (null)
285
- 3 256 true
256
+ count visible
257
+ 0 32 false
258
+ 1 64 (null)
259
+ 2 256 true
286
260
  TABLE
287
261
  end
288
262
 
@@ -291,11 +265,10 @@ class SlicerTest < Test::Unit::TestCase
291
265
  slicer.count > 16
292
266
  end
293
267
  assert_equal(<<-TABLE, sliced_table.to_s)
294
- count visible
295
- 0 32 false
296
- 1 64 (null)
297
- 2 (null) (null)
298
- 3 256 true
268
+ count visible
269
+ 0 32 false
270
+ 1 64 (null)
271
+ 2 256 true
299
272
  TABLE
300
273
  end
301
274
 
@@ -304,14 +277,13 @@ class SlicerTest < Test::Unit::TestCase
304
277
  !(slicer.count > 16)
305
278
  end
306
279
  assert_equal(<<-TABLE, sliced_table.to_s)
307
- count visible
308
- 0 0 (null)
309
- 1 1 true
310
- 2 2 false
311
- 3 4 (null)
312
- 4 8 true
313
- 5 16 true
314
- 6 (null) (null)
280
+ count visible
281
+ 0 0 (null)
282
+ 1 1 true
283
+ 2 2 false
284
+ 3 4 (null)
285
+ 4 8 true
286
+ 5 16 true
315
287
  TABLE
316
288
  end
317
289
 
@@ -320,12 +292,11 @@ class SlicerTest < Test::Unit::TestCase
320
292
  slicer.count >= 16
321
293
  end
322
294
  assert_equal(<<-TABLE, sliced_table.to_s)
323
- count visible
324
- 0 16 true
325
- 1 32 false
326
- 2 64 (null)
327
- 3 (null) (null)
328
- 4 256 true
295
+ count visible
296
+ 0 16 true
297
+ 1 32 false
298
+ 2 64 (null)
299
+ 3 256 true
329
300
  TABLE
330
301
  end
331
302
 
@@ -334,13 +305,12 @@ class SlicerTest < Test::Unit::TestCase
334
305
  !(slicer.count >= 16)
335
306
  end
336
307
  assert_equal(<<-TABLE, sliced_table.to_s)
337
- count visible
338
- 0 0 (null)
339
- 1 1 true
340
- 2 2 false
341
- 3 4 (null)
342
- 4 8 true
343
- 5 (null) (null)
308
+ count visible
309
+ 0 0 (null)
310
+ 1 1 true
311
+ 2 2 false
312
+ 3 4 (null)
313
+ 4 8 true
344
314
  TABLE
345
315
  end
346
316
 
@@ -377,13 +347,9 @@ class SlicerTest < Test::Unit::TestCase
377
347
  slicer.visible & (slicer.count >= 16)
378
348
  end
379
349
  assert_equal(<<-TABLE, sliced_table.to_s)
380
- count visible
381
- 0 (null) (null)
382
- 1 (null) (null)
383
- 2 16 true
384
- 3 (null) (null)
385
- 4 (null) (null)
386
- 5 256 true
350
+ count visible
351
+ 0 16 true
352
+ 1 256 true
387
353
  TABLE
388
354
  end
389
355
 
@@ -392,16 +358,12 @@ class SlicerTest < Test::Unit::TestCase
392
358
  slicer.visible | (slicer.count >= 16)
393
359
  end
394
360
  assert_equal(<<-TABLE, sliced_table.to_s)
395
- count visible
396
- 0 (null) (null)
397
- 1 1 true
398
- 2 (null) (null)
399
- 3 8 true
400
- 4 16 true
401
- 5 32 false
402
- 6 (null) (null)
403
- 7 (null) (null)
404
- 8 256 true
361
+ count visible
362
+ 0 1 true
363
+ 1 8 true
364
+ 2 16 true
365
+ 3 32 false
366
+ 4 256 true
405
367
  TABLE
406
368
  end
407
369
 
@@ -410,14 +372,10 @@ class SlicerTest < Test::Unit::TestCase
410
372
  slicer.visible ^ (slicer.count >= 16)
411
373
  end
412
374
  assert_equal(<<-TABLE, sliced_table.to_s)
413
- count visible
414
- 0 (null) (null)
415
- 1 1 true
416
- 2 (null) (null)
417
- 3 8 true
418
- 4 32 false
419
- 5 (null) (null)
420
- 6 (null) (null)
375
+ count visible
376
+ 0 1 true
377
+ 1 8 true
378
+ 2 32 false
421
379
  TABLE
422
380
  end
423
381
 
@@ -484,4 +442,111 @@ class SlicerTest < Test::Unit::TestCase
484
442
  7 256 true
485
443
  TABLE
486
444
  end
445
+
446
+ sub_test_case "MatchSubstringOptions family" do
447
+ def setup
448
+ @table = Arrow::Table.new(
449
+ string: ["array", "Arrow", "carrot", nil, "window"]
450
+ )
451
+ end
452
+
453
+ test("end_with?") do
454
+ sliced_table = @table.slice do |slicer|
455
+ slicer.string.end_with?("ow")
456
+ end
457
+ assert_equal(<<~TABLE, sliced_table.to_s)
458
+ string
459
+ 0 Arrow
460
+ 1 window
461
+ TABLE
462
+ end
463
+
464
+ test("match_like?") do
465
+ sliced_table = @table.slice do |slicer|
466
+ slicer.string.match_like?("_rr%")
467
+ end
468
+ assert_equal(<<~TABLE, sliced_table.to_s)
469
+ string
470
+ 0 array
471
+ 1 Arrow
472
+ TABLE
473
+ end
474
+
475
+ test("match_substring?") do
476
+ sliced_table = @table.slice do |slicer|
477
+ slicer.string.match_substring?("arr")
478
+ end
479
+ assert_equal(<<~TABLE, sliced_table.to_s)
480
+ string
481
+ 0 array
482
+ 1 carrot
483
+ TABLE
484
+ end
485
+
486
+ test("match_substring?(ignore_case:)") do
487
+ sliced_table = @table.slice do |slicer|
488
+ slicer.string.match_substring?("arr", ignore_case: true)
489
+ end
490
+ assert_equal(<<~TABLE, sliced_table.to_s)
491
+ string
492
+ 0 array
493
+ 1 Arrow
494
+ 2 carrot
495
+ TABLE
496
+ end
497
+
498
+ test("!match_substring?") do
499
+ sliced_table = @table.slice do |slicer|
500
+ !slicer.string.match_substring?("arr")
501
+ end
502
+ assert_equal(<<~TABLE, sliced_table.to_s)
503
+ string
504
+ 0 Arrow
505
+ 1 window
506
+ TABLE
507
+ end
508
+
509
+ test("match_substring?(Regexp)") do
510
+ sliced_table = @table.slice do |slicer|
511
+ slicer.string.match_substring?(/[dr]ow/)
512
+ end
513
+ assert_equal(<<~TABLE, sliced_table.to_s)
514
+ string
515
+ 0 Arrow
516
+ 1 window
517
+ TABLE
518
+ end
519
+
520
+ test("match_substring?(/String/i)") do
521
+ sliced_table = @table.slice do |slicer|
522
+ slicer.string.match_substring?(/arr/i)
523
+ end
524
+ assert_equal(<<~TABLE, sliced_table.to_s)
525
+ string
526
+ 0 array
527
+ 1 Arrow
528
+ 2 carrot
529
+ TABLE
530
+ end
531
+
532
+ test("match_substring? - invalid") do
533
+ message =
534
+ 'pattern must be either String or Regexp: ["arr"]'
535
+ assert_raise(ArgumentError.new(message)) do
536
+ @table.slice do |slicer|
537
+ slicer.string.match_substring?(["arr"])
538
+ end
539
+ end
540
+ end
541
+
542
+ test("start_with?") do
543
+ sliced_table = @table.slice do |slicer|
544
+ slicer.string.start_with?("ca")
545
+ end
546
+ assert_equal(<<~TABLE, sliced_table.to_s)
547
+ string
548
+ 0 carrot
549
+ TABLE
550
+ end
551
+ end
487
552
  end
@@ -0,0 +1,38 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class SparseUnionArrayTest < Test::Unit::TestCase
19
+ def setup
20
+ data_type_fields = [
21
+ Arrow::Field.new("number", :int16),
22
+ Arrow::Field.new("text", :string),
23
+ ]
24
+ type_codes = [11, 13]
25
+ @data_type = Arrow::SparseUnionDataType.new(data_type_fields, type_codes)
26
+ type_ids = Arrow::Int8Array.new([11, 13, 11, 13, 11])
27
+ fields = [
28
+ Arrow::Int16Array.new([1, nil, nil, nil, 5]),
29
+ Arrow::StringArray.new([nil, "b", nil, "d", nil]),
30
+ ]
31
+ @array = Arrow::SparseUnionArray.new(@data_type, type_ids, fields)
32
+ end
33
+
34
+ def test_get_value
35
+ assert_equal([1, "b", nil, "d", 5],
36
+ @array.length.times.collect {|i| @array[i]})
37
+ end
38
+ end