red-arrow 10.0.0 → 16.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/ext/arrow/arrow.cpp +31 -0
  4. data/ext/arrow/converters.hpp +45 -41
  5. data/ext/arrow/extconf.rb +16 -4
  6. data/ext/arrow/raw-records.cpp +155 -2
  7. data/ext/arrow/red-arrow.hpp +2 -0
  8. data/ext/arrow/values.cpp +1 -2
  9. data/lib/arrow/array-computable.rb +13 -0
  10. data/lib/arrow/array.rb +6 -1
  11. data/lib/arrow/chunked-array.rb +35 -1
  12. data/lib/arrow/column-containable.rb +9 -0
  13. data/lib/arrow/column.rb +1 -0
  14. data/lib/arrow/data-type.rb +9 -0
  15. data/lib/arrow/dense-union-array-builder.rb +49 -0
  16. data/lib/arrow/dense-union-array.rb +26 -0
  17. data/lib/arrow/expression.rb +6 -2
  18. data/lib/arrow/function.rb +0 -1
  19. data/lib/arrow/half-float-array-builder.rb +32 -0
  20. data/lib/arrow/half-float-array.rb +24 -0
  21. data/lib/arrow/half-float.rb +118 -0
  22. data/lib/arrow/input-referable.rb +29 -0
  23. data/lib/arrow/loader.rb +11 -0
  24. data/lib/arrow/raw-table-converter.rb +7 -5
  25. data/lib/arrow/record-batch-file-reader.rb +2 -0
  26. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  27. data/lib/arrow/record-batch.rb +6 -2
  28. data/lib/arrow/scalar.rb +67 -0
  29. data/lib/arrow/slicer.rb +61 -0
  30. data/lib/arrow/sort-key.rb +3 -3
  31. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  32. data/lib/arrow/sparse-union-array.rb +26 -0
  33. data/lib/arrow/struct-array-builder.rb +0 -5
  34. data/lib/arrow/table-loader.rb +11 -5
  35. data/lib/arrow/table-saver.rb +1 -0
  36. data/lib/arrow/table.rb +180 -33
  37. data/lib/arrow/tensor.rb +4 -0
  38. data/lib/arrow/timestamp-parser.rb +33 -0
  39. data/lib/arrow/union-array-builder.rb +59 -0
  40. data/lib/arrow/version.rb +1 -1
  41. data/red-arrow.gemspec +2 -1
  42. data/test/each-raw-record/test-basic-arrays.rb +411 -0
  43. data/test/each-raw-record/test-dense-union-array.rb +566 -0
  44. data/test/each-raw-record/test-dictionary-array.rb +341 -0
  45. data/test/each-raw-record/test-list-array.rb +628 -0
  46. data/test/each-raw-record/test-map-array.rb +507 -0
  47. data/test/each-raw-record/test-multiple-columns.rb +72 -0
  48. data/test/each-raw-record/test-sparse-union-array.rb +528 -0
  49. data/test/each-raw-record/test-struct-array.rb +529 -0
  50. data/test/each-raw-record/test-table.rb +47 -0
  51. data/test/helper/omittable.rb +13 -0
  52. data/test/helper.rb +1 -0
  53. data/test/raw-records/test-basic-arrays.rb +11 -1
  54. data/test/raw-records/test-dense-union-array.rb +90 -45
  55. data/test/raw-records/test-list-array.rb +28 -10
  56. data/test/raw-records/test-map-array.rb +39 -10
  57. data/test/raw-records/test-sparse-union-array.rb +86 -41
  58. data/test/raw-records/test-struct-array.rb +22 -8
  59. data/test/test-array.rb +7 -0
  60. data/test/test-chunked-array.rb +9 -0
  61. data/test/test-csv-loader.rb +39 -0
  62. data/test/test-data-type.rb +2 -1
  63. data/test/test-dense-union-array.rb +42 -0
  64. data/test/test-dense-union-data-type.rb +1 -1
  65. data/test/test-expression.rb +11 -0
  66. data/test/test-function.rb +7 -7
  67. data/test/test-group.rb +58 -58
  68. data/test/test-half-float-array.rb +43 -0
  69. data/test/test-half-float.rb +130 -0
  70. data/test/test-ractor.rb +34 -0
  71. data/test/test-record-batch-file-reader.rb +21 -0
  72. data/test/test-record-batch-stream-reader.rb +129 -0
  73. data/test/test-scalar.rb +65 -0
  74. data/test/test-slicer.rb +194 -129
  75. data/test/test-sparse-union-array.rb +38 -0
  76. data/test/test-table.rb +356 -40
  77. data/test/values/test-basic-arrays.rb +10 -0
  78. data/test/values/test-dense-union-array.rb +88 -45
  79. data/test/values/test-list-array.rb +26 -10
  80. data/test/values/test-map-array.rb +33 -10
  81. data/test/values/test-sparse-union-array.rb +84 -41
  82. data/test/values/test-struct-array.rb +20 -8
  83. metadata +62 -9
@@ -76,12 +76,25 @@ module RawRecordsDenseUnionArrayTests
76
76
  [union_array])
77
77
  end
78
78
 
79
+ def remove_field_names(records)
80
+ records.collect do |record|
81
+ record.collect do |column|
82
+ if column.nil?
83
+ column
84
+ else
85
+ column.values[0]
86
+ end
87
+ end
88
+ end
89
+ end
90
+
79
91
  def test_null
80
92
  records = [
81
93
  [{"0" => nil}],
82
94
  ]
83
95
  target = build(:null, records)
84
- assert_equal(records, target.raw_records)
96
+ assert_equal(remove_field_names(records),
97
+ target.raw_records)
85
98
  end
86
99
 
87
100
  def test_boolean
@@ -90,7 +103,8 @@ module RawRecordsDenseUnionArrayTests
90
103
  [{"1" => nil}],
91
104
  ]
92
105
  target = build(:boolean, records)
93
- assert_equal(records, target.raw_records)
106
+ assert_equal(remove_field_names(records),
107
+ target.raw_records)
94
108
  end
95
109
 
96
110
  def test_int8
@@ -99,7 +113,8 @@ module RawRecordsDenseUnionArrayTests
99
113
  [{"1" => nil}],
100
114
  ]
101
115
  target = build(:int8, records)
102
- assert_equal(records, target.raw_records)
116
+ assert_equal(remove_field_names(records),
117
+ target.raw_records)
103
118
  end
104
119
 
105
120
  def test_uint8
@@ -108,7 +123,8 @@ module RawRecordsDenseUnionArrayTests
108
123
  [{"1" => nil}],
109
124
  ]
110
125
  target = build(:uint8, records)
111
- assert_equal(records, target.raw_records)
126
+ assert_equal(remove_field_names(records),
127
+ target.raw_records)
112
128
  end
113
129
 
114
130
  def test_int16
@@ -117,7 +133,8 @@ module RawRecordsDenseUnionArrayTests
117
133
  [{"1" => nil}],
118
134
  ]
119
135
  target = build(:int16, records)
120
- assert_equal(records, target.raw_records)
136
+ assert_equal(remove_field_names(records),
137
+ target.raw_records)
121
138
  end
122
139
 
123
140
  def test_uint16
@@ -126,7 +143,8 @@ module RawRecordsDenseUnionArrayTests
126
143
  [{"1" => nil}],
127
144
  ]
128
145
  target = build(:uint16, records)
129
- assert_equal(records, target.raw_records)
146
+ assert_equal(remove_field_names(records),
147
+ target.raw_records)
130
148
  end
131
149
 
132
150
  def test_int32
@@ -135,7 +153,8 @@ module RawRecordsDenseUnionArrayTests
135
153
  [{"1" => nil}],
136
154
  ]
137
155
  target = build(:int32, records)
138
- assert_equal(records, target.raw_records)
156
+ assert_equal(remove_field_names(records),
157
+ target.raw_records)
139
158
  end
140
159
 
141
160
  def test_uint32
@@ -144,7 +163,8 @@ module RawRecordsDenseUnionArrayTests
144
163
  [{"1" => nil}],
145
164
  ]
146
165
  target = build(:uint32, records)
147
- assert_equal(records, target.raw_records)
166
+ assert_equal(remove_field_names(records),
167
+ target.raw_records)
148
168
  end
149
169
 
150
170
  def test_int64
@@ -153,7 +173,8 @@ module RawRecordsDenseUnionArrayTests
153
173
  [{"1" => nil}],
154
174
  ]
155
175
  target = build(:int64, records)
156
- assert_equal(records, target.raw_records)
176
+ assert_equal(remove_field_names(records),
177
+ target.raw_records)
157
178
  end
158
179
 
159
180
  def test_uint64
@@ -162,7 +183,8 @@ module RawRecordsDenseUnionArrayTests
162
183
  [{"1" => nil}],
163
184
  ]
164
185
  target = build(:uint64, records)
165
- assert_equal(records, target.raw_records)
186
+ assert_equal(remove_field_names(records),
187
+ target.raw_records)
166
188
  end
167
189
 
168
190
  def test_float
@@ -171,7 +193,8 @@ module RawRecordsDenseUnionArrayTests
171
193
  [{"1" => nil}],
172
194
  ]
173
195
  target = build(:float, records)
174
- assert_equal(records, target.raw_records)
196
+ assert_equal(remove_field_names(records),
197
+ target.raw_records)
175
198
  end
176
199
 
177
200
  def test_double
@@ -180,7 +203,8 @@ module RawRecordsDenseUnionArrayTests
180
203
  [{"1" => nil}],
181
204
  ]
182
205
  target = build(:double, records)
183
- assert_equal(records, target.raw_records)
206
+ assert_equal(remove_field_names(records),
207
+ target.raw_records)
184
208
  end
185
209
 
186
210
  def test_binary
@@ -189,7 +213,8 @@ module RawRecordsDenseUnionArrayTests
189
213
  [{"1" => nil}],
190
214
  ]
191
215
  target = build(:binary, records)
192
- assert_equal(records, target.raw_records)
216
+ assert_equal(remove_field_names(records),
217
+ target.raw_records)
193
218
  end
194
219
 
195
220
  def test_string
@@ -198,7 +223,8 @@ module RawRecordsDenseUnionArrayTests
198
223
  [{"1" => nil}],
199
224
  ]
200
225
  target = build(:string, records)
201
- assert_equal(records, target.raw_records)
226
+ assert_equal(remove_field_names(records),
227
+ target.raw_records)
202
228
  end
203
229
 
204
230
  def test_date32
@@ -207,7 +233,8 @@ module RawRecordsDenseUnionArrayTests
207
233
  [{"1" => nil}],
208
234
  ]
209
235
  target = build(:date32, records)
210
- assert_equal(records, target.raw_records)
236
+ assert_equal(remove_field_names(records),
237
+ target.raw_records)
211
238
  end
212
239
 
213
240
  def test_date64
@@ -216,7 +243,8 @@ module RawRecordsDenseUnionArrayTests
216
243
  [{"1" => nil}],
217
244
  ]
218
245
  target = build(:date64, records)
219
- assert_equal(records, target.raw_records)
246
+ assert_equal(remove_field_names(records),
247
+ target.raw_records)
220
248
  end
221
249
 
222
250
  def test_timestamp_second
@@ -229,7 +257,8 @@ module RawRecordsDenseUnionArrayTests
229
257
  unit: :second,
230
258
  },
231
259
  records)
232
- assert_equal(records, target.raw_records)
260
+ assert_equal(remove_field_names(records),
261
+ target.raw_records)
233
262
  end
234
263
 
235
264
  def test_timestamp_milli
@@ -242,7 +271,8 @@ module RawRecordsDenseUnionArrayTests
242
271
  unit: :milli,
243
272
  },
244
273
  records)
245
- assert_equal(records, target.raw_records)
274
+ assert_equal(remove_field_names(records),
275
+ target.raw_records)
246
276
  end
247
277
 
248
278
  def test_timestamp_micro
@@ -255,7 +285,8 @@ module RawRecordsDenseUnionArrayTests
255
285
  unit: :micro,
256
286
  },
257
287
  records)
258
- assert_equal(records, target.raw_records)
288
+ assert_equal(remove_field_names(records),
289
+ target.raw_records)
259
290
  end
260
291
 
261
292
  def test_timestamp_nano
@@ -268,7 +299,8 @@ module RawRecordsDenseUnionArrayTests
268
299
  unit: :nano,
269
300
  },
270
301
  records)
271
- assert_equal(records, target.raw_records)
302
+ assert_equal(remove_field_names(records),
303
+ target.raw_records)
272
304
  end
273
305
 
274
306
  def test_time32_second
@@ -283,7 +315,8 @@ module RawRecordsDenseUnionArrayTests
283
315
  unit: :second,
284
316
  },
285
317
  records)
286
- assert_equal(records, target.raw_records)
318
+ assert_equal(remove_field_names(records),
319
+ target.raw_records)
287
320
  end
288
321
 
289
322
  def test_time32_milli
@@ -298,7 +331,8 @@ module RawRecordsDenseUnionArrayTests
298
331
  unit: :milli,
299
332
  },
300
333
  records)
301
- assert_equal(records, target.raw_records)
334
+ assert_equal(remove_field_names(records),
335
+ target.raw_records)
302
336
  end
303
337
 
304
338
  def test_time64_micro
@@ -313,7 +347,8 @@ module RawRecordsDenseUnionArrayTests
313
347
  unit: :micro,
314
348
  },
315
349
  records)
316
- assert_equal(records, target.raw_records)
350
+ assert_equal(remove_field_names(records),
351
+ target.raw_records)
317
352
  end
318
353
 
319
354
  def test_time64_nano
@@ -328,7 +363,8 @@ module RawRecordsDenseUnionArrayTests
328
363
  unit: :nano,
329
364
  },
330
365
  records)
331
- assert_equal(records, target.raw_records)
366
+ assert_equal(remove_field_names(records),
367
+ target.raw_records)
332
368
  end
333
369
 
334
370
  def test_decimal128
@@ -342,7 +378,8 @@ module RawRecordsDenseUnionArrayTests
342
378
  scale: 2,
343
379
  },
344
380
  records)
345
- assert_equal(records, target.raw_records)
381
+ assert_equal(remove_field_names(records),
382
+ target.raw_records)
346
383
  end
347
384
 
348
385
  def test_decimal256
@@ -356,7 +393,8 @@ module RawRecordsDenseUnionArrayTests
356
393
  scale: 2,
357
394
  },
358
395
  records)
359
- assert_equal(records, target.raw_records)
396
+ assert_equal(remove_field_names(records),
397
+ target.raw_records)
360
398
  end
361
399
 
362
400
  def test_month_interval
@@ -365,7 +403,8 @@ module RawRecordsDenseUnionArrayTests
365
403
  [{"1" => nil}],
366
404
  ]
367
405
  target = build(:month_interval, records)
368
- assert_equal(records, target.raw_records)
406
+ assert_equal(remove_field_names(records),
407
+ target.raw_records)
369
408
  end
370
409
 
371
410
  def test_day_time_interval
@@ -374,7 +413,8 @@ module RawRecordsDenseUnionArrayTests
374
413
  [{"1" => nil}],
375
414
  ]
376
415
  target = build(:day_time_interval, records)
377
- assert_equal(records, target.raw_records)
416
+ assert_equal(remove_field_names(records),
417
+ target.raw_records)
378
418
  end
379
419
 
380
420
  def test_month_day_nano_interval
@@ -383,7 +423,8 @@ module RawRecordsDenseUnionArrayTests
383
423
  [{"1" => nil}],
384
424
  ]
385
425
  target = build(:month_day_nano_interval, records)
386
- assert_equal(records, target.raw_records)
426
+ assert_equal(remove_field_names(records),
427
+ target.raw_records)
387
428
  end
388
429
 
389
430
  def test_list
@@ -399,7 +440,8 @@ module RawRecordsDenseUnionArrayTests
399
440
  },
400
441
  },
401
442
  records)
402
- assert_equal(records, target.raw_records)
443
+ assert_equal(remove_field_names(records),
444
+ target.raw_records)
403
445
  end
404
446
 
405
447
  def test_struct
@@ -418,7 +460,8 @@ module RawRecordsDenseUnionArrayTests
418
460
  ],
419
461
  },
420
462
  records)
421
- assert_equal(records, target.raw_records)
463
+ assert_equal(remove_field_names(records),
464
+ target.raw_records)
422
465
  end
423
466
 
424
467
  def test_map
@@ -432,14 +475,15 @@ module RawRecordsDenseUnionArrayTests
432
475
  item: :boolean,
433
476
  },
434
477
  records)
435
- assert_equal(records, target.raw_records)
478
+ assert_equal(remove_field_names(records),
479
+ target.raw_records)
436
480
  end
437
481
 
438
482
  def test_sparse_union
439
- omit("Need to add support for SparseUnionArrayBuilder")
440
483
  records = [
441
484
  [{"0" => {"field1" => true}}],
442
485
  [{"1" => nil}],
486
+ [{"0" => {"field2" => 29}}],
443
487
  [{"0" => {"field2" => nil}}],
444
488
  ]
445
489
  target = build({
@@ -457,14 +501,15 @@ module RawRecordsDenseUnionArrayTests
457
501
  type_codes: [0, 1],
458
502
  },
459
503
  records)
460
- assert_equal(records, target.raw_records)
504
+ assert_equal(remove_field_names(remove_field_names(records)),
505
+ target.raw_records)
461
506
  end
462
507
 
463
508
  def test_dense_union
464
- omit("Need to add support for DenseUnionArrayBuilder")
465
509
  records = [
466
510
  [{"0" => {"field1" => true}}],
467
511
  [{"1" => nil}],
512
+ [{"0" => {"field2" => 29}}],
468
513
  [{"0" => {"field2" => nil}}],
469
514
  ]
470
515
  target = build({
@@ -482,25 +527,25 @@ module RawRecordsDenseUnionArrayTests
482
527
  type_codes: [0, 1],
483
528
  },
484
529
  records)
485
- assert_equal(records, target.raw_records)
530
+ assert_equal(remove_field_names(remove_field_names(records)),
531
+ target.raw_records)
486
532
  end
487
533
 
488
534
  def test_dictionary
489
- omit("Need to add support for DictionaryArrayBuilder")
490
535
  records = [
491
536
  [{"0" => "Ruby"}],
492
537
  [{"1" => nil}],
493
538
  [{"0" => "GLib"}],
494
539
  ]
495
- dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
496
540
  target = build({
497
- type: :dictionary,
498
- index_data_type: :int8,
499
- dictionary: dictionary,
500
- ordered: true,
501
- },
502
- records)
503
- assert_equal(records, target.raw_records)
541
+ type: :dictionary,
542
+ index_data_type: :int8,
543
+ value_data_type: :string,
544
+ ordered: false,
545
+ },
546
+ records)
547
+ assert_equal(remove_field_names(records),
548
+ target.raw_records)
504
549
  end
505
550
  end
506
551
 
@@ -509,13 +509,31 @@ module RawRecordsListArrayTests
509
509
  assert_equal(records, target.raw_records)
510
510
  end
511
511
 
512
- def test_sparse
513
- omit("Need to add support for SparseUnionArrayBuilder")
512
+ def remove_union_field_names(records)
513
+ records.collect do |record|
514
+ record.collect do |column|
515
+ if column.nil?
516
+ column
517
+ else
518
+ column.collect do |value|
519
+ if value.nil?
520
+ value
521
+ else
522
+ value.values[0]
523
+ end
524
+ end
525
+ end
526
+ end
527
+ end
528
+ end
529
+
530
+ def test_sparse_union
514
531
  records = [
515
532
  [
516
533
  [
517
534
  {"field1" => true},
518
535
  nil,
536
+ {"field2" => 29},
519
537
  {"field2" => nil},
520
538
  ],
521
539
  ],
@@ -536,16 +554,17 @@ module RawRecordsListArrayTests
536
554
  type_codes: [0, 1],
537
555
  },
538
556
  records)
539
- assert_equal(records, target.raw_records)
557
+ assert_equal(remove_union_field_names(records),
558
+ target.raw_records)
540
559
  end
541
560
 
542
- def test_dense
543
- omit("Need to add support for DenseUnionArrayBuilder")
561
+ def test_dense_union
544
562
  records = [
545
563
  [
546
564
  [
547
565
  {"field1" => true},
548
566
  nil,
567
+ {"field2" => 29},
549
568
  {"field2" => nil},
550
569
  ],
551
570
  ],
@@ -566,11 +585,11 @@ module RawRecordsListArrayTests
566
585
  type_codes: [0, 1],
567
586
  },
568
587
  records)
569
- assert_equal(records, target.raw_records)
588
+ assert_equal(remove_union_field_names(records),
589
+ target.raw_records)
570
590
  end
571
591
 
572
592
  def test_dictionary
573
- omit("Need to add support for DictionaryArrayBuilder")
574
593
  records = [
575
594
  [
576
595
  [
@@ -581,12 +600,11 @@ module RawRecordsListArrayTests
581
600
  ],
582
601
  [nil],
583
602
  ]
584
- dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
585
603
  target = build({
586
604
  type: :dictionary,
587
605
  index_data_type: :int8,
588
- dictionary: dictionary,
589
- ordered: true,
606
+ value_data_type: :string,
607
+ ordered: false,
590
608
  },
591
609
  records)
592
610
  assert_equal(records, target.raw_records)
@@ -395,10 +395,33 @@ module RawRecordsMapArrayTests
395
395
  assert_equal(records, target.raw_records)
396
396
  end
397
397
 
398
+ def remove_union_field_names(records)
399
+ records.collect do |record|
400
+ record.collect do |column|
401
+ if column.nil?
402
+ column
403
+ else
404
+ value = {}
405
+ column.each do |k, v|
406
+ v = v.values[0] unless v.nil?
407
+ value[k] = v
408
+ end
409
+ value
410
+ end
411
+ end
412
+ end
413
+ end
414
+
398
415
  def test_sparse_union
399
- omit("Need to add support for SparseUnionArrayBuilder")
400
416
  records = [
401
- [{"key1" => {"field" => true, "key2" => nil, "key3" => {"field" => nil}}}],
417
+ [
418
+ {
419
+ "key1" => {"field1" => true},
420
+ "key2" => nil,
421
+ "key3" => {"field2" => 29},
422
+ "key4" => {"field2" => nil},
423
+ },
424
+ ],
402
425
  [nil],
403
426
  ]
404
427
  target = build({
@@ -416,13 +439,20 @@ module RawRecordsMapArrayTests
416
439
  type_codes: [0, 1],
417
440
  },
418
441
  records)
419
- assert_equal(records, target.raw_records)
442
+ assert_equal(remove_union_field_names(records),
443
+ target.raw_records)
420
444
  end
421
445
 
422
446
  def test_dense_union
423
- omit("Need to add support for DenseUnionArrayBuilder")
424
447
  records = [
425
- [{"key1" => {"field1" => true}, "key2" => nil, "key3" => {"field2" => nil}}],
448
+ [
449
+ {
450
+ "key1" => {"field1" => true},
451
+ "key2" => nil,
452
+ "key3" => {"field2" => 29},
453
+ "key4" => {"field2" => nil},
454
+ },
455
+ ],
426
456
  [nil],
427
457
  ]
428
458
  target = build({
@@ -440,21 +470,20 @@ module RawRecordsMapArrayTests
440
470
  type_codes: [0, 1],
441
471
  },
442
472
  records)
443
- assert_equal(records, target.raw_records)
473
+ assert_equal(remove_union_field_names(records),
474
+ target.raw_records)
444
475
  end
445
476
 
446
477
  def test_dictionary
447
- omit("Need to add support for DictionaryArrayBuilder")
448
478
  records = [
449
479
  [{"key1" => "Ruby", "key2" => nil, "key3" => "GLib"}],
450
480
  [nil],
451
481
  ]
452
- dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
453
482
  target = build({
454
483
  type: :dictionary,
455
484
  index_data_type: :int8,
456
- dictionary: dictionary,
457
- ordered: true,
485
+ value_data_type: :string,
486
+ ordered: false,
458
487
  },
459
488
  records)
460
489
  assert_equal(records, target.raw_records)