red-arrow 10.0.0 → 16.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/ext/arrow/arrow.cpp +31 -0
  4. data/ext/arrow/converters.hpp +45 -41
  5. data/ext/arrow/extconf.rb +16 -4
  6. data/ext/arrow/raw-records.cpp +155 -2
  7. data/ext/arrow/red-arrow.hpp +2 -0
  8. data/ext/arrow/values.cpp +1 -2
  9. data/lib/arrow/array-computable.rb +13 -0
  10. data/lib/arrow/array.rb +6 -1
  11. data/lib/arrow/chunked-array.rb +35 -1
  12. data/lib/arrow/column-containable.rb +9 -0
  13. data/lib/arrow/column.rb +1 -0
  14. data/lib/arrow/data-type.rb +9 -0
  15. data/lib/arrow/dense-union-array-builder.rb +49 -0
  16. data/lib/arrow/dense-union-array.rb +26 -0
  17. data/lib/arrow/expression.rb +6 -2
  18. data/lib/arrow/function.rb +0 -1
  19. data/lib/arrow/half-float-array-builder.rb +32 -0
  20. data/lib/arrow/half-float-array.rb +24 -0
  21. data/lib/arrow/half-float.rb +118 -0
  22. data/lib/arrow/input-referable.rb +29 -0
  23. data/lib/arrow/loader.rb +11 -0
  24. data/lib/arrow/raw-table-converter.rb +7 -5
  25. data/lib/arrow/record-batch-file-reader.rb +2 -0
  26. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  27. data/lib/arrow/record-batch.rb +6 -2
  28. data/lib/arrow/scalar.rb +67 -0
  29. data/lib/arrow/slicer.rb +61 -0
  30. data/lib/arrow/sort-key.rb +3 -3
  31. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  32. data/lib/arrow/sparse-union-array.rb +26 -0
  33. data/lib/arrow/struct-array-builder.rb +0 -5
  34. data/lib/arrow/table-loader.rb +11 -5
  35. data/lib/arrow/table-saver.rb +1 -0
  36. data/lib/arrow/table.rb +180 -33
  37. data/lib/arrow/tensor.rb +4 -0
  38. data/lib/arrow/timestamp-parser.rb +33 -0
  39. data/lib/arrow/union-array-builder.rb +59 -0
  40. data/lib/arrow/version.rb +1 -1
  41. data/red-arrow.gemspec +2 -1
  42. data/test/each-raw-record/test-basic-arrays.rb +411 -0
  43. data/test/each-raw-record/test-dense-union-array.rb +566 -0
  44. data/test/each-raw-record/test-dictionary-array.rb +341 -0
  45. data/test/each-raw-record/test-list-array.rb +628 -0
  46. data/test/each-raw-record/test-map-array.rb +507 -0
  47. data/test/each-raw-record/test-multiple-columns.rb +72 -0
  48. data/test/each-raw-record/test-sparse-union-array.rb +528 -0
  49. data/test/each-raw-record/test-struct-array.rb +529 -0
  50. data/test/each-raw-record/test-table.rb +47 -0
  51. data/test/helper/omittable.rb +13 -0
  52. data/test/helper.rb +1 -0
  53. data/test/raw-records/test-basic-arrays.rb +11 -1
  54. data/test/raw-records/test-dense-union-array.rb +90 -45
  55. data/test/raw-records/test-list-array.rb +28 -10
  56. data/test/raw-records/test-map-array.rb +39 -10
  57. data/test/raw-records/test-sparse-union-array.rb +86 -41
  58. data/test/raw-records/test-struct-array.rb +22 -8
  59. data/test/test-array.rb +7 -0
  60. data/test/test-chunked-array.rb +9 -0
  61. data/test/test-csv-loader.rb +39 -0
  62. data/test/test-data-type.rb +2 -1
  63. data/test/test-dense-union-array.rb +42 -0
  64. data/test/test-dense-union-data-type.rb +1 -1
  65. data/test/test-expression.rb +11 -0
  66. data/test/test-function.rb +7 -7
  67. data/test/test-group.rb +58 -58
  68. data/test/test-half-float-array.rb +43 -0
  69. data/test/test-half-float.rb +130 -0
  70. data/test/test-ractor.rb +34 -0
  71. data/test/test-record-batch-file-reader.rb +21 -0
  72. data/test/test-record-batch-stream-reader.rb +129 -0
  73. data/test/test-scalar.rb +65 -0
  74. data/test/test-slicer.rb +194 -129
  75. data/test/test-sparse-union-array.rb +38 -0
  76. data/test/test-table.rb +356 -40
  77. data/test/values/test-basic-arrays.rb +10 -0
  78. data/test/values/test-dense-union-array.rb +88 -45
  79. data/test/values/test-list-array.rb +26 -10
  80. data/test/values/test-map-array.rb +33 -10
  81. data/test/values/test-sparse-union-array.rb +84 -41
  82. data/test/values/test-struct-array.rb +20 -8
  83. metadata +62 -9
@@ -76,12 +76,25 @@ module RawRecordsDenseUnionArrayTests
76
76
  [union_array])
77
77
  end
78
78
 
79
+ def remove_field_names(records)
80
+ records.collect do |record|
81
+ record.collect do |column|
82
+ if column.nil?
83
+ column
84
+ else
85
+ column.values[0]
86
+ end
87
+ end
88
+ end
89
+ end
90
+
79
91
  def test_null
80
92
  records = [
81
93
  [{"0" => nil}],
82
94
  ]
83
95
  target = build(:null, records)
84
- assert_equal(records, target.raw_records)
96
+ assert_equal(remove_field_names(records),
97
+ target.raw_records)
85
98
  end
86
99
 
87
100
  def test_boolean
@@ -90,7 +103,8 @@ module RawRecordsDenseUnionArrayTests
90
103
  [{"1" => nil}],
91
104
  ]
92
105
  target = build(:boolean, records)
93
- assert_equal(records, target.raw_records)
106
+ assert_equal(remove_field_names(records),
107
+ target.raw_records)
94
108
  end
95
109
 
96
110
  def test_int8
@@ -99,7 +113,8 @@ module RawRecordsDenseUnionArrayTests
99
113
  [{"1" => nil}],
100
114
  ]
101
115
  target = build(:int8, records)
102
- assert_equal(records, target.raw_records)
116
+ assert_equal(remove_field_names(records),
117
+ target.raw_records)
103
118
  end
104
119
 
105
120
  def test_uint8
@@ -108,7 +123,8 @@ module RawRecordsDenseUnionArrayTests
108
123
  [{"1" => nil}],
109
124
  ]
110
125
  target = build(:uint8, records)
111
- assert_equal(records, target.raw_records)
126
+ assert_equal(remove_field_names(records),
127
+ target.raw_records)
112
128
  end
113
129
 
114
130
  def test_int16
@@ -117,7 +133,8 @@ module RawRecordsDenseUnionArrayTests
117
133
  [{"1" => nil}],
118
134
  ]
119
135
  target = build(:int16, records)
120
- assert_equal(records, target.raw_records)
136
+ assert_equal(remove_field_names(records),
137
+ target.raw_records)
121
138
  end
122
139
 
123
140
  def test_uint16
@@ -126,7 +143,8 @@ module RawRecordsDenseUnionArrayTests
126
143
  [{"1" => nil}],
127
144
  ]
128
145
  target = build(:uint16, records)
129
- assert_equal(records, target.raw_records)
146
+ assert_equal(remove_field_names(records),
147
+ target.raw_records)
130
148
  end
131
149
 
132
150
  def test_int32
@@ -135,7 +153,8 @@ module RawRecordsDenseUnionArrayTests
135
153
  [{"1" => nil}],
136
154
  ]
137
155
  target = build(:int32, records)
138
- assert_equal(records, target.raw_records)
156
+ assert_equal(remove_field_names(records),
157
+ target.raw_records)
139
158
  end
140
159
 
141
160
  def test_uint32
@@ -144,7 +163,8 @@ module RawRecordsDenseUnionArrayTests
144
163
  [{"1" => nil}],
145
164
  ]
146
165
  target = build(:uint32, records)
147
- assert_equal(records, target.raw_records)
166
+ assert_equal(remove_field_names(records),
167
+ target.raw_records)
148
168
  end
149
169
 
150
170
  def test_int64
@@ -153,7 +173,8 @@ module RawRecordsDenseUnionArrayTests
153
173
  [{"1" => nil}],
154
174
  ]
155
175
  target = build(:int64, records)
156
- assert_equal(records, target.raw_records)
176
+ assert_equal(remove_field_names(records),
177
+ target.raw_records)
157
178
  end
158
179
 
159
180
  def test_uint64
@@ -162,7 +183,8 @@ module RawRecordsDenseUnionArrayTests
162
183
  [{"1" => nil}],
163
184
  ]
164
185
  target = build(:uint64, records)
165
- assert_equal(records, target.raw_records)
186
+ assert_equal(remove_field_names(records),
187
+ target.raw_records)
166
188
  end
167
189
 
168
190
  def test_float
@@ -171,7 +193,8 @@ module RawRecordsDenseUnionArrayTests
171
193
  [{"1" => nil}],
172
194
  ]
173
195
  target = build(:float, records)
174
- assert_equal(records, target.raw_records)
196
+ assert_equal(remove_field_names(records),
197
+ target.raw_records)
175
198
  end
176
199
 
177
200
  def test_double
@@ -180,7 +203,8 @@ module RawRecordsDenseUnionArrayTests
180
203
  [{"1" => nil}],
181
204
  ]
182
205
  target = build(:double, records)
183
- assert_equal(records, target.raw_records)
206
+ assert_equal(remove_field_names(records),
207
+ target.raw_records)
184
208
  end
185
209
 
186
210
  def test_binary
@@ -189,7 +213,8 @@ module RawRecordsDenseUnionArrayTests
189
213
  [{"1" => nil}],
190
214
  ]
191
215
  target = build(:binary, records)
192
- assert_equal(records, target.raw_records)
216
+ assert_equal(remove_field_names(records),
217
+ target.raw_records)
193
218
  end
194
219
 
195
220
  def test_string
@@ -198,7 +223,8 @@ module RawRecordsDenseUnionArrayTests
198
223
  [{"1" => nil}],
199
224
  ]
200
225
  target = build(:string, records)
201
- assert_equal(records, target.raw_records)
226
+ assert_equal(remove_field_names(records),
227
+ target.raw_records)
202
228
  end
203
229
 
204
230
  def test_date32
@@ -207,7 +233,8 @@ module RawRecordsDenseUnionArrayTests
207
233
  [{"1" => nil}],
208
234
  ]
209
235
  target = build(:date32, records)
210
- assert_equal(records, target.raw_records)
236
+ assert_equal(remove_field_names(records),
237
+ target.raw_records)
211
238
  end
212
239
 
213
240
  def test_date64
@@ -216,7 +243,8 @@ module RawRecordsDenseUnionArrayTests
216
243
  [{"1" => nil}],
217
244
  ]
218
245
  target = build(:date64, records)
219
- assert_equal(records, target.raw_records)
246
+ assert_equal(remove_field_names(records),
247
+ target.raw_records)
220
248
  end
221
249
 
222
250
  def test_timestamp_second
@@ -229,7 +257,8 @@ module RawRecordsDenseUnionArrayTests
229
257
  unit: :second,
230
258
  },
231
259
  records)
232
- assert_equal(records, target.raw_records)
260
+ assert_equal(remove_field_names(records),
261
+ target.raw_records)
233
262
  end
234
263
 
235
264
  def test_timestamp_milli
@@ -242,7 +271,8 @@ module RawRecordsDenseUnionArrayTests
242
271
  unit: :milli,
243
272
  },
244
273
  records)
245
- assert_equal(records, target.raw_records)
274
+ assert_equal(remove_field_names(records),
275
+ target.raw_records)
246
276
  end
247
277
 
248
278
  def test_timestamp_micro
@@ -255,7 +285,8 @@ module RawRecordsDenseUnionArrayTests
255
285
  unit: :micro,
256
286
  },
257
287
  records)
258
- assert_equal(records, target.raw_records)
288
+ assert_equal(remove_field_names(records),
289
+ target.raw_records)
259
290
  end
260
291
 
261
292
  def test_timestamp_nano
@@ -268,7 +299,8 @@ module RawRecordsDenseUnionArrayTests
268
299
  unit: :nano,
269
300
  },
270
301
  records)
271
- assert_equal(records, target.raw_records)
302
+ assert_equal(remove_field_names(records),
303
+ target.raw_records)
272
304
  end
273
305
 
274
306
  def test_time32_second
@@ -283,7 +315,8 @@ module RawRecordsDenseUnionArrayTests
283
315
  unit: :second,
284
316
  },
285
317
  records)
286
- assert_equal(records, target.raw_records)
318
+ assert_equal(remove_field_names(records),
319
+ target.raw_records)
287
320
  end
288
321
 
289
322
  def test_time32_milli
@@ -298,7 +331,8 @@ module RawRecordsDenseUnionArrayTests
298
331
  unit: :milli,
299
332
  },
300
333
  records)
301
- assert_equal(records, target.raw_records)
334
+ assert_equal(remove_field_names(records),
335
+ target.raw_records)
302
336
  end
303
337
 
304
338
  def test_time64_micro
@@ -313,7 +347,8 @@ module RawRecordsDenseUnionArrayTests
313
347
  unit: :micro,
314
348
  },
315
349
  records)
316
- assert_equal(records, target.raw_records)
350
+ assert_equal(remove_field_names(records),
351
+ target.raw_records)
317
352
  end
318
353
 
319
354
  def test_time64_nano
@@ -328,7 +363,8 @@ module RawRecordsDenseUnionArrayTests
328
363
  unit: :nano,
329
364
  },
330
365
  records)
331
- assert_equal(records, target.raw_records)
366
+ assert_equal(remove_field_names(records),
367
+ target.raw_records)
332
368
  end
333
369
 
334
370
  def test_decimal128
@@ -342,7 +378,8 @@ module RawRecordsDenseUnionArrayTests
342
378
  scale: 2,
343
379
  },
344
380
  records)
345
- assert_equal(records, target.raw_records)
381
+ assert_equal(remove_field_names(records),
382
+ target.raw_records)
346
383
  end
347
384
 
348
385
  def test_decimal256
@@ -356,7 +393,8 @@ module RawRecordsDenseUnionArrayTests
356
393
  scale: 2,
357
394
  },
358
395
  records)
359
- assert_equal(records, target.raw_records)
396
+ assert_equal(remove_field_names(records),
397
+ target.raw_records)
360
398
  end
361
399
 
362
400
  def test_month_interval
@@ -365,7 +403,8 @@ module RawRecordsDenseUnionArrayTests
365
403
  [{"1" => nil}],
366
404
  ]
367
405
  target = build(:month_interval, records)
368
- assert_equal(records, target.raw_records)
406
+ assert_equal(remove_field_names(records),
407
+ target.raw_records)
369
408
  end
370
409
 
371
410
  def test_day_time_interval
@@ -374,7 +413,8 @@ module RawRecordsDenseUnionArrayTests
374
413
  [{"1" => nil}],
375
414
  ]
376
415
  target = build(:day_time_interval, records)
377
- assert_equal(records, target.raw_records)
416
+ assert_equal(remove_field_names(records),
417
+ target.raw_records)
378
418
  end
379
419
 
380
420
  def test_month_day_nano_interval
@@ -383,7 +423,8 @@ module RawRecordsDenseUnionArrayTests
383
423
  [{"1" => nil}],
384
424
  ]
385
425
  target = build(:month_day_nano_interval, records)
386
- assert_equal(records, target.raw_records)
426
+ assert_equal(remove_field_names(records),
427
+ target.raw_records)
387
428
  end
388
429
 
389
430
  def test_list
@@ -399,7 +440,8 @@ module RawRecordsDenseUnionArrayTests
399
440
  },
400
441
  },
401
442
  records)
402
- assert_equal(records, target.raw_records)
443
+ assert_equal(remove_field_names(records),
444
+ target.raw_records)
403
445
  end
404
446
 
405
447
  def test_struct
@@ -418,7 +460,8 @@ module RawRecordsDenseUnionArrayTests
418
460
  ],
419
461
  },
420
462
  records)
421
- assert_equal(records, target.raw_records)
463
+ assert_equal(remove_field_names(records),
464
+ target.raw_records)
422
465
  end
423
466
 
424
467
  def test_map
@@ -432,14 +475,15 @@ module RawRecordsDenseUnionArrayTests
432
475
  item: :boolean,
433
476
  },
434
477
  records)
435
- assert_equal(records, target.raw_records)
478
+ assert_equal(remove_field_names(records),
479
+ target.raw_records)
436
480
  end
437
481
 
438
482
  def test_sparse_union
439
- omit("Need to add support for SparseUnionArrayBuilder")
440
483
  records = [
441
484
  [{"0" => {"field1" => true}}],
442
485
  [{"1" => nil}],
486
+ [{"0" => {"field2" => 29}}],
443
487
  [{"0" => {"field2" => nil}}],
444
488
  ]
445
489
  target = build({
@@ -457,14 +501,15 @@ module RawRecordsDenseUnionArrayTests
457
501
  type_codes: [0, 1],
458
502
  },
459
503
  records)
460
- assert_equal(records, target.raw_records)
504
+ assert_equal(remove_field_names(remove_field_names(records)),
505
+ target.raw_records)
461
506
  end
462
507
 
463
508
  def test_dense_union
464
- omit("Need to add support for DenseUnionArrayBuilder")
465
509
  records = [
466
510
  [{"0" => {"field1" => true}}],
467
511
  [{"1" => nil}],
512
+ [{"0" => {"field2" => 29}}],
468
513
  [{"0" => {"field2" => nil}}],
469
514
  ]
470
515
  target = build({
@@ -482,25 +527,25 @@ module RawRecordsDenseUnionArrayTests
482
527
  type_codes: [0, 1],
483
528
  },
484
529
  records)
485
- assert_equal(records, target.raw_records)
530
+ assert_equal(remove_field_names(remove_field_names(records)),
531
+ target.raw_records)
486
532
  end
487
533
 
488
534
  def test_dictionary
489
- omit("Need to add support for DictionaryArrayBuilder")
490
535
  records = [
491
536
  [{"0" => "Ruby"}],
492
537
  [{"1" => nil}],
493
538
  [{"0" => "GLib"}],
494
539
  ]
495
- dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
496
540
  target = build({
497
- type: :dictionary,
498
- index_data_type: :int8,
499
- dictionary: dictionary,
500
- ordered: true,
501
- },
502
- records)
503
- assert_equal(records, target.raw_records)
541
+ type: :dictionary,
542
+ index_data_type: :int8,
543
+ value_data_type: :string,
544
+ ordered: false,
545
+ },
546
+ records)
547
+ assert_equal(remove_field_names(records),
548
+ target.raw_records)
504
549
  end
505
550
  end
506
551
 
@@ -509,13 +509,31 @@ module RawRecordsListArrayTests
509
509
  assert_equal(records, target.raw_records)
510
510
  end
511
511
 
512
- def test_sparse
513
- omit("Need to add support for SparseUnionArrayBuilder")
512
+ def remove_union_field_names(records)
513
+ records.collect do |record|
514
+ record.collect do |column|
515
+ if column.nil?
516
+ column
517
+ else
518
+ column.collect do |value|
519
+ if value.nil?
520
+ value
521
+ else
522
+ value.values[0]
523
+ end
524
+ end
525
+ end
526
+ end
527
+ end
528
+ end
529
+
530
+ def test_sparse_union
514
531
  records = [
515
532
  [
516
533
  [
517
534
  {"field1" => true},
518
535
  nil,
536
+ {"field2" => 29},
519
537
  {"field2" => nil},
520
538
  ],
521
539
  ],
@@ -536,16 +554,17 @@ module RawRecordsListArrayTests
536
554
  type_codes: [0, 1],
537
555
  },
538
556
  records)
539
- assert_equal(records, target.raw_records)
557
+ assert_equal(remove_union_field_names(records),
558
+ target.raw_records)
540
559
  end
541
560
 
542
- def test_dense
543
- omit("Need to add support for DenseUnionArrayBuilder")
561
+ def test_dense_union
544
562
  records = [
545
563
  [
546
564
  [
547
565
  {"field1" => true},
548
566
  nil,
567
+ {"field2" => 29},
549
568
  {"field2" => nil},
550
569
  ],
551
570
  ],
@@ -566,11 +585,11 @@ module RawRecordsListArrayTests
566
585
  type_codes: [0, 1],
567
586
  },
568
587
  records)
569
- assert_equal(records, target.raw_records)
588
+ assert_equal(remove_union_field_names(records),
589
+ target.raw_records)
570
590
  end
571
591
 
572
592
  def test_dictionary
573
- omit("Need to add support for DictionaryArrayBuilder")
574
593
  records = [
575
594
  [
576
595
  [
@@ -581,12 +600,11 @@ module RawRecordsListArrayTests
581
600
  ],
582
601
  [nil],
583
602
  ]
584
- dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
585
603
  target = build({
586
604
  type: :dictionary,
587
605
  index_data_type: :int8,
588
- dictionary: dictionary,
589
- ordered: true,
606
+ value_data_type: :string,
607
+ ordered: false,
590
608
  },
591
609
  records)
592
610
  assert_equal(records, target.raw_records)
@@ -395,10 +395,33 @@ module RawRecordsMapArrayTests
395
395
  assert_equal(records, target.raw_records)
396
396
  end
397
397
 
398
+ def remove_union_field_names(records)
399
+ records.collect do |record|
400
+ record.collect do |column|
401
+ if column.nil?
402
+ column
403
+ else
404
+ value = {}
405
+ column.each do |k, v|
406
+ v = v.values[0] unless v.nil?
407
+ value[k] = v
408
+ end
409
+ value
410
+ end
411
+ end
412
+ end
413
+ end
414
+
398
415
  def test_sparse_union
399
- omit("Need to add support for SparseUnionArrayBuilder")
400
416
  records = [
401
- [{"key1" => {"field" => true, "key2" => nil, "key3" => {"field" => nil}}}],
417
+ [
418
+ {
419
+ "key1" => {"field1" => true},
420
+ "key2" => nil,
421
+ "key3" => {"field2" => 29},
422
+ "key4" => {"field2" => nil},
423
+ },
424
+ ],
402
425
  [nil],
403
426
  ]
404
427
  target = build({
@@ -416,13 +439,20 @@ module RawRecordsMapArrayTests
416
439
  type_codes: [0, 1],
417
440
  },
418
441
  records)
419
- assert_equal(records, target.raw_records)
442
+ assert_equal(remove_union_field_names(records),
443
+ target.raw_records)
420
444
  end
421
445
 
422
446
  def test_dense_union
423
- omit("Need to add support for DenseUnionArrayBuilder")
424
447
  records = [
425
- [{"key1" => {"field1" => true}, "key2" => nil, "key3" => {"field2" => nil}}],
448
+ [
449
+ {
450
+ "key1" => {"field1" => true},
451
+ "key2" => nil,
452
+ "key3" => {"field2" => 29},
453
+ "key4" => {"field2" => nil},
454
+ },
455
+ ],
426
456
  [nil],
427
457
  ]
428
458
  target = build({
@@ -440,21 +470,20 @@ module RawRecordsMapArrayTests
440
470
  type_codes: [0, 1],
441
471
  },
442
472
  records)
443
- assert_equal(records, target.raw_records)
473
+ assert_equal(remove_union_field_names(records),
474
+ target.raw_records)
444
475
  end
445
476
 
446
477
  def test_dictionary
447
- omit("Need to add support for DictionaryArrayBuilder")
448
478
  records = [
449
479
  [{"key1" => "Ruby", "key2" => nil, "key3" => "GLib"}],
450
480
  [nil],
451
481
  ]
452
- dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
453
482
  target = build({
454
483
  type: :dictionary,
455
484
  index_data_type: :int8,
456
- dictionary: dictionary,
457
- ordered: true,
485
+ value_data_type: :string,
486
+ ordered: false,
458
487
  },
459
488
  records)
460
489
  assert_equal(records, target.raw_records)