red-arrow 10.0.0 → 16.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/ext/arrow/arrow.cpp +31 -0
  4. data/ext/arrow/converters.hpp +45 -41
  5. data/ext/arrow/extconf.rb +16 -4
  6. data/ext/arrow/raw-records.cpp +155 -2
  7. data/ext/arrow/red-arrow.hpp +2 -0
  8. data/ext/arrow/values.cpp +1 -2
  9. data/lib/arrow/array-computable.rb +13 -0
  10. data/lib/arrow/array.rb +6 -1
  11. data/lib/arrow/chunked-array.rb +35 -1
  12. data/lib/arrow/column-containable.rb +9 -0
  13. data/lib/arrow/column.rb +1 -0
  14. data/lib/arrow/data-type.rb +9 -0
  15. data/lib/arrow/dense-union-array-builder.rb +49 -0
  16. data/lib/arrow/dense-union-array.rb +26 -0
  17. data/lib/arrow/expression.rb +6 -2
  18. data/lib/arrow/function.rb +0 -1
  19. data/lib/arrow/half-float-array-builder.rb +32 -0
  20. data/lib/arrow/half-float-array.rb +24 -0
  21. data/lib/arrow/half-float.rb +118 -0
  22. data/lib/arrow/input-referable.rb +29 -0
  23. data/lib/arrow/loader.rb +11 -0
  24. data/lib/arrow/raw-table-converter.rb +7 -5
  25. data/lib/arrow/record-batch-file-reader.rb +2 -0
  26. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  27. data/lib/arrow/record-batch.rb +6 -2
  28. data/lib/arrow/scalar.rb +67 -0
  29. data/lib/arrow/slicer.rb +61 -0
  30. data/lib/arrow/sort-key.rb +3 -3
  31. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  32. data/lib/arrow/sparse-union-array.rb +26 -0
  33. data/lib/arrow/struct-array-builder.rb +0 -5
  34. data/lib/arrow/table-loader.rb +11 -5
  35. data/lib/arrow/table-saver.rb +1 -0
  36. data/lib/arrow/table.rb +180 -33
  37. data/lib/arrow/tensor.rb +4 -0
  38. data/lib/arrow/timestamp-parser.rb +33 -0
  39. data/lib/arrow/union-array-builder.rb +59 -0
  40. data/lib/arrow/version.rb +1 -1
  41. data/red-arrow.gemspec +2 -1
  42. data/test/each-raw-record/test-basic-arrays.rb +411 -0
  43. data/test/each-raw-record/test-dense-union-array.rb +566 -0
  44. data/test/each-raw-record/test-dictionary-array.rb +341 -0
  45. data/test/each-raw-record/test-list-array.rb +628 -0
  46. data/test/each-raw-record/test-map-array.rb +507 -0
  47. data/test/each-raw-record/test-multiple-columns.rb +72 -0
  48. data/test/each-raw-record/test-sparse-union-array.rb +528 -0
  49. data/test/each-raw-record/test-struct-array.rb +529 -0
  50. data/test/each-raw-record/test-table.rb +47 -0
  51. data/test/helper/omittable.rb +13 -0
  52. data/test/helper.rb +1 -0
  53. data/test/raw-records/test-basic-arrays.rb +11 -1
  54. data/test/raw-records/test-dense-union-array.rb +90 -45
  55. data/test/raw-records/test-list-array.rb +28 -10
  56. data/test/raw-records/test-map-array.rb +39 -10
  57. data/test/raw-records/test-sparse-union-array.rb +86 -41
  58. data/test/raw-records/test-struct-array.rb +22 -8
  59. data/test/test-array.rb +7 -0
  60. data/test/test-chunked-array.rb +9 -0
  61. data/test/test-csv-loader.rb +39 -0
  62. data/test/test-data-type.rb +2 -1
  63. data/test/test-dense-union-array.rb +42 -0
  64. data/test/test-dense-union-data-type.rb +1 -1
  65. data/test/test-expression.rb +11 -0
  66. data/test/test-function.rb +7 -7
  67. data/test/test-group.rb +58 -58
  68. data/test/test-half-float-array.rb +43 -0
  69. data/test/test-half-float.rb +130 -0
  70. data/test/test-ractor.rb +34 -0
  71. data/test/test-record-batch-file-reader.rb +21 -0
  72. data/test/test-record-batch-stream-reader.rb +129 -0
  73. data/test/test-scalar.rb +65 -0
  74. data/test/test-slicer.rb +194 -129
  75. data/test/test-sparse-union-array.rb +38 -0
  76. data/test/test-table.rb +356 -40
  77. data/test/values/test-basic-arrays.rb +10 -0
  78. data/test/values/test-dense-union-array.rb +88 -45
  79. data/test/values/test-list-array.rb +26 -10
  80. data/test/values/test-map-array.rb +33 -10
  81. data/test/values/test-sparse-union-array.rb +84 -41
  82. data/test/values/test-struct-array.rb +20 -8
  83. metadata +62 -9
@@ -64,12 +64,23 @@ module ValuesDenseUnionArrayTests
64
64
  arrays)
65
65
  end
66
66
 
67
+ def remove_field_names(values)
68
+ values.collect do |value|
69
+ if value.nil?
70
+ value
71
+ else
72
+ value.values[0]
73
+ end
74
+ end
75
+ end
76
+
67
77
  def test_null
68
78
  values = [
69
79
  {"0" => nil},
70
80
  ]
71
81
  target = build(:null, values)
72
- assert_equal(values, target.values)
82
+ assert_equal(remove_field_names(values),
83
+ target.values)
73
84
  end
74
85
 
75
86
  def test_boolean
@@ -78,7 +89,8 @@ module ValuesDenseUnionArrayTests
78
89
  {"1" => nil},
79
90
  ]
80
91
  target = build(:boolean, values)
81
- assert_equal(values, target.values)
92
+ assert_equal(remove_field_names(values),
93
+ target.values)
82
94
  end
83
95
 
84
96
  def test_int8
@@ -87,7 +99,8 @@ module ValuesDenseUnionArrayTests
87
99
  {"1" => nil},
88
100
  ]
89
101
  target = build(:int8, values)
90
- assert_equal(values, target.values)
102
+ assert_equal(remove_field_names(values),
103
+ target.values)
91
104
  end
92
105
 
93
106
  def test_uint8
@@ -96,7 +109,8 @@ module ValuesDenseUnionArrayTests
96
109
  {"1" => nil},
97
110
  ]
98
111
  target = build(:uint8, values)
99
- assert_equal(values, target.values)
112
+ assert_equal(remove_field_names(values),
113
+ target.values)
100
114
  end
101
115
 
102
116
  def test_int16
@@ -105,7 +119,8 @@ module ValuesDenseUnionArrayTests
105
119
  {"1" => nil},
106
120
  ]
107
121
  target = build(:int16, values)
108
- assert_equal(values, target.values)
122
+ assert_equal(remove_field_names(values),
123
+ target.values)
109
124
  end
110
125
 
111
126
  def test_uint16
@@ -114,7 +129,8 @@ module ValuesDenseUnionArrayTests
114
129
  {"1" => nil},
115
130
  ]
116
131
  target = build(:uint16, values)
117
- assert_equal(values, target.values)
132
+ assert_equal(remove_field_names(values),
133
+ target.values)
118
134
  end
119
135
 
120
136
  def test_int32
@@ -123,7 +139,8 @@ module ValuesDenseUnionArrayTests
123
139
  {"1" => nil},
124
140
  ]
125
141
  target = build(:int32, values)
126
- assert_equal(values, target.values)
142
+ assert_equal(remove_field_names(values),
143
+ target.values)
127
144
  end
128
145
 
129
146
  def test_uint32
@@ -132,7 +149,8 @@ module ValuesDenseUnionArrayTests
132
149
  {"1" => nil},
133
150
  ]
134
151
  target = build(:uint32, values)
135
- assert_equal(values, target.values)
152
+ assert_equal(remove_field_names(values),
153
+ target.values)
136
154
  end
137
155
 
138
156
  def test_int64
@@ -141,7 +159,8 @@ module ValuesDenseUnionArrayTests
141
159
  {"1" => nil},
142
160
  ]
143
161
  target = build(:int64, values)
144
- assert_equal(values, target.values)
162
+ assert_equal(remove_field_names(values),
163
+ target.values)
145
164
  end
146
165
 
147
166
  def test_uint64
@@ -150,7 +169,8 @@ module ValuesDenseUnionArrayTests
150
169
  {"1" => nil},
151
170
  ]
152
171
  target = build(:uint64, values)
153
- assert_equal(values, target.values)
172
+ assert_equal(remove_field_names(values),
173
+ target.values)
154
174
  end
155
175
 
156
176
  def test_float
@@ -159,7 +179,8 @@ module ValuesDenseUnionArrayTests
159
179
  {"1" => nil},
160
180
  ]
161
181
  target = build(:float, values)
162
- assert_equal(values, target.values)
182
+ assert_equal(remove_field_names(values),
183
+ target.values)
163
184
  end
164
185
 
165
186
  def test_double
@@ -168,7 +189,8 @@ module ValuesDenseUnionArrayTests
168
189
  {"1" => nil},
169
190
  ]
170
191
  target = build(:double, values)
171
- assert_equal(values, target.values)
192
+ assert_equal(remove_field_names(values),
193
+ target.values)
172
194
  end
173
195
 
174
196
  def test_binary
@@ -177,7 +199,8 @@ module ValuesDenseUnionArrayTests
177
199
  {"1" => nil},
178
200
  ]
179
201
  target = build(:binary, values)
180
- assert_equal(values, target.values)
202
+ assert_equal(remove_field_names(values),
203
+ target.values)
181
204
  end
182
205
 
183
206
  def test_string
@@ -186,7 +209,8 @@ module ValuesDenseUnionArrayTests
186
209
  {"1" => nil},
187
210
  ]
188
211
  target = build(:string, values)
189
- assert_equal(values, target.values)
212
+ assert_equal(remove_field_names(values),
213
+ target.values)
190
214
  end
191
215
 
192
216
  def test_date32
@@ -195,7 +219,8 @@ module ValuesDenseUnionArrayTests
195
219
  {"1" => nil},
196
220
  ]
197
221
  target = build(:date32, values)
198
- assert_equal(values, target.values)
222
+ assert_equal(remove_field_names(values),
223
+ target.values)
199
224
  end
200
225
 
201
226
  def test_date64
@@ -204,7 +229,8 @@ module ValuesDenseUnionArrayTests
204
229
  {"1" => nil},
205
230
  ]
206
231
  target = build(:date64, values)
207
- assert_equal(values, target.values)
232
+ assert_equal(remove_field_names(values),
233
+ target.values)
208
234
  end
209
235
 
210
236
  def test_timestamp_second
@@ -217,7 +243,8 @@ module ValuesDenseUnionArrayTests
217
243
  unit: :second,
218
244
  },
219
245
  values)
220
- assert_equal(values, target.values)
246
+ assert_equal(remove_field_names(values),
247
+ target.values)
221
248
  end
222
249
 
223
250
  def test_timestamp_milli
@@ -230,7 +257,8 @@ module ValuesDenseUnionArrayTests
230
257
  unit: :milli,
231
258
  },
232
259
  values)
233
- assert_equal(values, target.values)
260
+ assert_equal(remove_field_names(values),
261
+ target.values)
234
262
  end
235
263
 
236
264
  def test_timestamp_micro
@@ -243,7 +271,8 @@ module ValuesDenseUnionArrayTests
243
271
  unit: :micro,
244
272
  },
245
273
  values)
246
- assert_equal(values, target.values)
274
+ assert_equal(remove_field_names(values),
275
+ target.values)
247
276
  end
248
277
 
249
278
  def test_timestamp_nano
@@ -256,7 +285,8 @@ module ValuesDenseUnionArrayTests
256
285
  unit: :nano,
257
286
  },
258
287
  values)
259
- assert_equal(values, target.values)
288
+ assert_equal(remove_field_names(values),
289
+ target.values)
260
290
  end
261
291
 
262
292
  def test_time32_second
@@ -271,7 +301,8 @@ module ValuesDenseUnionArrayTests
271
301
  unit: :second,
272
302
  },
273
303
  values)
274
- assert_equal(values, target.values)
304
+ assert_equal(remove_field_names(values),
305
+ target.values)
275
306
  end
276
307
 
277
308
  def test_time32_milli
@@ -286,7 +317,8 @@ module ValuesDenseUnionArrayTests
286
317
  unit: :milli,
287
318
  },
288
319
  values)
289
- assert_equal(values, target.values)
320
+ assert_equal(remove_field_names(values),
321
+ target.values)
290
322
  end
291
323
 
292
324
  def test_time64_micro
@@ -301,7 +333,8 @@ module ValuesDenseUnionArrayTests
301
333
  unit: :micro,
302
334
  },
303
335
  values)
304
- assert_equal(values, target.values)
336
+ assert_equal(remove_field_names(values),
337
+ target.values)
305
338
  end
306
339
 
307
340
  def test_time64_nano
@@ -316,7 +349,8 @@ module ValuesDenseUnionArrayTests
316
349
  unit: :nano,
317
350
  },
318
351
  values)
319
- assert_equal(values, target.values)
352
+ assert_equal(remove_field_names(values),
353
+ target.values)
320
354
  end
321
355
 
322
356
  def test_decimal128
@@ -330,7 +364,8 @@ module ValuesDenseUnionArrayTests
330
364
  scale: 2,
331
365
  },
332
366
  values)
333
- assert_equal(values, target.values)
367
+ assert_equal(remove_field_names(values),
368
+ target.values)
334
369
  end
335
370
 
336
371
  def test_decimal256
@@ -344,7 +379,8 @@ module ValuesDenseUnionArrayTests
344
379
  scale: 2,
345
380
  },
346
381
  values)
347
- assert_equal(values, target.values)
382
+ assert_equal(remove_field_names(values),
383
+ target.values)
348
384
  end
349
385
 
350
386
  def test_month_interval
@@ -353,7 +389,8 @@ module ValuesDenseUnionArrayTests
353
389
  {"1" => nil},
354
390
  ]
355
391
  target = build(:month_interval, values)
356
- assert_equal(values, target.values)
392
+ assert_equal(remove_field_names(values),
393
+ target.values)
357
394
  end
358
395
 
359
396
  def test_day_time_interval
@@ -362,7 +399,8 @@ module ValuesDenseUnionArrayTests
362
399
  {"1" => nil},
363
400
  ]
364
401
  target = build(:day_time_interval, values)
365
- assert_equal(values, target.values)
402
+ assert_equal(remove_field_names(values),
403
+ target.values)
366
404
  end
367
405
 
368
406
  def test_month_day_nano_interval
@@ -371,7 +409,8 @@ module ValuesDenseUnionArrayTests
371
409
  {"1" => nil},
372
410
  ]
373
411
  target = build(:month_day_nano_interval, values)
374
- assert_equal(values, target.values)
412
+ assert_equal(remove_field_names(values),
413
+ target.values)
375
414
  end
376
415
 
377
416
  def test_list
@@ -387,7 +426,8 @@ module ValuesDenseUnionArrayTests
387
426
  },
388
427
  },
389
428
  values)
390
- assert_equal(values, target.values)
429
+ assert_equal(remove_field_names(values),
430
+ target.values)
391
431
  end
392
432
 
393
433
  def test_struct
@@ -406,7 +446,8 @@ module ValuesDenseUnionArrayTests
406
446
  ],
407
447
  },
408
448
  values)
409
- assert_equal(values, target.values)
449
+ assert_equal(remove_field_names(values),
450
+ target.values)
410
451
  end
411
452
 
412
453
  def test_map
@@ -420,14 +461,15 @@ module ValuesDenseUnionArrayTests
420
461
  item: :boolean,
421
462
  },
422
463
  values)
423
- assert_equal(values, target.values)
464
+ assert_equal(remove_field_names(values),
465
+ target.values)
424
466
  end
425
467
 
426
468
  def test_sparse_union
427
- omit("Need to add support for SparseUnionArrayBuilder")
428
469
  values = [
429
470
  {"0" => {"field1" => true}},
430
471
  {"1" => nil},
472
+ {"0" => {"field2" => 29}},
431
473
  {"0" => {"field2" => nil}},
432
474
  ]
433
475
  target = build({
@@ -445,14 +487,15 @@ module ValuesDenseUnionArrayTests
445
487
  type_codes: [0, 1],
446
488
  },
447
489
  values)
448
- assert_equal(values, target.values)
490
+ assert_equal(remove_field_names(remove_field_names(values)),
491
+ target.values)
449
492
  end
450
493
 
451
494
  def test_dense_union
452
- omit("Need to add support for DenseUnionArrayBuilder")
453
495
  values = [
454
496
  {"0" => {"field1" => true}},
455
497
  {"1" => nil},
498
+ {"0" => {"field2" => 29}},
456
499
  {"0" => {"field2" => nil}},
457
500
  ]
458
501
  target = build({
@@ -470,25 +513,25 @@ module ValuesDenseUnionArrayTests
470
513
  type_codes: [0, 1],
471
514
  },
472
515
  values)
473
- assert_equal(values, target.values)
516
+ assert_equal(remove_field_names(remove_field_names(values)),
517
+ target.values)
474
518
  end
475
519
 
476
520
  def test_dictionary
477
- omit("Need to add support for DictionaryArrayBuilder")
478
521
  values = [
479
522
  {"0" => "Ruby"},
480
523
  {"1" => nil},
481
524
  {"0" => "GLib"},
482
525
  ]
483
- dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
484
526
  target = build({
485
- type: :dictionary,
486
- index_data_type: :int8,
487
- dictionary: dictionary,
488
- ordered: true,
489
- },
490
- values)
491
- assert_equal(values, target.values)
527
+ type: :dictionary,
528
+ index_data_type: :int8,
529
+ value_data_type: :string,
530
+ ordered: false,
531
+ },
532
+ values)
533
+ assert_equal(remove_field_names(values),
534
+ target.values)
492
535
  end
493
536
  end
494
537
 
@@ -476,12 +476,28 @@ module ValuesListArrayTests
476
476
  assert_equal(values, target.values)
477
477
  end
478
478
 
479
- def test_sparse
480
- omit("Need to add support for SparseUnionArrayBuilder")
479
+ def remove_union_field_names(values)
480
+ values.collect do |value|
481
+ if value.nil?
482
+ value
483
+ else
484
+ value.collect do |v|
485
+ if v.nil?
486
+ v
487
+ else
488
+ v.values[0]
489
+ end
490
+ end
491
+ end
492
+ end
493
+ end
494
+
495
+ def test_sparse_union
481
496
  values = [
482
497
  [
483
498
  {"field1" => true},
484
499
  nil,
500
+ {"field2" => 29},
485
501
  {"field2" => nil},
486
502
  ],
487
503
  nil,
@@ -501,15 +517,16 @@ module ValuesListArrayTests
501
517
  type_codes: [0, 1],
502
518
  },
503
519
  values)
504
- assert_equal(values, target.values)
520
+ assert_equal(remove_union_field_names(values),
521
+ target.values)
505
522
  end
506
523
 
507
- def test_dense
508
- omit("Need to add support for DenseUnionArrayBuilder")
524
+ def test_dense_union
509
525
  values = [
510
526
  [
511
527
  {"field1" => true},
512
528
  nil,
529
+ {"field2" => 29},
513
530
  {"field2" => nil},
514
531
  ],
515
532
  nil,
@@ -529,11 +546,11 @@ module ValuesListArrayTests
529
546
  type_codes: [0, 1],
530
547
  },
531
548
  values)
532
- assert_equal(values, target.values)
549
+ assert_equal(remove_union_field_names(values),
550
+ target.values)
533
551
  end
534
552
 
535
553
  def test_dictionary
536
- omit("Need to add support for DictionaryArrayBuilder")
537
554
  values = [
538
555
  [
539
556
  "Ruby",
@@ -542,12 +559,11 @@ module ValuesListArrayTests
542
559
  ],
543
560
  nil,
544
561
  ]
545
- dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
546
562
  target = build({
547
563
  type: :dictionary,
548
564
  index_data_type: :int8,
549
- dictionary: dictionary,
550
- ordered: true,
565
+ value_data_type: :string,
566
+ ordered: false,
551
567
  },
552
568
  values)
553
569
  assert_equal(values, target.values)
@@ -383,10 +383,29 @@ module ValuesMapArrayTests
383
383
  assert_equal(values, target.values)
384
384
  end
385
385
 
386
+ def remove_union_field_names(values)
387
+ values.collect do |value|
388
+ if value.nil?
389
+ value
390
+ else
391
+ val = {}
392
+ value.each do |k, v|
393
+ v = v.values[0] unless v.nil?
394
+ val[k] = v
395
+ end
396
+ val
397
+ end
398
+ end
399
+ end
400
+
386
401
  def test_sparse_union
387
- omit("Need to add support for SparseUnionArrayBuilder")
388
402
  values = [
389
- {"key1" => {"field1" => true}, "key2" => nil, "key3" => {"field2" => nil}},
403
+ {
404
+ "key1" => {"field1" => true},
405
+ "key2" => nil,
406
+ "key3" => {"field2" => 29},
407
+ "key4" => {"field2" => nil},
408
+ },
390
409
  nil,
391
410
  ]
392
411
  target = build({
@@ -404,13 +423,18 @@ module ValuesMapArrayTests
404
423
  type_codes: [0, 1],
405
424
  },
406
425
  values)
407
- assert_equal(values, target.values)
426
+ assert_equal(remove_union_field_names(values),
427
+ target.values)
408
428
  end
409
429
 
410
430
  def test_dense_union
411
- omit("Need to add support for DenseUnionArrayBuilder")
412
431
  values = [
413
- {"key1" => {"field1" => true}, "key2" => nil, "key3" => {"field2" => nil}},
432
+ {
433
+ "key1" => {"field1" => true},
434
+ "key2" => nil,
435
+ "key3" => {"field2" => 29},
436
+ "key4" => {"field2" => nil},
437
+ },
414
438
  nil,
415
439
  ]
416
440
  target = build({
@@ -428,21 +452,20 @@ module ValuesMapArrayTests
428
452
  type_codes: [0, 1],
429
453
  },
430
454
  values)
431
- assert_equal(values, target.values)
455
+ assert_equal(remove_union_field_names(values),
456
+ target.values)
432
457
  end
433
458
 
434
459
  def test_dictionary
435
- omit("Need to add support for DictionaryArrayBuilder")
436
460
  values = [
437
461
  {"key1" => "Ruby", "key2" => nil, "key3" => "GLib"},
438
462
  nil,
439
463
  ]
440
- dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
441
464
  target = build({
442
465
  type: :dictionary,
443
466
  index_data_type: :int8,
444
- dictionary: dictionary,
445
- ordered: true,
467
+ value_data_type: :string,
468
+ ordered: false,
446
469
  },
447
470
  values)
448
471
  assert_equal(values, target.values)