red-arrow 10.0.0 → 16.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/ext/arrow/arrow.cpp +31 -0
  4. data/ext/arrow/converters.hpp +45 -41
  5. data/ext/arrow/extconf.rb +16 -4
  6. data/ext/arrow/raw-records.cpp +155 -2
  7. data/ext/arrow/red-arrow.hpp +2 -0
  8. data/ext/arrow/values.cpp +1 -2
  9. data/lib/arrow/array-computable.rb +13 -0
  10. data/lib/arrow/array.rb +6 -1
  11. data/lib/arrow/chunked-array.rb +35 -1
  12. data/lib/arrow/column-containable.rb +9 -0
  13. data/lib/arrow/column.rb +1 -0
  14. data/lib/arrow/data-type.rb +9 -0
  15. data/lib/arrow/dense-union-array-builder.rb +49 -0
  16. data/lib/arrow/dense-union-array.rb +26 -0
  17. data/lib/arrow/expression.rb +6 -2
  18. data/lib/arrow/function.rb +0 -1
  19. data/lib/arrow/half-float-array-builder.rb +32 -0
  20. data/lib/arrow/half-float-array.rb +24 -0
  21. data/lib/arrow/half-float.rb +118 -0
  22. data/lib/arrow/input-referable.rb +29 -0
  23. data/lib/arrow/loader.rb +11 -0
  24. data/lib/arrow/raw-table-converter.rb +7 -5
  25. data/lib/arrow/record-batch-file-reader.rb +2 -0
  26. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  27. data/lib/arrow/record-batch.rb +6 -2
  28. data/lib/arrow/scalar.rb +67 -0
  29. data/lib/arrow/slicer.rb +61 -0
  30. data/lib/arrow/sort-key.rb +3 -3
  31. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  32. data/lib/arrow/sparse-union-array.rb +26 -0
  33. data/lib/arrow/struct-array-builder.rb +0 -5
  34. data/lib/arrow/table-loader.rb +11 -5
  35. data/lib/arrow/table-saver.rb +1 -0
  36. data/lib/arrow/table.rb +180 -33
  37. data/lib/arrow/tensor.rb +4 -0
  38. data/lib/arrow/timestamp-parser.rb +33 -0
  39. data/lib/arrow/union-array-builder.rb +59 -0
  40. data/lib/arrow/version.rb +1 -1
  41. data/red-arrow.gemspec +2 -1
  42. data/test/each-raw-record/test-basic-arrays.rb +411 -0
  43. data/test/each-raw-record/test-dense-union-array.rb +566 -0
  44. data/test/each-raw-record/test-dictionary-array.rb +341 -0
  45. data/test/each-raw-record/test-list-array.rb +628 -0
  46. data/test/each-raw-record/test-map-array.rb +507 -0
  47. data/test/each-raw-record/test-multiple-columns.rb +72 -0
  48. data/test/each-raw-record/test-sparse-union-array.rb +528 -0
  49. data/test/each-raw-record/test-struct-array.rb +529 -0
  50. data/test/each-raw-record/test-table.rb +47 -0
  51. data/test/helper/omittable.rb +13 -0
  52. data/test/helper.rb +1 -0
  53. data/test/raw-records/test-basic-arrays.rb +11 -1
  54. data/test/raw-records/test-dense-union-array.rb +90 -45
  55. data/test/raw-records/test-list-array.rb +28 -10
  56. data/test/raw-records/test-map-array.rb +39 -10
  57. data/test/raw-records/test-sparse-union-array.rb +86 -41
  58. data/test/raw-records/test-struct-array.rb +22 -8
  59. data/test/test-array.rb +7 -0
  60. data/test/test-chunked-array.rb +9 -0
  61. data/test/test-csv-loader.rb +39 -0
  62. data/test/test-data-type.rb +2 -1
  63. data/test/test-dense-union-array.rb +42 -0
  64. data/test/test-dense-union-data-type.rb +1 -1
  65. data/test/test-expression.rb +11 -0
  66. data/test/test-function.rb +7 -7
  67. data/test/test-group.rb +58 -58
  68. data/test/test-half-float-array.rb +43 -0
  69. data/test/test-half-float.rb +130 -0
  70. data/test/test-ractor.rb +34 -0
  71. data/test/test-record-batch-file-reader.rb +21 -0
  72. data/test/test-record-batch-stream-reader.rb +129 -0
  73. data/test/test-scalar.rb +65 -0
  74. data/test/test-slicer.rb +194 -129
  75. data/test/test-sparse-union-array.rb +38 -0
  76. data/test/test-table.rb +356 -40
  77. data/test/values/test-basic-arrays.rb +10 -0
  78. data/test/values/test-dense-union-array.rb +88 -45
  79. data/test/values/test-list-array.rb +26 -10
  80. data/test/values/test-map-array.rb +33 -10
  81. data/test/values/test-sparse-union-array.rb +84 -41
  82. data/test/values/test-struct-array.rb +20 -8
  83. metadata +62 -9
@@ -0,0 +1,528 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module EachRawRecordSparseUnionArrayTests
19
+ def build_schema(type, type_codes)
20
+ field_description = {}
21
+ if type.is_a?(Hash)
22
+ field_description = field_description.merge(type)
23
+ else
24
+ field_description[:type] = type
25
+ end
26
+ {
27
+ column: {
28
+ type: :sparse_union,
29
+ fields: [
30
+ field_description.merge(name: "0"),
31
+ field_description.merge(name: "1"),
32
+ ],
33
+ type_codes: type_codes,
34
+ },
35
+ }
36
+ end
37
+
38
+ # TODO: Use Arrow::RecordBatch.new(build_schema(type, type_codes), records)
39
+ def build_record_batch(type, records)
40
+ type_codes = [0, 1]
41
+ schema = Arrow::Schema.new(build_schema(type, type_codes))
42
+ type_ids = []
43
+ arrays = schema.fields[0].data_type.fields.collect do |field|
44
+ sub_schema = Arrow::Schema.new([field])
45
+ sub_records = records.collect do |record|
46
+ [record[0].nil? ? nil : record[0][field.name]]
47
+ end
48
+ sub_record_batch = Arrow::RecordBatch.new(sub_schema,
49
+ sub_records)
50
+ sub_record_batch.columns[0].data
51
+ end
52
+ records.each do |record|
53
+ column = record[0]
54
+ if column.key?("0")
55
+ type_ids << type_codes[0]
56
+ elsif column.key?("1")
57
+ type_ids << type_codes[1]
58
+ end
59
+ end
60
+ union_array = Arrow::SparseUnionArray.new(schema.fields[0].data_type,
61
+ Arrow::Int8Array.new(type_ids),
62
+ arrays)
63
+ schema = Arrow::Schema.new(column: union_array.value_data_type)
64
+ Arrow::RecordBatch.new(schema,
65
+ records.size,
66
+ [union_array])
67
+ end
68
+
69
+ def remove_field_names(records)
70
+ records.collect do |record|
71
+ record.collect do |column|
72
+ if column.nil?
73
+ column
74
+ else
75
+ column.values[0]
76
+ end
77
+ end
78
+ end
79
+ end
80
+
81
+ def test_null
82
+ records = [
83
+ [{"0" => nil}],
84
+ ]
85
+ target = build(:null, records)
86
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
87
+ end
88
+
89
+ def test_boolean
90
+ records = [
91
+ [{"0" => true}],
92
+ [{"1" => nil}],
93
+ ]
94
+ target = build(:boolean, records)
95
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
96
+ end
97
+
98
+ def test_int8
99
+ records = [
100
+ [{"0" => -(2 ** 7)}],
101
+ [{"1" => nil}],
102
+ ]
103
+ target = build(:int8, records)
104
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
105
+ end
106
+
107
+ def test_uint8
108
+ records = [
109
+ [{"0" => (2 ** 8) - 1}],
110
+ [{"1" => nil}],
111
+ ]
112
+ target = build(:uint8, records)
113
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
114
+ end
115
+
116
+ def test_int16
117
+ records = [
118
+ [{"0" => -(2 ** 15)}],
119
+ [{"1" => nil}],
120
+ ]
121
+ target = build(:int16, records)
122
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
123
+ end
124
+
125
+ def test_uint16
126
+ records = [
127
+ [{"0" => (2 ** 16) - 1}],
128
+ [{"1" => nil}],
129
+ ]
130
+ target = build(:uint16, records)
131
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
132
+ end
133
+
134
+ def test_int32
135
+ records = [
136
+ [{"0" => -(2 ** 31)}],
137
+ [{"1" => nil}],
138
+ ]
139
+ target = build(:int32, records)
140
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
141
+ end
142
+
143
+ def test_uint32
144
+ records = [
145
+ [{"0" => (2 ** 32) - 1}],
146
+ [{"1" => nil}],
147
+ ]
148
+ target = build(:uint32, records)
149
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
150
+ end
151
+
152
+ def test_int64
153
+ records = [
154
+ [{"0" => -(2 ** 63)}],
155
+ [{"1" => nil}],
156
+ ]
157
+ target = build(:int64, records)
158
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
159
+ end
160
+
161
+ def test_uint64
162
+ records = [
163
+ [{"0" => (2 ** 64) - 1}],
164
+ [{"1" => nil}],
165
+ ]
166
+ target = build(:uint64, records)
167
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
168
+ end
169
+
170
+ def test_float
171
+ records = [
172
+ [{"0" => -1.0}],
173
+ [{"1" => nil}],
174
+ ]
175
+ target = build(:float, records)
176
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
177
+ end
178
+
179
+ def test_double
180
+ records = [
181
+ [{"0" => -1.0}],
182
+ [{"1" => nil}],
183
+ ]
184
+ target = build(:double, records)
185
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
186
+ end
187
+
188
+ def test_binary
189
+ records = [
190
+ [{"0" => "\xff".b}],
191
+ [{"1" => nil}],
192
+ ]
193
+ target = build(:binary, records)
194
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
195
+ end
196
+
197
+ def test_string
198
+ records = [
199
+ [{"0" => "Ruby"}],
200
+ [{"1" => nil}],
201
+ ]
202
+ target = build(:string, records)
203
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
204
+ end
205
+
206
+ def test_date32
207
+ records = [
208
+ [{"0" => Date.new(1960, 1, 1)}],
209
+ [{"1" => nil}],
210
+ ]
211
+ target = build(:date32, records)
212
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
213
+ end
214
+
215
+ def test_date64
216
+ records = [
217
+ [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
218
+ [{"1" => nil}],
219
+ ]
220
+ target = build(:date64, records)
221
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
222
+ end
223
+
224
+ def test_timestamp_second
225
+ records = [
226
+ [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
227
+ [{"1" => nil}],
228
+ ]
229
+ target = build({
230
+ type: :timestamp,
231
+ unit: :second,
232
+ },
233
+ records)
234
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
235
+ end
236
+
237
+ def test_timestamp_milli
238
+ records = [
239
+ [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
240
+ [{"1" => nil}],
241
+ ]
242
+ target = build({
243
+ type: :timestamp,
244
+ unit: :milli,
245
+ },
246
+ records)
247
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
248
+
249
+ end
250
+
251
+ def test_timestamp_micro
252
+ records = [
253
+ [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
254
+ [{"1" => nil}],
255
+ ]
256
+ target = build({
257
+ type: :timestamp,
258
+ unit: :micro,
259
+ },
260
+ records)
261
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
262
+ end
263
+
264
+ def test_timestamp_nano
265
+ records = [
266
+ [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
267
+ [{"1" => nil}],
268
+ ]
269
+ target = build({
270
+ type: :timestamp,
271
+ unit: :nano,
272
+ },
273
+ records)
274
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
275
+ end
276
+
277
+ def test_time32_second
278
+ unit = Arrow::TimeUnit::SECOND
279
+ records = [
280
+ # 00:10:00
281
+ [{"0" => Arrow::Time.new(unit, 60 * 10)}],
282
+ [{"1" => nil}],
283
+ ]
284
+ target = build({
285
+ type: :time32,
286
+ unit: :second,
287
+ },
288
+ records)
289
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
290
+ end
291
+
292
+ def test_time32_milli
293
+ unit = Arrow::TimeUnit::MILLI
294
+ records = [
295
+ # 00:10:00.123
296
+ [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}],
297
+ [{"1" => nil}],
298
+ ]
299
+ target = build({
300
+ type: :time32,
301
+ unit: :milli,
302
+ },
303
+ records)
304
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
305
+ end
306
+
307
+ def test_time64_micro
308
+ unit = Arrow::TimeUnit::MICRO
309
+ records = [
310
+ # 00:10:00.123456
311
+ [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}],
312
+ [{"1" => nil}],
313
+ ]
314
+ target = build({
315
+ type: :time64,
316
+ unit: :micro,
317
+ },
318
+ records)
319
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
320
+ end
321
+
322
+ def test_time64_nano
323
+ unit = Arrow::TimeUnit::NANO
324
+ records = [
325
+ # 00:10:00.123456789
326
+ [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}],
327
+ [{"1" => nil}],
328
+ ]
329
+ target = build({
330
+ type: :time64,
331
+ unit: :nano,
332
+ },
333
+ records)
334
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
335
+ end
336
+
337
+ def test_decimal128
338
+ records = [
339
+ [{"0" => BigDecimal("92.92")}],
340
+ [{"1" => nil}],
341
+ ]
342
+ target = build({
343
+ type: :decimal128,
344
+ precision: 8,
345
+ scale: 2,
346
+ },
347
+ records)
348
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
349
+ end
350
+
351
+ def test_decimal256
352
+ records = [
353
+ [{"0" => BigDecimal("92.92")}],
354
+ [{"1" => nil}],
355
+ ]
356
+ target = build({
357
+ type: :decimal256,
358
+ precision: 38,
359
+ scale: 2,
360
+ },
361
+ records)
362
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
363
+ end
364
+
365
+ def test_month_interval
366
+ records = [
367
+ [{"0" => 1}],
368
+ [{"1" => nil}],
369
+ ]
370
+ target = build(:month_interval, records)
371
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
372
+ end
373
+
374
+ def test_day_time_interval
375
+ records = [
376
+ [{"0" => {day: 1, millisecond: 100}}],
377
+ [{"1" => nil}],
378
+ ]
379
+ target = build(:day_time_interval, records)
380
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
381
+ end
382
+
383
+ def test_month_day_nano_interval
384
+ records = [
385
+ [{"0" => {month: 1, day: 1, nanosecond: 100}}],
386
+ [{"1" => nil}],
387
+ ]
388
+ target = build(:month_day_nano_interval, records)
389
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
390
+ end
391
+
392
+ def test_list
393
+ records = [
394
+ [{"0" => [true, nil, false]}],
395
+ [{"1" => nil}],
396
+ ]
397
+ target = build({
398
+ type: :list,
399
+ field: {
400
+ name: :sub_element,
401
+ type: :boolean,
402
+ },
403
+ },
404
+ records)
405
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
406
+ end
407
+
408
+ def test_struct
409
+ records = [
410
+ [{"0" => {"sub_field" => true}}],
411
+ [{"1" => nil}],
412
+ [{"0" => {"sub_field" => nil}}],
413
+ ]
414
+ target = build({
415
+ type: :struct,
416
+ fields: [
417
+ {
418
+ name: :sub_field,
419
+ type: :boolean,
420
+ },
421
+ ],
422
+ },
423
+ records)
424
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
425
+ end
426
+
427
+ def test_map
428
+ records = [
429
+ [{"0" => {"key1" => true, "key2" => nil}}],
430
+ [{"1" => nil}],
431
+ ]
432
+ target = build({
433
+ type: :map,
434
+ key: :string,
435
+ item: :boolean,
436
+ },
437
+ records)
438
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
439
+ end
440
+
441
+ def test_sparse_union
442
+ records = [
443
+ [{"0" => {"field1" => true}}],
444
+ [{"1" => nil}],
445
+ [{"0" => {"field2" => 29}}],
446
+ [{"0" => {"field2" => nil}}],
447
+ ]
448
+ target = build({
449
+ type: :sparse_union,
450
+ fields: [
451
+ {
452
+ name: :field1,
453
+ type: :boolean,
454
+ },
455
+ {
456
+ name: :field2,
457
+ type: :uint8,
458
+ },
459
+ ],
460
+ type_codes: [0, 1],
461
+ },
462
+ records)
463
+ assert_equal(remove_field_names(remove_field_names(records)),
464
+ target.each_raw_record.to_a)
465
+ end
466
+
467
+ def test_dense_union
468
+ records = [
469
+ [{"0" => {"field1" => true}}],
470
+ [{"1" => nil}],
471
+ [{"0" => {"field2" => 29}}],
472
+ [{"0" => {"field2" => nil}}],
473
+ ]
474
+ target = build({
475
+ type: :dense_union,
476
+ fields: [
477
+ {
478
+ name: :field1,
479
+ type: :boolean,
480
+ },
481
+ {
482
+ name: :field2,
483
+ type: :uint8,
484
+ },
485
+ ],
486
+ type_codes: [0, 1],
487
+ },
488
+ records)
489
+ assert_equal(remove_field_names(remove_field_names(records)),
490
+ target.each_raw_record.to_a)
491
+ end
492
+
493
+ def test_dictionary
494
+ records = [
495
+ [{"0" => "Ruby"}],
496
+ [{"1" => nil}],
497
+ [{"0" => "GLib"}],
498
+ ]
499
+ iterated_records = []
500
+ target = build({
501
+ type: :dictionary,
502
+ index_data_type: :int8,
503
+ value_data_type: :string,
504
+ ordered: false,
505
+ },
506
+ records)
507
+ target.each_raw_record do |record|
508
+ iterated_records << record
509
+ end
510
+ assert_equal(remove_field_names(records), iterated_records)
511
+ end
512
+ end
513
+
514
+ class EachRawRecordRecordBatchSparseUnionArrayTest < Test::Unit::TestCase
515
+ include EachRawRecordSparseUnionArrayTests
516
+
517
+ def build(type, records)
518
+ build_record_batch(type, records)
519
+ end
520
+ end
521
+
522
+ class EachRawRecordTableSparseUnionArrayTest < Test::Unit::TestCase
523
+ include EachRawRecordSparseUnionArrayTests
524
+
525
+ def build(type, records)
526
+ build_record_batch(type, records).to_table
527
+ end
528
+ end