red-arrow 13.0.0 → 14.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,566 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module EachRawRecordDenseUnionArrayTests
19
+ def build_schema(type, type_codes)
20
+ field_description = {}
21
+ if type.is_a?(Hash)
22
+ field_description = field_description.merge(type)
23
+ else
24
+ field_description[:type] = type
25
+ end
26
+ {
27
+ column: {
28
+ type: :dense_union,
29
+ fields: [
30
+ field_description.merge(name: "0"),
31
+ field_description.merge(name: "1"),
32
+ ],
33
+ type_codes: type_codes,
34
+ },
35
+ }
36
+ end
37
+
38
+ # TODO: Use Arrow::RecordBatch.new(build_schema(type, type_codes), records)
39
+ def build_record_batch(type, records)
40
+ type_codes = [0, 1]
41
+ schema = Arrow::Schema.new(build_schema(type, type_codes))
42
+ type_ids = []
43
+ offsets = []
44
+ arrays = schema.fields[0].data_type.fields.collect do |field|
45
+ sub_schema = Arrow::Schema.new([field])
46
+ sub_records = []
47
+ records.each do |record|
48
+ column = record[0]
49
+ next if column.nil?
50
+ next unless column.key?(field.name)
51
+ sub_records << [column[field.name]]
52
+ end
53
+ sub_record_batch = Arrow::RecordBatch.new(sub_schema,
54
+ sub_records)
55
+ sub_record_batch.columns[0].data
56
+ end
57
+ records.each do |record|
58
+ column = record[0]
59
+ if column.key?("0")
60
+ type_id = type_codes[0]
61
+ type_ids << type_id
62
+ offsets << (type_ids.count(type_id) - 1)
63
+ elsif column.key?("1")
64
+ type_id = type_codes[1]
65
+ type_ids << type_id
66
+ offsets << (type_ids.count(type_id) - 1)
67
+ end
68
+ end
69
+ union_array = Arrow::DenseUnionArray.new(schema.fields[0].data_type,
70
+ Arrow::Int8Array.new(type_ids),
71
+ Arrow::Int32Array.new(offsets),
72
+ arrays)
73
+ schema = Arrow::Schema.new(column: union_array.value_data_type)
74
+ Arrow::RecordBatch.new(schema,
75
+ records.size,
76
+ [union_array])
77
+ end
78
+
79
+ def remove_field_names(records)
80
+ records.collect do |record|
81
+ record.collect do |column|
82
+ if column.nil?
83
+ column
84
+ else
85
+ column.values[0]
86
+ end
87
+ end
88
+ end
89
+ end
90
+
91
+ def test_null
92
+ records = [
93
+ [{"0" => nil}],
94
+ ]
95
+ target = build(:null, records)
96
+ assert_equal(remove_field_names(records),
97
+ target.each_raw_record.to_a)
98
+ end
99
+
100
+ def test_boolean
101
+ records = [
102
+ [{"0" => true}],
103
+ [{"1" => nil}],
104
+ ]
105
+ target = build(:boolean, records)
106
+ assert_equal(remove_field_names(records),
107
+ target.each_raw_record.to_a)
108
+ end
109
+
110
+ def test_int8
111
+ records = [
112
+ [{"0" => -(2 ** 7)}],
113
+ [{"1" => nil}],
114
+ ]
115
+ target = build(:int8, records)
116
+ assert_equal(remove_field_names(records),
117
+ target.each_raw_record.to_a)
118
+ end
119
+
120
+ def test_uint8
121
+ records = [
122
+ [{"0" => (2 ** 8) - 1}],
123
+ [{"1" => nil}],
124
+ ]
125
+ target = build(:uint8, records)
126
+ assert_equal(remove_field_names(records),
127
+ target.each_raw_record.to_a)
128
+ end
129
+
130
+ def test_int16
131
+ records = [
132
+ [{"0" => -(2 ** 15)}],
133
+ [{"1" => nil}],
134
+ ]
135
+ target = build(:int16, records)
136
+ assert_equal(remove_field_names(records),
137
+ target.each_raw_record.to_a)
138
+ end
139
+
140
+ def test_uint16
141
+ records = [
142
+ [{"0" => (2 ** 16) - 1}],
143
+ [{"1" => nil}],
144
+ ]
145
+ target = build(:uint16, records)
146
+ assert_equal(remove_field_names(records),
147
+ target.each_raw_record.to_a)
148
+ end
149
+
150
+ def test_int32
151
+ records = [
152
+ [{"0" => -(2 ** 31)}],
153
+ [{"1" => nil}],
154
+ ]
155
+ target = build(:int32, records)
156
+ assert_equal(remove_field_names(records),
157
+ target.each_raw_record.to_a)
158
+ end
159
+
160
+ def test_uint32
161
+ records = [
162
+ [{"0" => (2 ** 32) - 1}],
163
+ [{"1" => nil}],
164
+ ]
165
+ target = build(:uint32, records)
166
+ assert_equal(remove_field_names(records),
167
+ target.each_raw_record.to_a)
168
+ end
169
+
170
+ def test_int64
171
+ records = [
172
+ [{"0" => -(2 ** 63)}],
173
+ [{"1" => nil}],
174
+ ]
175
+ target = build(:int64, records)
176
+ assert_equal(remove_field_names(records),
177
+ target.each_raw_record.to_a)
178
+ end
179
+
180
+ def test_uint64
181
+ records = [
182
+ [{"0" => (2 ** 64) - 1}],
183
+ [{"1" => nil}],
184
+ ]
185
+ target = build(:uint64, records)
186
+ assert_equal(remove_field_names(records),
187
+ target.each_raw_record.to_a)
188
+ end
189
+
190
+ def test_float
191
+ records = [
192
+ [{"0" => -1.0}],
193
+ [{"1" => nil}],
194
+ ]
195
+ target = build(:float, records)
196
+ assert_equal(remove_field_names(records),
197
+ target.each_raw_record.to_a)
198
+ end
199
+
200
+ def test_double
201
+ records = [
202
+ [{"0" => -1.0}],
203
+ [{"1" => nil}],
204
+ ]
205
+ target = build(:double, records)
206
+ assert_equal(remove_field_names(records),
207
+ target.each_raw_record.to_a)
208
+ end
209
+
210
+ def test_binary
211
+ records = [
212
+ [{"0" => "\xff".b}],
213
+ [{"1" => nil}],
214
+ ]
215
+ target = build(:binary, records)
216
+ assert_equal(remove_field_names(records),
217
+ target.each_raw_record.to_a)
218
+ end
219
+
220
+ def test_string
221
+ records = [
222
+ [{"0" => "Ruby"}],
223
+ [{"1" => nil}],
224
+ ]
225
+ target = build(:string, records)
226
+ assert_equal(remove_field_names(records),
227
+ target.each_raw_record.to_a)
228
+ end
229
+
230
+ def test_date32
231
+ records = [
232
+ [{"0" => Date.new(1960, 1, 1)}],
233
+ [{"1" => nil}],
234
+ ]
235
+ target = build(:date32, records)
236
+ assert_equal(remove_field_names(records),
237
+ target.each_raw_record.to_a)
238
+ end
239
+
240
+ def test_date64
241
+ records = [
242
+ [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
243
+ [{"1" => nil}],
244
+ ]
245
+ target = build(:date64, records)
246
+ assert_equal(remove_field_names(records),
247
+ target.each_raw_record.to_a)
248
+ end
249
+
250
+ def test_timestamp_second
251
+ records = [
252
+ [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
253
+ [{"1" => nil}],
254
+ ]
255
+ target = build({
256
+ type: :timestamp,
257
+ unit: :second,
258
+ },
259
+ records)
260
+ assert_equal(remove_field_names(records),
261
+ target.each_raw_record.to_a)
262
+ end
263
+
264
+ def test_timestamp_milli
265
+ records = [
266
+ [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
267
+ [{"1" => nil}],
268
+ ]
269
+ target = build({
270
+ type: :timestamp,
271
+ unit: :milli,
272
+ },
273
+ records)
274
+ assert_equal(remove_field_names(records),
275
+ target.each_raw_record.to_a)
276
+ end
277
+
278
+ def test_timestamp_micro
279
+ records = [
280
+ [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
281
+ [{"1" => nil}],
282
+ ]
283
+ target = build({
284
+ type: :timestamp,
285
+ unit: :micro,
286
+ },
287
+ records)
288
+ assert_equal(remove_field_names(records),
289
+ target.each_raw_record.to_a)
290
+ end
291
+
292
+ def test_timestamp_nano
293
+ records = [
294
+ [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
295
+ [{"1" => nil}],
296
+ ]
297
+ target = build({
298
+ type: :timestamp,
299
+ unit: :nano,
300
+ },
301
+ records)
302
+ assert_equal(remove_field_names(records),
303
+ target.each_raw_record.to_a)
304
+ end
305
+
306
+ def test_time32_second
307
+ unit = Arrow::TimeUnit::SECOND
308
+ records = [
309
+ # 00:10:00
310
+ [{"0" => Arrow::Time.new(unit, 60 * 10)}],
311
+ [{"1" => nil}],
312
+ ]
313
+ target = build({
314
+ type: :time32,
315
+ unit: :second,
316
+ },
317
+ records)
318
+ assert_equal(remove_field_names(records),
319
+ target.each_raw_record.to_a)
320
+ end
321
+
322
+ def test_time32_milli
323
+ unit = Arrow::TimeUnit::MILLI
324
+ records = [
325
+ # 00:10:00.123
326
+ [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}],
327
+ [{"1" => nil}],
328
+ ]
329
+ target = build({
330
+ type: :time32,
331
+ unit: :milli,
332
+ },
333
+ records)
334
+ assert_equal(remove_field_names(records),
335
+ target.each_raw_record.to_a)
336
+ end
337
+
338
+ def test_time64_micro
339
+ unit = Arrow::TimeUnit::MICRO
340
+ records = [
341
+ # 00:10:00.123456
342
+ [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}],
343
+ [{"1" => nil}],
344
+ ]
345
+ target = build({
346
+ type: :time64,
347
+ unit: :micro,
348
+ },
349
+ records)
350
+ assert_equal(remove_field_names(records),
351
+ target.each_raw_record.to_a)
352
+ end
353
+
354
+ def test_time64_nano
355
+ unit = Arrow::TimeUnit::NANO
356
+ records = [
357
+ # 00:10:00.123456789
358
+ [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}],
359
+ [{"1" => nil}],
360
+ ]
361
+ target = build({
362
+ type: :time64,
363
+ unit: :nano,
364
+ },
365
+ records)
366
+ assert_equal(remove_field_names(records),
367
+ target.each_raw_record.to_a)
368
+ end
369
+
370
+ def test_decimal128
371
+ records = [
372
+ [{"0" => BigDecimal("92.92")}],
373
+ [{"1" => nil}],
374
+ ]
375
+ target = build({
376
+ type: :decimal128,
377
+ precision: 8,
378
+ scale: 2,
379
+ },
380
+ records)
381
+ assert_equal(remove_field_names(records),
382
+ target.each_raw_record.to_a)
383
+ end
384
+
385
+ def test_decimal256
386
+ records = [
387
+ [{"0" => BigDecimal("92.92")}],
388
+ [{"1" => nil}],
389
+ ]
390
+ target = build({
391
+ type: :decimal256,
392
+ precision: 38,
393
+ scale: 2,
394
+ },
395
+ records)
396
+ assert_equal(remove_field_names(records),
397
+ target.each_raw_record.to_a)
398
+ end
399
+
400
+ def test_month_interval
401
+ records = [
402
+ [{"0" => 1}],
403
+ [{"1" => nil}],
404
+ ]
405
+ target = build(:month_interval, records)
406
+ assert_equal(remove_field_names(records),
407
+ target.each_raw_record.to_a)
408
+ end
409
+
410
+ def test_day_time_interval
411
+ records = [
412
+ [{"0" => {day: 1, millisecond: 100}}],
413
+ [{"1" => nil}],
414
+ ]
415
+ target = build(:day_time_interval, records)
416
+ assert_equal(remove_field_names(records),
417
+ target.each_raw_record.to_a)
418
+ end
419
+
420
+ def test_month_day_nano_interval
421
+ records = [
422
+ [{"0" => {month: 1, day: 1, nanosecond: 100}}],
423
+ [{"1" => nil}],
424
+ ]
425
+ target = build(:month_day_nano_interval, records)
426
+ assert_equal(remove_field_names(records),
427
+ target.each_raw_record.to_a)
428
+ end
429
+
430
+ def test_list
431
+ records = [
432
+ [{"0" => [true, nil, false]}],
433
+ [{"1" => nil}],
434
+ ]
435
+ target = build({
436
+ type: :list,
437
+ field: {
438
+ name: :sub_element,
439
+ type: :boolean,
440
+ },
441
+ },
442
+ records)
443
+ assert_equal(remove_field_names(records),
444
+ target.each_raw_record.to_a)
445
+ end
446
+
447
+ def test_struct
448
+ records = [
449
+ [{"0" => {"sub_field" => true}}],
450
+ [{"1" => nil}],
451
+ [{"0" => {"sub_field" => nil}}],
452
+ ]
453
+ target = build({
454
+ type: :struct,
455
+ fields: [
456
+ {
457
+ name: :sub_field,
458
+ type: :boolean,
459
+ },
460
+ ],
461
+ },
462
+ records)
463
+ assert_equal(remove_field_names(records),
464
+ target.each_raw_record.to_a)
465
+ end
466
+
467
+ def test_map
468
+ records = [
469
+ [{"0" => {"key1" => true, "key2" => nil}}],
470
+ [{"1" => nil}],
471
+ ]
472
+ target = build({
473
+ type: :map,
474
+ key: :string,
475
+ item: :boolean,
476
+ },
477
+ records)
478
+ assert_equal(remove_field_names(records),
479
+ target.each_raw_record.to_a)
480
+ end
481
+
482
+ def test_sparse_union
483
+ records = [
484
+ [{"0" => {"field1" => true}}],
485
+ [{"1" => nil}],
486
+ [{"0" => {"field2" => 29}}],
487
+ [{"0" => {"field2" => nil}}],
488
+ ]
489
+ target = build({
490
+ type: :sparse_union,
491
+ fields: [
492
+ {
493
+ name: :field1,
494
+ type: :boolean,
495
+ },
496
+ {
497
+ name: :field2,
498
+ type: :uint8,
499
+ },
500
+ ],
501
+ type_codes: [0, 1],
502
+ },
503
+ records)
504
+ assert_equal(remove_field_names(remove_field_names(records)),
505
+ target.each_raw_record.to_a)
506
+ end
507
+
508
+ def test_dense_union
509
+ records = [
510
+ [{"0" => {"field1" => true}}],
511
+ [{"1" => nil}],
512
+ [{"0" => {"field2" => 29}}],
513
+ [{"0" => {"field2" => nil}}],
514
+ ]
515
+ target = build({
516
+ type: :dense_union,
517
+ fields: [
518
+ {
519
+ name: :field1,
520
+ type: :boolean,
521
+ },
522
+ {
523
+ name: :field2,
524
+ type: :uint8,
525
+ },
526
+ ],
527
+ type_codes: [0, 1],
528
+ },
529
+ records)
530
+ assert_equal(remove_field_names(remove_field_names(records)),
531
+ target.each_raw_record.to_a)
532
+ end
533
+
534
+ def test_dictionary
535
+ records = [
536
+ [{"0" => "Ruby"}],
537
+ [{"1" => nil}],
538
+ [{"0" => "GLib"}],
539
+ ]
540
+ target = build({
541
+ type: :dictionary,
542
+ index_data_type: :int8,
543
+ value_data_type: :string,
544
+ ordered: false,
545
+ },
546
+ records)
547
+ assert_equal(remove_field_names(records),
548
+ target.each_raw_record.to_a)
549
+ end
550
+ end
551
+
552
+ class EachRawRecordRecordBatchDenseUnionArrayTest < Test::Unit::TestCase
553
+ include EachRawRecordDenseUnionArrayTests
554
+
555
+ def build(type, records)
556
+ build_record_batch(type, records)
557
+ end
558
+ end
559
+
560
+ class EachRawRecordTableDenseUnionArrayTest < Test::Unit::TestCase
561
+ include EachRawRecordDenseUnionArrayTests
562
+
563
+ def build(type, records)
564
+ build_record_batch(type, records).to_table
565
+ end
566
+ end