red-arrow 13.0.0 → 14.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,566 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module EachRawRecordDenseUnionArrayTests
19
+ def build_schema(type, type_codes)
20
+ field_description = {}
21
+ if type.is_a?(Hash)
22
+ field_description = field_description.merge(type)
23
+ else
24
+ field_description[:type] = type
25
+ end
26
+ {
27
+ column: {
28
+ type: :dense_union,
29
+ fields: [
30
+ field_description.merge(name: "0"),
31
+ field_description.merge(name: "1"),
32
+ ],
33
+ type_codes: type_codes,
34
+ },
35
+ }
36
+ end
37
+
38
+ # TODO: Use Arrow::RecordBatch.new(build_schema(type, type_codes), records)
39
+ def build_record_batch(type, records)
40
+ type_codes = [0, 1]
41
+ schema = Arrow::Schema.new(build_schema(type, type_codes))
42
+ type_ids = []
43
+ offsets = []
44
+ arrays = schema.fields[0].data_type.fields.collect do |field|
45
+ sub_schema = Arrow::Schema.new([field])
46
+ sub_records = []
47
+ records.each do |record|
48
+ column = record[0]
49
+ next if column.nil?
50
+ next unless column.key?(field.name)
51
+ sub_records << [column[field.name]]
52
+ end
53
+ sub_record_batch = Arrow::RecordBatch.new(sub_schema,
54
+ sub_records)
55
+ sub_record_batch.columns[0].data
56
+ end
57
+ records.each do |record|
58
+ column = record[0]
59
+ if column.key?("0")
60
+ type_id = type_codes[0]
61
+ type_ids << type_id
62
+ offsets << (type_ids.count(type_id) - 1)
63
+ elsif column.key?("1")
64
+ type_id = type_codes[1]
65
+ type_ids << type_id
66
+ offsets << (type_ids.count(type_id) - 1)
67
+ end
68
+ end
69
+ union_array = Arrow::DenseUnionArray.new(schema.fields[0].data_type,
70
+ Arrow::Int8Array.new(type_ids),
71
+ Arrow::Int32Array.new(offsets),
72
+ arrays)
73
+ schema = Arrow::Schema.new(column: union_array.value_data_type)
74
+ Arrow::RecordBatch.new(schema,
75
+ records.size,
76
+ [union_array])
77
+ end
78
+
79
+ def remove_field_names(records)
80
+ records.collect do |record|
81
+ record.collect do |column|
82
+ if column.nil?
83
+ column
84
+ else
85
+ column.values[0]
86
+ end
87
+ end
88
+ end
89
+ end
90
+
91
+ def test_null
92
+ records = [
93
+ [{"0" => nil}],
94
+ ]
95
+ target = build(:null, records)
96
+ assert_equal(remove_field_names(records),
97
+ target.each_raw_record.to_a)
98
+ end
99
+
100
+ def test_boolean
101
+ records = [
102
+ [{"0" => true}],
103
+ [{"1" => nil}],
104
+ ]
105
+ target = build(:boolean, records)
106
+ assert_equal(remove_field_names(records),
107
+ target.each_raw_record.to_a)
108
+ end
109
+
110
+ def test_int8
111
+ records = [
112
+ [{"0" => -(2 ** 7)}],
113
+ [{"1" => nil}],
114
+ ]
115
+ target = build(:int8, records)
116
+ assert_equal(remove_field_names(records),
117
+ target.each_raw_record.to_a)
118
+ end
119
+
120
+ def test_uint8
121
+ records = [
122
+ [{"0" => (2 ** 8) - 1}],
123
+ [{"1" => nil}],
124
+ ]
125
+ target = build(:uint8, records)
126
+ assert_equal(remove_field_names(records),
127
+ target.each_raw_record.to_a)
128
+ end
129
+
130
+ def test_int16
131
+ records = [
132
+ [{"0" => -(2 ** 15)}],
133
+ [{"1" => nil}],
134
+ ]
135
+ target = build(:int16, records)
136
+ assert_equal(remove_field_names(records),
137
+ target.each_raw_record.to_a)
138
+ end
139
+
140
+ def test_uint16
141
+ records = [
142
+ [{"0" => (2 ** 16) - 1}],
143
+ [{"1" => nil}],
144
+ ]
145
+ target = build(:uint16, records)
146
+ assert_equal(remove_field_names(records),
147
+ target.each_raw_record.to_a)
148
+ end
149
+
150
+ def test_int32
151
+ records = [
152
+ [{"0" => -(2 ** 31)}],
153
+ [{"1" => nil}],
154
+ ]
155
+ target = build(:int32, records)
156
+ assert_equal(remove_field_names(records),
157
+ target.each_raw_record.to_a)
158
+ end
159
+
160
+ def test_uint32
161
+ records = [
162
+ [{"0" => (2 ** 32) - 1}],
163
+ [{"1" => nil}],
164
+ ]
165
+ target = build(:uint32, records)
166
+ assert_equal(remove_field_names(records),
167
+ target.each_raw_record.to_a)
168
+ end
169
+
170
+ def test_int64
171
+ records = [
172
+ [{"0" => -(2 ** 63)}],
173
+ [{"1" => nil}],
174
+ ]
175
+ target = build(:int64, records)
176
+ assert_equal(remove_field_names(records),
177
+ target.each_raw_record.to_a)
178
+ end
179
+
180
+ def test_uint64
181
+ records = [
182
+ [{"0" => (2 ** 64) - 1}],
183
+ [{"1" => nil}],
184
+ ]
185
+ target = build(:uint64, records)
186
+ assert_equal(remove_field_names(records),
187
+ target.each_raw_record.to_a)
188
+ end
189
+
190
+ def test_float
191
+ records = [
192
+ [{"0" => -1.0}],
193
+ [{"1" => nil}],
194
+ ]
195
+ target = build(:float, records)
196
+ assert_equal(remove_field_names(records),
197
+ target.each_raw_record.to_a)
198
+ end
199
+
200
+ def test_double
201
+ records = [
202
+ [{"0" => -1.0}],
203
+ [{"1" => nil}],
204
+ ]
205
+ target = build(:double, records)
206
+ assert_equal(remove_field_names(records),
207
+ target.each_raw_record.to_a)
208
+ end
209
+
210
+ def test_binary
211
+ records = [
212
+ [{"0" => "\xff".b}],
213
+ [{"1" => nil}],
214
+ ]
215
+ target = build(:binary, records)
216
+ assert_equal(remove_field_names(records),
217
+ target.each_raw_record.to_a)
218
+ end
219
+
220
+ def test_string
221
+ records = [
222
+ [{"0" => "Ruby"}],
223
+ [{"1" => nil}],
224
+ ]
225
+ target = build(:string, records)
226
+ assert_equal(remove_field_names(records),
227
+ target.each_raw_record.to_a)
228
+ end
229
+
230
+ def test_date32
231
+ records = [
232
+ [{"0" => Date.new(1960, 1, 1)}],
233
+ [{"1" => nil}],
234
+ ]
235
+ target = build(:date32, records)
236
+ assert_equal(remove_field_names(records),
237
+ target.each_raw_record.to_a)
238
+ end
239
+
240
+ def test_date64
241
+ records = [
242
+ [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
243
+ [{"1" => nil}],
244
+ ]
245
+ target = build(:date64, records)
246
+ assert_equal(remove_field_names(records),
247
+ target.each_raw_record.to_a)
248
+ end
249
+
250
+ def test_timestamp_second
251
+ records = [
252
+ [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
253
+ [{"1" => nil}],
254
+ ]
255
+ target = build({
256
+ type: :timestamp,
257
+ unit: :second,
258
+ },
259
+ records)
260
+ assert_equal(remove_field_names(records),
261
+ target.each_raw_record.to_a)
262
+ end
263
+
264
+ def test_timestamp_milli
265
+ records = [
266
+ [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
267
+ [{"1" => nil}],
268
+ ]
269
+ target = build({
270
+ type: :timestamp,
271
+ unit: :milli,
272
+ },
273
+ records)
274
+ assert_equal(remove_field_names(records),
275
+ target.each_raw_record.to_a)
276
+ end
277
+
278
+ def test_timestamp_micro
279
+ records = [
280
+ [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
281
+ [{"1" => nil}],
282
+ ]
283
+ target = build({
284
+ type: :timestamp,
285
+ unit: :micro,
286
+ },
287
+ records)
288
+ assert_equal(remove_field_names(records),
289
+ target.each_raw_record.to_a)
290
+ end
291
+
292
+ def test_timestamp_nano
293
+ records = [
294
+ [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
295
+ [{"1" => nil}],
296
+ ]
297
+ target = build({
298
+ type: :timestamp,
299
+ unit: :nano,
300
+ },
301
+ records)
302
+ assert_equal(remove_field_names(records),
303
+ target.each_raw_record.to_a)
304
+ end
305
+
306
+ def test_time32_second
307
+ unit = Arrow::TimeUnit::SECOND
308
+ records = [
309
+ # 00:10:00
310
+ [{"0" => Arrow::Time.new(unit, 60 * 10)}],
311
+ [{"1" => nil}],
312
+ ]
313
+ target = build({
314
+ type: :time32,
315
+ unit: :second,
316
+ },
317
+ records)
318
+ assert_equal(remove_field_names(records),
319
+ target.each_raw_record.to_a)
320
+ end
321
+
322
+ def test_time32_milli
323
+ unit = Arrow::TimeUnit::MILLI
324
+ records = [
325
+ # 00:10:00.123
326
+ [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}],
327
+ [{"1" => nil}],
328
+ ]
329
+ target = build({
330
+ type: :time32,
331
+ unit: :milli,
332
+ },
333
+ records)
334
+ assert_equal(remove_field_names(records),
335
+ target.each_raw_record.to_a)
336
+ end
337
+
338
+ def test_time64_micro
339
+ unit = Arrow::TimeUnit::MICRO
340
+ records = [
341
+ # 00:10:00.123456
342
+ [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}],
343
+ [{"1" => nil}],
344
+ ]
345
+ target = build({
346
+ type: :time64,
347
+ unit: :micro,
348
+ },
349
+ records)
350
+ assert_equal(remove_field_names(records),
351
+ target.each_raw_record.to_a)
352
+ end
353
+
354
+ def test_time64_nano
355
+ unit = Arrow::TimeUnit::NANO
356
+ records = [
357
+ # 00:10:00.123456789
358
+ [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}],
359
+ [{"1" => nil}],
360
+ ]
361
+ target = build({
362
+ type: :time64,
363
+ unit: :nano,
364
+ },
365
+ records)
366
+ assert_equal(remove_field_names(records),
367
+ target.each_raw_record.to_a)
368
+ end
369
+
370
+ def test_decimal128
371
+ records = [
372
+ [{"0" => BigDecimal("92.92")}],
373
+ [{"1" => nil}],
374
+ ]
375
+ target = build({
376
+ type: :decimal128,
377
+ precision: 8,
378
+ scale: 2,
379
+ },
380
+ records)
381
+ assert_equal(remove_field_names(records),
382
+ target.each_raw_record.to_a)
383
+ end
384
+
385
+ def test_decimal256
386
+ records = [
387
+ [{"0" => BigDecimal("92.92")}],
388
+ [{"1" => nil}],
389
+ ]
390
+ target = build({
391
+ type: :decimal256,
392
+ precision: 38,
393
+ scale: 2,
394
+ },
395
+ records)
396
+ assert_equal(remove_field_names(records),
397
+ target.each_raw_record.to_a)
398
+ end
399
+
400
+ def test_month_interval
401
+ records = [
402
+ [{"0" => 1}],
403
+ [{"1" => nil}],
404
+ ]
405
+ target = build(:month_interval, records)
406
+ assert_equal(remove_field_names(records),
407
+ target.each_raw_record.to_a)
408
+ end
409
+
410
+ def test_day_time_interval
411
+ records = [
412
+ [{"0" => {day: 1, millisecond: 100}}],
413
+ [{"1" => nil}],
414
+ ]
415
+ target = build(:day_time_interval, records)
416
+ assert_equal(remove_field_names(records),
417
+ target.each_raw_record.to_a)
418
+ end
419
+
420
+ def test_month_day_nano_interval
421
+ records = [
422
+ [{"0" => {month: 1, day: 1, nanosecond: 100}}],
423
+ [{"1" => nil}],
424
+ ]
425
+ target = build(:month_day_nano_interval, records)
426
+ assert_equal(remove_field_names(records),
427
+ target.each_raw_record.to_a)
428
+ end
429
+
430
+ def test_list
431
+ records = [
432
+ [{"0" => [true, nil, false]}],
433
+ [{"1" => nil}],
434
+ ]
435
+ target = build({
436
+ type: :list,
437
+ field: {
438
+ name: :sub_element,
439
+ type: :boolean,
440
+ },
441
+ },
442
+ records)
443
+ assert_equal(remove_field_names(records),
444
+ target.each_raw_record.to_a)
445
+ end
446
+
447
+ def test_struct
448
+ records = [
449
+ [{"0" => {"sub_field" => true}}],
450
+ [{"1" => nil}],
451
+ [{"0" => {"sub_field" => nil}}],
452
+ ]
453
+ target = build({
454
+ type: :struct,
455
+ fields: [
456
+ {
457
+ name: :sub_field,
458
+ type: :boolean,
459
+ },
460
+ ],
461
+ },
462
+ records)
463
+ assert_equal(remove_field_names(records),
464
+ target.each_raw_record.to_a)
465
+ end
466
+
467
+ def test_map
468
+ records = [
469
+ [{"0" => {"key1" => true, "key2" => nil}}],
470
+ [{"1" => nil}],
471
+ ]
472
+ target = build({
473
+ type: :map,
474
+ key: :string,
475
+ item: :boolean,
476
+ },
477
+ records)
478
+ assert_equal(remove_field_names(records),
479
+ target.each_raw_record.to_a)
480
+ end
481
+
482
+ def test_sparse_union
483
+ records = [
484
+ [{"0" => {"field1" => true}}],
485
+ [{"1" => nil}],
486
+ [{"0" => {"field2" => 29}}],
487
+ [{"0" => {"field2" => nil}}],
488
+ ]
489
+ target = build({
490
+ type: :sparse_union,
491
+ fields: [
492
+ {
493
+ name: :field1,
494
+ type: :boolean,
495
+ },
496
+ {
497
+ name: :field2,
498
+ type: :uint8,
499
+ },
500
+ ],
501
+ type_codes: [0, 1],
502
+ },
503
+ records)
504
+ assert_equal(remove_field_names(remove_field_names(records)),
505
+ target.each_raw_record.to_a)
506
+ end
507
+
508
+ def test_dense_union
509
+ records = [
510
+ [{"0" => {"field1" => true}}],
511
+ [{"1" => nil}],
512
+ [{"0" => {"field2" => 29}}],
513
+ [{"0" => {"field2" => nil}}],
514
+ ]
515
+ target = build({
516
+ type: :dense_union,
517
+ fields: [
518
+ {
519
+ name: :field1,
520
+ type: :boolean,
521
+ },
522
+ {
523
+ name: :field2,
524
+ type: :uint8,
525
+ },
526
+ ],
527
+ type_codes: [0, 1],
528
+ },
529
+ records)
530
+ assert_equal(remove_field_names(remove_field_names(records)),
531
+ target.each_raw_record.to_a)
532
+ end
533
+
534
+ def test_dictionary
535
+ records = [
536
+ [{"0" => "Ruby"}],
537
+ [{"1" => nil}],
538
+ [{"0" => "GLib"}],
539
+ ]
540
+ target = build({
541
+ type: :dictionary,
542
+ index_data_type: :int8,
543
+ value_data_type: :string,
544
+ ordered: false,
545
+ },
546
+ records)
547
+ assert_equal(remove_field_names(records),
548
+ target.each_raw_record.to_a)
549
+ end
550
+ end
551
+
552
+ class EachRawRecordRecordBatchDenseUnionArrayTest < Test::Unit::TestCase
553
+ include EachRawRecordDenseUnionArrayTests
554
+
555
+ def build(type, records)
556
+ build_record_batch(type, records)
557
+ end
558
+ end
559
+
560
+ class EachRawRecordTableDenseUnionArrayTest < Test::Unit::TestCase
561
+ include EachRawRecordDenseUnionArrayTests
562
+
563
+ def build(type, records)
564
+ build_record_batch(type, records).to_table
565
+ end
566
+ end