red-arrow 13.0.0 → 14.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,528 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module EachRawRecordSparseUnionArrayTests
19
+ def build_schema(type, type_codes)
20
+ field_description = {}
21
+ if type.is_a?(Hash)
22
+ field_description = field_description.merge(type)
23
+ else
24
+ field_description[:type] = type
25
+ end
26
+ {
27
+ column: {
28
+ type: :sparse_union,
29
+ fields: [
30
+ field_description.merge(name: "0"),
31
+ field_description.merge(name: "1"),
32
+ ],
33
+ type_codes: type_codes,
34
+ },
35
+ }
36
+ end
37
+
38
+ # TODO: Use Arrow::RecordBatch.new(build_schema(type, type_codes), records)
39
+ def build_record_batch(type, records)
40
+ type_codes = [0, 1]
41
+ schema = Arrow::Schema.new(build_schema(type, type_codes))
42
+ type_ids = []
43
+ arrays = schema.fields[0].data_type.fields.collect do |field|
44
+ sub_schema = Arrow::Schema.new([field])
45
+ sub_records = records.collect do |record|
46
+ [record[0].nil? ? nil : record[0][field.name]]
47
+ end
48
+ sub_record_batch = Arrow::RecordBatch.new(sub_schema,
49
+ sub_records)
50
+ sub_record_batch.columns[0].data
51
+ end
52
+ records.each do |record|
53
+ column = record[0]
54
+ if column.key?("0")
55
+ type_ids << type_codes[0]
56
+ elsif column.key?("1")
57
+ type_ids << type_codes[1]
58
+ end
59
+ end
60
+ union_array = Arrow::SparseUnionArray.new(schema.fields[0].data_type,
61
+ Arrow::Int8Array.new(type_ids),
62
+ arrays)
63
+ schema = Arrow::Schema.new(column: union_array.value_data_type)
64
+ Arrow::RecordBatch.new(schema,
65
+ records.size,
66
+ [union_array])
67
+ end
68
+
69
+ def remove_field_names(records)
70
+ records.collect do |record|
71
+ record.collect do |column|
72
+ if column.nil?
73
+ column
74
+ else
75
+ column.values[0]
76
+ end
77
+ end
78
+ end
79
+ end
80
+
81
+ def test_null
82
+ records = [
83
+ [{"0" => nil}],
84
+ ]
85
+ target = build(:null, records)
86
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
87
+ end
88
+
89
+ def test_boolean
90
+ records = [
91
+ [{"0" => true}],
92
+ [{"1" => nil}],
93
+ ]
94
+ target = build(:boolean, records)
95
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
96
+ end
97
+
98
+ def test_int8
99
+ records = [
100
+ [{"0" => -(2 ** 7)}],
101
+ [{"1" => nil}],
102
+ ]
103
+ target = build(:int8, records)
104
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
105
+ end
106
+
107
+ def test_uint8
108
+ records = [
109
+ [{"0" => (2 ** 8) - 1}],
110
+ [{"1" => nil}],
111
+ ]
112
+ target = build(:uint8, records)
113
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
114
+ end
115
+
116
+ def test_int16
117
+ records = [
118
+ [{"0" => -(2 ** 15)}],
119
+ [{"1" => nil}],
120
+ ]
121
+ target = build(:int16, records)
122
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
123
+ end
124
+
125
+ def test_uint16
126
+ records = [
127
+ [{"0" => (2 ** 16) - 1}],
128
+ [{"1" => nil}],
129
+ ]
130
+ target = build(:uint16, records)
131
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
132
+ end
133
+
134
+ def test_int32
135
+ records = [
136
+ [{"0" => -(2 ** 31)}],
137
+ [{"1" => nil}],
138
+ ]
139
+ target = build(:int32, records)
140
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
141
+ end
142
+
143
+ def test_uint32
144
+ records = [
145
+ [{"0" => (2 ** 32) - 1}],
146
+ [{"1" => nil}],
147
+ ]
148
+ target = build(:uint32, records)
149
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
150
+ end
151
+
152
+ def test_int64
153
+ records = [
154
+ [{"0" => -(2 ** 63)}],
155
+ [{"1" => nil}],
156
+ ]
157
+ target = build(:int64, records)
158
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
159
+ end
160
+
161
+ def test_uint64
162
+ records = [
163
+ [{"0" => (2 ** 64) - 1}],
164
+ [{"1" => nil}],
165
+ ]
166
+ target = build(:uint64, records)
167
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
168
+ end
169
+
170
+ def test_float
171
+ records = [
172
+ [{"0" => -1.0}],
173
+ [{"1" => nil}],
174
+ ]
175
+ target = build(:float, records)
176
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
177
+ end
178
+
179
+ def test_double
180
+ records = [
181
+ [{"0" => -1.0}],
182
+ [{"1" => nil}],
183
+ ]
184
+ target = build(:double, records)
185
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
186
+ end
187
+
188
+ def test_binary
189
+ records = [
190
+ [{"0" => "\xff".b}],
191
+ [{"1" => nil}],
192
+ ]
193
+ target = build(:binary, records)
194
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
195
+ end
196
+
197
+ def test_string
198
+ records = [
199
+ [{"0" => "Ruby"}],
200
+ [{"1" => nil}],
201
+ ]
202
+ target = build(:string, records)
203
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
204
+ end
205
+
206
+ def test_date32
207
+ records = [
208
+ [{"0" => Date.new(1960, 1, 1)}],
209
+ [{"1" => nil}],
210
+ ]
211
+ target = build(:date32, records)
212
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
213
+ end
214
+
215
+ def test_date64
216
+ records = [
217
+ [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
218
+ [{"1" => nil}],
219
+ ]
220
+ target = build(:date64, records)
221
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
222
+ end
223
+
224
+ def test_timestamp_second
225
+ records = [
226
+ [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
227
+ [{"1" => nil}],
228
+ ]
229
+ target = build({
230
+ type: :timestamp,
231
+ unit: :second,
232
+ },
233
+ records)
234
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
235
+ end
236
+
237
+ def test_timestamp_milli
238
+ records = [
239
+ [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
240
+ [{"1" => nil}],
241
+ ]
242
+ target = build({
243
+ type: :timestamp,
244
+ unit: :milli,
245
+ },
246
+ records)
247
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
248
+
249
+ end
250
+
251
+ def test_timestamp_micro
252
+ records = [
253
+ [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
254
+ [{"1" => nil}],
255
+ ]
256
+ target = build({
257
+ type: :timestamp,
258
+ unit: :micro,
259
+ },
260
+ records)
261
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
262
+ end
263
+
264
+ def test_timestamp_nano
265
+ records = [
266
+ [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
267
+ [{"1" => nil}],
268
+ ]
269
+ target = build({
270
+ type: :timestamp,
271
+ unit: :nano,
272
+ },
273
+ records)
274
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
275
+ end
276
+
277
+ def test_time32_second
278
+ unit = Arrow::TimeUnit::SECOND
279
+ records = [
280
+ # 00:10:00
281
+ [{"0" => Arrow::Time.new(unit, 60 * 10)}],
282
+ [{"1" => nil}],
283
+ ]
284
+ target = build({
285
+ type: :time32,
286
+ unit: :second,
287
+ },
288
+ records)
289
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
290
+ end
291
+
292
+ def test_time32_milli
293
+ unit = Arrow::TimeUnit::MILLI
294
+ records = [
295
+ # 00:10:00.123
296
+ [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}],
297
+ [{"1" => nil}],
298
+ ]
299
+ target = build({
300
+ type: :time32,
301
+ unit: :milli,
302
+ },
303
+ records)
304
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
305
+ end
306
+
307
+ def test_time64_micro
308
+ unit = Arrow::TimeUnit::MICRO
309
+ records = [
310
+ # 00:10:00.123456
311
+ [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}],
312
+ [{"1" => nil}],
313
+ ]
314
+ target = build({
315
+ type: :time64,
316
+ unit: :micro,
317
+ },
318
+ records)
319
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
320
+ end
321
+
322
+ def test_time64_nano
323
+ unit = Arrow::TimeUnit::NANO
324
+ records = [
325
+ # 00:10:00.123456789
326
+ [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}],
327
+ [{"1" => nil}],
328
+ ]
329
+ target = build({
330
+ type: :time64,
331
+ unit: :nano,
332
+ },
333
+ records)
334
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
335
+ end
336
+
337
+ def test_decimal128
338
+ records = [
339
+ [{"0" => BigDecimal("92.92")}],
340
+ [{"1" => nil}],
341
+ ]
342
+ target = build({
343
+ type: :decimal128,
344
+ precision: 8,
345
+ scale: 2,
346
+ },
347
+ records)
348
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
349
+ end
350
+
351
+ def test_decimal256
352
+ records = [
353
+ [{"0" => BigDecimal("92.92")}],
354
+ [{"1" => nil}],
355
+ ]
356
+ target = build({
357
+ type: :decimal256,
358
+ precision: 38,
359
+ scale: 2,
360
+ },
361
+ records)
362
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
363
+ end
364
+
365
+ def test_month_interval
366
+ records = [
367
+ [{"0" => 1}],
368
+ [{"1" => nil}],
369
+ ]
370
+ target = build(:month_interval, records)
371
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
372
+ end
373
+
374
+ def test_day_time_interval
375
+ records = [
376
+ [{"0" => {day: 1, millisecond: 100}}],
377
+ [{"1" => nil}],
378
+ ]
379
+ target = build(:day_time_interval, records)
380
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
381
+ end
382
+
383
+ def test_month_day_nano_interval
384
+ records = [
385
+ [{"0" => {month: 1, day: 1, nanosecond: 100}}],
386
+ [{"1" => nil}],
387
+ ]
388
+ target = build(:month_day_nano_interval, records)
389
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
390
+ end
391
+
392
+ def test_list
393
+ records = [
394
+ [{"0" => [true, nil, false]}],
395
+ [{"1" => nil}],
396
+ ]
397
+ target = build({
398
+ type: :list,
399
+ field: {
400
+ name: :sub_element,
401
+ type: :boolean,
402
+ },
403
+ },
404
+ records)
405
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
406
+ end
407
+
408
+ def test_struct
409
+ records = [
410
+ [{"0" => {"sub_field" => true}}],
411
+ [{"1" => nil}],
412
+ [{"0" => {"sub_field" => nil}}],
413
+ ]
414
+ target = build({
415
+ type: :struct,
416
+ fields: [
417
+ {
418
+ name: :sub_field,
419
+ type: :boolean,
420
+ },
421
+ ],
422
+ },
423
+ records)
424
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
425
+ end
426
+
427
+ def test_map
428
+ records = [
429
+ [{"0" => {"key1" => true, "key2" => nil}}],
430
+ [{"1" => nil}],
431
+ ]
432
+ target = build({
433
+ type: :map,
434
+ key: :string,
435
+ item: :boolean,
436
+ },
437
+ records)
438
+ assert_equal(remove_field_names(records), target.each_raw_record.to_a)
439
+ end
440
+
441
+ def test_sparse_union
442
+ records = [
443
+ [{"0" => {"field1" => true}}],
444
+ [{"1" => nil}],
445
+ [{"0" => {"field2" => 29}}],
446
+ [{"0" => {"field2" => nil}}],
447
+ ]
448
+ target = build({
449
+ type: :sparse_union,
450
+ fields: [
451
+ {
452
+ name: :field1,
453
+ type: :boolean,
454
+ },
455
+ {
456
+ name: :field2,
457
+ type: :uint8,
458
+ },
459
+ ],
460
+ type_codes: [0, 1],
461
+ },
462
+ records)
463
+ assert_equal(remove_field_names(remove_field_names(records)),
464
+ target.each_raw_record.to_a)
465
+ end
466
+
467
+ def test_dense_union
468
+ records = [
469
+ [{"0" => {"field1" => true}}],
470
+ [{"1" => nil}],
471
+ [{"0" => {"field2" => 29}}],
472
+ [{"0" => {"field2" => nil}}],
473
+ ]
474
+ target = build({
475
+ type: :dense_union,
476
+ fields: [
477
+ {
478
+ name: :field1,
479
+ type: :boolean,
480
+ },
481
+ {
482
+ name: :field2,
483
+ type: :uint8,
484
+ },
485
+ ],
486
+ type_codes: [0, 1],
487
+ },
488
+ records)
489
+ assert_equal(remove_field_names(remove_field_names(records)),
490
+ target.each_raw_record.to_a)
491
+ end
492
+
493
+ def test_dictionary
494
+ records = [
495
+ [{"0" => "Ruby"}],
496
+ [{"1" => nil}],
497
+ [{"0" => "GLib"}],
498
+ ]
499
+ iterated_records = []
500
+ target = build({
501
+ type: :dictionary,
502
+ index_data_type: :int8,
503
+ value_data_type: :string,
504
+ ordered: false,
505
+ },
506
+ records)
507
+ target.each_raw_record do |record|
508
+ iterated_records << record
509
+ end
510
+ assert_equal(remove_field_names(records), iterated_records)
511
+ end
512
+ end
513
+
514
+ class EachRawRecordRecordBatchSparseUnionArrayTest < Test::Unit::TestCase
515
+ include EachRawRecordSparseUnionArrayTests
516
+
517
+ def build(type, records)
518
+ build_record_batch(type, records)
519
+ end
520
+ end
521
+
522
+ class EachRawRecordTableSparseUnionArrayTest < Test::Unit::TestCase
523
+ include EachRawRecordSparseUnionArrayTests
524
+
525
+ def build(type, records)
526
+ build_record_batch(type, records).to_table
527
+ end
528
+ end