red-arrow 13.0.0 → 14.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,507 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module EachRawRecordMapArrayTests
19
+ def build_schema(type)
20
+ {
21
+ column: {
22
+ type: :map,
23
+ key: :string,
24
+ item: type
25
+ },
26
+ }
27
+ end
28
+
29
+ def test_null
30
+ records = [
31
+ [{"key1" => nil}],
32
+ [nil],
33
+ ]
34
+ target = build(:null, records)
35
+ assert_equal(records, target.each_raw_record.to_a)
36
+ end
37
+
38
+ def test_boolean
39
+ records = [
40
+ [{"key1" => true, "key2" => nil}],
41
+ [nil],
42
+ ]
43
+ target = build(:boolean, records)
44
+ assert_equal(records, target.each_raw_record.to_a)
45
+ end
46
+
47
+ def test_int8
48
+ records = [
49
+ [{"key1" => -(2 ** 7), "key2" => nil}],
50
+ [nil],
51
+ ]
52
+ target = build(:int8, records)
53
+ assert_equal(records, target.each_raw_record.to_a)
54
+ end
55
+
56
+ def test_uint8
57
+ records = [
58
+ [{"key1" => (2 ** 8) - 1, "key2" => nil}],
59
+ [nil],
60
+ ]
61
+ target = build(:uint8, records)
62
+ assert_equal(records, target.each_raw_record.to_a)
63
+ end
64
+
65
+ def test_int16
66
+ records = [
67
+ [{"key1" => -(2 ** 15), "key2" => nil}],
68
+ [nil],
69
+ ]
70
+ target = build(:int16, records)
71
+ assert_equal(records, target.each_raw_record.to_a)
72
+ end
73
+
74
+ def test_uint16
75
+ records = [
76
+ [{"key1" => (2 ** 16) - 1, "key2" => nil}],
77
+ [nil],
78
+ ]
79
+ target = build(:uint16, records)
80
+ assert_equal(records, target.each_raw_record.to_a)
81
+ end
82
+
83
+ def test_int32
84
+ records = [
85
+ [{"key1" => -(2 ** 31), "key2" => nil}],
86
+ [nil],
87
+ ]
88
+ target = build(:int32, records)
89
+ assert_equal(records, target.each_raw_record.to_a)
90
+ end
91
+
92
+ def test_uint32
93
+ records = [
94
+ [{"key1" => (2 ** 32) - 1, "key2" => nil}],
95
+ [nil],
96
+ ]
97
+ target = build(:uint32, records)
98
+ assert_equal(records, target.each_raw_record.to_a)
99
+ end
100
+
101
+ def test_int64
102
+ records = [
103
+ [{"key1" => -(2 ** 63), "key2" => nil}],
104
+ [nil],
105
+ ]
106
+ target = build(:int64, records)
107
+ assert_equal(records, target.each_raw_record.to_a)
108
+ end
109
+
110
+ def test_uint64
111
+ records = [
112
+ [{"key1" => (2 ** 64) - 1, "key2" => nil}],
113
+ [nil],
114
+ ]
115
+ target = build(:uint64, records)
116
+ assert_equal(records, target.each_raw_record.to_a)
117
+ end
118
+
119
+ def test_float
120
+ records = [
121
+ [{"key1" => -1.0, "key2" => nil}],
122
+ [nil],
123
+ ]
124
+ target = build(:float, records)
125
+ assert_equal(records, target.each_raw_record.to_a)
126
+ end
127
+
128
+ def test_double
129
+ records = [
130
+ [{"key1" => -1.0, "key2" => nil}],
131
+ [nil],
132
+ ]
133
+ target = build(:double, records)
134
+ assert_equal(records, target.each_raw_record.to_a)
135
+ end
136
+
137
+ def test_binary
138
+ records = [
139
+ [{"key1" => "\xff".b, "key2" => nil}],
140
+ [nil],
141
+ ]
142
+ target = build(:binary, records)
143
+ assert_equal(records, target.each_raw_record.to_a)
144
+ end
145
+
146
+ def test_string
147
+ records = [
148
+ [{"key1" => "Ruby", "key2" => nil}],
149
+ [nil],
150
+ ]
151
+ target = build(:string, records)
152
+ assert_equal(records, target.each_raw_record.to_a)
153
+ end
154
+
155
+ def test_date32
156
+ records = [
157
+ [{"key1" => Date.new(1960, 1, 1), "key2" => nil}],
158
+ [nil],
159
+ ]
160
+ target = build(:date32, records)
161
+ assert_equal(records, target.each_raw_record.to_a)
162
+ end
163
+
164
+ def test_date64
165
+ records = [
166
+ [{"key1" => DateTime.new(1960, 1, 1, 2, 9, 30), "key2" => nil}],
167
+ [nil],
168
+ ]
169
+ target = build(:date64, records)
170
+ assert_equal(records, target.each_raw_record.to_a)
171
+ end
172
+
173
+ def test_timestamp_second
174
+ records = [
175
+ [{"key1" => Time.parse("1960-01-01T02:09:30Z"), "key2" => nil}],
176
+ [nil],
177
+ ]
178
+ target = build({
179
+ type: :timestamp,
180
+ unit: :second,
181
+ },
182
+ records)
183
+ assert_equal(records, target.each_raw_record.to_a)
184
+ end
185
+
186
+ def test_timestamp_milli
187
+ records = [
188
+ [{"key1" => Time.parse("1960-01-01T02:09:30.123Z"), "key2" => nil}],
189
+ [nil],
190
+ ]
191
+ target = build({
192
+ type: :timestamp,
193
+ unit: :milli,
194
+ },
195
+ records)
196
+ assert_equal(records, target.each_raw_record.to_a)
197
+ end
198
+
199
+ def test_timestamp_micro
200
+ records = [
201
+ [{"key1" => Time.parse("1960-01-01T02:09:30.123456Z"), "key2" => nil}],
202
+ [nil],
203
+ ]
204
+ target = build({
205
+ type: :timestamp,
206
+ unit: :micro,
207
+ },
208
+ records)
209
+ assert_equal(records, target.each_raw_record.to_a)
210
+ end
211
+
212
+ def test_timestamp_nano
213
+ records = [
214
+ [{"key1" => Time.parse("1960-01-01T02:09:30.123456789Z"), "key2" => nil}],
215
+ [nil],
216
+ ]
217
+ target = build({
218
+ type: :timestamp,
219
+ unit: :nano,
220
+ },
221
+ records)
222
+ assert_equal(records, target.each_raw_record.to_a)
223
+ end
224
+
225
+ def test_time32_second
226
+ unit = Arrow::TimeUnit::SECOND
227
+ records = [
228
+ # 00:10:00
229
+ [{"key1" => Arrow::Time.new(unit, 60 * 10), "key2" => nil}],
230
+ [nil],
231
+ ]
232
+ target = build({
233
+ type: :time32,
234
+ unit: :second,
235
+ },
236
+ records)
237
+ assert_equal(records, target.each_raw_record.to_a)
238
+ end
239
+
240
+ def test_time32_milli
241
+ unit = Arrow::TimeUnit::MILLI
242
+ records = [
243
+ # 00:10:00.123
244
+ [{"key1" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123), "key2" => nil}],
245
+ [nil],
246
+ ]
247
+ target = build({
248
+ type: :time32,
249
+ unit: :milli,
250
+ },
251
+ records)
252
+ assert_equal(records, target.each_raw_record.to_a)
253
+ end
254
+
255
+ def test_time64_micro
256
+ unit = Arrow::TimeUnit::MICRO
257
+ records = [
258
+ # 00:10:00.123456
259
+ [{"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456), "key2" => nil}],
260
+ [nil],
261
+ ]
262
+ target = build({
263
+ type: :time64,
264
+ unit: :micro,
265
+ },
266
+ records)
267
+ assert_equal(records, target.each_raw_record.to_a)
268
+ end
269
+
270
+ def test_time64_nano
271
+ unit = Arrow::TimeUnit::NANO
272
+ records = [
273
+ # 00:10:00.123456789
274
+ [{"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789), "key2" => nil}],
275
+ [nil],
276
+ ]
277
+ target = build({
278
+ type: :time64,
279
+ unit: :nano,
280
+ },
281
+ records)
282
+ assert_equal(records, target.each_raw_record.to_a)
283
+ end
284
+
285
+ def test_decimal128
286
+ records = [
287
+ [{"key1" => BigDecimal("92.92"), "key2" => nil}],
288
+ [nil],
289
+ ]
290
+ target = build({
291
+ type: :decimal128,
292
+ precision: 8,
293
+ scale: 2,
294
+ },
295
+ records)
296
+ assert_equal(records, target.each_raw_record.to_a)
297
+ end
298
+
299
+ def test_decimal256
300
+ records = [
301
+ [{"key1" => BigDecimal("92.92"), "key2" => nil}],
302
+ [nil],
303
+ ]
304
+ target = build({
305
+ type: :decimal256,
306
+ precision: 38,
307
+ scale: 2,
308
+ },
309
+ records)
310
+ assert_equal(records, target.each_raw_record.to_a)
311
+ end
312
+
313
+ def test_month_interval
314
+ records = [
315
+ [{"key1" => 1, "key2" => nil}],
316
+ [nil],
317
+ ]
318
+ target = build(:month_interval, records)
319
+ assert_equal(records, target.each_raw_record.to_a)
320
+ end
321
+
322
+ def test_day_time_interval
323
+ records = [
324
+ [
325
+ {
326
+ "key1" => {day: 1, millisecond: 100},
327
+ "key2" => nil,
328
+ },
329
+ ],
330
+ [nil],
331
+ ]
332
+ target = build(:day_time_interval, records)
333
+ assert_equal(records, target.each_raw_record.to_a)
334
+ end
335
+
336
+ def test_month_day_nano_interval
337
+ records = [
338
+ [
339
+ {
340
+ "key1" => {month: 1, day: 1, nanosecond: 100},
341
+ "key2" => nil,
342
+ },
343
+ ],
344
+ [nil],
345
+ ]
346
+ target = build(:month_day_nano_interval, records)
347
+ assert_equal(records, target.each_raw_record.to_a)
348
+ end
349
+
350
+ def test_list
351
+ records = [
352
+ [{"key1" => [true, nil, false], "key2" => nil}],
353
+ [nil],
354
+ ]
355
+ target = build({
356
+ type: :list,
357
+ field: {
358
+ name: :element,
359
+ type: :boolean,
360
+ },
361
+ },
362
+ records)
363
+ assert_equal(records, target.each_raw_record.to_a)
364
+ end
365
+
366
+ def test_struct
367
+ records = [
368
+ [{"key1" => {"field" => true}, "key2" => nil, "key3" => {"field" => nil}}],
369
+ [nil],
370
+ ]
371
+ target = build({
372
+ type: :struct,
373
+ fields: [
374
+ {
375
+ name: :field,
376
+ type: :boolean,
377
+ },
378
+ ],
379
+ },
380
+ records)
381
+ assert_equal(records, target.each_raw_record.to_a)
382
+ end
383
+
384
+ def test_map
385
+ records = [
386
+ [{"key1" => {"sub_key1" => true, "sub_key2" => nil}, "key2" => nil}],
387
+ [nil],
388
+ ]
389
+ target = build({
390
+ type: :map,
391
+ key: :string,
392
+ item: :boolean,
393
+ },
394
+ records)
395
+ assert_equal(records, target.each_raw_record.to_a)
396
+ end
397
+
398
+ def remove_union_field_names(records)
399
+ records.collect do |record|
400
+ record.collect do |column|
401
+ if column.nil?
402
+ column
403
+ else
404
+ value = {}
405
+ column.each do |k, v|
406
+ v = v.values[0] unless v.nil?
407
+ value[k] = v
408
+ end
409
+ value
410
+ end
411
+ end
412
+ end
413
+ end
414
+
415
+ def test_sparse_union
416
+ records = [
417
+ [
418
+ {
419
+ "key1" => {"field1" => true},
420
+ "key2" => nil,
421
+ "key3" => {"field2" => 29},
422
+ "key4" => {"field2" => nil},
423
+ },
424
+ ],
425
+ [nil],
426
+ ]
427
+ target = build({
428
+ type: :sparse_union,
429
+ fields: [
430
+ {
431
+ name: :field1,
432
+ type: :boolean,
433
+ },
434
+ {
435
+ name: :field2,
436
+ type: :uint8,
437
+ },
438
+ ],
439
+ type_codes: [0, 1],
440
+ },
441
+ records)
442
+ assert_equal(remove_union_field_names(records),
443
+ target.each_raw_record.to_a)
444
+ end
445
+
446
+ def test_dense_union
447
+ records = [
448
+ [
449
+ {
450
+ "key1" => {"field1" => true},
451
+ "key2" => nil,
452
+ "key3" => {"field2" => 29},
453
+ "key4" => {"field2" => nil},
454
+ },
455
+ ],
456
+ [nil],
457
+ ]
458
+ target = build({
459
+ type: :dense_union,
460
+ fields: [
461
+ {
462
+ name: :field1,
463
+ type: :boolean,
464
+ },
465
+ {
466
+ name: :field2,
467
+ type: :uint8,
468
+ },
469
+ ],
470
+ type_codes: [0, 1],
471
+ },
472
+ records)
473
+ assert_equal(remove_union_field_names(records),
474
+ target.each_raw_record.to_a)
475
+ end
476
+
477
+ def test_dictionary
478
+ records = [
479
+ [{"key1" => "Ruby", "key2" => nil, "key3" => "GLib"}],
480
+ [nil],
481
+ ]
482
+ target = build({
483
+ type: :dictionary,
484
+ index_data_type: :int8,
485
+ value_data_type: :string,
486
+ ordered: false,
487
+ },
488
+ records)
489
+ assert_equal(records, target.each_raw_record.to_a)
490
+ end
491
+ end
492
+
493
+ class EachRawRecordRecordBatchMapArrayTest < Test::Unit::TestCase
494
+ include EachRawRecordMapArrayTests
495
+
496
+ def build(type, records)
497
+ Arrow::RecordBatch.new(build_schema(type), records)
498
+ end
499
+ end
500
+
501
+ class EachRawRecordTableMapArrayTest < Test::Unit::TestCase
502
+ include EachRawRecordMapArrayTests
503
+
504
+ def build(type, records)
505
+ Arrow::Table.new(build_schema(type), records)
506
+ end
507
+ end
@@ -0,0 +1,72 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module EachRawRecordMultipleColumnsTests
19
+ def test_3_elements
20
+ records = [
21
+ [true, nil, "Ruby"],
22
+ [nil, 0, "GLib"],
23
+ [false, 2 ** 8 - 1, nil],
24
+ ]
25
+ target = build([
26
+ {name: :column0, type: :boolean},
27
+ {name: :column1, type: :uint8},
28
+ {name: :column2, type: :string},
29
+ ],
30
+ records)
31
+ assert_equal(records, target.each_raw_record.to_a)
32
+ end
33
+
34
+ def test_4_elements
35
+ records = [
36
+ [true, nil, "Ruby", -(2 ** 63)],
37
+ [nil, 0, "GLib", nil],
38
+ [false, 2 ** 8 - 1, nil, (2 ** 63) - 1],
39
+ ]
40
+ target = build([
41
+ {name: :column0, type: :boolean},
42
+ {name: :column1, type: :uint8},
43
+ {name: :column2, type: :string},
44
+ {name: :column3, type: :int64},
45
+ ],
46
+ records)
47
+ assert_equal(records, target.each_raw_record.to_a)
48
+ end
49
+ end
50
+
51
+ class EachRawRecordRecordBatchMultipleColumnsTest < Test::Unit::TestCase
52
+ include EachRawRecordMultipleColumnsTests
53
+
54
+ def build(schema, records)
55
+ Arrow::RecordBatch.new(schema, records)
56
+ end
57
+ end
58
+
59
+ class EachRawRecordTableMultipleColumnsTest < Test::Unit::TestCase
60
+ include EachRawRecordMultipleColumnsTests
61
+
62
+ def build(schema, records)
63
+ record_batch = Arrow::RecordBatch.new(schema, records)
64
+ record_batches = [
65
+ record_batch.slice(0, 2),
66
+ record_batch.slice(2, 0),
67
+ record_batch.slice(2, record_batch.length - 2),
68
+ ]
69
+
70
+ Arrow::Table.new(schema, record_batches)
71
+ end
72
+ end