red-arrow 0.13.0 → 0.14.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

@@ -1,486 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- class RawRecordsRecordBatchDenseUnionArrayTest < Test::Unit::TestCase
19
- def fields(type, type_codes)
20
- field_description = {}
21
- if type.is_a?(Hash)
22
- field_description = field_description.merge(type)
23
- else
24
- field_description[:type] = type
25
- end
26
- {
27
- column: {
28
- type: :dense_union,
29
- fields: [
30
- field_description.merge(name: "0"),
31
- field_description.merge(name: "1"),
32
- ],
33
- type_codes: type_codes,
34
- },
35
- }
36
- end
37
-
38
- # TODO: Use Arrow::RecordBatch.new(fields(type), records)
39
- def build_record_batch(type, records)
40
- type_codes = [0, 1]
41
- schema = Arrow::Schema.new(fields(type, type_codes))
42
- type_ids = []
43
- offsets = []
44
- arrays = schema.fields[0].data_type.fields.collect do |field|
45
- sub_schema = Arrow::Schema.new([field])
46
- sub_records = []
47
- records.each do |record|
48
- column = record[0]
49
- next if column.nil?
50
- next unless column.key?(field.name)
51
- sub_records << [column[field.name]]
52
- end
53
- sub_record_batch = Arrow::RecordBatch.new(sub_schema,
54
- sub_records)
55
- sub_record_batch.columns[0]
56
- end
57
- records.each do |record|
58
- column = record[0]
59
- if column.nil?
60
- type_ids << nil
61
- offsets << 0
62
- elsif column.key?("0")
63
- type_id = type_codes[0]
64
- type_ids << type_id
65
- offsets << (type_ids.count(type_id) - 1)
66
- elsif column.key?("1")
67
- type_id = type_codes[1]
68
- type_ids << type_id
69
- offsets << (type_ids.count(type_id) - 1)
70
- end
71
- end
72
- # TODO
73
- # union_array = Arrow::DenseUnionArray.new(schema.fields[0].data_type,
74
- # Arrow::Int8Array.new(type_ids),
75
- # Arrow::Int32Array.new(offsets),
76
- # arrays)
77
- union_array = Arrow::DenseUnionArray.new(Arrow::Int8Array.new(type_ids),
78
- Arrow::Int32Array.new(offsets),
79
- arrays)
80
- schema = Arrow::Schema.new(column: union_array.value_data_type)
81
- Arrow::RecordBatch.new(schema,
82
- records.size,
83
- [union_array])
84
- end
85
-
86
- test("NullArray") do
87
- records = [
88
- [{"0" => nil}],
89
- [nil],
90
- ]
91
- record_batch = build_record_batch(:null, records)
92
- assert_equal(records, record_batch.raw_records)
93
- end
94
-
95
- test("BooleanArray") do
96
- records = [
97
- [{"0" => true}],
98
- [nil],
99
- [{"1" => nil}],
100
- ]
101
- record_batch = build_record_batch(:boolean, records)
102
- assert_equal(records, record_batch.raw_records)
103
- end
104
-
105
- test("Int8Array") do
106
- records = [
107
- [{"0" => -(2 ** 7)}],
108
- [nil],
109
- [{"1" => nil}],
110
- ]
111
- record_batch = build_record_batch(:int8, records)
112
- assert_equal(records, record_batch.raw_records)
113
- end
114
-
115
- test("UInt8Array") do
116
- records = [
117
- [{"0" => (2 ** 8) - 1}],
118
- [nil],
119
- [{"1" => nil}],
120
- ]
121
- record_batch = build_record_batch(:uint8, records)
122
- assert_equal(records, record_batch.raw_records)
123
- end
124
-
125
- test("Int16Array") do
126
- records = [
127
- [{"0" => -(2 ** 15)}],
128
- [nil],
129
- [{"1" => nil}],
130
- ]
131
- record_batch = build_record_batch(:int16, records)
132
- assert_equal(records, record_batch.raw_records)
133
- end
134
-
135
- test("UInt16Array") do
136
- records = [
137
- [{"0" => (2 ** 16) - 1}],
138
- [nil],
139
- [{"1" => nil}],
140
- ]
141
- record_batch = build_record_batch(:uint16, records)
142
- assert_equal(records, record_batch.raw_records)
143
- end
144
-
145
- test("Int32Array") do
146
- records = [
147
- [{"0" => -(2 ** 31)}],
148
- [nil],
149
- [{"1" => nil}],
150
- ]
151
- record_batch = build_record_batch(:int32, records)
152
- assert_equal(records, record_batch.raw_records)
153
- end
154
-
155
- test("UInt32Array") do
156
- records = [
157
- [{"0" => (2 ** 32) - 1}],
158
- [nil],
159
- [{"1" => nil}],
160
- ]
161
- record_batch = build_record_batch(:uint32, records)
162
- assert_equal(records, record_batch.raw_records)
163
- end
164
-
165
- test("Int64Array") do
166
- records = [
167
- [{"0" => -(2 ** 63)}],
168
- [nil],
169
- [{"1" => nil}],
170
- ]
171
- record_batch = build_record_batch(:int64, records)
172
- assert_equal(records, record_batch.raw_records)
173
- end
174
-
175
- test("UInt64Array") do
176
- records = [
177
- [{"0" => (2 ** 64) - 1}],
178
- [nil],
179
- [{"1" => nil}],
180
- ]
181
- record_batch = build_record_batch(:uint64, records)
182
- assert_equal(records, record_batch.raw_records)
183
- end
184
-
185
- test("FloatArray") do
186
- records = [
187
- [{"0" => -1.0}],
188
- [nil],
189
- [{"1" => nil}],
190
- ]
191
- record_batch = build_record_batch(:float, records)
192
- assert_equal(records, record_batch.raw_records)
193
- end
194
-
195
- test("DoubleArray") do
196
- records = [
197
- [{"0" => -1.0}],
198
- [nil],
199
- [{"1" => nil}],
200
- ]
201
- record_batch = build_record_batch(:double, records)
202
- assert_equal(records, record_batch.raw_records)
203
- end
204
-
205
- test("BinaryArray") do
206
- records = [
207
- [{"0" => "\xff".b}],
208
- [nil],
209
- [{"1" => nil}],
210
- ]
211
- record_batch = build_record_batch(:binary, records)
212
- assert_equal(records, record_batch.raw_records)
213
- end
214
-
215
- test("StringArray") do
216
- records = [
217
- [{"0" => "Ruby"}],
218
- [nil],
219
- [{"1" => nil}],
220
- ]
221
- record_batch = build_record_batch(:string, records)
222
- assert_equal(records, record_batch.raw_records)
223
- end
224
-
225
- test("Date32Array") do
226
- records = [
227
- [{"0" => Date.new(1960, 1, 1)}],
228
- [nil],
229
- [{"1" => nil}],
230
- ]
231
- record_batch = build_record_batch(:date32, records)
232
- assert_equal(records, record_batch.raw_records)
233
- end
234
-
235
- test("Date64Array") do
236
- records = [
237
- [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
238
- [nil],
239
- [{"1" => nil}],
240
- ]
241
- record_batch = build_record_batch(:date64, records)
242
- assert_equal(records, record_batch.raw_records)
243
- end
244
-
245
- sub_test_case("TimestampArray") do
246
- test("second") do
247
- records = [
248
- [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
249
- [nil],
250
- [{"1" => nil}],
251
- ]
252
- record_batch = build_record_batch({
253
- type: :timestamp,
254
- unit: :second,
255
- },
256
- records)
257
- assert_equal(records, record_batch.raw_records)
258
- end
259
-
260
- test("milli") do
261
- records = [
262
- [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
263
- [nil],
264
- [{"1" => nil}],
265
- ]
266
- record_batch = build_record_batch({
267
- type: :timestamp,
268
- unit: :milli,
269
- },
270
- records)
271
- assert_equal(records, record_batch.raw_records)
272
- end
273
-
274
- test("micro") do
275
- records = [
276
- [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
277
- [nil],
278
- [{"1" => nil}],
279
- ]
280
- record_batch = build_record_batch({
281
- type: :timestamp,
282
- unit: :micro,
283
- },
284
- records)
285
- assert_equal(records, record_batch.raw_records)
286
- end
287
-
288
- test("nano") do
289
- records = [
290
- [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
291
- [nil],
292
- [{"1" => nil}],
293
- ]
294
- record_batch = build_record_batch({
295
- type: :timestamp,
296
- unit: :nano,
297
- },
298
- records)
299
- assert_equal(records, record_batch.raw_records)
300
- end
301
- end
302
-
303
- sub_test_case("Time32Array") do
304
- test("second") do
305
- records = [
306
- [{"0" => 60 * 10}], # 00:10:00
307
- [nil],
308
- [{"1" => nil}],
309
- ]
310
- record_batch = build_record_batch({
311
- type: :time32,
312
- unit: :second,
313
- },
314
- records)
315
- assert_equal(records, record_batch.raw_records)
316
- end
317
-
318
- test("milli") do
319
- records = [
320
- [{"0" => (60 * 10) * 1000 + 123}], # 00:10:00.123
321
- [nil],
322
- [{"1" => nil}],
323
- ]
324
- record_batch = build_record_batch({
325
- type: :time32,
326
- unit: :milli,
327
- },
328
- records)
329
- assert_equal(records, record_batch.raw_records)
330
- end
331
- end
332
-
333
- sub_test_case("Time64Array") do
334
- test("micro") do
335
- records = [
336
- [{"0" => (60 * 10) * 1_000_000 + 123_456}], # 00:10:00.123456
337
- [nil],
338
- [{"1" => nil}],
339
- ]
340
- record_batch = build_record_batch({
341
- type: :time64,
342
- unit: :micro,
343
- },
344
- records)
345
- assert_equal(records, record_batch.raw_records)
346
- end
347
-
348
- test("nano") do
349
- records = [
350
- # 00:10:00.123456789
351
- [{"0" => (60 * 10) * 1_000_000_000 + 123_456_789}],
352
- [nil],
353
- [{"1" => nil}],
354
- ]
355
- record_batch = build_record_batch({
356
- type: :time64,
357
- unit: :nano,
358
- },
359
- records)
360
- assert_equal(records, record_batch.raw_records)
361
- end
362
- end
363
-
364
- test("Decimal128Array") do
365
- records = [
366
- [{"0" => BigDecimal("92.92")}],
367
- [nil],
368
- [{"1" => nil}],
369
- ]
370
- record_batch = build_record_batch({
371
- type: :decimal128,
372
- precision: 8,
373
- scale: 2,
374
- },
375
- records)
376
- assert_equal(records, record_batch.raw_records)
377
- end
378
-
379
- test("ListArray") do
380
- records = [
381
- [{"0" => [true, nil, false]}],
382
- [nil],
383
- [{"1" => nil}],
384
- ]
385
- record_batch = build_record_batch({
386
- type: :list,
387
- field: {
388
- name: :sub_element,
389
- type: :boolean,
390
- },
391
- },
392
- records)
393
- assert_equal(records, record_batch.raw_records)
394
- end
395
-
396
- test("StructArray") do
397
- records = [
398
- [{"0" => {"sub_field" => true}}],
399
- [nil],
400
- [{"1" => nil}],
401
- [{"0" => {"sub_field" => nil}}],
402
- ]
403
- record_batch = build_record_batch({
404
- type: :struct,
405
- fields: [
406
- {
407
- name: :sub_field,
408
- type: :boolean,
409
- },
410
- ],
411
- },
412
- records)
413
- assert_equal(records, record_batch.raw_records)
414
- end
415
-
416
- test("SparseUnionArray") do
417
- omit("Need to add support for SparseUnionArrayBuilder")
418
- records = [
419
- [{"0" => {"field1" => true}}],
420
- [nil],
421
- [{"1" => nil}],
422
- [{"0" => {"field2" => nil}}],
423
- ]
424
- record_batch = build_record_batch({
425
- type: :sparse_union,
426
- fields: [
427
- {
428
- name: :field1,
429
- type: :boolean,
430
- },
431
- {
432
- name: :field2,
433
- type: :uint8,
434
- },
435
- ],
436
- type_codes: [0, 1],
437
- },
438
- records)
439
- assert_equal(records, record_batch.raw_records)
440
- end
441
-
442
- test("DenseUnionArray") do
443
- omit("Need to add support for DenseUnionArrayBuilder")
444
- records = [
445
- [{"0" => {"field1" => true}}],
446
- [nil],
447
- [{"1" => nil}],
448
- [{"0" => {"field2" => nil}}],
449
- ]
450
- record_batch = build_record_batch({
451
- type: :dense_union,
452
- fields: [
453
- {
454
- name: :field1,
455
- type: :boolean,
456
- },
457
- {
458
- name: :field2,
459
- type: :uint8,
460
- },
461
- ],
462
- type_codes: [0, 1],
463
- },
464
- records)
465
- assert_equal(records, record_batch.raw_records)
466
- end
467
-
468
- test("DictionaryArray") do
469
- omit("Need to add support for DictionaryArrayBuilder")
470
- records = [
471
- [{"0" => "Ruby"}],
472
- [nil],
473
- [{"1" => nil}],
474
- [{"0" => "GLib"}],
475
- ]
476
- dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
477
- record_batch = build_record_batch({
478
- type: :dictionary,
479
- index_data_type: :int8,
480
- dictionary: dictionary,
481
- ordered: true,
482
- },
483
- records)
484
- assert_equal(records, record_batch.raw_records)
485
- end
486
- end