red-arrow 0.13.0 → 0.14.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

@@ -1,474 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- class RawRecordsRecordBatchSparseUnionArrayTest < Test::Unit::TestCase
19
- def fields(type, type_codes)
20
- field_description = {}
21
- if type.is_a?(Hash)
22
- field_description = field_description.merge(type)
23
- else
24
- field_description[:type] = type
25
- end
26
- {
27
- column: {
28
- type: :sparse_union,
29
- fields: [
30
- field_description.merge(name: "0"),
31
- field_description.merge(name: "1"),
32
- ],
33
- type_codes: type_codes,
34
- },
35
- }
36
- end
37
-
38
- # TODO: Use Arrow::RecordBatch.new(fields(type), records)
39
- def build_record_batch(type, records)
40
- type_codes = [0, 1]
41
- schema = Arrow::Schema.new(fields(type, type_codes))
42
- type_ids = []
43
- arrays = schema.fields[0].data_type.fields.collect do |field|
44
- sub_schema = Arrow::Schema.new([field])
45
- sub_records = records.collect do |record|
46
- [record[0].nil? ? nil : record[0][field.name]]
47
- end
48
- sub_record_batch = Arrow::RecordBatch.new(sub_schema,
49
- sub_records)
50
- sub_record_batch.columns[0]
51
- end
52
- records.each do |record|
53
- column = record[0]
54
- if column.nil?
55
- type_ids << nil
56
- elsif column.key?("0")
57
- type_ids << type_codes[0]
58
- elsif column.key?("1")
59
- type_ids << type_codes[1]
60
- end
61
- end
62
- # TODO
63
- # union_array = Arrow::SparseUnionArray.new(schema.fields[0].data_type,
64
- # Arrow::Int8Array.new(type_ids),
65
- # arrays)
66
- union_array = Arrow::SparseUnionArray.new(Arrow::Int8Array.new(type_ids),
67
- arrays)
68
- schema = Arrow::Schema.new(column: union_array.value_data_type)
69
- Arrow::RecordBatch.new(schema,
70
- records.size,
71
- [union_array])
72
- end
73
-
74
- test("NullArray") do
75
- records = [
76
- [{"0" => nil}],
77
- [nil],
78
- ]
79
- record_batch = build_record_batch(:null, records)
80
- assert_equal(records, record_batch.raw_records)
81
- end
82
-
83
- test("BooleanArray") do
84
- records = [
85
- [{"0" => true}],
86
- [nil],
87
- [{"1" => nil}],
88
- ]
89
- record_batch = build_record_batch(:boolean, records)
90
- assert_equal(records, record_batch.raw_records)
91
- end
92
-
93
- test("Int8Array") do
94
- records = [
95
- [{"0" => -(2 ** 7)}],
96
- [nil],
97
- [{"1" => nil}],
98
- ]
99
- record_batch = build_record_batch(:int8, records)
100
- assert_equal(records, record_batch.raw_records)
101
- end
102
-
103
- test("UInt8Array") do
104
- records = [
105
- [{"0" => (2 ** 8) - 1}],
106
- [nil],
107
- [{"1" => nil}],
108
- ]
109
- record_batch = build_record_batch(:uint8, records)
110
- assert_equal(records, record_batch.raw_records)
111
- end
112
-
113
- test("Int16Array") do
114
- records = [
115
- [{"0" => -(2 ** 15)}],
116
- [nil],
117
- [{"1" => nil}],
118
- ]
119
- record_batch = build_record_batch(:int16, records)
120
- assert_equal(records, record_batch.raw_records)
121
- end
122
-
123
- test("UInt16Array") do
124
- records = [
125
- [{"0" => (2 ** 16) - 1}],
126
- [nil],
127
- [{"1" => nil}],
128
- ]
129
- record_batch = build_record_batch(:uint16, records)
130
- assert_equal(records, record_batch.raw_records)
131
- end
132
-
133
- test("Int32Array") do
134
- records = [
135
- [{"0" => -(2 ** 31)}],
136
- [nil],
137
- [{"1" => nil}],
138
- ]
139
- record_batch = build_record_batch(:int32, records)
140
- assert_equal(records, record_batch.raw_records)
141
- end
142
-
143
- test("UInt32Array") do
144
- records = [
145
- [{"0" => (2 ** 32) - 1}],
146
- [nil],
147
- [{"1" => nil}],
148
- ]
149
- record_batch = build_record_batch(:uint32, records)
150
- assert_equal(records, record_batch.raw_records)
151
- end
152
-
153
- test("Int64Array") do
154
- records = [
155
- [{"0" => -(2 ** 63)}],
156
- [nil],
157
- [{"1" => nil}],
158
- ]
159
- record_batch = build_record_batch(:int64, records)
160
- assert_equal(records, record_batch.raw_records)
161
- end
162
-
163
- test("UInt64Array") do
164
- records = [
165
- [{"0" => (2 ** 64) - 1}],
166
- [nil],
167
- [{"1" => nil}],
168
- ]
169
- record_batch = build_record_batch(:uint64, records)
170
- assert_equal(records, record_batch.raw_records)
171
- end
172
-
173
- test("FloatArray") do
174
- records = [
175
- [{"0" => -1.0}],
176
- [nil],
177
- [{"1" => nil}],
178
- ]
179
- record_batch = build_record_batch(:float, records)
180
- assert_equal(records, record_batch.raw_records)
181
- end
182
-
183
- test("DoubleArray") do
184
- records = [
185
- [{"0" => -1.0}],
186
- [nil],
187
- [{"1" => nil}],
188
- ]
189
- record_batch = build_record_batch(:double, records)
190
- assert_equal(records, record_batch.raw_records)
191
- end
192
-
193
- test("BinaryArray") do
194
- records = [
195
- [{"0" => "\xff".b}],
196
- [nil],
197
- [{"1" => nil}],
198
- ]
199
- record_batch = build_record_batch(:binary, records)
200
- assert_equal(records, record_batch.raw_records)
201
- end
202
-
203
- test("StringArray") do
204
- records = [
205
- [{"0" => "Ruby"}],
206
- [nil],
207
- [{"1" => nil}],
208
- ]
209
- record_batch = build_record_batch(:string, records)
210
- assert_equal(records, record_batch.raw_records)
211
- end
212
-
213
- test("Date32Array") do
214
- records = [
215
- [{"0" => Date.new(1960, 1, 1)}],
216
- [nil],
217
- [{"1" => nil}],
218
- ]
219
- record_batch = build_record_batch(:date32, records)
220
- assert_equal(records, record_batch.raw_records)
221
- end
222
-
223
- test("Date64Array") do
224
- records = [
225
- [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
226
- [nil],
227
- [{"1" => nil}],
228
- ]
229
- record_batch = build_record_batch(:date64, records)
230
- assert_equal(records, record_batch.raw_records)
231
- end
232
-
233
- sub_test_case("TimestampArray") do
234
- test("second") do
235
- records = [
236
- [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
237
- [nil],
238
- [{"1" => nil}],
239
- ]
240
- record_batch = build_record_batch({
241
- type: :timestamp,
242
- unit: :second,
243
- },
244
- records)
245
- assert_equal(records, record_batch.raw_records)
246
- end
247
-
248
- test("milli") do
249
- records = [
250
- [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
251
- [nil],
252
- [{"1" => nil}],
253
- ]
254
- record_batch = build_record_batch({
255
- type: :timestamp,
256
- unit: :milli,
257
- },
258
- records)
259
- assert_equal(records, record_batch.raw_records)
260
- end
261
-
262
- test("micro") do
263
- records = [
264
- [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
265
- [nil],
266
- [{"1" => nil}],
267
- ]
268
- record_batch = build_record_batch({
269
- type: :timestamp,
270
- unit: :micro,
271
- },
272
- records)
273
- assert_equal(records, record_batch.raw_records)
274
- end
275
-
276
- test("nano") do
277
- records = [
278
- [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
279
- [nil],
280
- [{"1" => nil}],
281
- ]
282
- record_batch = build_record_batch({
283
- type: :timestamp,
284
- unit: :nano,
285
- },
286
- records)
287
- assert_equal(records, record_batch.raw_records)
288
- end
289
- end
290
-
291
- sub_test_case("Time32Array") do
292
- test("second") do
293
- records = [
294
- [{"0" => 60 * 10}], # 00:10:00
295
- [nil],
296
- [{"1" => nil}],
297
- ]
298
- record_batch = build_record_batch({
299
- type: :time32,
300
- unit: :second,
301
- },
302
- records)
303
- assert_equal(records, record_batch.raw_records)
304
- end
305
-
306
- test("milli") do
307
- records = [
308
- [{"0" => (60 * 10) * 1000 + 123}], # 00:10:00.123
309
- [nil],
310
- [{"1" => nil}],
311
- ]
312
- record_batch = build_record_batch({
313
- type: :time32,
314
- unit: :milli,
315
- },
316
- records)
317
- assert_equal(records, record_batch.raw_records)
318
- end
319
- end
320
-
321
- sub_test_case("Time64Array") do
322
- test("micro") do
323
- records = [
324
- [{"0" => (60 * 10) * 1_000_000 + 123_456}], # 00:10:00.123456
325
- [nil],
326
- [{"1" => nil}],
327
- ]
328
- record_batch = build_record_batch({
329
- type: :time64,
330
- unit: :micro,
331
- },
332
- records)
333
- assert_equal(records, record_batch.raw_records)
334
- end
335
-
336
- test("nano") do
337
- records = [
338
- # 00:10:00.123456789
339
- [{"0" => (60 * 10) * 1_000_000_000 + 123_456_789}],
340
- [nil],
341
- [{"1" => nil}],
342
- ]
343
- record_batch = build_record_batch({
344
- type: :time64,
345
- unit: :nano,
346
- },
347
- records)
348
- assert_equal(records, record_batch.raw_records)
349
- end
350
- end
351
-
352
- test("Decimal128Array") do
353
- records = [
354
- [{"0" => BigDecimal("92.92")}],
355
- [nil],
356
- [{"1" => nil}],
357
- ]
358
- record_batch = build_record_batch({
359
- type: :decimal128,
360
- precision: 8,
361
- scale: 2,
362
- },
363
- records)
364
- assert_equal(records, record_batch.raw_records)
365
- end
366
-
367
- test("ListArray") do
368
- records = [
369
- [{"0" => [true, nil, false]}],
370
- [nil],
371
- [{"1" => nil}],
372
- ]
373
- record_batch = build_record_batch({
374
- type: :list,
375
- field: {
376
- name: :sub_element,
377
- type: :boolean,
378
- },
379
- },
380
- records)
381
- assert_equal(records, record_batch.raw_records)
382
- end
383
-
384
- test("StructArray") do
385
- records = [
386
- [{"0" => {"sub_field" => true}}],
387
- [nil],
388
- [{"1" => nil}],
389
- [{"0" => {"sub_field" => nil}}],
390
- ]
391
- record_batch = build_record_batch({
392
- type: :struct,
393
- fields: [
394
- {
395
- name: :sub_field,
396
- type: :boolean,
397
- },
398
- ],
399
- },
400
- records)
401
- assert_equal(records, record_batch.raw_records)
402
- end
403
-
404
- test("SparseUnionArray") do
405
- omit("Need to add support for SparseUnionArrayBuilder")
406
- records = [
407
- [{"0" => {"field1" => true}}],
408
- [nil],
409
- [{"1" => nil}],
410
- [{"0" => {"field2" => nil}}],
411
- ]
412
- record_batch = build_record_batch({
413
- type: :sparse_union,
414
- fields: [
415
- {
416
- name: :field1,
417
- type: :boolean,
418
- },
419
- {
420
- name: :field2,
421
- type: :uint8,
422
- },
423
- ],
424
- type_codes: [0, 1],
425
- },
426
- records)
427
- assert_equal(records, record_batch.raw_records)
428
- end
429
-
430
- test("DenseUnionArray") do
431
- omit("Need to add support for DenseUnionArrayBuilder")
432
- records = [
433
- [{"0" => {"field1" => true}}],
434
- [nil],
435
- [{"1" => nil}],
436
- [{"0" => {"field2" => nil}}],
437
- ]
438
- record_batch = build_record_batch({
439
- type: :dense_union,
440
- fields: [
441
- {
442
- name: :field1,
443
- type: :boolean,
444
- },
445
- {
446
- name: :field2,
447
- type: :uint8,
448
- },
449
- ],
450
- type_codes: [0, 1],
451
- },
452
- records)
453
- assert_equal(records, record_batch.raw_records)
454
- end
455
-
456
- test("DictionaryArray") do
457
- omit("Need to add support for DictionaryArrayBuilder")
458
- records = [
459
- [{"0" => "Ruby"}],
460
- [nil],
461
- [{"1" => nil}],
462
- [{"0" => "GLib"}],
463
- ]
464
- dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
465
- record_batch = build_record_batch({
466
- type: :dictionary,
467
- index_data_type: :int8,
468
- dictionary: dictionary,
469
- ordered: true,
470
- },
471
- records)
472
- assert_equal(records, record_batch.raw_records)
473
- end
474
- end