red-arrow 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

@@ -1,486 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- class RawRecordsRecordBatchDenseUnionArrayTest < Test::Unit::TestCase
19
- def fields(type, type_codes)
20
- field_description = {}
21
- if type.is_a?(Hash)
22
- field_description = field_description.merge(type)
23
- else
24
- field_description[:type] = type
25
- end
26
- {
27
- column: {
28
- type: :dense_union,
29
- fields: [
30
- field_description.merge(name: "0"),
31
- field_description.merge(name: "1"),
32
- ],
33
- type_codes: type_codes,
34
- },
35
- }
36
- end
37
-
38
- # TODO: Use Arrow::RecordBatch.new(fields(type), records)
39
- def build_record_batch(type, records)
40
- type_codes = [0, 1]
41
- schema = Arrow::Schema.new(fields(type, type_codes))
42
- type_ids = []
43
- offsets = []
44
- arrays = schema.fields[0].data_type.fields.collect do |field|
45
- sub_schema = Arrow::Schema.new([field])
46
- sub_records = []
47
- records.each do |record|
48
- column = record[0]
49
- next if column.nil?
50
- next unless column.key?(field.name)
51
- sub_records << [column[field.name]]
52
- end
53
- sub_record_batch = Arrow::RecordBatch.new(sub_schema,
54
- sub_records)
55
- sub_record_batch.columns[0]
56
- end
57
- records.each do |record|
58
- column = record[0]
59
- if column.nil?
60
- type_ids << nil
61
- offsets << 0
62
- elsif column.key?("0")
63
- type_id = type_codes[0]
64
- type_ids << type_id
65
- offsets << (type_ids.count(type_id) - 1)
66
- elsif column.key?("1")
67
- type_id = type_codes[1]
68
- type_ids << type_id
69
- offsets << (type_ids.count(type_id) - 1)
70
- end
71
- end
72
- # TODO
73
- # union_array = Arrow::DenseUnionArray.new(schema.fields[0].data_type,
74
- # Arrow::Int8Array.new(type_ids),
75
- # Arrow::Int32Array.new(offsets),
76
- # arrays)
77
- union_array = Arrow::DenseUnionArray.new(Arrow::Int8Array.new(type_ids),
78
- Arrow::Int32Array.new(offsets),
79
- arrays)
80
- schema = Arrow::Schema.new(column: union_array.value_data_type)
81
- Arrow::RecordBatch.new(schema,
82
- records.size,
83
- [union_array])
84
- end
85
-
86
- test("NullArray") do
87
- records = [
88
- [{"0" => nil}],
89
- [nil],
90
- ]
91
- record_batch = build_record_batch(:null, records)
92
- assert_equal(records, record_batch.raw_records)
93
- end
94
-
95
- test("BooleanArray") do
96
- records = [
97
- [{"0" => true}],
98
- [nil],
99
- [{"1" => nil}],
100
- ]
101
- record_batch = build_record_batch(:boolean, records)
102
- assert_equal(records, record_batch.raw_records)
103
- end
104
-
105
- test("Int8Array") do
106
- records = [
107
- [{"0" => -(2 ** 7)}],
108
- [nil],
109
- [{"1" => nil}],
110
- ]
111
- record_batch = build_record_batch(:int8, records)
112
- assert_equal(records, record_batch.raw_records)
113
- end
114
-
115
- test("UInt8Array") do
116
- records = [
117
- [{"0" => (2 ** 8) - 1}],
118
- [nil],
119
- [{"1" => nil}],
120
- ]
121
- record_batch = build_record_batch(:uint8, records)
122
- assert_equal(records, record_batch.raw_records)
123
- end
124
-
125
- test("Int16Array") do
126
- records = [
127
- [{"0" => -(2 ** 15)}],
128
- [nil],
129
- [{"1" => nil}],
130
- ]
131
- record_batch = build_record_batch(:int16, records)
132
- assert_equal(records, record_batch.raw_records)
133
- end
134
-
135
- test("UInt16Array") do
136
- records = [
137
- [{"0" => (2 ** 16) - 1}],
138
- [nil],
139
- [{"1" => nil}],
140
- ]
141
- record_batch = build_record_batch(:uint16, records)
142
- assert_equal(records, record_batch.raw_records)
143
- end
144
-
145
- test("Int32Array") do
146
- records = [
147
- [{"0" => -(2 ** 31)}],
148
- [nil],
149
- [{"1" => nil}],
150
- ]
151
- record_batch = build_record_batch(:int32, records)
152
- assert_equal(records, record_batch.raw_records)
153
- end
154
-
155
- test("UInt32Array") do
156
- records = [
157
- [{"0" => (2 ** 32) - 1}],
158
- [nil],
159
- [{"1" => nil}],
160
- ]
161
- record_batch = build_record_batch(:uint32, records)
162
- assert_equal(records, record_batch.raw_records)
163
- end
164
-
165
- test("Int64Array") do
166
- records = [
167
- [{"0" => -(2 ** 63)}],
168
- [nil],
169
- [{"1" => nil}],
170
- ]
171
- record_batch = build_record_batch(:int64, records)
172
- assert_equal(records, record_batch.raw_records)
173
- end
174
-
175
- test("UInt64Array") do
176
- records = [
177
- [{"0" => (2 ** 64) - 1}],
178
- [nil],
179
- [{"1" => nil}],
180
- ]
181
- record_batch = build_record_batch(:uint64, records)
182
- assert_equal(records, record_batch.raw_records)
183
- end
184
-
185
- test("FloatArray") do
186
- records = [
187
- [{"0" => -1.0}],
188
- [nil],
189
- [{"1" => nil}],
190
- ]
191
- record_batch = build_record_batch(:float, records)
192
- assert_equal(records, record_batch.raw_records)
193
- end
194
-
195
- test("DoubleArray") do
196
- records = [
197
- [{"0" => -1.0}],
198
- [nil],
199
- [{"1" => nil}],
200
- ]
201
- record_batch = build_record_batch(:double, records)
202
- assert_equal(records, record_batch.raw_records)
203
- end
204
-
205
- test("BinaryArray") do
206
- records = [
207
- [{"0" => "\xff".b}],
208
- [nil],
209
- [{"1" => nil}],
210
- ]
211
- record_batch = build_record_batch(:binary, records)
212
- assert_equal(records, record_batch.raw_records)
213
- end
214
-
215
- test("StringArray") do
216
- records = [
217
- [{"0" => "Ruby"}],
218
- [nil],
219
- [{"1" => nil}],
220
- ]
221
- record_batch = build_record_batch(:string, records)
222
- assert_equal(records, record_batch.raw_records)
223
- end
224
-
225
- test("Date32Array") do
226
- records = [
227
- [{"0" => Date.new(1960, 1, 1)}],
228
- [nil],
229
- [{"1" => nil}],
230
- ]
231
- record_batch = build_record_batch(:date32, records)
232
- assert_equal(records, record_batch.raw_records)
233
- end
234
-
235
- test("Date64Array") do
236
- records = [
237
- [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
238
- [nil],
239
- [{"1" => nil}],
240
- ]
241
- record_batch = build_record_batch(:date64, records)
242
- assert_equal(records, record_batch.raw_records)
243
- end
244
-
245
- sub_test_case("TimestampArray") do
246
- test("second") do
247
- records = [
248
- [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
249
- [nil],
250
- [{"1" => nil}],
251
- ]
252
- record_batch = build_record_batch({
253
- type: :timestamp,
254
- unit: :second,
255
- },
256
- records)
257
- assert_equal(records, record_batch.raw_records)
258
- end
259
-
260
- test("milli") do
261
- records = [
262
- [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
263
- [nil],
264
- [{"1" => nil}],
265
- ]
266
- record_batch = build_record_batch({
267
- type: :timestamp,
268
- unit: :milli,
269
- },
270
- records)
271
- assert_equal(records, record_batch.raw_records)
272
- end
273
-
274
- test("micro") do
275
- records = [
276
- [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
277
- [nil],
278
- [{"1" => nil}],
279
- ]
280
- record_batch = build_record_batch({
281
- type: :timestamp,
282
- unit: :micro,
283
- },
284
- records)
285
- assert_equal(records, record_batch.raw_records)
286
- end
287
-
288
- test("nano") do
289
- records = [
290
- [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
291
- [nil],
292
- [{"1" => nil}],
293
- ]
294
- record_batch = build_record_batch({
295
- type: :timestamp,
296
- unit: :nano,
297
- },
298
- records)
299
- assert_equal(records, record_batch.raw_records)
300
- end
301
- end
302
-
303
- sub_test_case("Time32Array") do
304
- test("second") do
305
- records = [
306
- [{"0" => 60 * 10}], # 00:10:00
307
- [nil],
308
- [{"1" => nil}],
309
- ]
310
- record_batch = build_record_batch({
311
- type: :time32,
312
- unit: :second,
313
- },
314
- records)
315
- assert_equal(records, record_batch.raw_records)
316
- end
317
-
318
- test("milli") do
319
- records = [
320
- [{"0" => (60 * 10) * 1000 + 123}], # 00:10:00.123
321
- [nil],
322
- [{"1" => nil}],
323
- ]
324
- record_batch = build_record_batch({
325
- type: :time32,
326
- unit: :milli,
327
- },
328
- records)
329
- assert_equal(records, record_batch.raw_records)
330
- end
331
- end
332
-
333
- sub_test_case("Time64Array") do
334
- test("micro") do
335
- records = [
336
- [{"0" => (60 * 10) * 1_000_000 + 123_456}], # 00:10:00.123456
337
- [nil],
338
- [{"1" => nil}],
339
- ]
340
- record_batch = build_record_batch({
341
- type: :time64,
342
- unit: :micro,
343
- },
344
- records)
345
- assert_equal(records, record_batch.raw_records)
346
- end
347
-
348
- test("nano") do
349
- records = [
350
- # 00:10:00.123456789
351
- [{"0" => (60 * 10) * 1_000_000_000 + 123_456_789}],
352
- [nil],
353
- [{"1" => nil}],
354
- ]
355
- record_batch = build_record_batch({
356
- type: :time64,
357
- unit: :nano,
358
- },
359
- records)
360
- assert_equal(records, record_batch.raw_records)
361
- end
362
- end
363
-
364
- test("Decimal128Array") do
365
- records = [
366
- [{"0" => BigDecimal("92.92")}],
367
- [nil],
368
- [{"1" => nil}],
369
- ]
370
- record_batch = build_record_batch({
371
- type: :decimal128,
372
- precision: 8,
373
- scale: 2,
374
- },
375
- records)
376
- assert_equal(records, record_batch.raw_records)
377
- end
378
-
379
- test("ListArray") do
380
- records = [
381
- [{"0" => [true, nil, false]}],
382
- [nil],
383
- [{"1" => nil}],
384
- ]
385
- record_batch = build_record_batch({
386
- type: :list,
387
- field: {
388
- name: :sub_element,
389
- type: :boolean,
390
- },
391
- },
392
- records)
393
- assert_equal(records, record_batch.raw_records)
394
- end
395
-
396
- test("StructArray") do
397
- records = [
398
- [{"0" => {"sub_field" => true}}],
399
- [nil],
400
- [{"1" => nil}],
401
- [{"0" => {"sub_field" => nil}}],
402
- ]
403
- record_batch = build_record_batch({
404
- type: :struct,
405
- fields: [
406
- {
407
- name: :sub_field,
408
- type: :boolean,
409
- },
410
- ],
411
- },
412
- records)
413
- assert_equal(records, record_batch.raw_records)
414
- end
415
-
416
- test("SparseUnionArray") do
417
- omit("Need to add support for SparseUnionArrayBuilder")
418
- records = [
419
- [{"0" => {"field1" => true}}],
420
- [nil],
421
- [{"1" => nil}],
422
- [{"0" => {"field2" => nil}}],
423
- ]
424
- record_batch = build_record_batch({
425
- type: :sparse_union,
426
- fields: [
427
- {
428
- name: :field1,
429
- type: :boolean,
430
- },
431
- {
432
- name: :field2,
433
- type: :uint8,
434
- },
435
- ],
436
- type_codes: [0, 1],
437
- },
438
- records)
439
- assert_equal(records, record_batch.raw_records)
440
- end
441
-
442
- test("DenseUnionArray") do
443
- omit("Need to add support for DenseUnionArrayBuilder")
444
- records = [
445
- [{"0" => {"field1" => true}}],
446
- [nil],
447
- [{"1" => nil}],
448
- [{"0" => {"field2" => nil}}],
449
- ]
450
- record_batch = build_record_batch({
451
- type: :dense_union,
452
- fields: [
453
- {
454
- name: :field1,
455
- type: :boolean,
456
- },
457
- {
458
- name: :field2,
459
- type: :uint8,
460
- },
461
- ],
462
- type_codes: [0, 1],
463
- },
464
- records)
465
- assert_equal(records, record_batch.raw_records)
466
- end
467
-
468
- test("DictionaryArray") do
469
- omit("Need to add support for DictionaryArrayBuilder")
470
- records = [
471
- [{"0" => "Ruby"}],
472
- [nil],
473
- [{"1" => nil}],
474
- [{"0" => "GLib"}],
475
- ]
476
- dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
477
- record_batch = build_record_batch({
478
- type: :dictionary,
479
- index_data_type: :int8,
480
- dictionary: dictionary,
481
- ordered: true,
482
- },
483
- records)
484
- assert_equal(records, record_batch.raw_records)
485
- end
486
- end