red-arrow 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

@@ -1,474 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- class RawRecordsRecordBatchSparseUnionArrayTest < Test::Unit::TestCase
19
- def fields(type, type_codes)
20
- field_description = {}
21
- if type.is_a?(Hash)
22
- field_description = field_description.merge(type)
23
- else
24
- field_description[:type] = type
25
- end
26
- {
27
- column: {
28
- type: :sparse_union,
29
- fields: [
30
- field_description.merge(name: "0"),
31
- field_description.merge(name: "1"),
32
- ],
33
- type_codes: type_codes,
34
- },
35
- }
36
- end
37
-
38
- # TODO: Use Arrow::RecordBatch.new(fields(type), records)
39
- def build_record_batch(type, records)
40
- type_codes = [0, 1]
41
- schema = Arrow::Schema.new(fields(type, type_codes))
42
- type_ids = []
43
- arrays = schema.fields[0].data_type.fields.collect do |field|
44
- sub_schema = Arrow::Schema.new([field])
45
- sub_records = records.collect do |record|
46
- [record[0].nil? ? nil : record[0][field.name]]
47
- end
48
- sub_record_batch = Arrow::RecordBatch.new(sub_schema,
49
- sub_records)
50
- sub_record_batch.columns[0]
51
- end
52
- records.each do |record|
53
- column = record[0]
54
- if column.nil?
55
- type_ids << nil
56
- elsif column.key?("0")
57
- type_ids << type_codes[0]
58
- elsif column.key?("1")
59
- type_ids << type_codes[1]
60
- end
61
- end
62
- # TODO
63
- # union_array = Arrow::SparseUnionArray.new(schema.fields[0].data_type,
64
- # Arrow::Int8Array.new(type_ids),
65
- # arrays)
66
- union_array = Arrow::SparseUnionArray.new(Arrow::Int8Array.new(type_ids),
67
- arrays)
68
- schema = Arrow::Schema.new(column: union_array.value_data_type)
69
- Arrow::RecordBatch.new(schema,
70
- records.size,
71
- [union_array])
72
- end
73
-
74
- test("NullArray") do
75
- records = [
76
- [{"0" => nil}],
77
- [nil],
78
- ]
79
- record_batch = build_record_batch(:null, records)
80
- assert_equal(records, record_batch.raw_records)
81
- end
82
-
83
- test("BooleanArray") do
84
- records = [
85
- [{"0" => true}],
86
- [nil],
87
- [{"1" => nil}],
88
- ]
89
- record_batch = build_record_batch(:boolean, records)
90
- assert_equal(records, record_batch.raw_records)
91
- end
92
-
93
- test("Int8Array") do
94
- records = [
95
- [{"0" => -(2 ** 7)}],
96
- [nil],
97
- [{"1" => nil}],
98
- ]
99
- record_batch = build_record_batch(:int8, records)
100
- assert_equal(records, record_batch.raw_records)
101
- end
102
-
103
- test("UInt8Array") do
104
- records = [
105
- [{"0" => (2 ** 8) - 1}],
106
- [nil],
107
- [{"1" => nil}],
108
- ]
109
- record_batch = build_record_batch(:uint8, records)
110
- assert_equal(records, record_batch.raw_records)
111
- end
112
-
113
- test("Int16Array") do
114
- records = [
115
- [{"0" => -(2 ** 15)}],
116
- [nil],
117
- [{"1" => nil}],
118
- ]
119
- record_batch = build_record_batch(:int16, records)
120
- assert_equal(records, record_batch.raw_records)
121
- end
122
-
123
- test("UInt16Array") do
124
- records = [
125
- [{"0" => (2 ** 16) - 1}],
126
- [nil],
127
- [{"1" => nil}],
128
- ]
129
- record_batch = build_record_batch(:uint16, records)
130
- assert_equal(records, record_batch.raw_records)
131
- end
132
-
133
- test("Int32Array") do
134
- records = [
135
- [{"0" => -(2 ** 31)}],
136
- [nil],
137
- [{"1" => nil}],
138
- ]
139
- record_batch = build_record_batch(:int32, records)
140
- assert_equal(records, record_batch.raw_records)
141
- end
142
-
143
- test("UInt32Array") do
144
- records = [
145
- [{"0" => (2 ** 32) - 1}],
146
- [nil],
147
- [{"1" => nil}],
148
- ]
149
- record_batch = build_record_batch(:uint32, records)
150
- assert_equal(records, record_batch.raw_records)
151
- end
152
-
153
- test("Int64Array") do
154
- records = [
155
- [{"0" => -(2 ** 63)}],
156
- [nil],
157
- [{"1" => nil}],
158
- ]
159
- record_batch = build_record_batch(:int64, records)
160
- assert_equal(records, record_batch.raw_records)
161
- end
162
-
163
- test("UInt64Array") do
164
- records = [
165
- [{"0" => (2 ** 64) - 1}],
166
- [nil],
167
- [{"1" => nil}],
168
- ]
169
- record_batch = build_record_batch(:uint64, records)
170
- assert_equal(records, record_batch.raw_records)
171
- end
172
-
173
- test("FloatArray") do
174
- records = [
175
- [{"0" => -1.0}],
176
- [nil],
177
- [{"1" => nil}],
178
- ]
179
- record_batch = build_record_batch(:float, records)
180
- assert_equal(records, record_batch.raw_records)
181
- end
182
-
183
- test("DoubleArray") do
184
- records = [
185
- [{"0" => -1.0}],
186
- [nil],
187
- [{"1" => nil}],
188
- ]
189
- record_batch = build_record_batch(:double, records)
190
- assert_equal(records, record_batch.raw_records)
191
- end
192
-
193
- test("BinaryArray") do
194
- records = [
195
- [{"0" => "\xff".b}],
196
- [nil],
197
- [{"1" => nil}],
198
- ]
199
- record_batch = build_record_batch(:binary, records)
200
- assert_equal(records, record_batch.raw_records)
201
- end
202
-
203
- test("StringArray") do
204
- records = [
205
- [{"0" => "Ruby"}],
206
- [nil],
207
- [{"1" => nil}],
208
- ]
209
- record_batch = build_record_batch(:string, records)
210
- assert_equal(records, record_batch.raw_records)
211
- end
212
-
213
- test("Date32Array") do
214
- records = [
215
- [{"0" => Date.new(1960, 1, 1)}],
216
- [nil],
217
- [{"1" => nil}],
218
- ]
219
- record_batch = build_record_batch(:date32, records)
220
- assert_equal(records, record_batch.raw_records)
221
- end
222
-
223
- test("Date64Array") do
224
- records = [
225
- [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
226
- [nil],
227
- [{"1" => nil}],
228
- ]
229
- record_batch = build_record_batch(:date64, records)
230
- assert_equal(records, record_batch.raw_records)
231
- end
232
-
233
- sub_test_case("TimestampArray") do
234
- test("second") do
235
- records = [
236
- [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
237
- [nil],
238
- [{"1" => nil}],
239
- ]
240
- record_batch = build_record_batch({
241
- type: :timestamp,
242
- unit: :second,
243
- },
244
- records)
245
- assert_equal(records, record_batch.raw_records)
246
- end
247
-
248
- test("milli") do
249
- records = [
250
- [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
251
- [nil],
252
- [{"1" => nil}],
253
- ]
254
- record_batch = build_record_batch({
255
- type: :timestamp,
256
- unit: :milli,
257
- },
258
- records)
259
- assert_equal(records, record_batch.raw_records)
260
- end
261
-
262
- test("micro") do
263
- records = [
264
- [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
265
- [nil],
266
- [{"1" => nil}],
267
- ]
268
- record_batch = build_record_batch({
269
- type: :timestamp,
270
- unit: :micro,
271
- },
272
- records)
273
- assert_equal(records, record_batch.raw_records)
274
- end
275
-
276
- test("nano") do
277
- records = [
278
- [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
279
- [nil],
280
- [{"1" => nil}],
281
- ]
282
- record_batch = build_record_batch({
283
- type: :timestamp,
284
- unit: :nano,
285
- },
286
- records)
287
- assert_equal(records, record_batch.raw_records)
288
- end
289
- end
290
-
291
- sub_test_case("Time32Array") do
292
- test("second") do
293
- records = [
294
- [{"0" => 60 * 10}], # 00:10:00
295
- [nil],
296
- [{"1" => nil}],
297
- ]
298
- record_batch = build_record_batch({
299
- type: :time32,
300
- unit: :second,
301
- },
302
- records)
303
- assert_equal(records, record_batch.raw_records)
304
- end
305
-
306
- test("milli") do
307
- records = [
308
- [{"0" => (60 * 10) * 1000 + 123}], # 00:10:00.123
309
- [nil],
310
- [{"1" => nil}],
311
- ]
312
- record_batch = build_record_batch({
313
- type: :time32,
314
- unit: :milli,
315
- },
316
- records)
317
- assert_equal(records, record_batch.raw_records)
318
- end
319
- end
320
-
321
- sub_test_case("Time64Array") do
322
- test("micro") do
323
- records = [
324
- [{"0" => (60 * 10) * 1_000_000 + 123_456}], # 00:10:00.123456
325
- [nil],
326
- [{"1" => nil}],
327
- ]
328
- record_batch = build_record_batch({
329
- type: :time64,
330
- unit: :micro,
331
- },
332
- records)
333
- assert_equal(records, record_batch.raw_records)
334
- end
335
-
336
- test("nano") do
337
- records = [
338
- # 00:10:00.123456789
339
- [{"0" => (60 * 10) * 1_000_000_000 + 123_456_789}],
340
- [nil],
341
- [{"1" => nil}],
342
- ]
343
- record_batch = build_record_batch({
344
- type: :time64,
345
- unit: :nano,
346
- },
347
- records)
348
- assert_equal(records, record_batch.raw_records)
349
- end
350
- end
351
-
352
- test("Decimal128Array") do
353
- records = [
354
- [{"0" => BigDecimal("92.92")}],
355
- [nil],
356
- [{"1" => nil}],
357
- ]
358
- record_batch = build_record_batch({
359
- type: :decimal128,
360
- precision: 8,
361
- scale: 2,
362
- },
363
- records)
364
- assert_equal(records, record_batch.raw_records)
365
- end
366
-
367
- test("ListArray") do
368
- records = [
369
- [{"0" => [true, nil, false]}],
370
- [nil],
371
- [{"1" => nil}],
372
- ]
373
- record_batch = build_record_batch({
374
- type: :list,
375
- field: {
376
- name: :sub_element,
377
- type: :boolean,
378
- },
379
- },
380
- records)
381
- assert_equal(records, record_batch.raw_records)
382
- end
383
-
384
- test("StructArray") do
385
- records = [
386
- [{"0" => {"sub_field" => true}}],
387
- [nil],
388
- [{"1" => nil}],
389
- [{"0" => {"sub_field" => nil}}],
390
- ]
391
- record_batch = build_record_batch({
392
- type: :struct,
393
- fields: [
394
- {
395
- name: :sub_field,
396
- type: :boolean,
397
- },
398
- ],
399
- },
400
- records)
401
- assert_equal(records, record_batch.raw_records)
402
- end
403
-
404
- test("SparseUnionArray") do
405
- omit("Need to add support for SparseUnionArrayBuilder")
406
- records = [
407
- [{"0" => {"field1" => true}}],
408
- [nil],
409
- [{"1" => nil}],
410
- [{"0" => {"field2" => nil}}],
411
- ]
412
- record_batch = build_record_batch({
413
- type: :sparse_union,
414
- fields: [
415
- {
416
- name: :field1,
417
- type: :boolean,
418
- },
419
- {
420
- name: :field2,
421
- type: :uint8,
422
- },
423
- ],
424
- type_codes: [0, 1],
425
- },
426
- records)
427
- assert_equal(records, record_batch.raw_records)
428
- end
429
-
430
- test("DenseUnionArray") do
431
- omit("Need to add support for DenseUnionArrayBuilder")
432
- records = [
433
- [{"0" => {"field1" => true}}],
434
- [nil],
435
- [{"1" => nil}],
436
- [{"0" => {"field2" => nil}}],
437
- ]
438
- record_batch = build_record_batch({
439
- type: :dense_union,
440
- fields: [
441
- {
442
- name: :field1,
443
- type: :boolean,
444
- },
445
- {
446
- name: :field2,
447
- type: :uint8,
448
- },
449
- ],
450
- type_codes: [0, 1],
451
- },
452
- records)
453
- assert_equal(records, record_batch.raw_records)
454
- end
455
-
456
- test("DictionaryArray") do
457
- omit("Need to add support for DictionaryArrayBuilder")
458
- records = [
459
- [{"0" => "Ruby"}],
460
- [nil],
461
- [{"1" => nil}],
462
- [{"0" => "GLib"}],
463
- ]
464
- dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
465
- record_batch = build_record_batch({
466
- type: :dictionary,
467
- index_data_type: :int8,
468
- dictionary: dictionary,
469
- ordered: true,
470
- },
471
- records)
472
- assert_equal(records, record_batch.raw_records)
473
- end
474
- end