red-arrow 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

@@ -15,35 +15,51 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- class RawRecordsRecordBatchMultipleColumnsTest < Test::Unit::TestCase
19
- test("3 elements") do
18
+ module RawRecordsMultipleColumnsTests
19
+ def test_3_elements
20
20
  records = [
21
21
  [true, nil, "Ruby"],
22
22
  [nil, 0, "GLib"],
23
23
  [false, 2 ** 8 - 1, nil],
24
24
  ]
25
- record_batch = Arrow::RecordBatch.new([
26
- {name: :column0, type: :boolean},
27
- {name: :column1, type: :uint8},
28
- {name: :column2, type: :string},
29
- ],
30
- records)
31
- assert_equal(records, record_batch.raw_records)
25
+ target = build([
26
+ {name: :column0, type: :boolean},
27
+ {name: :column1, type: :uint8},
28
+ {name: :column2, type: :string},
29
+ ],
30
+ records)
31
+ assert_equal(records, target.raw_records)
32
32
  end
33
33
 
34
- test("4 elements") do
34
+ def test_4_elements
35
35
  records = [
36
36
  [true, nil, "Ruby", -(2 ** 63)],
37
37
  [nil, 0, "GLib", nil],
38
38
  [false, 2 ** 8 - 1, nil, (2 ** 63) - 1],
39
39
  ]
40
- record_batch = Arrow::RecordBatch.new([
41
- {name: :column0, type: :boolean},
42
- {name: :column1, type: :uint8},
43
- {name: :column2, type: :string},
44
- {name: :column3, type: :int64},
45
- ],
46
- records)
47
- assert_equal(records, record_batch.raw_records)
40
+ target = build([
41
+ {name: :column0, type: :boolean},
42
+ {name: :column1, type: :uint8},
43
+ {name: :column2, type: :string},
44
+ {name: :column3, type: :int64},
45
+ ],
46
+ records)
47
+ assert_equal(records, target.raw_records)
48
+ end
49
+ end
50
+
51
+ class RawRecordsRecordBatchMultipleColumnsTest < Test::Unit::TestCase
52
+ include RawRecordsMultipleColumnsTests
53
+
54
+ def build(schema, records)
55
+ Arrow::RecordBatch.new(schema, records)
56
+ end
57
+ end
58
+
59
+ class RawRecordsTableMultipleColumnsTest < Test::Unit::TestCase
60
+ include RawRecordsMultipleColumnsTests
61
+
62
+ def build(schema, records)
63
+ Arrow::Table.new(schema, records)
48
64
  end
49
65
  end
@@ -0,0 +1,480 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module RawRecordsSparseUnionArrayTests
19
+ def build_schema(type, type_codes)
20
+ field_description = {}
21
+ if type.is_a?(Hash)
22
+ field_description = field_description.merge(type)
23
+ else
24
+ field_description[:type] = type
25
+ end
26
+ {
27
+ column: {
28
+ type: :sparse_union,
29
+ fields: [
30
+ field_description.merge(name: "0"),
31
+ field_description.merge(name: "1"),
32
+ ],
33
+ type_codes: type_codes,
34
+ },
35
+ }
36
+ end
37
+
38
+ # TODO: Use Arrow::RecordBatch.new(build_schema(type, type_codes), records)
39
+ def build_record_batch(type, records)
40
+ type_codes = [0, 1]
41
+ schema = Arrow::Schema.new(build_schema(type, type_codes))
42
+ type_ids = []
43
+ arrays = schema.fields[0].data_type.fields.collect do |field|
44
+ sub_schema = Arrow::Schema.new([field])
45
+ sub_records = records.collect do |record|
46
+ [record[0].nil? ? nil : record[0][field.name]]
47
+ end
48
+ sub_record_batch = Arrow::RecordBatch.new(sub_schema,
49
+ sub_records)
50
+ sub_record_batch.columns[0]
51
+ end
52
+ records.each do |record|
53
+ column = record[0]
54
+ if column.nil?
55
+ type_ids << nil
56
+ elsif column.key?("0")
57
+ type_ids << type_codes[0]
58
+ elsif column.key?("1")
59
+ type_ids << type_codes[1]
60
+ end
61
+ end
62
+ union_array = Arrow::SparseUnionArray.new(schema.fields[0].data_type,
63
+ Arrow::Int8Array.new(type_ids),
64
+ arrays)
65
+ schema = Arrow::Schema.new(column: union_array.value_data_type)
66
+ Arrow::RecordBatch.new(schema,
67
+ records.size,
68
+ [union_array])
69
+ end
70
+
71
+ def test_null
72
+ records = [
73
+ [{"0" => nil}],
74
+ [nil],
75
+ ]
76
+ target = build(:null, records)
77
+ assert_equal(records, target.raw_records)
78
+ end
79
+
80
+ def test_boolean
81
+ records = [
82
+ [{"0" => true}],
83
+ [nil],
84
+ [{"1" => nil}],
85
+ ]
86
+ target = build(:boolean, records)
87
+ assert_equal(records, target.raw_records)
88
+ end
89
+
90
+ def test_int8
91
+ records = [
92
+ [{"0" => -(2 ** 7)}],
93
+ [nil],
94
+ [{"1" => nil}],
95
+ ]
96
+ target = build(:int8, records)
97
+ assert_equal(records, target.raw_records)
98
+ end
99
+
100
+ def test_uint8
101
+ records = [
102
+ [{"0" => (2 ** 8) - 1}],
103
+ [nil],
104
+ [{"1" => nil}],
105
+ ]
106
+ target = build(:uint8, records)
107
+ assert_equal(records, target.raw_records)
108
+ end
109
+
110
+ def test_int16
111
+ records = [
112
+ [{"0" => -(2 ** 15)}],
113
+ [nil],
114
+ [{"1" => nil}],
115
+ ]
116
+ target = build(:int16, records)
117
+ assert_equal(records, target.raw_records)
118
+ end
119
+
120
+ def test_uint16
121
+ records = [
122
+ [{"0" => (2 ** 16) - 1}],
123
+ [nil],
124
+ [{"1" => nil}],
125
+ ]
126
+ target = build(:uint16, records)
127
+ assert_equal(records, target.raw_records)
128
+ end
129
+
130
+ def test_int32
131
+ records = [
132
+ [{"0" => -(2 ** 31)}],
133
+ [nil],
134
+ [{"1" => nil}],
135
+ ]
136
+ target = build(:int32, records)
137
+ assert_equal(records, target.raw_records)
138
+ end
139
+
140
+ def test_uint32
141
+ records = [
142
+ [{"0" => (2 ** 32) - 1}],
143
+ [nil],
144
+ [{"1" => nil}],
145
+ ]
146
+ target = build(:uint32, records)
147
+ assert_equal(records, target.raw_records)
148
+ end
149
+
150
+ def test_int64
151
+ records = [
152
+ [{"0" => -(2 ** 63)}],
153
+ [nil],
154
+ [{"1" => nil}],
155
+ ]
156
+ target = build(:int64, records)
157
+ assert_equal(records, target.raw_records)
158
+ end
159
+
160
+ def test_uint64
161
+ records = [
162
+ [{"0" => (2 ** 64) - 1}],
163
+ [nil],
164
+ [{"1" => nil}],
165
+ ]
166
+ target = build(:uint64, records)
167
+ assert_equal(records, target.raw_records)
168
+ end
169
+
170
+ def test_float
171
+ records = [
172
+ [{"0" => -1.0}],
173
+ [nil],
174
+ [{"1" => nil}],
175
+ ]
176
+ target = build(:float, records)
177
+ assert_equal(records, target.raw_records)
178
+ end
179
+
180
+ def test_double
181
+ records = [
182
+ [{"0" => -1.0}],
183
+ [nil],
184
+ [{"1" => nil}],
185
+ ]
186
+ target = build(:double, records)
187
+ assert_equal(records, target.raw_records)
188
+ end
189
+
190
+ def test_binary
191
+ records = [
192
+ [{"0" => "\xff".b}],
193
+ [nil],
194
+ [{"1" => nil}],
195
+ ]
196
+ target = build(:binary, records)
197
+ assert_equal(records, target.raw_records)
198
+ end
199
+
200
+ def test_string
201
+ records = [
202
+ [{"0" => "Ruby"}],
203
+ [nil],
204
+ [{"1" => nil}],
205
+ ]
206
+ target = build(:string, records)
207
+ assert_equal(records, target.raw_records)
208
+ end
209
+
210
+ def test_date32
211
+ records = [
212
+ [{"0" => Date.new(1960, 1, 1)}],
213
+ [nil],
214
+ [{"1" => nil}],
215
+ ]
216
+ target = build(:date32, records)
217
+ assert_equal(records, target.raw_records)
218
+ end
219
+
220
+ def test_date64
221
+ records = [
222
+ [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
223
+ [nil],
224
+ [{"1" => nil}],
225
+ ]
226
+ target = build(:date64, records)
227
+ assert_equal(records, target.raw_records)
228
+ end
229
+
230
+ def test_timestamp_second
231
+ records = [
232
+ [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
233
+ [nil],
234
+ [{"1" => nil}],
235
+ ]
236
+ target = build({
237
+ type: :timestamp,
238
+ unit: :second,
239
+ },
240
+ records)
241
+ assert_equal(records, target.raw_records)
242
+ end
243
+
244
+ def test_timestamp_milli
245
+ records = [
246
+ [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
247
+ [nil],
248
+ [{"1" => nil}],
249
+ ]
250
+ target = build({
251
+ type: :timestamp,
252
+ unit: :milli,
253
+ },
254
+ records)
255
+ assert_equal(records, target.raw_records)
256
+ end
257
+
258
+ def test_timestamp_micro
259
+ records = [
260
+ [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
261
+ [nil],
262
+ [{"1" => nil}],
263
+ ]
264
+ target = build({
265
+ type: :timestamp,
266
+ unit: :micro,
267
+ },
268
+ records)
269
+ assert_equal(records, target.raw_records)
270
+ end
271
+
272
+ def test_timestamp_nano
273
+ records = [
274
+ [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
275
+ [nil],
276
+ [{"1" => nil}],
277
+ ]
278
+ target = build({
279
+ type: :timestamp,
280
+ unit: :nano,
281
+ },
282
+ records)
283
+ assert_equal(records, target.raw_records)
284
+ end
285
+
286
+ def test_time32_second
287
+ records = [
288
+ [{"0" => 60 * 10}], # 00:10:00
289
+ [nil],
290
+ [{"1" => nil}],
291
+ ]
292
+ target = build({
293
+ type: :time32,
294
+ unit: :second,
295
+ },
296
+ records)
297
+ assert_equal(records, target.raw_records)
298
+ end
299
+
300
+ def test_time32_milli
301
+ records = [
302
+ [{"0" => (60 * 10) * 1000 + 123}], # 00:10:00.123
303
+ [nil],
304
+ [{"1" => nil}],
305
+ ]
306
+ target = build({
307
+ type: :time32,
308
+ unit: :milli,
309
+ },
310
+ records)
311
+ assert_equal(records, target.raw_records)
312
+ end
313
+
314
+ def test_time64_micro
315
+ records = [
316
+ [{"0" => (60 * 10) * 1_000_000 + 123_456}], # 00:10:00.123456
317
+ [nil],
318
+ [{"1" => nil}],
319
+ ]
320
+ target = build({
321
+ type: :time64,
322
+ unit: :micro,
323
+ },
324
+ records)
325
+ assert_equal(records, target.raw_records)
326
+ end
327
+
328
+ def test_time64_nano
329
+ records = [
330
+ [{"0" => (60 * 10) * 1_000_000_000 + 123_456_789}], # 00:10:00.123456789
331
+ [nil],
332
+ [{"1" => nil}],
333
+ ]
334
+ target = build({
335
+ type: :time64,
336
+ unit: :nano,
337
+ },
338
+ records)
339
+ assert_equal(records, target.raw_records)
340
+ end
341
+
342
+ def test_decimal128
343
+ records = [
344
+ [{"0" => BigDecimal("92.92")}],
345
+ [nil],
346
+ [{"1" => nil}],
347
+ ]
348
+ target = build({
349
+ type: :decimal128,
350
+ precision: 8,
351
+ scale: 2,
352
+ },
353
+ records)
354
+ assert_equal(records, target.raw_records)
355
+ end
356
+
357
+ def test_list
358
+ records = [
359
+ [{"0" => [true, nil, false]}],
360
+ [nil],
361
+ [{"1" => nil}],
362
+ ]
363
+ target = build({
364
+ type: :list,
365
+ field: {
366
+ name: :sub_element,
367
+ type: :boolean,
368
+ },
369
+ },
370
+ records)
371
+ assert_equal(records, target.raw_records)
372
+ end
373
+
374
+ def test_struct
375
+ records = [
376
+ [{"0" => {"sub_field" => true}}],
377
+ [nil],
378
+ [{"1" => nil}],
379
+ [{"0" => {"sub_field" => nil}}],
380
+ ]
381
+ target = build({
382
+ type: :struct,
383
+ fields: [
384
+ {
385
+ name: :sub_field,
386
+ type: :boolean,
387
+ },
388
+ ],
389
+ },
390
+ records)
391
+ assert_equal(records, target.raw_records)
392
+ end
393
+
394
+ def test_sparse_union
395
+ omit("Need to add support for SparseUnionArrayBuilder")
396
+ records = [
397
+ [{"0" => {"field1" => true}}],
398
+ [nil],
399
+ [{"1" => nil}],
400
+ [{"0" => {"field2" => nil}}],
401
+ ]
402
+ target = build({
403
+ type: :sparse_union,
404
+ fields: [
405
+ {
406
+ name: :field1,
407
+ type: :boolean,
408
+ },
409
+ {
410
+ name: :field2,
411
+ type: :uint8,
412
+ },
413
+ ],
414
+ type_codes: [0, 1],
415
+ },
416
+ records)
417
+ assert_equal(records, target.raw_records)
418
+ end
419
+
420
+ def test_dense_union
421
+ omit("Need to add support for DenseUnionArrayBuilder")
422
+ records = [
423
+ [{"0" => {"field1" => true}}],
424
+ [nil],
425
+ [{"1" => nil}],
426
+ [{"0" => {"field2" => nil}}],
427
+ ]
428
+ target = build({
429
+ type: :dense_union,
430
+ fields: [
431
+ {
432
+ name: :field1,
433
+ type: :boolean,
434
+ },
435
+ {
436
+ name: :field2,
437
+ type: :uint8,
438
+ },
439
+ ],
440
+ type_codes: [0, 1],
441
+ },
442
+ records)
443
+ assert_equal(records, target.raw_records)
444
+ end
445
+
446
+ def test_dictionary
447
+ omit("Need to add support for DictionaryArrayBuilder")
448
+ records = [
449
+ [{"0" => "Ruby"}],
450
+ [nil],
451
+ [{"1" => nil}],
452
+ [{"0" => "GLib"}],
453
+ ]
454
+ dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
455
+ target = build({
456
+ type: :dictionary,
457
+ index_data_type: :int8,
458
+ dictionary: dictionary,
459
+ ordered: true,
460
+ },
461
+ records)
462
+ assert_equal(records, target.raw_records)
463
+ end
464
+ end
465
+
466
+ class RawRecordsRecordBatchSparseUnionArrayTest < Test::Unit::TestCase
467
+ include RawRecordsSparseUnionArrayTests
468
+
469
+ def build(type, records)
470
+ build_record_batch(type, records)
471
+ end
472
+ end
473
+
474
+ class RawRecordsTableSparseUnionArrayTest < Test::Unit::TestCase
475
+ include RawRecordsSparseUnionArrayTests
476
+
477
+ def build(type, records)
478
+ build_record_batch(type, records).to_table
479
+ end
480
+ end