red-arrow 0.13.0 → 0.14.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

@@ -15,35 +15,51 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- class RawRecordsRecordBatchMultipleColumnsTest < Test::Unit::TestCase
19
- test("3 elements") do
18
+ module RawRecordsMultipleColumnsTests
19
+ def test_3_elements
20
20
  records = [
21
21
  [true, nil, "Ruby"],
22
22
  [nil, 0, "GLib"],
23
23
  [false, 2 ** 8 - 1, nil],
24
24
  ]
25
- record_batch = Arrow::RecordBatch.new([
26
- {name: :column0, type: :boolean},
27
- {name: :column1, type: :uint8},
28
- {name: :column2, type: :string},
29
- ],
30
- records)
31
- assert_equal(records, record_batch.raw_records)
25
+ target = build([
26
+ {name: :column0, type: :boolean},
27
+ {name: :column1, type: :uint8},
28
+ {name: :column2, type: :string},
29
+ ],
30
+ records)
31
+ assert_equal(records, target.raw_records)
32
32
  end
33
33
 
34
- test("4 elements") do
34
+ def test_4_elements
35
35
  records = [
36
36
  [true, nil, "Ruby", -(2 ** 63)],
37
37
  [nil, 0, "GLib", nil],
38
38
  [false, 2 ** 8 - 1, nil, (2 ** 63) - 1],
39
39
  ]
40
- record_batch = Arrow::RecordBatch.new([
41
- {name: :column0, type: :boolean},
42
- {name: :column1, type: :uint8},
43
- {name: :column2, type: :string},
44
- {name: :column3, type: :int64},
45
- ],
46
- records)
47
- assert_equal(records, record_batch.raw_records)
40
+ target = build([
41
+ {name: :column0, type: :boolean},
42
+ {name: :column1, type: :uint8},
43
+ {name: :column2, type: :string},
44
+ {name: :column3, type: :int64},
45
+ ],
46
+ records)
47
+ assert_equal(records, target.raw_records)
48
+ end
49
+ end
50
+
51
+ class RawRecordsRecordBatchMultipleColumnsTest < Test::Unit::TestCase
52
+ include RawRecordsMultipleColumnsTests
53
+
54
+ def build(schema, records)
55
+ Arrow::RecordBatch.new(schema, records)
56
+ end
57
+ end
58
+
59
+ class RawRecordsTableMultipleColumnsTest < Test::Unit::TestCase
60
+ include RawRecordsMultipleColumnsTests
61
+
62
+ def build(schema, records)
63
+ Arrow::Table.new(schema, records)
48
64
  end
49
65
  end
@@ -0,0 +1,480 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module RawRecordsSparseUnionArrayTests
19
+ def build_schema(type, type_codes)
20
+ field_description = {}
21
+ if type.is_a?(Hash)
22
+ field_description = field_description.merge(type)
23
+ else
24
+ field_description[:type] = type
25
+ end
26
+ {
27
+ column: {
28
+ type: :sparse_union,
29
+ fields: [
30
+ field_description.merge(name: "0"),
31
+ field_description.merge(name: "1"),
32
+ ],
33
+ type_codes: type_codes,
34
+ },
35
+ }
36
+ end
37
+
38
+ # TODO: Use Arrow::RecordBatch.new(build_schema(type, type_codes), records)
39
+ def build_record_batch(type, records)
40
+ type_codes = [0, 1]
41
+ schema = Arrow::Schema.new(build_schema(type, type_codes))
42
+ type_ids = []
43
+ arrays = schema.fields[0].data_type.fields.collect do |field|
44
+ sub_schema = Arrow::Schema.new([field])
45
+ sub_records = records.collect do |record|
46
+ [record[0].nil? ? nil : record[0][field.name]]
47
+ end
48
+ sub_record_batch = Arrow::RecordBatch.new(sub_schema,
49
+ sub_records)
50
+ sub_record_batch.columns[0]
51
+ end
52
+ records.each do |record|
53
+ column = record[0]
54
+ if column.nil?
55
+ type_ids << nil
56
+ elsif column.key?("0")
57
+ type_ids << type_codes[0]
58
+ elsif column.key?("1")
59
+ type_ids << type_codes[1]
60
+ end
61
+ end
62
+ union_array = Arrow::SparseUnionArray.new(schema.fields[0].data_type,
63
+ Arrow::Int8Array.new(type_ids),
64
+ arrays)
65
+ schema = Arrow::Schema.new(column: union_array.value_data_type)
66
+ Arrow::RecordBatch.new(schema,
67
+ records.size,
68
+ [union_array])
69
+ end
70
+
71
+ def test_null
72
+ records = [
73
+ [{"0" => nil}],
74
+ [nil],
75
+ ]
76
+ target = build(:null, records)
77
+ assert_equal(records, target.raw_records)
78
+ end
79
+
80
+ def test_boolean
81
+ records = [
82
+ [{"0" => true}],
83
+ [nil],
84
+ [{"1" => nil}],
85
+ ]
86
+ target = build(:boolean, records)
87
+ assert_equal(records, target.raw_records)
88
+ end
89
+
90
+ def test_int8
91
+ records = [
92
+ [{"0" => -(2 ** 7)}],
93
+ [nil],
94
+ [{"1" => nil}],
95
+ ]
96
+ target = build(:int8, records)
97
+ assert_equal(records, target.raw_records)
98
+ end
99
+
100
+ def test_uint8
101
+ records = [
102
+ [{"0" => (2 ** 8) - 1}],
103
+ [nil],
104
+ [{"1" => nil}],
105
+ ]
106
+ target = build(:uint8, records)
107
+ assert_equal(records, target.raw_records)
108
+ end
109
+
110
+ def test_int16
111
+ records = [
112
+ [{"0" => -(2 ** 15)}],
113
+ [nil],
114
+ [{"1" => nil}],
115
+ ]
116
+ target = build(:int16, records)
117
+ assert_equal(records, target.raw_records)
118
+ end
119
+
120
+ def test_uint16
121
+ records = [
122
+ [{"0" => (2 ** 16) - 1}],
123
+ [nil],
124
+ [{"1" => nil}],
125
+ ]
126
+ target = build(:uint16, records)
127
+ assert_equal(records, target.raw_records)
128
+ end
129
+
130
+ def test_int32
131
+ records = [
132
+ [{"0" => -(2 ** 31)}],
133
+ [nil],
134
+ [{"1" => nil}],
135
+ ]
136
+ target = build(:int32, records)
137
+ assert_equal(records, target.raw_records)
138
+ end
139
+
140
+ def test_uint32
141
+ records = [
142
+ [{"0" => (2 ** 32) - 1}],
143
+ [nil],
144
+ [{"1" => nil}],
145
+ ]
146
+ target = build(:uint32, records)
147
+ assert_equal(records, target.raw_records)
148
+ end
149
+
150
+ def test_int64
151
+ records = [
152
+ [{"0" => -(2 ** 63)}],
153
+ [nil],
154
+ [{"1" => nil}],
155
+ ]
156
+ target = build(:int64, records)
157
+ assert_equal(records, target.raw_records)
158
+ end
159
+
160
+ def test_uint64
161
+ records = [
162
+ [{"0" => (2 ** 64) - 1}],
163
+ [nil],
164
+ [{"1" => nil}],
165
+ ]
166
+ target = build(:uint64, records)
167
+ assert_equal(records, target.raw_records)
168
+ end
169
+
170
+ def test_float
171
+ records = [
172
+ [{"0" => -1.0}],
173
+ [nil],
174
+ [{"1" => nil}],
175
+ ]
176
+ target = build(:float, records)
177
+ assert_equal(records, target.raw_records)
178
+ end
179
+
180
+ def test_double
181
+ records = [
182
+ [{"0" => -1.0}],
183
+ [nil],
184
+ [{"1" => nil}],
185
+ ]
186
+ target = build(:double, records)
187
+ assert_equal(records, target.raw_records)
188
+ end
189
+
190
+ def test_binary
191
+ records = [
192
+ [{"0" => "\xff".b}],
193
+ [nil],
194
+ [{"1" => nil}],
195
+ ]
196
+ target = build(:binary, records)
197
+ assert_equal(records, target.raw_records)
198
+ end
199
+
200
+ def test_string
201
+ records = [
202
+ [{"0" => "Ruby"}],
203
+ [nil],
204
+ [{"1" => nil}],
205
+ ]
206
+ target = build(:string, records)
207
+ assert_equal(records, target.raw_records)
208
+ end
209
+
210
+ def test_date32
211
+ records = [
212
+ [{"0" => Date.new(1960, 1, 1)}],
213
+ [nil],
214
+ [{"1" => nil}],
215
+ ]
216
+ target = build(:date32, records)
217
+ assert_equal(records, target.raw_records)
218
+ end
219
+
220
+ def test_date64
221
+ records = [
222
+ [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
223
+ [nil],
224
+ [{"1" => nil}],
225
+ ]
226
+ target = build(:date64, records)
227
+ assert_equal(records, target.raw_records)
228
+ end
229
+
230
+ def test_timestamp_second
231
+ records = [
232
+ [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
233
+ [nil],
234
+ [{"1" => nil}],
235
+ ]
236
+ target = build({
237
+ type: :timestamp,
238
+ unit: :second,
239
+ },
240
+ records)
241
+ assert_equal(records, target.raw_records)
242
+ end
243
+
244
+ def test_timestamp_milli
245
+ records = [
246
+ [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
247
+ [nil],
248
+ [{"1" => nil}],
249
+ ]
250
+ target = build({
251
+ type: :timestamp,
252
+ unit: :milli,
253
+ },
254
+ records)
255
+ assert_equal(records, target.raw_records)
256
+ end
257
+
258
+ def test_timestamp_micro
259
+ records = [
260
+ [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
261
+ [nil],
262
+ [{"1" => nil}],
263
+ ]
264
+ target = build({
265
+ type: :timestamp,
266
+ unit: :micro,
267
+ },
268
+ records)
269
+ assert_equal(records, target.raw_records)
270
+ end
271
+
272
+ def test_timestamp_nano
273
+ records = [
274
+ [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
275
+ [nil],
276
+ [{"1" => nil}],
277
+ ]
278
+ target = build({
279
+ type: :timestamp,
280
+ unit: :nano,
281
+ },
282
+ records)
283
+ assert_equal(records, target.raw_records)
284
+ end
285
+
286
+ def test_time32_second
287
+ records = [
288
+ [{"0" => 60 * 10}], # 00:10:00
289
+ [nil],
290
+ [{"1" => nil}],
291
+ ]
292
+ target = build({
293
+ type: :time32,
294
+ unit: :second,
295
+ },
296
+ records)
297
+ assert_equal(records, target.raw_records)
298
+ end
299
+
300
+ def test_time32_milli
301
+ records = [
302
+ [{"0" => (60 * 10) * 1000 + 123}], # 00:10:00.123
303
+ [nil],
304
+ [{"1" => nil}],
305
+ ]
306
+ target = build({
307
+ type: :time32,
308
+ unit: :milli,
309
+ },
310
+ records)
311
+ assert_equal(records, target.raw_records)
312
+ end
313
+
314
+ def test_time64_micro
315
+ records = [
316
+ [{"0" => (60 * 10) * 1_000_000 + 123_456}], # 00:10:00.123456
317
+ [nil],
318
+ [{"1" => nil}],
319
+ ]
320
+ target = build({
321
+ type: :time64,
322
+ unit: :micro,
323
+ },
324
+ records)
325
+ assert_equal(records, target.raw_records)
326
+ end
327
+
328
+ def test_time64_nano
329
+ records = [
330
+ [{"0" => (60 * 10) * 1_000_000_000 + 123_456_789}], # 00:10:00.123456789
331
+ [nil],
332
+ [{"1" => nil}],
333
+ ]
334
+ target = build({
335
+ type: :time64,
336
+ unit: :nano,
337
+ },
338
+ records)
339
+ assert_equal(records, target.raw_records)
340
+ end
341
+
342
+ def test_decimal128
343
+ records = [
344
+ [{"0" => BigDecimal("92.92")}],
345
+ [nil],
346
+ [{"1" => nil}],
347
+ ]
348
+ target = build({
349
+ type: :decimal128,
350
+ precision: 8,
351
+ scale: 2,
352
+ },
353
+ records)
354
+ assert_equal(records, target.raw_records)
355
+ end
356
+
357
+ def test_list
358
+ records = [
359
+ [{"0" => [true, nil, false]}],
360
+ [nil],
361
+ [{"1" => nil}],
362
+ ]
363
+ target = build({
364
+ type: :list,
365
+ field: {
366
+ name: :sub_element,
367
+ type: :boolean,
368
+ },
369
+ },
370
+ records)
371
+ assert_equal(records, target.raw_records)
372
+ end
373
+
374
+ def test_struct
375
+ records = [
376
+ [{"0" => {"sub_field" => true}}],
377
+ [nil],
378
+ [{"1" => nil}],
379
+ [{"0" => {"sub_field" => nil}}],
380
+ ]
381
+ target = build({
382
+ type: :struct,
383
+ fields: [
384
+ {
385
+ name: :sub_field,
386
+ type: :boolean,
387
+ },
388
+ ],
389
+ },
390
+ records)
391
+ assert_equal(records, target.raw_records)
392
+ end
393
+
394
+ def test_sparse_union
395
+ omit("Need to add support for SparseUnionArrayBuilder")
396
+ records = [
397
+ [{"0" => {"field1" => true}}],
398
+ [nil],
399
+ [{"1" => nil}],
400
+ [{"0" => {"field2" => nil}}],
401
+ ]
402
+ target = build({
403
+ type: :sparse_union,
404
+ fields: [
405
+ {
406
+ name: :field1,
407
+ type: :boolean,
408
+ },
409
+ {
410
+ name: :field2,
411
+ type: :uint8,
412
+ },
413
+ ],
414
+ type_codes: [0, 1],
415
+ },
416
+ records)
417
+ assert_equal(records, target.raw_records)
418
+ end
419
+
420
+ def test_dense_union
421
+ omit("Need to add support for DenseUnionArrayBuilder")
422
+ records = [
423
+ [{"0" => {"field1" => true}}],
424
+ [nil],
425
+ [{"1" => nil}],
426
+ [{"0" => {"field2" => nil}}],
427
+ ]
428
+ target = build({
429
+ type: :dense_union,
430
+ fields: [
431
+ {
432
+ name: :field1,
433
+ type: :boolean,
434
+ },
435
+ {
436
+ name: :field2,
437
+ type: :uint8,
438
+ },
439
+ ],
440
+ type_codes: [0, 1],
441
+ },
442
+ records)
443
+ assert_equal(records, target.raw_records)
444
+ end
445
+
446
+ def test_dictionary
447
+ omit("Need to add support for DictionaryArrayBuilder")
448
+ records = [
449
+ [{"0" => "Ruby"}],
450
+ [nil],
451
+ [{"1" => nil}],
452
+ [{"0" => "GLib"}],
453
+ ]
454
+ dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
455
+ target = build({
456
+ type: :dictionary,
457
+ index_data_type: :int8,
458
+ dictionary: dictionary,
459
+ ordered: true,
460
+ },
461
+ records)
462
+ assert_equal(records, target.raw_records)
463
+ end
464
+ end
465
+
466
+ class RawRecordsRecordBatchSparseUnionArrayTest < Test::Unit::TestCase
467
+ include RawRecordsSparseUnionArrayTests
468
+
469
+ def build(type, records)
470
+ build_record_batch(type, records)
471
+ end
472
+ end
473
+
474
+ class RawRecordsTableSparseUnionArrayTest < Test::Unit::TestCase
475
+ include RawRecordsSparseUnionArrayTests
476
+
477
+ def build(type, records)
478
+ build_record_batch(type, records).to_table
479
+ end
480
+ end