red-arrow 0.12.0 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +49 -4
  3. data/ext/arrow/arrow.cpp +43 -0
  4. data/ext/arrow/extconf.rb +52 -0
  5. data/ext/arrow/record-batch.cpp +756 -0
  6. data/ext/arrow/red-arrow.hpp +60 -0
  7. data/lib/arrow.rb +2 -1
  8. data/lib/arrow/array-builder.rb +4 -0
  9. data/lib/arrow/array.rb +11 -1
  10. data/lib/arrow/bigdecimal-extension.rb +24 -0
  11. data/lib/arrow/binary-array-builder.rb +36 -0
  12. data/lib/arrow/block-closable.rb +5 -1
  13. data/lib/arrow/csv-loader.rb +28 -6
  14. data/lib/arrow/data-type.rb +8 -4
  15. data/lib/arrow/decimal128-array-builder.rb +2 -2
  16. data/lib/arrow/decimal128.rb +42 -0
  17. data/lib/arrow/list-array-builder.rb +1 -1
  18. data/lib/arrow/loader.rb +8 -0
  19. data/lib/arrow/null-array-builder.rb +26 -0
  20. data/lib/arrow/record-batch-builder.rb +8 -9
  21. data/lib/arrow/struct-array-builder.rb +3 -3
  22. data/lib/arrow/struct-array.rb +15 -7
  23. data/lib/arrow/struct.rb +11 -0
  24. data/lib/arrow/table-loader.rb +14 -14
  25. data/lib/arrow/version.rb +1 -1
  26. data/red-arrow.gemspec +8 -4
  27. data/test/raw-records/record-batch/test-basic-arrays.rb +349 -0
  28. data/test/raw-records/record-batch/test-dense-union-array.rb +486 -0
  29. data/test/raw-records/record-batch/test-list-array.rb +498 -0
  30. data/test/raw-records/record-batch/test-multiple-columns.rb +49 -0
  31. data/test/raw-records/record-batch/test-sparse-union-array.rb +474 -0
  32. data/test/raw-records/record-batch/test-struct-array.rb +426 -0
  33. data/test/run-test.rb +25 -2
  34. data/test/test-array.rb +38 -9
  35. data/test/test-bigdecimal.rb +23 -0
  36. data/{dependency-check/Rakefile → test/test-buffer.rb} +15 -20
  37. data/test/test-chunked-array.rb +22 -0
  38. data/test/test-column.rb +24 -0
  39. data/test/test-csv-loader.rb +30 -0
  40. data/test/test-data-type.rb +25 -0
  41. data/test/test-decimal128.rb +64 -0
  42. data/test/test-field.rb +20 -0
  43. data/test/test-group.rb +2 -2
  44. data/test/test-record-batch-builder.rb +9 -0
  45. data/test/test-record-batch.rb +14 -0
  46. data/test/test-schema.rb +14 -0
  47. data/test/test-struct-array.rb +16 -3
  48. data/test/test-table.rb +14 -0
  49. data/test/test-tensor.rb +56 -0
  50. metadata +117 -47
@@ -0,0 +1,486 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class RawRecordsRecordBatchDenseUnionArrayTest < Test::Unit::TestCase
19
+ def fields(type, type_codes)
20
+ field_description = {}
21
+ if type.is_a?(Hash)
22
+ field_description = field_description.merge(type)
23
+ else
24
+ field_description[:type] = type
25
+ end
26
+ {
27
+ column: {
28
+ type: :dense_union,
29
+ fields: [
30
+ field_description.merge(name: "0"),
31
+ field_description.merge(name: "1"),
32
+ ],
33
+ type_codes: type_codes,
34
+ },
35
+ }
36
+ end
37
+
38
+ # TODO: Use Arrow::RecordBatch.new(fields(type), records)
39
+ def build_record_batch(type, records)
40
+ type_codes = [0, 1]
41
+ schema = Arrow::Schema.new(fields(type, type_codes))
42
+ type_ids = []
43
+ offsets = []
44
+ arrays = schema.fields[0].data_type.fields.collect do |field|
45
+ sub_schema = Arrow::Schema.new([field])
46
+ sub_records = []
47
+ records.each do |record|
48
+ column = record[0]
49
+ next if column.nil?
50
+ next unless column.key?(field.name)
51
+ sub_records << [column[field.name]]
52
+ end
53
+ sub_record_batch = Arrow::RecordBatch.new(sub_schema,
54
+ sub_records)
55
+ sub_record_batch.columns[0]
56
+ end
57
+ records.each do |record|
58
+ column = record[0]
59
+ if column.nil?
60
+ type_ids << nil
61
+ offsets << 0
62
+ elsif column.key?("0")
63
+ type_id = type_codes[0]
64
+ type_ids << type_id
65
+ offsets << (type_ids.count(type_id) - 1)
66
+ elsif column.key?("1")
67
+ type_id = type_codes[1]
68
+ type_ids << type_id
69
+ offsets << (type_ids.count(type_id) - 1)
70
+ end
71
+ end
72
+ # TODO
73
+ # union_array = Arrow::DenseUnionArray.new(schema.fields[0].data_type,
74
+ # Arrow::Int8Array.new(type_ids),
75
+ # Arrow::Int32Array.new(offsets),
76
+ # arrays)
77
+ union_array = Arrow::DenseUnionArray.new(Arrow::Int8Array.new(type_ids),
78
+ Arrow::Int32Array.new(offsets),
79
+ arrays)
80
+ schema = Arrow::Schema.new(column: union_array.value_data_type)
81
+ Arrow::RecordBatch.new(schema,
82
+ records.size,
83
+ [union_array])
84
+ end
85
+
86
+ test("NullArray") do
87
+ records = [
88
+ [{"0" => nil}],
89
+ [nil],
90
+ ]
91
+ record_batch = build_record_batch(:null, records)
92
+ assert_equal(records, record_batch.raw_records)
93
+ end
94
+
95
+ test("BooleanArray") do
96
+ records = [
97
+ [{"0" => true}],
98
+ [nil],
99
+ [{"1" => nil}],
100
+ ]
101
+ record_batch = build_record_batch(:boolean, records)
102
+ assert_equal(records, record_batch.raw_records)
103
+ end
104
+
105
+ test("Int8Array") do
106
+ records = [
107
+ [{"0" => -(2 ** 7)}],
108
+ [nil],
109
+ [{"1" => nil}],
110
+ ]
111
+ record_batch = build_record_batch(:int8, records)
112
+ assert_equal(records, record_batch.raw_records)
113
+ end
114
+
115
+ test("UInt8Array") do
116
+ records = [
117
+ [{"0" => (2 ** 8) - 1}],
118
+ [nil],
119
+ [{"1" => nil}],
120
+ ]
121
+ record_batch = build_record_batch(:uint8, records)
122
+ assert_equal(records, record_batch.raw_records)
123
+ end
124
+
125
+ test("Int16Array") do
126
+ records = [
127
+ [{"0" => -(2 ** 15)}],
128
+ [nil],
129
+ [{"1" => nil}],
130
+ ]
131
+ record_batch = build_record_batch(:int16, records)
132
+ assert_equal(records, record_batch.raw_records)
133
+ end
134
+
135
+ test("UInt16Array") do
136
+ records = [
137
+ [{"0" => (2 ** 16) - 1}],
138
+ [nil],
139
+ [{"1" => nil}],
140
+ ]
141
+ record_batch = build_record_batch(:uint16, records)
142
+ assert_equal(records, record_batch.raw_records)
143
+ end
144
+
145
+ test("Int32Array") do
146
+ records = [
147
+ [{"0" => -(2 ** 31)}],
148
+ [nil],
149
+ [{"1" => nil}],
150
+ ]
151
+ record_batch = build_record_batch(:int32, records)
152
+ assert_equal(records, record_batch.raw_records)
153
+ end
154
+
155
+ test("UInt32Array") do
156
+ records = [
157
+ [{"0" => (2 ** 32) - 1}],
158
+ [nil],
159
+ [{"1" => nil}],
160
+ ]
161
+ record_batch = build_record_batch(:uint32, records)
162
+ assert_equal(records, record_batch.raw_records)
163
+ end
164
+
165
+ test("Int64Array") do
166
+ records = [
167
+ [{"0" => -(2 ** 63)}],
168
+ [nil],
169
+ [{"1" => nil}],
170
+ ]
171
+ record_batch = build_record_batch(:int64, records)
172
+ assert_equal(records, record_batch.raw_records)
173
+ end
174
+
175
+ test("UInt64Array") do
176
+ records = [
177
+ [{"0" => (2 ** 64) - 1}],
178
+ [nil],
179
+ [{"1" => nil}],
180
+ ]
181
+ record_batch = build_record_batch(:uint64, records)
182
+ assert_equal(records, record_batch.raw_records)
183
+ end
184
+
185
+ test("FloatArray") do
186
+ records = [
187
+ [{"0" => -1.0}],
188
+ [nil],
189
+ [{"1" => nil}],
190
+ ]
191
+ record_batch = build_record_batch(:float, records)
192
+ assert_equal(records, record_batch.raw_records)
193
+ end
194
+
195
+ test("DoubleArray") do
196
+ records = [
197
+ [{"0" => -1.0}],
198
+ [nil],
199
+ [{"1" => nil}],
200
+ ]
201
+ record_batch = build_record_batch(:double, records)
202
+ assert_equal(records, record_batch.raw_records)
203
+ end
204
+
205
+ test("BinaryArray") do
206
+ records = [
207
+ [{"0" => "\xff".b}],
208
+ [nil],
209
+ [{"1" => nil}],
210
+ ]
211
+ record_batch = build_record_batch(:binary, records)
212
+ assert_equal(records, record_batch.raw_records)
213
+ end
214
+
215
+ test("StringArray") do
216
+ records = [
217
+ [{"0" => "Ruby"}],
218
+ [nil],
219
+ [{"1" => nil}],
220
+ ]
221
+ record_batch = build_record_batch(:string, records)
222
+ assert_equal(records, record_batch.raw_records)
223
+ end
224
+
225
+ test("Date32Array") do
226
+ records = [
227
+ [{"0" => Date.new(1960, 1, 1)}],
228
+ [nil],
229
+ [{"1" => nil}],
230
+ ]
231
+ record_batch = build_record_batch(:date32, records)
232
+ assert_equal(records, record_batch.raw_records)
233
+ end
234
+
235
+ test("Date64Array") do
236
+ records = [
237
+ [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
238
+ [nil],
239
+ [{"1" => nil}],
240
+ ]
241
+ record_batch = build_record_batch(:date64, records)
242
+ assert_equal(records, record_batch.raw_records)
243
+ end
244
+
245
+ sub_test_case("TimestampArray") do
246
+ test("second") do
247
+ records = [
248
+ [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
249
+ [nil],
250
+ [{"1" => nil}],
251
+ ]
252
+ record_batch = build_record_batch({
253
+ type: :timestamp,
254
+ unit: :second,
255
+ },
256
+ records)
257
+ assert_equal(records, record_batch.raw_records)
258
+ end
259
+
260
+ test("milli") do
261
+ records = [
262
+ [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
263
+ [nil],
264
+ [{"1" => nil}],
265
+ ]
266
+ record_batch = build_record_batch({
267
+ type: :timestamp,
268
+ unit: :milli,
269
+ },
270
+ records)
271
+ assert_equal(records, record_batch.raw_records)
272
+ end
273
+
274
+ test("micro") do
275
+ records = [
276
+ [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
277
+ [nil],
278
+ [{"1" => nil}],
279
+ ]
280
+ record_batch = build_record_batch({
281
+ type: :timestamp,
282
+ unit: :micro,
283
+ },
284
+ records)
285
+ assert_equal(records, record_batch.raw_records)
286
+ end
287
+
288
+ test("nano") do
289
+ records = [
290
+ [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
291
+ [nil],
292
+ [{"1" => nil}],
293
+ ]
294
+ record_batch = build_record_batch({
295
+ type: :timestamp,
296
+ unit: :nano,
297
+ },
298
+ records)
299
+ assert_equal(records, record_batch.raw_records)
300
+ end
301
+ end
302
+
303
+ sub_test_case("Time32Array") do
304
+ test("second") do
305
+ records = [
306
+ [{"0" => 60 * 10}], # 00:10:00
307
+ [nil],
308
+ [{"1" => nil}],
309
+ ]
310
+ record_batch = build_record_batch({
311
+ type: :time32,
312
+ unit: :second,
313
+ },
314
+ records)
315
+ assert_equal(records, record_batch.raw_records)
316
+ end
317
+
318
+ test("milli") do
319
+ records = [
320
+ [{"0" => (60 * 10) * 1000 + 123}], # 00:10:00.123
321
+ [nil],
322
+ [{"1" => nil}],
323
+ ]
324
+ record_batch = build_record_batch({
325
+ type: :time32,
326
+ unit: :milli,
327
+ },
328
+ records)
329
+ assert_equal(records, record_batch.raw_records)
330
+ end
331
+ end
332
+
333
+ sub_test_case("Time64Array") do
334
+ test("micro") do
335
+ records = [
336
+ [{"0" => (60 * 10) * 1_000_000 + 123_456}], # 00:10:00.123456
337
+ [nil],
338
+ [{"1" => nil}],
339
+ ]
340
+ record_batch = build_record_batch({
341
+ type: :time64,
342
+ unit: :micro,
343
+ },
344
+ records)
345
+ assert_equal(records, record_batch.raw_records)
346
+ end
347
+
348
+ test("nano") do
349
+ records = [
350
+ # 00:10:00.123456789
351
+ [{"0" => (60 * 10) * 1_000_000_000 + 123_456_789}],
352
+ [nil],
353
+ [{"1" => nil}],
354
+ ]
355
+ record_batch = build_record_batch({
356
+ type: :time64,
357
+ unit: :nano,
358
+ },
359
+ records)
360
+ assert_equal(records, record_batch.raw_records)
361
+ end
362
+ end
363
+
364
+ test("Decimal128Array") do
365
+ records = [
366
+ [{"0" => BigDecimal("92.92")}],
367
+ [nil],
368
+ [{"1" => nil}],
369
+ ]
370
+ record_batch = build_record_batch({
371
+ type: :decimal128,
372
+ precision: 8,
373
+ scale: 2,
374
+ },
375
+ records)
376
+ assert_equal(records, record_batch.raw_records)
377
+ end
378
+
379
+ test("ListArray") do
380
+ records = [
381
+ [{"0" => [true, nil, false]}],
382
+ [nil],
383
+ [{"1" => nil}],
384
+ ]
385
+ record_batch = build_record_batch({
386
+ type: :list,
387
+ field: {
388
+ name: :sub_element,
389
+ type: :boolean,
390
+ },
391
+ },
392
+ records)
393
+ assert_equal(records, record_batch.raw_records)
394
+ end
395
+
396
+ test("StructArray") do
397
+ records = [
398
+ [{"0" => {"sub_field" => true}}],
399
+ [nil],
400
+ [{"1" => nil}],
401
+ [{"0" => {"sub_field" => nil}}],
402
+ ]
403
+ record_batch = build_record_batch({
404
+ type: :struct,
405
+ fields: [
406
+ {
407
+ name: :sub_field,
408
+ type: :boolean,
409
+ },
410
+ ],
411
+ },
412
+ records)
413
+ assert_equal(records, record_batch.raw_records)
414
+ end
415
+
416
+ test("SparseUnionArray") do
417
+ omit("Need to add support for SparseUnionArrayBuilder")
418
+ records = [
419
+ [{"0" => {"field1" => true}}],
420
+ [nil],
421
+ [{"1" => nil}],
422
+ [{"0" => {"field2" => nil}}],
423
+ ]
424
+ record_batch = build_record_batch({
425
+ type: :sparse_union,
426
+ fields: [
427
+ {
428
+ name: :field1,
429
+ type: :boolean,
430
+ },
431
+ {
432
+ name: :field2,
433
+ type: :uint8,
434
+ },
435
+ ],
436
+ type_codes: [0, 1],
437
+ },
438
+ records)
439
+ assert_equal(records, record_batch.raw_records)
440
+ end
441
+
442
+ test("DenseUnionArray") do
443
+ omit("Need to add support for DenseUnionArrayBuilder")
444
+ records = [
445
+ [{"0" => {"field1" => true}}],
446
+ [nil],
447
+ [{"1" => nil}],
448
+ [{"0" => {"field2" => nil}}],
449
+ ]
450
+ record_batch = build_record_batch({
451
+ type: :dense_union,
452
+ fields: [
453
+ {
454
+ name: :field1,
455
+ type: :boolean,
456
+ },
457
+ {
458
+ name: :field2,
459
+ type: :uint8,
460
+ },
461
+ ],
462
+ type_codes: [0, 1],
463
+ },
464
+ records)
465
+ assert_equal(records, record_batch.raw_records)
466
+ end
467
+
468
+ test("DictionaryArray") do
469
+ omit("Need to add support for DictionaryArrayBuilder")
470
+ records = [
471
+ [{"0" => "Ruby"}],
472
+ [nil],
473
+ [{"1" => nil}],
474
+ [{"0" => "GLib"}],
475
+ ]
476
+ dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
477
+ record_batch = build_record_batch({
478
+ type: :dictionary,
479
+ index_data_type: :int8,
480
+ dictionary: dictionary,
481
+ ordered: true,
482
+ },
483
+ records)
484
+ assert_equal(records, record_batch.raw_records)
485
+ end
486
+ end