red-arrow 0.12.0 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +49 -4
  3. data/ext/arrow/arrow.cpp +43 -0
  4. data/ext/arrow/extconf.rb +52 -0
  5. data/ext/arrow/record-batch.cpp +756 -0
  6. data/ext/arrow/red-arrow.hpp +60 -0
  7. data/lib/arrow.rb +2 -1
  8. data/lib/arrow/array-builder.rb +4 -0
  9. data/lib/arrow/array.rb +11 -1
  10. data/lib/arrow/bigdecimal-extension.rb +24 -0
  11. data/lib/arrow/binary-array-builder.rb +36 -0
  12. data/lib/arrow/block-closable.rb +5 -1
  13. data/lib/arrow/csv-loader.rb +28 -6
  14. data/lib/arrow/data-type.rb +8 -4
  15. data/lib/arrow/decimal128-array-builder.rb +2 -2
  16. data/lib/arrow/decimal128.rb +42 -0
  17. data/lib/arrow/list-array-builder.rb +1 -1
  18. data/lib/arrow/loader.rb +8 -0
  19. data/lib/arrow/null-array-builder.rb +26 -0
  20. data/lib/arrow/record-batch-builder.rb +8 -9
  21. data/lib/arrow/struct-array-builder.rb +3 -3
  22. data/lib/arrow/struct-array.rb +15 -7
  23. data/lib/arrow/struct.rb +11 -0
  24. data/lib/arrow/table-loader.rb +14 -14
  25. data/lib/arrow/version.rb +1 -1
  26. data/red-arrow.gemspec +8 -4
  27. data/test/raw-records/record-batch/test-basic-arrays.rb +349 -0
  28. data/test/raw-records/record-batch/test-dense-union-array.rb +486 -0
  29. data/test/raw-records/record-batch/test-list-array.rb +498 -0
  30. data/test/raw-records/record-batch/test-multiple-columns.rb +49 -0
  31. data/test/raw-records/record-batch/test-sparse-union-array.rb +474 -0
  32. data/test/raw-records/record-batch/test-struct-array.rb +426 -0
  33. data/test/run-test.rb +25 -2
  34. data/test/test-array.rb +38 -9
  35. data/test/test-bigdecimal.rb +23 -0
  36. data/{dependency-check/Rakefile → test/test-buffer.rb} +15 -20
  37. data/test/test-chunked-array.rb +22 -0
  38. data/test/test-column.rb +24 -0
  39. data/test/test-csv-loader.rb +30 -0
  40. data/test/test-data-type.rb +25 -0
  41. data/test/test-decimal128.rb +64 -0
  42. data/test/test-field.rb +20 -0
  43. data/test/test-group.rb +2 -2
  44. data/test/test-record-batch-builder.rb +9 -0
  45. data/test/test-record-batch.rb +14 -0
  46. data/test/test-schema.rb +14 -0
  47. data/test/test-struct-array.rb +16 -3
  48. data/test/test-table.rb +14 -0
  49. data/test/test-tensor.rb +56 -0
  50. metadata +117 -47
@@ -0,0 +1,49 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class RawRecordsRecordBatchMultipleColumnsTest < Test::Unit::TestCase
19
+ test("3 elements") do
20
+ records = [
21
+ [true, nil, "Ruby"],
22
+ [nil, 0, "GLib"],
23
+ [false, 2 ** 8 - 1, nil],
24
+ ]
25
+ record_batch = Arrow::RecordBatch.new([
26
+ {name: :column0, type: :boolean},
27
+ {name: :column1, type: :uint8},
28
+ {name: :column2, type: :string},
29
+ ],
30
+ records)
31
+ assert_equal(records, record_batch.raw_records)
32
+ end
33
+
34
+ test("4 elements") do
35
+ records = [
36
+ [true, nil, "Ruby", -(2 ** 63)],
37
+ [nil, 0, "GLib", nil],
38
+ [false, 2 ** 8 - 1, nil, (2 ** 63) - 1],
39
+ ]
40
+ record_batch = Arrow::RecordBatch.new([
41
+ {name: :column0, type: :boolean},
42
+ {name: :column1, type: :uint8},
43
+ {name: :column2, type: :string},
44
+ {name: :column3, type: :int64},
45
+ ],
46
+ records)
47
+ assert_equal(records, record_batch.raw_records)
48
+ end
49
+ end
@@ -0,0 +1,474 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class RawRecordsRecordBatchSparseUnionArrayTest < Test::Unit::TestCase
19
+ def fields(type, type_codes)
20
+ field_description = {}
21
+ if type.is_a?(Hash)
22
+ field_description = field_description.merge(type)
23
+ else
24
+ field_description[:type] = type
25
+ end
26
+ {
27
+ column: {
28
+ type: :sparse_union,
29
+ fields: [
30
+ field_description.merge(name: "0"),
31
+ field_description.merge(name: "1"),
32
+ ],
33
+ type_codes: type_codes,
34
+ },
35
+ }
36
+ end
37
+
38
+ # TODO: Use Arrow::RecordBatch.new(fields(type), records)
39
+ def build_record_batch(type, records)
40
+ type_codes = [0, 1]
41
+ schema = Arrow::Schema.new(fields(type, type_codes))
42
+ type_ids = []
43
+ arrays = schema.fields[0].data_type.fields.collect do |field|
44
+ sub_schema = Arrow::Schema.new([field])
45
+ sub_records = records.collect do |record|
46
+ [record[0].nil? ? nil : record[0][field.name]]
47
+ end
48
+ sub_record_batch = Arrow::RecordBatch.new(sub_schema,
49
+ sub_records)
50
+ sub_record_batch.columns[0]
51
+ end
52
+ records.each do |record|
53
+ column = record[0]
54
+ if column.nil?
55
+ type_ids << nil
56
+ elsif column.key?("0")
57
+ type_ids << type_codes[0]
58
+ elsif column.key?("1")
59
+ type_ids << type_codes[1]
60
+ end
61
+ end
62
+ # TODO
63
+ # union_array = Arrow::SparseUnionArray.new(schema.fields[0].data_type,
64
+ # Arrow::Int8Array.new(type_ids),
65
+ # arrays)
66
+ union_array = Arrow::SparseUnionArray.new(Arrow::Int8Array.new(type_ids),
67
+ arrays)
68
+ schema = Arrow::Schema.new(column: union_array.value_data_type)
69
+ Arrow::RecordBatch.new(schema,
70
+ records.size,
71
+ [union_array])
72
+ end
73
+
74
+ test("NullArray") do
75
+ records = [
76
+ [{"0" => nil}],
77
+ [nil],
78
+ ]
79
+ record_batch = build_record_batch(:null, records)
80
+ assert_equal(records, record_batch.raw_records)
81
+ end
82
+
83
+ test("BooleanArray") do
84
+ records = [
85
+ [{"0" => true}],
86
+ [nil],
87
+ [{"1" => nil}],
88
+ ]
89
+ record_batch = build_record_batch(:boolean, records)
90
+ assert_equal(records, record_batch.raw_records)
91
+ end
92
+
93
+ test("Int8Array") do
94
+ records = [
95
+ [{"0" => -(2 ** 7)}],
96
+ [nil],
97
+ [{"1" => nil}],
98
+ ]
99
+ record_batch = build_record_batch(:int8, records)
100
+ assert_equal(records, record_batch.raw_records)
101
+ end
102
+
103
+ test("UInt8Array") do
104
+ records = [
105
+ [{"0" => (2 ** 8) - 1}],
106
+ [nil],
107
+ [{"1" => nil}],
108
+ ]
109
+ record_batch = build_record_batch(:uint8, records)
110
+ assert_equal(records, record_batch.raw_records)
111
+ end
112
+
113
+ test("Int16Array") do
114
+ records = [
115
+ [{"0" => -(2 ** 15)}],
116
+ [nil],
117
+ [{"1" => nil}],
118
+ ]
119
+ record_batch = build_record_batch(:int16, records)
120
+ assert_equal(records, record_batch.raw_records)
121
+ end
122
+
123
+ test("UInt16Array") do
124
+ records = [
125
+ [{"0" => (2 ** 16) - 1}],
126
+ [nil],
127
+ [{"1" => nil}],
128
+ ]
129
+ record_batch = build_record_batch(:uint16, records)
130
+ assert_equal(records, record_batch.raw_records)
131
+ end
132
+
133
+ test("Int32Array") do
134
+ records = [
135
+ [{"0" => -(2 ** 31)}],
136
+ [nil],
137
+ [{"1" => nil}],
138
+ ]
139
+ record_batch = build_record_batch(:int32, records)
140
+ assert_equal(records, record_batch.raw_records)
141
+ end
142
+
143
+ test("UInt32Array") do
144
+ records = [
145
+ [{"0" => (2 ** 32) - 1}],
146
+ [nil],
147
+ [{"1" => nil}],
148
+ ]
149
+ record_batch = build_record_batch(:uint32, records)
150
+ assert_equal(records, record_batch.raw_records)
151
+ end
152
+
153
+ test("Int64Array") do
154
+ records = [
155
+ [{"0" => -(2 ** 63)}],
156
+ [nil],
157
+ [{"1" => nil}],
158
+ ]
159
+ record_batch = build_record_batch(:int64, records)
160
+ assert_equal(records, record_batch.raw_records)
161
+ end
162
+
163
+ test("UInt64Array") do
164
+ records = [
165
+ [{"0" => (2 ** 64) - 1}],
166
+ [nil],
167
+ [{"1" => nil}],
168
+ ]
169
+ record_batch = build_record_batch(:uint64, records)
170
+ assert_equal(records, record_batch.raw_records)
171
+ end
172
+
173
+ test("FloatArray") do
174
+ records = [
175
+ [{"0" => -1.0}],
176
+ [nil],
177
+ [{"1" => nil}],
178
+ ]
179
+ record_batch = build_record_batch(:float, records)
180
+ assert_equal(records, record_batch.raw_records)
181
+ end
182
+
183
+ test("DoubleArray") do
184
+ records = [
185
+ [{"0" => -1.0}],
186
+ [nil],
187
+ [{"1" => nil}],
188
+ ]
189
+ record_batch = build_record_batch(:double, records)
190
+ assert_equal(records, record_batch.raw_records)
191
+ end
192
+
193
+ test("BinaryArray") do
194
+ records = [
195
+ [{"0" => "\xff".b}],
196
+ [nil],
197
+ [{"1" => nil}],
198
+ ]
199
+ record_batch = build_record_batch(:binary, records)
200
+ assert_equal(records, record_batch.raw_records)
201
+ end
202
+
203
+ test("StringArray") do
204
+ records = [
205
+ [{"0" => "Ruby"}],
206
+ [nil],
207
+ [{"1" => nil}],
208
+ ]
209
+ record_batch = build_record_batch(:string, records)
210
+ assert_equal(records, record_batch.raw_records)
211
+ end
212
+
213
+ test("Date32Array") do
214
+ records = [
215
+ [{"0" => Date.new(1960, 1, 1)}],
216
+ [nil],
217
+ [{"1" => nil}],
218
+ ]
219
+ record_batch = build_record_batch(:date32, records)
220
+ assert_equal(records, record_batch.raw_records)
221
+ end
222
+
223
+ test("Date64Array") do
224
+ records = [
225
+ [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
226
+ [nil],
227
+ [{"1" => nil}],
228
+ ]
229
+ record_batch = build_record_batch(:date64, records)
230
+ assert_equal(records, record_batch.raw_records)
231
+ end
232
+
233
+ sub_test_case("TimestampArray") do
234
+ test("second") do
235
+ records = [
236
+ [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
237
+ [nil],
238
+ [{"1" => nil}],
239
+ ]
240
+ record_batch = build_record_batch({
241
+ type: :timestamp,
242
+ unit: :second,
243
+ },
244
+ records)
245
+ assert_equal(records, record_batch.raw_records)
246
+ end
247
+
248
+ test("milli") do
249
+ records = [
250
+ [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
251
+ [nil],
252
+ [{"1" => nil}],
253
+ ]
254
+ record_batch = build_record_batch({
255
+ type: :timestamp,
256
+ unit: :milli,
257
+ },
258
+ records)
259
+ assert_equal(records, record_batch.raw_records)
260
+ end
261
+
262
+ test("micro") do
263
+ records = [
264
+ [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
265
+ [nil],
266
+ [{"1" => nil}],
267
+ ]
268
+ record_batch = build_record_batch({
269
+ type: :timestamp,
270
+ unit: :micro,
271
+ },
272
+ records)
273
+ assert_equal(records, record_batch.raw_records)
274
+ end
275
+
276
+ test("nano") do
277
+ records = [
278
+ [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
279
+ [nil],
280
+ [{"1" => nil}],
281
+ ]
282
+ record_batch = build_record_batch({
283
+ type: :timestamp,
284
+ unit: :nano,
285
+ },
286
+ records)
287
+ assert_equal(records, record_batch.raw_records)
288
+ end
289
+ end
290
+
291
+ sub_test_case("Time32Array") do
292
+ test("second") do
293
+ records = [
294
+ [{"0" => 60 * 10}], # 00:10:00
295
+ [nil],
296
+ [{"1" => nil}],
297
+ ]
298
+ record_batch = build_record_batch({
299
+ type: :time32,
300
+ unit: :second,
301
+ },
302
+ records)
303
+ assert_equal(records, record_batch.raw_records)
304
+ end
305
+
306
+ test("milli") do
307
+ records = [
308
+ [{"0" => (60 * 10) * 1000 + 123}], # 00:10:00.123
309
+ [nil],
310
+ [{"1" => nil}],
311
+ ]
312
+ record_batch = build_record_batch({
313
+ type: :time32,
314
+ unit: :milli,
315
+ },
316
+ records)
317
+ assert_equal(records, record_batch.raw_records)
318
+ end
319
+ end
320
+
321
+ sub_test_case("Time64Array") do
322
+ test("micro") do
323
+ records = [
324
+ [{"0" => (60 * 10) * 1_000_000 + 123_456}], # 00:10:00.123456
325
+ [nil],
326
+ [{"1" => nil}],
327
+ ]
328
+ record_batch = build_record_batch({
329
+ type: :time64,
330
+ unit: :micro,
331
+ },
332
+ records)
333
+ assert_equal(records, record_batch.raw_records)
334
+ end
335
+
336
+ test("nano") do
337
+ records = [
338
+ # 00:10:00.123456789
339
+ [{"0" => (60 * 10) * 1_000_000_000 + 123_456_789}],
340
+ [nil],
341
+ [{"1" => nil}],
342
+ ]
343
+ record_batch = build_record_batch({
344
+ type: :time64,
345
+ unit: :nano,
346
+ },
347
+ records)
348
+ assert_equal(records, record_batch.raw_records)
349
+ end
350
+ end
351
+
352
+ test("Decimal128Array") do
353
+ records = [
354
+ [{"0" => BigDecimal("92.92")}],
355
+ [nil],
356
+ [{"1" => nil}],
357
+ ]
358
+ record_batch = build_record_batch({
359
+ type: :decimal128,
360
+ precision: 8,
361
+ scale: 2,
362
+ },
363
+ records)
364
+ assert_equal(records, record_batch.raw_records)
365
+ end
366
+
367
+ test("ListArray") do
368
+ records = [
369
+ [{"0" => [true, nil, false]}],
370
+ [nil],
371
+ [{"1" => nil}],
372
+ ]
373
+ record_batch = build_record_batch({
374
+ type: :list,
375
+ field: {
376
+ name: :sub_element,
377
+ type: :boolean,
378
+ },
379
+ },
380
+ records)
381
+ assert_equal(records, record_batch.raw_records)
382
+ end
383
+
384
+ test("StructArray") do
385
+ records = [
386
+ [{"0" => {"sub_field" => true}}],
387
+ [nil],
388
+ [{"1" => nil}],
389
+ [{"0" => {"sub_field" => nil}}],
390
+ ]
391
+ record_batch = build_record_batch({
392
+ type: :struct,
393
+ fields: [
394
+ {
395
+ name: :sub_field,
396
+ type: :boolean,
397
+ },
398
+ ],
399
+ },
400
+ records)
401
+ assert_equal(records, record_batch.raw_records)
402
+ end
403
+
404
+ test("SparseUnionArray") do
405
+ omit("Need to add support for SparseUnionArrayBuilder")
406
+ records = [
407
+ [{"0" => {"field1" => true}}],
408
+ [nil],
409
+ [{"1" => nil}],
410
+ [{"0" => {"field2" => nil}}],
411
+ ]
412
+ record_batch = build_record_batch({
413
+ type: :sparse_union,
414
+ fields: [
415
+ {
416
+ name: :field1,
417
+ type: :boolean,
418
+ },
419
+ {
420
+ name: :field2,
421
+ type: :uint8,
422
+ },
423
+ ],
424
+ type_codes: [0, 1],
425
+ },
426
+ records)
427
+ assert_equal(records, record_batch.raw_records)
428
+ end
429
+
430
+ test("DenseUnionArray") do
431
+ omit("Need to add support for DenseUnionArrayBuilder")
432
+ records = [
433
+ [{"0" => {"field1" => true}}],
434
+ [nil],
435
+ [{"1" => nil}],
436
+ [{"0" => {"field2" => nil}}],
437
+ ]
438
+ record_batch = build_record_batch({
439
+ type: :dense_union,
440
+ fields: [
441
+ {
442
+ name: :field1,
443
+ type: :boolean,
444
+ },
445
+ {
446
+ name: :field2,
447
+ type: :uint8,
448
+ },
449
+ ],
450
+ type_codes: [0, 1],
451
+ },
452
+ records)
453
+ assert_equal(records, record_batch.raw_records)
454
+ end
455
+
456
+ test("DictionaryArray") do
457
+ omit("Need to add support for DictionaryArrayBuilder")
458
+ records = [
459
+ [{"0" => "Ruby"}],
460
+ [nil],
461
+ [{"1" => nil}],
462
+ [{"0" => "GLib"}],
463
+ ]
464
+ dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
465
+ record_batch = build_record_batch({
466
+ type: :dictionary,
467
+ index_data_type: :int8,
468
+ dictionary: dictionary,
469
+ ordered: true,
470
+ },
471
+ records)
472
+ assert_equal(records, record_batch.raw_records)
473
+ end
474
+ end