red-arrow 0.12.0 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +49 -4
  3. data/ext/arrow/arrow.cpp +43 -0
  4. data/ext/arrow/extconf.rb +52 -0
  5. data/ext/arrow/record-batch.cpp +756 -0
  6. data/ext/arrow/red-arrow.hpp +60 -0
  7. data/lib/arrow.rb +2 -1
  8. data/lib/arrow/array-builder.rb +4 -0
  9. data/lib/arrow/array.rb +11 -1
  10. data/lib/arrow/bigdecimal-extension.rb +24 -0
  11. data/lib/arrow/binary-array-builder.rb +36 -0
  12. data/lib/arrow/block-closable.rb +5 -1
  13. data/lib/arrow/csv-loader.rb +28 -6
  14. data/lib/arrow/data-type.rb +8 -4
  15. data/lib/arrow/decimal128-array-builder.rb +2 -2
  16. data/lib/arrow/decimal128.rb +42 -0
  17. data/lib/arrow/list-array-builder.rb +1 -1
  18. data/lib/arrow/loader.rb +8 -0
  19. data/lib/arrow/null-array-builder.rb +26 -0
  20. data/lib/arrow/record-batch-builder.rb +8 -9
  21. data/lib/arrow/struct-array-builder.rb +3 -3
  22. data/lib/arrow/struct-array.rb +15 -7
  23. data/lib/arrow/struct.rb +11 -0
  24. data/lib/arrow/table-loader.rb +14 -14
  25. data/lib/arrow/version.rb +1 -1
  26. data/red-arrow.gemspec +8 -4
  27. data/test/raw-records/record-batch/test-basic-arrays.rb +349 -0
  28. data/test/raw-records/record-batch/test-dense-union-array.rb +486 -0
  29. data/test/raw-records/record-batch/test-list-array.rb +498 -0
  30. data/test/raw-records/record-batch/test-multiple-columns.rb +49 -0
  31. data/test/raw-records/record-batch/test-sparse-union-array.rb +474 -0
  32. data/test/raw-records/record-batch/test-struct-array.rb +426 -0
  33. data/test/run-test.rb +25 -2
  34. data/test/test-array.rb +38 -9
  35. data/test/test-bigdecimal.rb +23 -0
  36. data/{dependency-check/Rakefile → test/test-buffer.rb} +15 -20
  37. data/test/test-chunked-array.rb +22 -0
  38. data/test/test-column.rb +24 -0
  39. data/test/test-csv-loader.rb +30 -0
  40. data/test/test-data-type.rb +25 -0
  41. data/test/test-decimal128.rb +64 -0
  42. data/test/test-field.rb +20 -0
  43. data/test/test-group.rb +2 -2
  44. data/test/test-record-batch-builder.rb +9 -0
  45. data/test/test-record-batch.rb +14 -0
  46. data/test/test-schema.rb +14 -0
  47. data/test/test-struct-array.rb +16 -3
  48. data/test/test-table.rb +14 -0
  49. data/test/test-tensor.rb +56 -0
  50. metadata +117 -47
@@ -0,0 +1,498 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class RawRecordsRecordBatchListArrayTest < Test::Unit::TestCase
19
+ def fields(type)
20
+ field_description = {
21
+ name: :element,
22
+ }
23
+ if type.is_a?(Hash)
24
+ field_description = field_description.merge(type)
25
+ else
26
+ field_description[:type] = type
27
+ end
28
+ {
29
+ column: {
30
+ type: :list,
31
+ field: field_description,
32
+ },
33
+ }
34
+ end
35
+
36
+ test("NullArray") do
37
+ records = [
38
+ [[nil, nil, nil]],
39
+ [nil],
40
+ ]
41
+ record_batch = Arrow::RecordBatch.new(fields(:null),
42
+ records)
43
+ assert_equal(records, record_batch.raw_records)
44
+ end
45
+
46
+ test("BooleanArray") do
47
+ records = [
48
+ [[true, nil, false]],
49
+ [nil],
50
+ ]
51
+ record_batch = Arrow::RecordBatch.new(fields(:boolean),
52
+ records)
53
+ assert_equal(records, record_batch.raw_records)
54
+ end
55
+
56
+ test("Int8Array") do
57
+ records = [
58
+ [[-(2 ** 7), nil, (2 ** 7) - 1]],
59
+ [nil],
60
+ ]
61
+ record_batch = Arrow::RecordBatch.new(fields(:int8),
62
+ records)
63
+ assert_equal(records, record_batch.raw_records)
64
+ end
65
+
66
+ test("UInt8Array") do
67
+ records = [
68
+ [[0, nil, (2 ** 8) - 1]],
69
+ [nil],
70
+ ]
71
+ record_batch = Arrow::RecordBatch.new(fields(:uint8),
72
+ records)
73
+ assert_equal(records, record_batch.raw_records)
74
+ end
75
+
76
+ test("Int16Array") do
77
+ records = [
78
+ [[-(2 ** 15), nil, (2 ** 15) - 1]],
79
+ [nil],
80
+ ]
81
+ record_batch = Arrow::RecordBatch.new(fields(:int16),
82
+ records)
83
+ assert_equal(records, record_batch.raw_records)
84
+ end
85
+
86
+ test("UInt16Array") do
87
+ records = [
88
+ [[0, nil, (2 ** 16) - 1]],
89
+ [nil],
90
+ ]
91
+ record_batch = Arrow::RecordBatch.new(fields(:uint16),
92
+ records)
93
+ assert_equal(records, record_batch.raw_records)
94
+ end
95
+
96
+ test("Int32Array") do
97
+ records = [
98
+ [[-(2 ** 31), nil, (2 ** 31) - 1]],
99
+ [nil],
100
+ ]
101
+ record_batch = Arrow::RecordBatch.new(fields(:int32),
102
+ records)
103
+ assert_equal(records, record_batch.raw_records)
104
+ end
105
+
106
+ test("UInt32Array") do
107
+ records = [
108
+ [[0, nil, (2 ** 32) - 1]],
109
+ [nil],
110
+ ]
111
+ record_batch = Arrow::RecordBatch.new(fields(:uint32),
112
+ records)
113
+ assert_equal(records, record_batch.raw_records)
114
+ end
115
+
116
+ test("Int64Array") do
117
+ records = [
118
+ [[-(2 ** 63), nil, (2 ** 63) - 1]],
119
+ [nil],
120
+ ]
121
+ record_batch = Arrow::RecordBatch.new(fields(:int64),
122
+ records)
123
+ assert_equal(records, record_batch.raw_records)
124
+ end
125
+
126
+ test("UInt64Array") do
127
+ records = [
128
+ [[0, nil, (2 ** 64) - 1]],
129
+ [nil],
130
+ ]
131
+ record_batch = Arrow::RecordBatch.new(fields(:uint64),
132
+ records)
133
+ assert_equal(records, record_batch.raw_records)
134
+ end
135
+
136
+ test("FloatArray") do
137
+ records = [
138
+ [[-1.0, nil, 1.0]],
139
+ [nil],
140
+ ]
141
+ record_batch = Arrow::RecordBatch.new(fields(:float),
142
+ records)
143
+ assert_equal(records, record_batch.raw_records)
144
+ end
145
+
146
+ test("DoubleArray") do
147
+ records = [
148
+ [[-1.0, nil, 1.0]],
149
+ [nil],
150
+ ]
151
+ record_batch = Arrow::RecordBatch.new(fields(:double),
152
+ records)
153
+ assert_equal(records, record_batch.raw_records)
154
+ end
155
+
156
+ test("BinaryArray") do
157
+ records = [
158
+ [["\x00".b, nil, "\xff".b]],
159
+ [nil],
160
+ ]
161
+ record_batch = Arrow::RecordBatch.new(fields(:binary),
162
+ records)
163
+ assert_equal(records, record_batch.raw_records)
164
+ end
165
+
166
+ test("StringArray") do
167
+ records = [
168
+ [
169
+ [
170
+ "Ruby",
171
+ nil,
172
+ "\u3042", # U+3042 HIRAGANA LETTER A
173
+ ],
174
+ ],
175
+ [nil],
176
+ ]
177
+ record_batch = Arrow::RecordBatch.new(fields(:string),
178
+ records)
179
+ assert_equal(records, record_batch.raw_records)
180
+ end
181
+
182
+ test("Date32Array") do
183
+ records = [
184
+ [
185
+ [
186
+ Date.new(1960, 1, 1),
187
+ nil,
188
+ Date.new(2017, 8, 23),
189
+ ],
190
+ ],
191
+ [nil],
192
+ ]
193
+ record_batch = Arrow::RecordBatch.new(fields(:date32),
194
+ records)
195
+ assert_equal(records, record_batch.raw_records)
196
+ end
197
+
198
+ test("Date64Array") do
199
+ records = [
200
+ [
201
+ [
202
+ DateTime.new(1960, 1, 1, 2, 9, 30),
203
+ nil,
204
+ DateTime.new(2017, 8, 23, 14, 57, 2),
205
+ ],
206
+ ],
207
+ [nil],
208
+ ]
209
+ record_batch = Arrow::RecordBatch.new(fields(:date64),
210
+ records)
211
+ assert_equal(records, record_batch.raw_records)
212
+ end
213
+
214
+ sub_test_case("TimestampArray") do
215
+ test("second") do
216
+ records = [
217
+ [
218
+ [
219
+ Time.parse("1960-01-01T02:09:30Z"),
220
+ nil,
221
+ Time.parse("2017-08-23T14:57:02Z"),
222
+ ],
223
+ ],
224
+ [nil],
225
+ ]
226
+ record_batch = Arrow::RecordBatch.new(fields(type: :timestamp,
227
+ unit: :second),
228
+ records)
229
+ assert_equal(records, record_batch.raw_records)
230
+ end
231
+
232
+ test("milli") do
233
+ records = [
234
+ [
235
+ [
236
+ Time.parse("1960-01-01T02:09:30.123Z"),
237
+ nil,
238
+ Time.parse("2017-08-23T14:57:02.987Z"),
239
+ ],
240
+ ],
241
+ [nil],
242
+ ]
243
+ record_batch = Arrow::RecordBatch.new(fields(type: :timestamp,
244
+ unit: :milli),
245
+ records)
246
+ assert_equal(records, record_batch.raw_records)
247
+ end
248
+
249
+ test("micro") do
250
+ records = [
251
+ [
252
+ [
253
+ Time.parse("1960-01-01T02:09:30.123456Z"),
254
+ nil,
255
+ Time.parse("2017-08-23T14:57:02.987654Z"),
256
+ ],
257
+ ],
258
+ [nil],
259
+ ]
260
+ record_batch = Arrow::RecordBatch.new(fields(type: :timestamp,
261
+ unit: :micro),
262
+ records)
263
+ assert_equal(records, record_batch.raw_records)
264
+ end
265
+
266
+ test("nano") do
267
+ records = [
268
+ [
269
+ [
270
+ Time.parse("1960-01-01T02:09:30.123456789Z"),
271
+ nil,
272
+ Time.parse("2017-08-23T14:57:02.987654321Z"),
273
+ ],
274
+ ],
275
+ [nil],
276
+ ]
277
+ record_batch = Arrow::RecordBatch.new(fields(type: :timestamp,
278
+ unit: :nano),
279
+ records)
280
+ assert_equal(records, record_batch.raw_records)
281
+ end
282
+ end
283
+
284
+ sub_test_case("Time32Array") do
285
+ test("second") do
286
+ records = [
287
+ [
288
+ [
289
+ 60 * 10, # 00:10:00
290
+ nil,
291
+ 60 * 60 * 2 + 9, # 02:00:09
292
+ ],
293
+ ],
294
+ [nil],
295
+ ]
296
+ record_batch = Arrow::RecordBatch.new(fields(type: :time32,
297
+ unit: :second),
298
+ records)
299
+ assert_equal(records, record_batch.raw_records)
300
+ end
301
+
302
+ test("milli") do
303
+ records = [
304
+ [
305
+ [
306
+ (60 * 10) * 1000 + 123, # 00:10:00.123
307
+ nil,
308
+ (60 * 60 * 2 + 9) * 1000 + 987, # 02:00:09.987
309
+ ],
310
+ ],
311
+ [nil],
312
+ ]
313
+ record_batch = Arrow::RecordBatch.new(fields(type: :time32,
314
+ unit: :milli),
315
+ records)
316
+ assert_equal(records, record_batch.raw_records)
317
+ end
318
+ end
319
+
320
+ sub_test_case("Time64Array") do
321
+ test("micro") do
322
+ records = [
323
+ [
324
+ [
325
+ (60 * 10) * 1_000_000 + 123_456, # 00:10:00.123456
326
+ nil,
327
+ (60 * 60 * 2 + 9) * 1_000_000 + 987_654, # 02:00:09.987654
328
+ ],
329
+ ],
330
+ [nil],
331
+ ]
332
+ record_batch = Arrow::RecordBatch.new(fields(type: :time64,
333
+ unit: :micro),
334
+ records)
335
+ assert_equal(records, record_batch.raw_records)
336
+ end
337
+
338
+ test("nano") do
339
+ records = [
340
+ [
341
+ [
342
+ (60 * 10) * 1_000_000_000 + 123_456_789, # 00:10:00.123456789
343
+ nil,
344
+ (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321, # 02:00:09.987654321
345
+ ],
346
+ ],
347
+ [nil],
348
+ ]
349
+ record_batch = Arrow::RecordBatch.new(fields(type: :time64,
350
+ unit: :nano),
351
+ records)
352
+ assert_equal(records, record_batch.raw_records)
353
+ end
354
+ end
355
+
356
+ test("Decimal128Array") do
357
+ records = [
358
+ [
359
+ [
360
+ BigDecimal("92.92"),
361
+ nil,
362
+ BigDecimal("29.29"),
363
+ ],
364
+ ],
365
+ [nil],
366
+ ]
367
+ record_batch = Arrow::RecordBatch.new(fields(type: :decimal128,
368
+ precision: 8,
369
+ scale: 2),
370
+ records)
371
+ assert_equal(records, record_batch.raw_records)
372
+ end
373
+
374
+ test("ListArray") do
375
+ records = [
376
+ [
377
+ [
378
+ [
379
+ true,
380
+ nil,
381
+ ],
382
+ nil,
383
+ [
384
+ nil,
385
+ false,
386
+ ],
387
+ ],
388
+ ],
389
+ [nil],
390
+ ]
391
+ record_batch = Arrow::RecordBatch.new(fields(type: :list,
392
+ field: {
393
+ name: :sub_element,
394
+ type: :boolean,
395
+ }),
396
+ records)
397
+ assert_equal(records, record_batch.raw_records)
398
+ end
399
+
400
+ test("StructArray") do
401
+ records = [
402
+ [
403
+ [
404
+ {"field" => true},
405
+ nil,
406
+ {"field" => nil},
407
+ ],
408
+ ],
409
+ [nil],
410
+ ]
411
+ record_batch = Arrow::RecordBatch.new(fields(type: :struct,
412
+ fields: [
413
+ {
414
+ name: :field,
415
+ type: :boolean,
416
+ },
417
+ ]),
418
+ records)
419
+ assert_equal(records, record_batch.raw_records)
420
+ end
421
+
422
+ test("SparseUnionArray") do
423
+ omit("Need to add support for SparseUnionArrayBuilder")
424
+ records = [
425
+ [
426
+ [
427
+ {"field1" => true},
428
+ nil,
429
+ {"field2" => nil},
430
+ ],
431
+ ],
432
+ [nil],
433
+ ]
434
+ record_batch = Arrow::RecordBatch.new(fields(type: :sparse_union,
435
+ fields: [
436
+ {
437
+ name: :field1,
438
+ type: :boolean,
439
+ },
440
+ {
441
+ name: :field2,
442
+ type: :uint8,
443
+ },
444
+ ],
445
+ type_codes: [0, 1]),
446
+ records)
447
+ assert_equal(records, record_batch.raw_records)
448
+ end
449
+
450
+ test("DenseUnionArray") do
451
+ omit("Need to add support for DenseUnionArrayBuilder")
452
+ records = [
453
+ [
454
+ [
455
+ {"field1" => true},
456
+ nil,
457
+ {"field2" => nil},
458
+ ],
459
+ ],
460
+ [nil],
461
+ ]
462
+ record_batch = Arrow::RecordBatch.new(fields(type: :dense_union,
463
+ fields: [
464
+ {
465
+ name: :field1,
466
+ type: :boolean,
467
+ },
468
+ {
469
+ name: :field2,
470
+ type: :uint8,
471
+ },
472
+ ],
473
+ type_codes: [0, 1]),
474
+ records)
475
+ assert_equal(records, record_batch.raw_records)
476
+ end
477
+
478
+ test("DictionaryArray") do
479
+ omit("Need to add support for DictionaryArrayBuilder")
480
+ records = [
481
+ [
482
+ [
483
+ "Ruby",
484
+ nil,
485
+ "GLib",
486
+ ],
487
+ ],
488
+ [nil],
489
+ ]
490
+ dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
491
+ record_batch = Arrow::RecordBatch.new(fields(type: :dictionary,
492
+ index_data_type: :int8,
493
+ dictionary: dictionary,
494
+ ordered: true),
495
+ records)
496
+ assert_equal(records, record_batch.raw_records)
497
+ end
498
+ end