red-arrow 0.14.1 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/arrow.cpp +34 -0
  3. data/ext/arrow/converters.cpp +42 -0
  4. data/ext/arrow/converters.hpp +626 -0
  5. data/ext/arrow/raw-records.cpp +6 -625
  6. data/ext/arrow/red-arrow.hpp +37 -3
  7. data/ext/arrow/values.cpp +154 -0
  8. data/lib/arrow/array-builder.rb +24 -1
  9. data/lib/arrow/array.rb +9 -0
  10. data/lib/arrow/chunked-array.rb +5 -0
  11. data/lib/arrow/column-containable.rb +48 -0
  12. data/lib/arrow/column.rb +36 -10
  13. data/lib/arrow/csv-loader.rb +2 -2
  14. data/lib/arrow/data-type.rb +22 -5
  15. data/lib/arrow/date64-array-builder.rb +2 -2
  16. data/lib/arrow/date64-array.rb +1 -1
  17. data/lib/arrow/decimal128-array.rb +24 -0
  18. data/lib/arrow/field-containable.rb +3 -0
  19. data/lib/arrow/group.rb +10 -13
  20. data/lib/arrow/loader.rb +20 -1
  21. data/lib/arrow/record-batch.rb +6 -4
  22. data/lib/arrow/record-containable.rb +0 -35
  23. data/lib/arrow/record.rb +12 -9
  24. data/lib/arrow/slicer.rb +2 -2
  25. data/lib/arrow/struct-array-builder.rb +1 -7
  26. data/lib/arrow/struct-array.rb +13 -11
  27. data/lib/arrow/table-loader.rb +3 -9
  28. data/lib/arrow/table-table-formatter.rb +2 -2
  29. data/lib/arrow/table.rb +61 -24
  30. data/lib/arrow/time.rb +159 -0
  31. data/lib/arrow/time32-array-builder.rb +49 -0
  32. data/lib/arrow/time32-array.rb +28 -0
  33. data/lib/arrow/time64-array-builder.rb +49 -0
  34. data/lib/arrow/time64-array.rb +28 -0
  35. data/lib/arrow/timestamp-array-builder.rb +20 -1
  36. data/lib/arrow/timestamp-array.rb +10 -22
  37. data/lib/arrow/version.rb +1 -1
  38. data/red-arrow.gemspec +1 -1
  39. data/test/raw-records/test-basic-arrays.rb +16 -8
  40. data/test/raw-records/test-dense-union-array.rb +12 -5
  41. data/test/raw-records/test-list-array.rb +21 -9
  42. data/test/raw-records/test-sparse-union-array.rb +13 -5
  43. data/test/raw-records/test-struct-array.rb +11 -4
  44. data/test/test-column.rb +56 -31
  45. data/test/test-decimal128-array-builder.rb +11 -11
  46. data/test/test-decimal128-array.rb +4 -4
  47. data/test/test-slicer.rb +1 -3
  48. data/test/test-struct-array-builder.rb +4 -4
  49. data/test/test-struct-array.rb +4 -4
  50. data/test/test-table.rb +17 -8
  51. data/test/test-time.rb +288 -0
  52. data/test/test-time32-array.rb +81 -0
  53. data/test/test-time64-array.rb +81 -0
  54. data/test/values/test-basic-arrays.rb +284 -0
  55. data/test/values/test-dense-union-array.rb +487 -0
  56. data/test/values/test-list-array.rb +497 -0
  57. data/test/values/test-sparse-union-array.rb +477 -0
  58. data/test/values/test-struct-array.rb +452 -0
  59. metadata +78 -54
  60. data/lib/arrow/struct.rb +0 -79
  61. data/test/test-struct.rb +0 -81
@@ -0,0 +1,497 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module ValuesListArrayTests
19
+ def build_data_type(type)
20
+ field_description = {
21
+ name: :element,
22
+ }
23
+ if type.is_a?(Hash)
24
+ field_description = field_description.merge(type)
25
+ else
26
+ field_description[:type] = type
27
+ end
28
+ Arrow::ListDataType.new(field: field_description)
29
+ end
30
+
31
+ def build_array(type, values)
32
+ Arrow::ListArray.new(build_data_type(type), values)
33
+ end
34
+
35
+ def test_null
36
+ values = [
37
+ [nil, nil, nil],
38
+ nil,
39
+ ]
40
+ target = build(:null, values)
41
+ assert_equal(values, target.values)
42
+ end
43
+
44
+ def test_boolean
45
+ values = [
46
+ [true, nil, false],
47
+ nil,
48
+ ]
49
+ target = build(:boolean, values)
50
+ assert_equal(values, target.values)
51
+ end
52
+
53
+ def test_int8
54
+ values = [
55
+ [-(2 ** 7), nil, (2 ** 7) - 1],
56
+ nil,
57
+ ]
58
+ target = build(:int8, values)
59
+ assert_equal(values, target.values)
60
+ end
61
+
62
+ def test_uint8
63
+ values = [
64
+ [0, nil, (2 ** 8) - 1],
65
+ nil,
66
+ ]
67
+ target = build(:uint8, values)
68
+ assert_equal(values, target.values)
69
+ end
70
+
71
+ def test_int16
72
+ values = [
73
+ [-(2 ** 15), nil, (2 ** 15) - 1],
74
+ nil,
75
+ ]
76
+ target = build(:int16, values)
77
+ assert_equal(values, target.values)
78
+ end
79
+
80
+ def test_uint16
81
+ values = [
82
+ [0, nil, (2 ** 16) - 1],
83
+ nil,
84
+ ]
85
+ target = build(:uint16, values)
86
+ assert_equal(values, target.values)
87
+ end
88
+
89
+ def test_int32
90
+ values = [
91
+ [-(2 ** 31), nil, (2 ** 31) - 1],
92
+ nil,
93
+ ]
94
+ target = build(:int32, values)
95
+ assert_equal(values, target.values)
96
+ end
97
+
98
+ def test_uint32
99
+ values = [
100
+ [0, nil, (2 ** 32) - 1],
101
+ nil,
102
+ ]
103
+ target = build(:uint32, values)
104
+ assert_equal(values, target.values)
105
+ end
106
+
107
+ def test_int64
108
+ values = [
109
+ [-(2 ** 63), nil, (2 ** 63) - 1],
110
+ nil,
111
+ ]
112
+ target = build(:int64, values)
113
+ assert_equal(values, target.values)
114
+ end
115
+
116
+ def test_uint64
117
+ values = [
118
+ [0, nil, (2 ** 64) - 1],
119
+ nil,
120
+ ]
121
+ target = build(:uint64, values)
122
+ assert_equal(values, target.values)
123
+ end
124
+
125
+ def test_float
126
+ values = [
127
+ [-1.0, nil, 1.0],
128
+ nil,
129
+ ]
130
+ target = build(:float, values)
131
+ assert_equal(values, target.values)
132
+ end
133
+
134
+ def test_double
135
+ values = [
136
+ [-1.0, nil, 1.0],
137
+ nil,
138
+ ]
139
+ target = build(:double, values)
140
+ assert_equal(values, target.values)
141
+ end
142
+
143
+ def test_binary
144
+ values = [
145
+ ["\x00".b, nil, "\xff".b],
146
+ nil,
147
+ ]
148
+ target = build(:binary, values)
149
+ assert_equal(values, target.values)
150
+ end
151
+
152
+ def test_string
153
+ values = [
154
+ [
155
+ "Ruby",
156
+ nil,
157
+ "\u3042", # U+3042 HIRAGANA LETTER A
158
+ ],
159
+ nil,
160
+ ]
161
+ target = build(:string, values)
162
+ assert_equal(values, target.values)
163
+ end
164
+
165
+ def test_date32
166
+ values = [
167
+ [
168
+ Date.new(1960, 1, 1),
169
+ nil,
170
+ Date.new(2017, 8, 23),
171
+ ],
172
+ nil,
173
+ ]
174
+ target = build(:date32, values)
175
+ assert_equal(values, target.values)
176
+ end
177
+
178
+ def test_date64
179
+ values = [
180
+ [
181
+ DateTime.new(1960, 1, 1, 2, 9, 30),
182
+ nil,
183
+ DateTime.new(2017, 8, 23, 14, 57, 2),
184
+ ],
185
+ nil,
186
+ ]
187
+ target = build(:date64, values)
188
+ assert_equal(values, target.values)
189
+ end
190
+
191
+ def test_timestamp_second
192
+ values = [
193
+ [
194
+ Time.parse("1960-01-01T02:09:30Z"),
195
+ nil,
196
+ Time.parse("2017-08-23T14:57:02Z"),
197
+ ],
198
+ nil,
199
+ ]
200
+ target = build({
201
+ type: :timestamp,
202
+ unit: :second,
203
+ },
204
+ values)
205
+ assert_equal(values, target.values)
206
+ end
207
+
208
+ def test_timestamp_milli
209
+ values = [
210
+ [
211
+ Time.parse("1960-01-01T02:09:30.123Z"),
212
+ nil,
213
+ Time.parse("2017-08-23T14:57:02.987Z"),
214
+ ],
215
+ nil,
216
+ ]
217
+ target = build({
218
+ type: :timestamp,
219
+ unit: :milli,
220
+ },
221
+ values)
222
+ assert_equal(values, target.values)
223
+ end
224
+
225
+ def test_timestamp_micro
226
+ values = [
227
+ [
228
+ Time.parse("1960-01-01T02:09:30.123456Z"),
229
+ nil,
230
+ Time.parse("2017-08-23T14:57:02.987654Z"),
231
+ ],
232
+ nil,
233
+ ]
234
+ target = build({
235
+ type: :timestamp,
236
+ unit: :micro,
237
+ },
238
+ values)
239
+ assert_equal(values, target.values)
240
+ end
241
+
242
+ def test_timestamp_nano
243
+ values = [
244
+ [
245
+ Time.parse("1960-01-01T02:09:30.123456789Z"),
246
+ nil,
247
+ Time.parse("2017-08-23T14:57:02.987654321Z"),
248
+ ],
249
+ nil,
250
+ ]
251
+ target = build({
252
+ type: :timestamp,
253
+ unit: :nano,
254
+ },
255
+ values)
256
+ assert_equal(values, target.values)
257
+ end
258
+
259
+ def test_time32_second
260
+ unit = Arrow::TimeUnit::SECOND
261
+ values = [
262
+ [
263
+ # 00:10:00
264
+ Arrow::Time.new(unit, 60 * 10),
265
+ nil,
266
+ # 02:00:09
267
+ Arrow::Time.new(unit, 60 * 60 * 2 + 9),
268
+ ],
269
+ nil,
270
+ ]
271
+ target = build({
272
+ type: :time32,
273
+ unit: :second,
274
+ },
275
+ values)
276
+ assert_equal(values, target.values)
277
+ end
278
+
279
+ def test_time32_milli
280
+ unit = Arrow::TimeUnit::MILLI
281
+ values = [
282
+ [
283
+ # 00:10:00.123
284
+ Arrow::Time.new(unit, (60 * 10) * 1000 + 123),
285
+ nil,
286
+ # 02:00:09.987
287
+ Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987),
288
+ ],
289
+ nil,
290
+ ]
291
+ target = build({
292
+ type: :time32,
293
+ unit: :milli,
294
+ },
295
+ values)
296
+ assert_equal(values, target.values)
297
+ end
298
+
299
+ def test_time64_micro
300
+ unit = Arrow::TimeUnit::MICRO
301
+ values = [
302
+ [
303
+ # 00:10:00.123456
304
+ Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456),
305
+ nil,
306
+ # 02:00:09.987654
307
+ Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654),
308
+ ],
309
+ nil,
310
+ ]
311
+ target = build({
312
+ type: :time64,
313
+ unit: :micro,
314
+ },
315
+ values)
316
+ assert_equal(values, target.values)
317
+ end
318
+
319
+ def test_time64_nano
320
+ unit = Arrow::TimeUnit::NANO
321
+ values = [
322
+ [
323
+ # 00:10:00.123456789
324
+ Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789),
325
+ nil,
326
+ # 02:00:09.987654321
327
+ Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321),
328
+ ],
329
+ nil,
330
+ ]
331
+ target = build({
332
+ type: :time64,
333
+ unit: :nano,
334
+ },
335
+ values)
336
+ assert_equal(values, target.values)
337
+ end
338
+
339
+ def test_decimal128
340
+ values = [
341
+ [
342
+ BigDecimal("92.92"),
343
+ nil,
344
+ BigDecimal("29.29"),
345
+ ],
346
+ nil,
347
+ ]
348
+ target = build({
349
+ type: :decimal128,
350
+ precision: 8,
351
+ scale: 2,
352
+ },
353
+ values)
354
+ assert_equal(values, target.values)
355
+ end
356
+
357
+ def test_list
358
+ values = [
359
+ [
360
+ [
361
+ true,
362
+ nil,
363
+ ],
364
+ nil,
365
+ [
366
+ nil,
367
+ false,
368
+ ],
369
+ ],
370
+ nil,
371
+ ]
372
+ target = build({
373
+ type: :list,
374
+ field: {
375
+ name: :sub_element,
376
+ type: :boolean,
377
+ },
378
+ },
379
+ values)
380
+ assert_equal(values, target.values)
381
+ end
382
+
383
+ def test_struct
384
+ values = [
385
+ [
386
+ {"field" => true},
387
+ nil,
388
+ {"field" => nil},
389
+ ],
390
+ nil,
391
+ ]
392
+ target = build({
393
+ type: :struct,
394
+ fields: [
395
+ {
396
+ name: :field,
397
+ type: :boolean,
398
+ },
399
+ ],
400
+ },
401
+ values)
402
+ assert_equal(values, target.values)
403
+ end
404
+
405
+ def test_sparse
406
+ omit("Need to add support for SparseUnionArrayBuilder")
407
+ values = [
408
+ [
409
+ {"field1" => true},
410
+ nil,
411
+ {"field2" => nil},
412
+ ],
413
+ nil,
414
+ ]
415
+ target = build({
416
+ type: :sparse_union,
417
+ fields: [
418
+ {
419
+ name: :field1,
420
+ type: :boolean,
421
+ },
422
+ {
423
+ name: :field2,
424
+ type: :uint8,
425
+ },
426
+ ],
427
+ type_codes: [0, 1],
428
+ },
429
+ values)
430
+ assert_equal(values, target.values)
431
+ end
432
+
433
+ def test_dense
434
+ omit("Need to add support for DenseUnionArrayBuilder")
435
+ values = [
436
+ [
437
+ {"field1" => true},
438
+ nil,
439
+ {"field2" => nil},
440
+ ],
441
+ nil,
442
+ ]
443
+ target = build({
444
+ type: :dense_union,
445
+ fields: [
446
+ {
447
+ name: :field1,
448
+ type: :boolean,
449
+ },
450
+ {
451
+ name: :field2,
452
+ type: :uint8,
453
+ },
454
+ ],
455
+ type_codes: [0, 1],
456
+ },
457
+ values)
458
+ assert_equal(values, target.values)
459
+ end
460
+
461
+ def test_dictionary
462
+ omit("Need to add support for DictionaryArrayBuilder")
463
+ values = [
464
+ [
465
+ "Ruby",
466
+ nil,
467
+ "GLib",
468
+ ],
469
+ nil,
470
+ ]
471
+ dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
472
+ target = build({
473
+ type: :dictionary,
474
+ index_data_type: :int8,
475
+ dictionary: dictionary,
476
+ ordered: true,
477
+ },
478
+ values)
479
+ assert_equal(values, target.values)
480
+ end
481
+ end
482
+
483
+ class ValuesArrayListArrayTest < Test::Unit::TestCase
484
+ include ValuesListArrayTests
485
+
486
+ def build(type, values)
487
+ build_array(type, values)
488
+ end
489
+ end
490
+
491
+ class ValuesChunkedArrayListArrayTest < Test::Unit::TestCase
492
+ include ValuesListArrayTests
493
+
494
+ def build(type, values)
495
+ Arrow::ChunkedArray.new([build_array(type, values)])
496
+ end
497
+ end