red-arrow 0.14.1 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/arrow.cpp +34 -0
  3. data/ext/arrow/converters.cpp +42 -0
  4. data/ext/arrow/converters.hpp +626 -0
  5. data/ext/arrow/raw-records.cpp +6 -625
  6. data/ext/arrow/red-arrow.hpp +37 -3
  7. data/ext/arrow/values.cpp +154 -0
  8. data/lib/arrow/array-builder.rb +24 -1
  9. data/lib/arrow/array.rb +9 -0
  10. data/lib/arrow/chunked-array.rb +5 -0
  11. data/lib/arrow/column-containable.rb +48 -0
  12. data/lib/arrow/column.rb +36 -10
  13. data/lib/arrow/csv-loader.rb +2 -2
  14. data/lib/arrow/data-type.rb +22 -5
  15. data/lib/arrow/date64-array-builder.rb +2 -2
  16. data/lib/arrow/date64-array.rb +1 -1
  17. data/lib/arrow/decimal128-array.rb +24 -0
  18. data/lib/arrow/field-containable.rb +3 -0
  19. data/lib/arrow/group.rb +10 -13
  20. data/lib/arrow/loader.rb +20 -1
  21. data/lib/arrow/record-batch.rb +6 -4
  22. data/lib/arrow/record-containable.rb +0 -35
  23. data/lib/arrow/record.rb +12 -9
  24. data/lib/arrow/slicer.rb +2 -2
  25. data/lib/arrow/struct-array-builder.rb +1 -7
  26. data/lib/arrow/struct-array.rb +13 -11
  27. data/lib/arrow/table-loader.rb +3 -9
  28. data/lib/arrow/table-table-formatter.rb +2 -2
  29. data/lib/arrow/table.rb +61 -24
  30. data/lib/arrow/time.rb +159 -0
  31. data/lib/arrow/time32-array-builder.rb +49 -0
  32. data/lib/arrow/time32-array.rb +28 -0
  33. data/lib/arrow/time64-array-builder.rb +49 -0
  34. data/lib/arrow/time64-array.rb +28 -0
  35. data/lib/arrow/timestamp-array-builder.rb +20 -1
  36. data/lib/arrow/timestamp-array.rb +10 -22
  37. data/lib/arrow/version.rb +1 -1
  38. data/red-arrow.gemspec +1 -1
  39. data/test/raw-records/test-basic-arrays.rb +16 -8
  40. data/test/raw-records/test-dense-union-array.rb +12 -5
  41. data/test/raw-records/test-list-array.rb +21 -9
  42. data/test/raw-records/test-sparse-union-array.rb +13 -5
  43. data/test/raw-records/test-struct-array.rb +11 -4
  44. data/test/test-column.rb +56 -31
  45. data/test/test-decimal128-array-builder.rb +11 -11
  46. data/test/test-decimal128-array.rb +4 -4
  47. data/test/test-slicer.rb +1 -3
  48. data/test/test-struct-array-builder.rb +4 -4
  49. data/test/test-struct-array.rb +4 -4
  50. data/test/test-table.rb +17 -8
  51. data/test/test-time.rb +288 -0
  52. data/test/test-time32-array.rb +81 -0
  53. data/test/test-time64-array.rb +81 -0
  54. data/test/values/test-basic-arrays.rb +284 -0
  55. data/test/values/test-dense-union-array.rb +487 -0
  56. data/test/values/test-list-array.rb +497 -0
  57. data/test/values/test-sparse-union-array.rb +477 -0
  58. data/test/values/test-struct-array.rb +452 -0
  59. metadata +78 -54
  60. data/lib/arrow/struct.rb +0 -79
  61. data/test/test-struct.rb +0 -81
@@ -0,0 +1,477 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module ValuesSparseUnionArrayTests
19
+ def build_data_type(type, type_codes)
20
+ field_description = {}
21
+ if type.is_a?(Hash)
22
+ field_description = field_description.merge(type)
23
+ else
24
+ field_description[:type] = type
25
+ end
26
+ Arrow::SparseUnionDataType.new(fields: [
27
+ field_description.merge(name: "0"),
28
+ field_description.merge(name: "1"),
29
+ ],
30
+ type_codes: type_codes)
31
+ end
32
+
33
+ def build_array(type, values)
34
+ type_codes = [0, 1]
35
+ data_type = build_data_type(type, type_codes)
36
+ type_ids = []
37
+ arrays = data_type.fields.collect do |field|
38
+ sub_schema = Arrow::Schema.new([field])
39
+ sub_records = values.collect do |value|
40
+ [value.nil? ? nil : value[field.name]]
41
+ end
42
+ sub_record_batch = Arrow::RecordBatch.new(sub_schema,
43
+ sub_records)
44
+ sub_record_batch.columns[0].data
45
+ end
46
+ values.each do |value|
47
+ if value.nil?
48
+ type_ids << nil
49
+ elsif value.key?("0")
50
+ type_ids << type_codes[0]
51
+ elsif value.key?("1")
52
+ type_ids << type_codes[1]
53
+ end
54
+ end
55
+ Arrow::SparseUnionArray.new(data_type,
56
+ Arrow::Int8Array.new(type_ids),
57
+ arrays)
58
+ end
59
+
60
+ def test_null
61
+ values = [
62
+ {"0" => nil},
63
+ nil,
64
+ ]
65
+ target = build(:null, values)
66
+ assert_equal(values, target.values)
67
+ end
68
+
69
+ def test_boolean
70
+ values = [
71
+ {"0" => true},
72
+ nil,
73
+ {"1" => nil},
74
+ ]
75
+ target = build(:boolean, values)
76
+ assert_equal(values, target.values)
77
+ end
78
+
79
+ def test_int8
80
+ values = [
81
+ {"0" => -(2 ** 7)},
82
+ nil,
83
+ {"1" => nil},
84
+ ]
85
+ target = build(:int8, values)
86
+ assert_equal(values, target.values)
87
+ end
88
+
89
+ def test_uint8
90
+ values = [
91
+ {"0" => (2 ** 8) - 1},
92
+ nil,
93
+ {"1" => nil},
94
+ ]
95
+ target = build(:uint8, values)
96
+ assert_equal(values, target.values)
97
+ end
98
+
99
+ def test_int16
100
+ values = [
101
+ {"0" => -(2 ** 15)},
102
+ nil,
103
+ {"1" => nil},
104
+ ]
105
+ target = build(:int16, values)
106
+ assert_equal(values, target.values)
107
+ end
108
+
109
+ def test_uint16
110
+ values = [
111
+ {"0" => (2 ** 16) - 1},
112
+ nil,
113
+ {"1" => nil},
114
+ ]
115
+ target = build(:uint16, values)
116
+ assert_equal(values, target.values)
117
+ end
118
+
119
+ def test_int32
120
+ values = [
121
+ {"0" => -(2 ** 31)},
122
+ nil,
123
+ {"1" => nil},
124
+ ]
125
+ target = build(:int32, values)
126
+ assert_equal(values, target.values)
127
+ end
128
+
129
+ def test_uint32
130
+ values = [
131
+ {"0" => (2 ** 32) - 1},
132
+ nil,
133
+ {"1" => nil},
134
+ ]
135
+ target = build(:uint32, values)
136
+ assert_equal(values, target.values)
137
+ end
138
+
139
+ def test_int64
140
+ values = [
141
+ {"0" => -(2 ** 63)},
142
+ nil,
143
+ {"1" => nil},
144
+ ]
145
+ target = build(:int64, values)
146
+ assert_equal(values, target.values)
147
+ end
148
+
149
+ def test_uint64
150
+ values = [
151
+ {"0" => (2 ** 64) - 1},
152
+ nil,
153
+ {"1" => nil},
154
+ ]
155
+ target = build(:uint64, values)
156
+ assert_equal(values, target.values)
157
+ end
158
+
159
+ def test_float
160
+ values = [
161
+ {"0" => -1.0},
162
+ nil,
163
+ {"1" => nil},
164
+ ]
165
+ target = build(:float, values)
166
+ assert_equal(values, target.values)
167
+ end
168
+
169
+ def test_double
170
+ values = [
171
+ {"0" => -1.0},
172
+ nil,
173
+ {"1" => nil},
174
+ ]
175
+ target = build(:double, values)
176
+ assert_equal(values, target.values)
177
+ end
178
+
179
+ def test_binary
180
+ values = [
181
+ {"0" => "\xff".b},
182
+ nil,
183
+ {"1" => nil},
184
+ ]
185
+ target = build(:binary, values)
186
+ assert_equal(values, target.values)
187
+ end
188
+
189
+ def test_string
190
+ values = [
191
+ {"0" => "Ruby"},
192
+ nil,
193
+ {"1" => nil},
194
+ ]
195
+ target = build(:string, values)
196
+ assert_equal(values, target.values)
197
+ end
198
+
199
+ def test_date32
200
+ values = [
201
+ {"0" => Date.new(1960, 1, 1)},
202
+ nil,
203
+ {"1" => nil},
204
+ ]
205
+ target = build(:date32, values)
206
+ assert_equal(values, target.values)
207
+ end
208
+
209
+ def test_date64
210
+ values = [
211
+ {"0" => DateTime.new(1960, 1, 1, 2, 9, 30)},
212
+ nil,
213
+ {"1" => nil},
214
+ ]
215
+ target = build(:date64, values)
216
+ assert_equal(values, target.values)
217
+ end
218
+
219
+ def test_timestamp_second
220
+ values = [
221
+ {"0" => Time.parse("1960-01-01T02:09:30Z")},
222
+ nil,
223
+ {"1" => nil},
224
+ ]
225
+ target = build({
226
+ type: :timestamp,
227
+ unit: :second,
228
+ },
229
+ values)
230
+ assert_equal(values, target.values)
231
+ end
232
+
233
+ def test_timestamp_milli
234
+ values = [
235
+ {"0" => Time.parse("1960-01-01T02:09:30.123Z")},
236
+ nil,
237
+ {"1" => nil},
238
+ ]
239
+ target = build({
240
+ type: :timestamp,
241
+ unit: :milli,
242
+ },
243
+ values)
244
+ assert_equal(values, target.values)
245
+ end
246
+
247
+ def test_timestamp_micro
248
+ values = [
249
+ {"0" => Time.parse("1960-01-01T02:09:30.123456Z")},
250
+ nil,
251
+ {"1" => nil},
252
+ ]
253
+ target = build({
254
+ type: :timestamp,
255
+ unit: :micro,
256
+ },
257
+ values)
258
+ assert_equal(values, target.values)
259
+ end
260
+
261
+ def test_timestamp_nano
262
+ values = [
263
+ {"0" => Time.parse("1960-01-01T02:09:30.123456789Z")},
264
+ nil,
265
+ {"1" => nil},
266
+ ]
267
+ target = build({
268
+ type: :timestamp,
269
+ unit: :nano,
270
+ },
271
+ values)
272
+ assert_equal(values, target.values)
273
+ end
274
+
275
+ def test_time32_second
276
+ unit = Arrow::TimeUnit::SECOND
277
+ values = [
278
+ # 00:10:00
279
+ {"0" => Arrow::Time.new(unit, 60 * 10)},
280
+ nil,
281
+ {"1" => nil},
282
+ ]
283
+ target = build({
284
+ type: :time32,
285
+ unit: :second,
286
+ },
287
+ values)
288
+ assert_equal(values, target.values)
289
+ end
290
+
291
+ def test_time32_milli
292
+ unit = Arrow::TimeUnit::MILLI
293
+ values = [
294
+ # 00:10:00.123
295
+ {"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)},
296
+ nil,
297
+ {"1" => nil},
298
+ ]
299
+ target = build({
300
+ type: :time32,
301
+ unit: :milli,
302
+ },
303
+ values)
304
+ assert_equal(values, target.values)
305
+ end
306
+
307
+ def test_time64_micro
308
+ unit = Arrow::TimeUnit::MICRO
309
+ values = [
310
+ # 00:10:00.123456
311
+ {"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)},
312
+ nil,
313
+ {"1" => nil},
314
+ ]
315
+ target = build({
316
+ type: :time64,
317
+ unit: :micro,
318
+ },
319
+ values)
320
+ assert_equal(values, target.values)
321
+ end
322
+
323
+ def test_time64_nano
324
+ unit = Arrow::TimeUnit::NANO
325
+ values = [
326
+ # 00:10:00.123456789
327
+ {"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)},
328
+ nil,
329
+ {"1" => nil},
330
+ ]
331
+ target = build({
332
+ type: :time64,
333
+ unit: :nano,
334
+ },
335
+ values)
336
+ assert_equal(values, target.values)
337
+ end
338
+
339
+ def test_decimal128
340
+ values = [
341
+ {"0" => BigDecimal("92.92")},
342
+ nil,
343
+ {"1" => nil},
344
+ ]
345
+ target = build({
346
+ type: :decimal128,
347
+ precision: 8,
348
+ scale: 2,
349
+ },
350
+ values)
351
+ assert_equal(values, target.values)
352
+ end
353
+
354
+ def test_list
355
+ values = [
356
+ {"0" => [true, nil, false]},
357
+ nil,
358
+ {"1" => nil},
359
+ ]
360
+ target = build({
361
+ type: :list,
362
+ field: {
363
+ name: :sub_element,
364
+ type: :boolean,
365
+ },
366
+ },
367
+ values)
368
+ assert_equal(values, target.values)
369
+ end
370
+
371
+ def test_struct
372
+ values = [
373
+ {"0" => {"sub_field" => true}},
374
+ nil,
375
+ {"1" => nil},
376
+ {"0" => {"sub_field" => nil}},
377
+ ]
378
+ target = build({
379
+ type: :struct,
380
+ fields: [
381
+ {
382
+ name: :sub_field,
383
+ type: :boolean,
384
+ },
385
+ ],
386
+ },
387
+ values)
388
+ assert_equal(values, target.values)
389
+ end
390
+
391
+ def test_sparse_union
392
+ omit("Need to add support for SparseUnionArrayBuilder")
393
+ values = [
394
+ {"0" => {"field1" => true}},
395
+ nil,
396
+ {"1" => nil},
397
+ {"0" => {"field2" => nil}},
398
+ ]
399
+ target = build({
400
+ type: :sparse_union,
401
+ fields: [
402
+ {
403
+ name: :field1,
404
+ type: :boolean,
405
+ },
406
+ {
407
+ name: :field2,
408
+ type: :uint8,
409
+ },
410
+ ],
411
+ type_codes: [0, 1],
412
+ },
413
+ values)
414
+ assert_equal(values, target.values)
415
+ end
416
+
417
+ def test_dense_union
418
+ omit("Need to add support for DenseUnionArrayBuilder")
419
+ values = [
420
+ {"0" => {"field1" => true}},
421
+ nil,
422
+ {"1" => nil},
423
+ {"0" => {"field2" => nil}},
424
+ ]
425
+ target = build({
426
+ type: :dense_union,
427
+ fields: [
428
+ {
429
+ name: :field1,
430
+ type: :boolean,
431
+ },
432
+ {
433
+ name: :field2,
434
+ type: :uint8,
435
+ },
436
+ ],
437
+ type_codes: [0, 1],
438
+ },
439
+ values)
440
+ assert_equal(values, target.values)
441
+ end
442
+
443
+ def test_dictionary
444
+ omit("Need to add support for DictionaryArrayBuilder")
445
+ values = [
446
+ {"0" => "Ruby"},
447
+ nil,
448
+ {"1" => nil},
449
+ {"0" => "GLib"},
450
+ ]
451
+ dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
452
+ target = build({
453
+ type: :dictionary,
454
+ index_data_type: :int8,
455
+ dictionary: dictionary,
456
+ ordered: true,
457
+ },
458
+ values)
459
+ assert_equal(values, target.values)
460
+ end
461
+ end
462
+
463
+ class ValuesArraySparseUnionArrayTest < Test::Unit::TestCase
464
+ include ValuesSparseUnionArrayTests
465
+
466
+ def build(type, values)
467
+ build_array(type, values)
468
+ end
469
+ end
470
+
471
+ class ValuesChunkedArraySparseUnionArrayTest < Test::Unit::TestCase
472
+ include ValuesSparseUnionArrayTests
473
+
474
+ def build(type, values)
475
+ Arrow::ChunkedArray.new([build_array(type, values)])
476
+ end
477
+ end