red-arrow 0.14.1 → 0.15.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/arrow.cpp +34 -0
  3. data/ext/arrow/converters.cpp +42 -0
  4. data/ext/arrow/converters.hpp +626 -0
  5. data/ext/arrow/raw-records.cpp +6 -625
  6. data/ext/arrow/red-arrow.hpp +37 -3
  7. data/ext/arrow/values.cpp +154 -0
  8. data/lib/arrow/array-builder.rb +24 -1
  9. data/lib/arrow/array.rb +9 -0
  10. data/lib/arrow/chunked-array.rb +5 -0
  11. data/lib/arrow/column-containable.rb +48 -0
  12. data/lib/arrow/column.rb +36 -10
  13. data/lib/arrow/csv-loader.rb +2 -2
  14. data/lib/arrow/data-type.rb +22 -5
  15. data/lib/arrow/date64-array-builder.rb +2 -2
  16. data/lib/arrow/date64-array.rb +1 -1
  17. data/lib/arrow/decimal128-array.rb +24 -0
  18. data/lib/arrow/field-containable.rb +3 -0
  19. data/lib/arrow/group.rb +10 -13
  20. data/lib/arrow/loader.rb +20 -1
  21. data/lib/arrow/record-batch.rb +6 -4
  22. data/lib/arrow/record-containable.rb +0 -35
  23. data/lib/arrow/record.rb +12 -9
  24. data/lib/arrow/slicer.rb +2 -2
  25. data/lib/arrow/struct-array-builder.rb +1 -7
  26. data/lib/arrow/struct-array.rb +13 -11
  27. data/lib/arrow/table-loader.rb +3 -9
  28. data/lib/arrow/table-table-formatter.rb +2 -2
  29. data/lib/arrow/table.rb +61 -24
  30. data/lib/arrow/time.rb +159 -0
  31. data/lib/arrow/time32-array-builder.rb +49 -0
  32. data/lib/arrow/time32-array.rb +28 -0
  33. data/lib/arrow/time64-array-builder.rb +49 -0
  34. data/lib/arrow/time64-array.rb +28 -0
  35. data/lib/arrow/timestamp-array-builder.rb +20 -1
  36. data/lib/arrow/timestamp-array.rb +10 -22
  37. data/lib/arrow/version.rb +1 -1
  38. data/red-arrow.gemspec +1 -1
  39. data/test/raw-records/test-basic-arrays.rb +16 -8
  40. data/test/raw-records/test-dense-union-array.rb +12 -5
  41. data/test/raw-records/test-list-array.rb +21 -9
  42. data/test/raw-records/test-sparse-union-array.rb +13 -5
  43. data/test/raw-records/test-struct-array.rb +11 -4
  44. data/test/test-column.rb +56 -31
  45. data/test/test-decimal128-array-builder.rb +11 -11
  46. data/test/test-decimal128-array.rb +4 -4
  47. data/test/test-slicer.rb +1 -3
  48. data/test/test-struct-array-builder.rb +4 -4
  49. data/test/test-struct-array.rb +4 -4
  50. data/test/test-table.rb +17 -8
  51. data/test/test-time.rb +288 -0
  52. data/test/test-time32-array.rb +81 -0
  53. data/test/test-time64-array.rb +81 -0
  54. data/test/values/test-basic-arrays.rb +284 -0
  55. data/test/values/test-dense-union-array.rb +487 -0
  56. data/test/values/test-list-array.rb +497 -0
  57. data/test/values/test-sparse-union-array.rb +477 -0
  58. data/test/values/test-struct-array.rb +452 -0
  59. metadata +78 -54
  60. data/lib/arrow/struct.rb +0 -79
  61. data/test/test-struct.rb +0 -81
@@ -0,0 +1,477 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module ValuesSparseUnionArrayTests
19
+ def build_data_type(type, type_codes)
20
+ field_description = {}
21
+ if type.is_a?(Hash)
22
+ field_description = field_description.merge(type)
23
+ else
24
+ field_description[:type] = type
25
+ end
26
+ Arrow::SparseUnionDataType.new(fields: [
27
+ field_description.merge(name: "0"),
28
+ field_description.merge(name: "1"),
29
+ ],
30
+ type_codes: type_codes)
31
+ end
32
+
33
+ def build_array(type, values)
34
+ type_codes = [0, 1]
35
+ data_type = build_data_type(type, type_codes)
36
+ type_ids = []
37
+ arrays = data_type.fields.collect do |field|
38
+ sub_schema = Arrow::Schema.new([field])
39
+ sub_records = values.collect do |value|
40
+ [value.nil? ? nil : value[field.name]]
41
+ end
42
+ sub_record_batch = Arrow::RecordBatch.new(sub_schema,
43
+ sub_records)
44
+ sub_record_batch.columns[0].data
45
+ end
46
+ values.each do |value|
47
+ if value.nil?
48
+ type_ids << nil
49
+ elsif value.key?("0")
50
+ type_ids << type_codes[0]
51
+ elsif value.key?("1")
52
+ type_ids << type_codes[1]
53
+ end
54
+ end
55
+ Arrow::SparseUnionArray.new(data_type,
56
+ Arrow::Int8Array.new(type_ids),
57
+ arrays)
58
+ end
59
+
60
+ def test_null
61
+ values = [
62
+ {"0" => nil},
63
+ nil,
64
+ ]
65
+ target = build(:null, values)
66
+ assert_equal(values, target.values)
67
+ end
68
+
69
+ def test_boolean
70
+ values = [
71
+ {"0" => true},
72
+ nil,
73
+ {"1" => nil},
74
+ ]
75
+ target = build(:boolean, values)
76
+ assert_equal(values, target.values)
77
+ end
78
+
79
+ def test_int8
80
+ values = [
81
+ {"0" => -(2 ** 7)},
82
+ nil,
83
+ {"1" => nil},
84
+ ]
85
+ target = build(:int8, values)
86
+ assert_equal(values, target.values)
87
+ end
88
+
89
+ def test_uint8
90
+ values = [
91
+ {"0" => (2 ** 8) - 1},
92
+ nil,
93
+ {"1" => nil},
94
+ ]
95
+ target = build(:uint8, values)
96
+ assert_equal(values, target.values)
97
+ end
98
+
99
+ def test_int16
100
+ values = [
101
+ {"0" => -(2 ** 15)},
102
+ nil,
103
+ {"1" => nil},
104
+ ]
105
+ target = build(:int16, values)
106
+ assert_equal(values, target.values)
107
+ end
108
+
109
+ def test_uint16
110
+ values = [
111
+ {"0" => (2 ** 16) - 1},
112
+ nil,
113
+ {"1" => nil},
114
+ ]
115
+ target = build(:uint16, values)
116
+ assert_equal(values, target.values)
117
+ end
118
+
119
+ def test_int32
120
+ values = [
121
+ {"0" => -(2 ** 31)},
122
+ nil,
123
+ {"1" => nil},
124
+ ]
125
+ target = build(:int32, values)
126
+ assert_equal(values, target.values)
127
+ end
128
+
129
+ def test_uint32
130
+ values = [
131
+ {"0" => (2 ** 32) - 1},
132
+ nil,
133
+ {"1" => nil},
134
+ ]
135
+ target = build(:uint32, values)
136
+ assert_equal(values, target.values)
137
+ end
138
+
139
+ def test_int64
140
+ values = [
141
+ {"0" => -(2 ** 63)},
142
+ nil,
143
+ {"1" => nil},
144
+ ]
145
+ target = build(:int64, values)
146
+ assert_equal(values, target.values)
147
+ end
148
+
149
+ def test_uint64
150
+ values = [
151
+ {"0" => (2 ** 64) - 1},
152
+ nil,
153
+ {"1" => nil},
154
+ ]
155
+ target = build(:uint64, values)
156
+ assert_equal(values, target.values)
157
+ end
158
+
159
+ def test_float
160
+ values = [
161
+ {"0" => -1.0},
162
+ nil,
163
+ {"1" => nil},
164
+ ]
165
+ target = build(:float, values)
166
+ assert_equal(values, target.values)
167
+ end
168
+
169
+ def test_double
170
+ values = [
171
+ {"0" => -1.0},
172
+ nil,
173
+ {"1" => nil},
174
+ ]
175
+ target = build(:double, values)
176
+ assert_equal(values, target.values)
177
+ end
178
+
179
+ def test_binary
180
+ values = [
181
+ {"0" => "\xff".b},
182
+ nil,
183
+ {"1" => nil},
184
+ ]
185
+ target = build(:binary, values)
186
+ assert_equal(values, target.values)
187
+ end
188
+
189
+ def test_string
190
+ values = [
191
+ {"0" => "Ruby"},
192
+ nil,
193
+ {"1" => nil},
194
+ ]
195
+ target = build(:string, values)
196
+ assert_equal(values, target.values)
197
+ end
198
+
199
+ def test_date32
200
+ values = [
201
+ {"0" => Date.new(1960, 1, 1)},
202
+ nil,
203
+ {"1" => nil},
204
+ ]
205
+ target = build(:date32, values)
206
+ assert_equal(values, target.values)
207
+ end
208
+
209
+ def test_date64
210
+ values = [
211
+ {"0" => DateTime.new(1960, 1, 1, 2, 9, 30)},
212
+ nil,
213
+ {"1" => nil},
214
+ ]
215
+ target = build(:date64, values)
216
+ assert_equal(values, target.values)
217
+ end
218
+
219
+ def test_timestamp_second
220
+ values = [
221
+ {"0" => Time.parse("1960-01-01T02:09:30Z")},
222
+ nil,
223
+ {"1" => nil},
224
+ ]
225
+ target = build({
226
+ type: :timestamp,
227
+ unit: :second,
228
+ },
229
+ values)
230
+ assert_equal(values, target.values)
231
+ end
232
+
233
+ def test_timestamp_milli
234
+ values = [
235
+ {"0" => Time.parse("1960-01-01T02:09:30.123Z")},
236
+ nil,
237
+ {"1" => nil},
238
+ ]
239
+ target = build({
240
+ type: :timestamp,
241
+ unit: :milli,
242
+ },
243
+ values)
244
+ assert_equal(values, target.values)
245
+ end
246
+
247
+ def test_timestamp_micro
248
+ values = [
249
+ {"0" => Time.parse("1960-01-01T02:09:30.123456Z")},
250
+ nil,
251
+ {"1" => nil},
252
+ ]
253
+ target = build({
254
+ type: :timestamp,
255
+ unit: :micro,
256
+ },
257
+ values)
258
+ assert_equal(values, target.values)
259
+ end
260
+
261
+ def test_timestamp_nano
262
+ values = [
263
+ {"0" => Time.parse("1960-01-01T02:09:30.123456789Z")},
264
+ nil,
265
+ {"1" => nil},
266
+ ]
267
+ target = build({
268
+ type: :timestamp,
269
+ unit: :nano,
270
+ },
271
+ values)
272
+ assert_equal(values, target.values)
273
+ end
274
+
275
+ def test_time32_second
276
+ unit = Arrow::TimeUnit::SECOND
277
+ values = [
278
+ # 00:10:00
279
+ {"0" => Arrow::Time.new(unit, 60 * 10)},
280
+ nil,
281
+ {"1" => nil},
282
+ ]
283
+ target = build({
284
+ type: :time32,
285
+ unit: :second,
286
+ },
287
+ values)
288
+ assert_equal(values, target.values)
289
+ end
290
+
291
+ def test_time32_milli
292
+ unit = Arrow::TimeUnit::MILLI
293
+ values = [
294
+ # 00:10:00.123
295
+ {"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)},
296
+ nil,
297
+ {"1" => nil},
298
+ ]
299
+ target = build({
300
+ type: :time32,
301
+ unit: :milli,
302
+ },
303
+ values)
304
+ assert_equal(values, target.values)
305
+ end
306
+
307
+ def test_time64_micro
308
+ unit = Arrow::TimeUnit::MICRO
309
+ values = [
310
+ # 00:10:00.123456
311
+ {"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)},
312
+ nil,
313
+ {"1" => nil},
314
+ ]
315
+ target = build({
316
+ type: :time64,
317
+ unit: :micro,
318
+ },
319
+ values)
320
+ assert_equal(values, target.values)
321
+ end
322
+
323
+ def test_time64_nano
324
+ unit = Arrow::TimeUnit::NANO
325
+ values = [
326
+ # 00:10:00.123456789
327
+ {"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)},
328
+ nil,
329
+ {"1" => nil},
330
+ ]
331
+ target = build({
332
+ type: :time64,
333
+ unit: :nano,
334
+ },
335
+ values)
336
+ assert_equal(values, target.values)
337
+ end
338
+
339
+ def test_decimal128
340
+ values = [
341
+ {"0" => BigDecimal("92.92")},
342
+ nil,
343
+ {"1" => nil},
344
+ ]
345
+ target = build({
346
+ type: :decimal128,
347
+ precision: 8,
348
+ scale: 2,
349
+ },
350
+ values)
351
+ assert_equal(values, target.values)
352
+ end
353
+
354
+ def test_list
355
+ values = [
356
+ {"0" => [true, nil, false]},
357
+ nil,
358
+ {"1" => nil},
359
+ ]
360
+ target = build({
361
+ type: :list,
362
+ field: {
363
+ name: :sub_element,
364
+ type: :boolean,
365
+ },
366
+ },
367
+ values)
368
+ assert_equal(values, target.values)
369
+ end
370
+
371
+ def test_struct
372
+ values = [
373
+ {"0" => {"sub_field" => true}},
374
+ nil,
375
+ {"1" => nil},
376
+ {"0" => {"sub_field" => nil}},
377
+ ]
378
+ target = build({
379
+ type: :struct,
380
+ fields: [
381
+ {
382
+ name: :sub_field,
383
+ type: :boolean,
384
+ },
385
+ ],
386
+ },
387
+ values)
388
+ assert_equal(values, target.values)
389
+ end
390
+
391
+ def test_sparse_union
392
+ omit("Need to add support for SparseUnionArrayBuilder")
393
+ values = [
394
+ {"0" => {"field1" => true}},
395
+ nil,
396
+ {"1" => nil},
397
+ {"0" => {"field2" => nil}},
398
+ ]
399
+ target = build({
400
+ type: :sparse_union,
401
+ fields: [
402
+ {
403
+ name: :field1,
404
+ type: :boolean,
405
+ },
406
+ {
407
+ name: :field2,
408
+ type: :uint8,
409
+ },
410
+ ],
411
+ type_codes: [0, 1],
412
+ },
413
+ values)
414
+ assert_equal(values, target.values)
415
+ end
416
+
417
+ def test_dense_union
418
+ omit("Need to add support for DenseUnionArrayBuilder")
419
+ values = [
420
+ {"0" => {"field1" => true}},
421
+ nil,
422
+ {"1" => nil},
423
+ {"0" => {"field2" => nil}},
424
+ ]
425
+ target = build({
426
+ type: :dense_union,
427
+ fields: [
428
+ {
429
+ name: :field1,
430
+ type: :boolean,
431
+ },
432
+ {
433
+ name: :field2,
434
+ type: :uint8,
435
+ },
436
+ ],
437
+ type_codes: [0, 1],
438
+ },
439
+ values)
440
+ assert_equal(values, target.values)
441
+ end
442
+
443
+ def test_dictionary
444
+ omit("Need to add support for DictionaryArrayBuilder")
445
+ values = [
446
+ {"0" => "Ruby"},
447
+ nil,
448
+ {"1" => nil},
449
+ {"0" => "GLib"},
450
+ ]
451
+ dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
452
+ target = build({
453
+ type: :dictionary,
454
+ index_data_type: :int8,
455
+ dictionary: dictionary,
456
+ ordered: true,
457
+ },
458
+ values)
459
+ assert_equal(values, target.values)
460
+ end
461
+ end
462
+
463
+ class ValuesArraySparseUnionArrayTest < Test::Unit::TestCase
464
+ include ValuesSparseUnionArrayTests
465
+
466
+ def build(type, values)
467
+ build_array(type, values)
468
+ end
469
+ end
470
+
471
+ class ValuesChunkedArraySparseUnionArrayTest < Test::Unit::TestCase
472
+ include ValuesSparseUnionArrayTests
473
+
474
+ def build(type, values)
475
+ Arrow::ChunkedArray.new([build_array(type, values)])
476
+ end
477
+ end