red-arrow 5.0.0 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +23 -0
  3. data/ext/arrow/converters.cpp +5 -0
  4. data/ext/arrow/converters.hpp +126 -0
  5. data/ext/arrow/extconf.rb +13 -0
  6. data/ext/arrow/raw-records.cpp +1 -0
  7. data/ext/arrow/values.cpp +1 -0
  8. data/lib/arrow/aggregate-node-options.rb +35 -0
  9. data/lib/arrow/aggregation.rb +46 -0
  10. data/lib/arrow/array-builder.rb +5 -0
  11. data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
  12. data/lib/arrow/column-containable.rb +100 -1
  13. data/lib/arrow/datum.rb +2 -0
  14. data/lib/arrow/expression.rb +48 -0
  15. data/lib/arrow/file-system.rb +34 -0
  16. data/lib/arrow/group.rb +116 -124
  17. data/lib/arrow/loader.rb +13 -0
  18. data/lib/arrow/map-array-builder.rb +109 -0
  19. data/lib/arrow/map-array.rb +26 -0
  20. data/lib/arrow/map-data-type.rb +89 -0
  21. data/lib/arrow/path-extension.rb +1 -1
  22. data/lib/arrow/record-batch-reader.rb +41 -0
  23. data/lib/arrow/record-batch.rb +0 -2
  24. data/lib/arrow/slicer.rb +44 -143
  25. data/lib/arrow/source-node-options.rb +32 -0
  26. data/lib/arrow/string-dictionary-array-builder.rb +27 -0
  27. data/lib/arrow/symbol-values-appendable.rb +34 -0
  28. data/lib/arrow/table-concatenate-options.rb +36 -0
  29. data/lib/arrow/table-formatter.rb +141 -17
  30. data/lib/arrow/table-list-formatter.rb +5 -3
  31. data/lib/arrow/table-loader.rb +41 -3
  32. data/lib/arrow/table-saver.rb +29 -3
  33. data/lib/arrow/table-table-formatter.rb +7 -31
  34. data/lib/arrow/table.rb +32 -38
  35. data/lib/arrow/version.rb +1 -1
  36. data/red-arrow.gemspec +1 -1
  37. data/test/raw-records/test-dense-union-array.rb +14 -0
  38. data/test/raw-records/test-list-array.rb +19 -0
  39. data/test/raw-records/test-map-array.rb +441 -0
  40. data/test/raw-records/test-sparse-union-array.rb +14 -0
  41. data/test/raw-records/test-struct-array.rb +15 -0
  42. data/test/test-array-builder.rb +7 -0
  43. data/test/test-binary-dictionary-array-builder.rb +103 -0
  44. data/test/test-csv-loader.rb +8 -8
  45. data/test/test-expression.rb +40 -0
  46. data/test/test-group.rb +75 -51
  47. data/test/test-map-array-builder.rb +110 -0
  48. data/test/test-map-array.rb +33 -0
  49. data/test/test-map-data-type.rb +36 -0
  50. data/test/test-record-batch-reader.rb +46 -0
  51. data/test/test-record-batch.rb +42 -0
  52. data/test/test-slicer.rb +166 -167
  53. data/test/test-string-dictionary-array-builder.rb +103 -0
  54. data/test/test-table.rb +190 -53
  55. data/test/values/test-dense-union-array.rb +14 -0
  56. data/test/values/test-list-array.rb +17 -0
  57. data/test/values/test-map-array.rb +433 -0
  58. data/test/values/test-sparse-union-array.rb +14 -0
  59. data/test/values/test-struct-array.rb +15 -0
  60. metadata +107 -76
@@ -0,0 +1,433 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module ValuesMapArrayTests
19
+ def build_data_type(item_type)
20
+ Arrow::MapDataType.new(
21
+ key: :string,
22
+ item: item_type
23
+ )
24
+ end
25
+
26
+ def build_array(item_type, values)
27
+ Arrow::MapArray.new(build_data_type(item_type), values)
28
+ end
29
+
30
+ def test_null
31
+ values = [
32
+ {"key1" => nil},
33
+ nil,
34
+ ]
35
+ target = build(:null, values)
36
+ assert_equal(values, target.values)
37
+ end
38
+
39
+ def test_boolean
40
+ values = [
41
+ {"key1" => false, "key2" => nil},
42
+ nil,
43
+ ]
44
+ target = build(:boolean, values)
45
+ assert_equal(values, target.values)
46
+ end
47
+
48
+ def test_int8
49
+ values = [
50
+ {"key1" => (2 ** 7) - 1, "key2" => nil},
51
+ nil,
52
+ ]
53
+ target = build(:int8, values)
54
+ assert_equal(values, target.values)
55
+ end
56
+
57
+ def test_uint8
58
+ values = [
59
+ {"key1" => (2 ** 8) - 1, "key2" => nil},
60
+ nil,
61
+ ]
62
+ target = build(:uint8, values)
63
+ assert_equal(values, target.values)
64
+ end
65
+
66
+ def test_uint16
67
+ values = [
68
+ {"key1" => (2 ** 16) - 1, "key2" => nil},
69
+ nil,
70
+ ]
71
+ target = build(:uint16, values)
72
+ assert_equal(values, target.values)
73
+ end
74
+
75
+ def test_int32
76
+ values = [
77
+ {"key1" => -(2 ** 31), "key2" => nil},
78
+ nil,
79
+ ]
80
+ target = build(:int32, values)
81
+ assert_equal(values, target.values)
82
+ end
83
+
84
+ def test_uint32
85
+ values = [
86
+ {"key1" => (2 ** 32) - 1, "key2" => nil},
87
+ nil,
88
+ ]
89
+ target = build(:uint32, values)
90
+ assert_equal(values, target.values)
91
+ end
92
+
93
+ def test_int64
94
+ values = [
95
+ {"key1" => -(2 ** 63), "key2" => nil},
96
+ nil,
97
+ ]
98
+ target = build(:int64, values)
99
+ assert_equal(values, target.values)
100
+ end
101
+
102
+ def test_uint64
103
+ values = [
104
+ {"key1" => (2 ** 64) - 1, "key2" => nil},
105
+ nil,
106
+ ]
107
+ target = build(:uint64, values)
108
+ assert_equal(values, target.values)
109
+ end
110
+
111
+ def test_float
112
+ values = [
113
+ {"key1" => -1.0, "key2" => nil},
114
+ nil,
115
+ ]
116
+ target = build(:float, values)
117
+ assert_equal(values, target.values)
118
+ end
119
+
120
+ def test_double
121
+ values = [
122
+ {"key1" => -1.0, "key2" => nil},
123
+ nil,
124
+ ]
125
+ target = build(:double, values)
126
+ assert_equal(values, target.values)
127
+ end
128
+
129
+ def test_binary
130
+ values = [
131
+ {"key1" => "\xff".b, "key2" => nil},
132
+ nil,
133
+ ]
134
+ target = build(:binary, values)
135
+ assert_equal(values, target.values)
136
+ end
137
+
138
+ def test_string
139
+ values = [
140
+ {"key1" => "Ruby", "key2" => nil},
141
+ nil,
142
+ ]
143
+ target = build(:string, values)
144
+ assert_equal(values, target.values)
145
+ end
146
+
147
+ def test_date32
148
+ values = [
149
+ {"key1" => Date.new(1960, 1, 1), "key2" => nil},
150
+ nil,
151
+ ]
152
+ target = build(:date32, values)
153
+ assert_equal(values, target.values)
154
+ end
155
+
156
+ def test_date64
157
+ values = [
158
+ {"key1" => DateTime.new(1960, 1, 1, 2, 9, 30), "key2" => nil},
159
+ nil,
160
+ ]
161
+ target = build(:date64, values)
162
+ assert_equal(values, target.values)
163
+ end
164
+
165
+ def test_timestamp_second
166
+ values = [
167
+ {"key1" => Time.parse("1960-01-01T02:09:30Z"), "key2" => nil},
168
+ nil,
169
+ ]
170
+ target = build({
171
+ type: :timestamp,
172
+ unit: :second,
173
+ },
174
+ values)
175
+ assert_equal(values, target.values)
176
+ end
177
+
178
+ def test_timestamp_milli
179
+ values = [
180
+ {"key1" => Time.parse("1960-01-01T02:09:30.123Z"), "key2" => nil},
181
+ nil,
182
+ ]
183
+ target = build({
184
+ type: :timestamp,
185
+ unit: :milli,
186
+ },
187
+ values)
188
+ assert_equal(values, target.values)
189
+ end
190
+
191
+ def test_timestamp_micro
192
+ values = [
193
+ {"key1" => Time.parse("1960-01-01T02:09:30.123456Z"), "key2" => nil},
194
+ nil,
195
+ ]
196
+ target = build({
197
+ type: :timestamp,
198
+ unit: :micro,
199
+ },
200
+ values)
201
+ assert_equal(values, target.values)
202
+ end
203
+
204
+ def test_timestamp_nano
205
+ values = [
206
+ {"key1" => Time.parse("1960-01-01T02:09:30.123456789Z"), "key2" => nil},
207
+ nil,
208
+ ]
209
+ target = build({
210
+ type: :timestamp,
211
+ unit: :nano,
212
+ },
213
+ values)
214
+ assert_equal(values, target.values)
215
+ end
216
+
217
+ def test_time32_second
218
+ unit = Arrow::TimeUnit::SECOND
219
+ values = [
220
+ # 00:10:00
221
+ {"key1" => Arrow::Time.new(unit, 60 * 10), "key2" => nil},
222
+ nil,
223
+ ]
224
+ target = build({
225
+ type: :time32,
226
+ unit: :second,
227
+ },
228
+ values)
229
+ assert_equal(values, target.values)
230
+ end
231
+
232
+ def test_time32_milli
233
+ unit = Arrow::TimeUnit::MILLI
234
+ values = [
235
+ # 00:10:00.123
236
+ {"key1" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123), "key2" => nil},
237
+ nil,
238
+ ]
239
+ target = build({
240
+ type: :time32,
241
+ unit: :milli,
242
+ },
243
+ values)
244
+ assert_equal(values, target.values)
245
+ end
246
+
247
+ def test_time64_micro
248
+ unit = Arrow::TimeUnit::MICRO
249
+ values = [
250
+ # 00:10:00.123456
251
+ {"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456), "key2" => nil},
252
+ nil,
253
+ ]
254
+ target = build({
255
+ type: :time64,
256
+ unit: :micro,
257
+ },
258
+ values)
259
+ assert_equal(values, target.values)
260
+ end
261
+
262
+ def test_time64_nano
263
+ unit = Arrow::TimeUnit::NANO
264
+ values = [
265
+ # 00:10:00.123456789
266
+ {"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789), "key2" => nil},
267
+ nil,
268
+ ]
269
+ target = build({
270
+ type: :time64,
271
+ unit: :nano,
272
+ },
273
+ values)
274
+ assert_equal(values, target.values)
275
+ end
276
+
277
+ def test_decimal128
278
+ values = [
279
+ {"key1" => BigDecimal("92.92"), "key2" => nil},
280
+ nil,
281
+ ]
282
+ target = build({
283
+ type: :decimal128,
284
+ precision: 8,
285
+ scale: 2,
286
+ },
287
+ values)
288
+ assert_equal(values, target.values)
289
+ end
290
+
291
+ def test_decimal256
292
+ values = [
293
+ {"key1" => BigDecimal("92.92"), "key2" => nil},
294
+ nil,
295
+ ]
296
+ target = build({
297
+ type: :decimal256,
298
+ precision: 38,
299
+ scale: 2,
300
+ },
301
+ values)
302
+ assert_equal(values, target.values)
303
+ end
304
+
305
+ def test_list
306
+ values = [
307
+ {"key1" => [true, nil, false], "key2" => nil},
308
+ nil,
309
+ ]
310
+ target = build({
311
+ type: :list,
312
+ field: {
313
+ name: :sub_element,
314
+ type: :boolean,
315
+ },
316
+ },
317
+ values)
318
+ assert_equal(values, target.values)
319
+ end
320
+
321
+ def test_struct
322
+ values = [
323
+ {"key1" => {"field" => true}, "key2" => nil, "key3" => {"field" => nil}},
324
+ nil,
325
+ ]
326
+ target = build({
327
+ type: :struct,
328
+ fields: [
329
+ {
330
+ name: :field,
331
+ type: :boolean,
332
+ },
333
+ ],
334
+ },
335
+ values)
336
+ assert_equal(values, target.values)
337
+ end
338
+
339
+ def test_map
340
+ values = [
341
+ {"key1" => {"sub_key1" => true, "sub_key2" => nil}, "key2" => nil},
342
+ nil,
343
+ ]
344
+ target = build({
345
+ type: :map,
346
+ key: :string,
347
+ item: :boolean,
348
+ },
349
+ values)
350
+ assert_equal(values, target.values)
351
+ end
352
+
353
+ def test_sparse_union
354
+ omit("Need to add support for SparseUnionArrayBuilder")
355
+ values = [
356
+ {"key1" => {"field1" => true}, "key2" => nil, "key3" => {"field2" => nil}},
357
+ nil,
358
+ ]
359
+ target = build({
360
+ type: :sparse_union,
361
+ fields: [
362
+ {
363
+ name: :field1,
364
+ type: :boolean,
365
+ },
366
+ {
367
+ name: :field2,
368
+ type: :uint8,
369
+ },
370
+ ],
371
+ type_codes: [0, 1],
372
+ },
373
+ values)
374
+ assert_equal(values, target.values)
375
+ end
376
+
377
+ def test_dense_union
378
+ omit("Need to add support for DenseUnionArrayBuilder")
379
+ values = [
380
+ {"key1" => {"field1" => true}, "key2" => nil, "key3" => {"field2" => nil}},
381
+ nil,
382
+ ]
383
+ target = build({
384
+ type: :dense_union,
385
+ fields: [
386
+ {
387
+ name: :field1,
388
+ type: :boolean,
389
+ },
390
+ {
391
+ name: :field2,
392
+ type: :uint8,
393
+ },
394
+ ],
395
+ type_codes: [0, 1],
396
+ },
397
+ values)
398
+ assert_equal(values, target.values)
399
+ end
400
+
401
+ def test_dictionary
402
+ omit("Need to add support for DictionaryArrayBuilder")
403
+ values = [
404
+ {"key1" => "Ruby", "key2" => nil, "key3" => "GLib"},
405
+ nil,
406
+ ]
407
+ dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
408
+ target = build({
409
+ type: :dictionary,
410
+ index_data_type: :int8,
411
+ dictionary: dictionary,
412
+ ordered: true,
413
+ },
414
+ values)
415
+ assert_equal(values, target.values)
416
+ end
417
+ end
418
+
419
+ class ValuesArrayMapArrayTest < Test::Unit::TestCase
420
+ include ValuesMapArrayTests
421
+
422
+ def build(item_type, values)
423
+ build_array(item_type, values)
424
+ end
425
+ end
426
+
427
+ class ValuesChunkedArrayMapArrayTest < Test::Unit::TestCase
428
+ include ValuesMapArrayTests
429
+
430
+ def build(item_type, values)
431
+ Arrow::ChunkedArray.new([build_array(item_type, values)])
432
+ end
433
+ end
@@ -373,6 +373,20 @@ module ValuesSparseUnionArrayTests
373
373
  assert_equal(values, target.values)
374
374
  end
375
375
 
376
+ def test_map
377
+ values = [
378
+ {"0" => {"key1" => true, "key2" => nil}},
379
+ {"1" => nil},
380
+ ]
381
+ target = build({
382
+ type: :map,
383
+ key: :string,
384
+ item: :boolean,
385
+ },
386
+ values)
387
+ assert_equal(values, target.values)
388
+ end
389
+
376
390
  def test_sparse_union
377
391
  omit("Need to add support for SparseUnionArrayBuilder")
378
392
  values = [
@@ -378,6 +378,21 @@ module ValuesStructArrayTests
378
378
  assert_equal(values, target.values)
379
379
  end
380
380
 
381
+ def test_map
382
+ values = [
383
+ {"field" => {"key1" => true, "key2" => nil}},
384
+ nil,
385
+ {"field" => nil},
386
+ ]
387
+ target = build({
388
+ type: :map,
389
+ key: :string,
390
+ item: :boolean,
391
+ },
392
+ values)
393
+ assert_equal(values, target.values)
394
+ end
395
+
381
396
  def test_sparse_union
382
397
  omit("Need to add support for SparseUnionArrayBuilder")
383
398
  values = [