red-arrow 5.0.0 → 6.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +23 -0
  3. data/ext/arrow/converters.cpp +5 -0
  4. data/ext/arrow/converters.hpp +126 -0
  5. data/ext/arrow/extconf.rb +13 -0
  6. data/ext/arrow/raw-records.cpp +1 -0
  7. data/ext/arrow/values.cpp +1 -0
  8. data/lib/arrow/aggregate-node-options.rb +35 -0
  9. data/lib/arrow/aggregation.rb +46 -0
  10. data/lib/arrow/array-builder.rb +5 -0
  11. data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
  12. data/lib/arrow/column-containable.rb +100 -1
  13. data/lib/arrow/datum.rb +2 -0
  14. data/lib/arrow/expression.rb +48 -0
  15. data/lib/arrow/file-system.rb +34 -0
  16. data/lib/arrow/group.rb +116 -124
  17. data/lib/arrow/loader.rb +13 -0
  18. data/lib/arrow/map-array-builder.rb +109 -0
  19. data/lib/arrow/map-array.rb +26 -0
  20. data/lib/arrow/map-data-type.rb +89 -0
  21. data/lib/arrow/path-extension.rb +1 -1
  22. data/lib/arrow/record-batch-reader.rb +41 -0
  23. data/lib/arrow/record-batch.rb +0 -2
  24. data/lib/arrow/slicer.rb +44 -143
  25. data/lib/arrow/source-node-options.rb +32 -0
  26. data/lib/arrow/string-dictionary-array-builder.rb +27 -0
  27. data/lib/arrow/symbol-values-appendable.rb +34 -0
  28. data/lib/arrow/table-concatenate-options.rb +36 -0
  29. data/lib/arrow/table-formatter.rb +141 -17
  30. data/lib/arrow/table-list-formatter.rb +5 -3
  31. data/lib/arrow/table-loader.rb +41 -3
  32. data/lib/arrow/table-saver.rb +29 -3
  33. data/lib/arrow/table-table-formatter.rb +7 -31
  34. data/lib/arrow/table.rb +32 -38
  35. data/lib/arrow/version.rb +1 -1
  36. data/red-arrow.gemspec +1 -1
  37. data/test/raw-records/test-dense-union-array.rb +14 -0
  38. data/test/raw-records/test-list-array.rb +19 -0
  39. data/test/raw-records/test-map-array.rb +441 -0
  40. data/test/raw-records/test-sparse-union-array.rb +14 -0
  41. data/test/raw-records/test-struct-array.rb +15 -0
  42. data/test/test-array-builder.rb +7 -0
  43. data/test/test-binary-dictionary-array-builder.rb +103 -0
  44. data/test/test-csv-loader.rb +8 -8
  45. data/test/test-expression.rb +40 -0
  46. data/test/test-group.rb +75 -51
  47. data/test/test-map-array-builder.rb +110 -0
  48. data/test/test-map-array.rb +33 -0
  49. data/test/test-map-data-type.rb +36 -0
  50. data/test/test-record-batch-reader.rb +46 -0
  51. data/test/test-record-batch.rb +42 -0
  52. data/test/test-slicer.rb +166 -167
  53. data/test/test-string-dictionary-array-builder.rb +103 -0
  54. data/test/test-table.rb +190 -53
  55. data/test/values/test-dense-union-array.rb +14 -0
  56. data/test/values/test-list-array.rb +17 -0
  57. data/test/values/test-map-array.rb +433 -0
  58. data/test/values/test-sparse-union-array.rb +14 -0
  59. data/test/values/test-struct-array.rb +15 -0
  60. metadata +107 -76
@@ -0,0 +1,433 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module ValuesMapArrayTests
19
+ def build_data_type(item_type)
20
+ Arrow::MapDataType.new(
21
+ key: :string,
22
+ item: item_type
23
+ )
24
+ end
25
+
26
+ def build_array(item_type, values)
27
+ Arrow::MapArray.new(build_data_type(item_type), values)
28
+ end
29
+
30
+ def test_null
31
+ values = [
32
+ {"key1" => nil},
33
+ nil,
34
+ ]
35
+ target = build(:null, values)
36
+ assert_equal(values, target.values)
37
+ end
38
+
39
+ def test_boolean
40
+ values = [
41
+ {"key1" => false, "key2" => nil},
42
+ nil,
43
+ ]
44
+ target = build(:boolean, values)
45
+ assert_equal(values, target.values)
46
+ end
47
+
48
+ def test_int8
49
+ values = [
50
+ {"key1" => (2 ** 7) - 1, "key2" => nil},
51
+ nil,
52
+ ]
53
+ target = build(:int8, values)
54
+ assert_equal(values, target.values)
55
+ end
56
+
57
+ def test_uint8
58
+ values = [
59
+ {"key1" => (2 ** 8) - 1, "key2" => nil},
60
+ nil,
61
+ ]
62
+ target = build(:uint8, values)
63
+ assert_equal(values, target.values)
64
+ end
65
+
66
+ def test_uint16
67
+ values = [
68
+ {"key1" => (2 ** 16) - 1, "key2" => nil},
69
+ nil,
70
+ ]
71
+ target = build(:uint16, values)
72
+ assert_equal(values, target.values)
73
+ end
74
+
75
+ def test_int32
76
+ values = [
77
+ {"key1" => -(2 ** 31), "key2" => nil},
78
+ nil,
79
+ ]
80
+ target = build(:int32, values)
81
+ assert_equal(values, target.values)
82
+ end
83
+
84
+ def test_uint32
85
+ values = [
86
+ {"key1" => (2 ** 32) - 1, "key2" => nil},
87
+ nil,
88
+ ]
89
+ target = build(:uint32, values)
90
+ assert_equal(values, target.values)
91
+ end
92
+
93
+ def test_int64
94
+ values = [
95
+ {"key1" => -(2 ** 63), "key2" => nil},
96
+ nil,
97
+ ]
98
+ target = build(:int64, values)
99
+ assert_equal(values, target.values)
100
+ end
101
+
102
+ def test_uint64
103
+ values = [
104
+ {"key1" => (2 ** 64) - 1, "key2" => nil},
105
+ nil,
106
+ ]
107
+ target = build(:uint64, values)
108
+ assert_equal(values, target.values)
109
+ end
110
+
111
+ def test_float
112
+ values = [
113
+ {"key1" => -1.0, "key2" => nil},
114
+ nil,
115
+ ]
116
+ target = build(:float, values)
117
+ assert_equal(values, target.values)
118
+ end
119
+
120
+ def test_double
121
+ values = [
122
+ {"key1" => -1.0, "key2" => nil},
123
+ nil,
124
+ ]
125
+ target = build(:double, values)
126
+ assert_equal(values, target.values)
127
+ end
128
+
129
+ def test_binary
130
+ values = [
131
+ {"key1" => "\xff".b, "key2" => nil},
132
+ nil,
133
+ ]
134
+ target = build(:binary, values)
135
+ assert_equal(values, target.values)
136
+ end
137
+
138
+ def test_string
139
+ values = [
140
+ {"key1" => "Ruby", "key2" => nil},
141
+ nil,
142
+ ]
143
+ target = build(:string, values)
144
+ assert_equal(values, target.values)
145
+ end
146
+
147
+ def test_date32
148
+ values = [
149
+ {"key1" => Date.new(1960, 1, 1), "key2" => nil},
150
+ nil,
151
+ ]
152
+ target = build(:date32, values)
153
+ assert_equal(values, target.values)
154
+ end
155
+
156
+ def test_date64
157
+ values = [
158
+ {"key1" => DateTime.new(1960, 1, 1, 2, 9, 30), "key2" => nil},
159
+ nil,
160
+ ]
161
+ target = build(:date64, values)
162
+ assert_equal(values, target.values)
163
+ end
164
+
165
+ def test_timestamp_second
166
+ values = [
167
+ {"key1" => Time.parse("1960-01-01T02:09:30Z"), "key2" => nil},
168
+ nil,
169
+ ]
170
+ target = build({
171
+ type: :timestamp,
172
+ unit: :second,
173
+ },
174
+ values)
175
+ assert_equal(values, target.values)
176
+ end
177
+
178
+ def test_timestamp_milli
179
+ values = [
180
+ {"key1" => Time.parse("1960-01-01T02:09:30.123Z"), "key2" => nil},
181
+ nil,
182
+ ]
183
+ target = build({
184
+ type: :timestamp,
185
+ unit: :milli,
186
+ },
187
+ values)
188
+ assert_equal(values, target.values)
189
+ end
190
+
191
+ def test_timestamp_micro
192
+ values = [
193
+ {"key1" => Time.parse("1960-01-01T02:09:30.123456Z"), "key2" => nil},
194
+ nil,
195
+ ]
196
+ target = build({
197
+ type: :timestamp,
198
+ unit: :micro,
199
+ },
200
+ values)
201
+ assert_equal(values, target.values)
202
+ end
203
+
204
+ def test_timestamp_nano
205
+ values = [
206
+ {"key1" => Time.parse("1960-01-01T02:09:30.123456789Z"), "key2" => nil},
207
+ nil,
208
+ ]
209
+ target = build({
210
+ type: :timestamp,
211
+ unit: :nano,
212
+ },
213
+ values)
214
+ assert_equal(values, target.values)
215
+ end
216
+
217
+ def test_time32_second
218
+ unit = Arrow::TimeUnit::SECOND
219
+ values = [
220
+ # 00:10:00
221
+ {"key1" => Arrow::Time.new(unit, 60 * 10), "key2" => nil},
222
+ nil,
223
+ ]
224
+ target = build({
225
+ type: :time32,
226
+ unit: :second,
227
+ },
228
+ values)
229
+ assert_equal(values, target.values)
230
+ end
231
+
232
+ def test_time32_milli
233
+ unit = Arrow::TimeUnit::MILLI
234
+ values = [
235
+ # 00:10:00.123
236
+ {"key1" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123), "key2" => nil},
237
+ nil,
238
+ ]
239
+ target = build({
240
+ type: :time32,
241
+ unit: :milli,
242
+ },
243
+ values)
244
+ assert_equal(values, target.values)
245
+ end
246
+
247
+ def test_time64_micro
248
+ unit = Arrow::TimeUnit::MICRO
249
+ values = [
250
+ # 00:10:00.123456
251
+ {"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456), "key2" => nil},
252
+ nil,
253
+ ]
254
+ target = build({
255
+ type: :time64,
256
+ unit: :micro,
257
+ },
258
+ values)
259
+ assert_equal(values, target.values)
260
+ end
261
+
262
+ def test_time64_nano
263
+ unit = Arrow::TimeUnit::NANO
264
+ values = [
265
+ # 00:10:00.123456789
266
+ {"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789), "key2" => nil},
267
+ nil,
268
+ ]
269
+ target = build({
270
+ type: :time64,
271
+ unit: :nano,
272
+ },
273
+ values)
274
+ assert_equal(values, target.values)
275
+ end
276
+
277
+ def test_decimal128
278
+ values = [
279
+ {"key1" => BigDecimal("92.92"), "key2" => nil},
280
+ nil,
281
+ ]
282
+ target = build({
283
+ type: :decimal128,
284
+ precision: 8,
285
+ scale: 2,
286
+ },
287
+ values)
288
+ assert_equal(values, target.values)
289
+ end
290
+
291
+ def test_decimal256
292
+ values = [
293
+ {"key1" => BigDecimal("92.92"), "key2" => nil},
294
+ nil,
295
+ ]
296
+ target = build({
297
+ type: :decimal256,
298
+ precision: 38,
299
+ scale: 2,
300
+ },
301
+ values)
302
+ assert_equal(values, target.values)
303
+ end
304
+
305
+ def test_list
306
+ values = [
307
+ {"key1" => [true, nil, false], "key2" => nil},
308
+ nil,
309
+ ]
310
+ target = build({
311
+ type: :list,
312
+ field: {
313
+ name: :sub_element,
314
+ type: :boolean,
315
+ },
316
+ },
317
+ values)
318
+ assert_equal(values, target.values)
319
+ end
320
+
321
+ def test_struct
322
+ values = [
323
+ {"key1" => {"field" => true}, "key2" => nil, "key3" => {"field" => nil}},
324
+ nil,
325
+ ]
326
+ target = build({
327
+ type: :struct,
328
+ fields: [
329
+ {
330
+ name: :field,
331
+ type: :boolean,
332
+ },
333
+ ],
334
+ },
335
+ values)
336
+ assert_equal(values, target.values)
337
+ end
338
+
339
+ def test_map
340
+ values = [
341
+ {"key1" => {"sub_key1" => true, "sub_key2" => nil}, "key2" => nil},
342
+ nil,
343
+ ]
344
+ target = build({
345
+ type: :map,
346
+ key: :string,
347
+ item: :boolean,
348
+ },
349
+ values)
350
+ assert_equal(values, target.values)
351
+ end
352
+
353
+ def test_sparse_union
354
+ omit("Need to add support for SparseUnionArrayBuilder")
355
+ values = [
356
+ {"key1" => {"field1" => true}, "key2" => nil, "key3" => {"field2" => nil}},
357
+ nil,
358
+ ]
359
+ target = build({
360
+ type: :sparse_union,
361
+ fields: [
362
+ {
363
+ name: :field1,
364
+ type: :boolean,
365
+ },
366
+ {
367
+ name: :field2,
368
+ type: :uint8,
369
+ },
370
+ ],
371
+ type_codes: [0, 1],
372
+ },
373
+ values)
374
+ assert_equal(values, target.values)
375
+ end
376
+
377
+ def test_dense_union
378
+ omit("Need to add support for DenseUnionArrayBuilder")
379
+ values = [
380
+ {"key1" => {"field1" => true}, "key2" => nil, "key3" => {"field2" => nil}},
381
+ nil,
382
+ ]
383
+ target = build({
384
+ type: :dense_union,
385
+ fields: [
386
+ {
387
+ name: :field1,
388
+ type: :boolean,
389
+ },
390
+ {
391
+ name: :field2,
392
+ type: :uint8,
393
+ },
394
+ ],
395
+ type_codes: [0, 1],
396
+ },
397
+ values)
398
+ assert_equal(values, target.values)
399
+ end
400
+
401
+ def test_dictionary
402
+ omit("Need to add support for DictionaryArrayBuilder")
403
+ values = [
404
+ {"key1" => "Ruby", "key2" => nil, "key3" => "GLib"},
405
+ nil,
406
+ ]
407
+ dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
408
+ target = build({
409
+ type: :dictionary,
410
+ index_data_type: :int8,
411
+ dictionary: dictionary,
412
+ ordered: true,
413
+ },
414
+ values)
415
+ assert_equal(values, target.values)
416
+ end
417
+ end
418
+
419
+ class ValuesArrayMapArrayTest < Test::Unit::TestCase
420
+ include ValuesMapArrayTests
421
+
422
+ def build(item_type, values)
423
+ build_array(item_type, values)
424
+ end
425
+ end
426
+
427
+ class ValuesChunkedArrayMapArrayTest < Test::Unit::TestCase
428
+ include ValuesMapArrayTests
429
+
430
+ def build(item_type, values)
431
+ Arrow::ChunkedArray.new([build_array(item_type, values)])
432
+ end
433
+ end
@@ -373,6 +373,20 @@ module ValuesSparseUnionArrayTests
373
373
  assert_equal(values, target.values)
374
374
  end
375
375
 
376
+ def test_map
377
+ values = [
378
+ {"0" => {"key1" => true, "key2" => nil}},
379
+ {"1" => nil},
380
+ ]
381
+ target = build({
382
+ type: :map,
383
+ key: :string,
384
+ item: :boolean,
385
+ },
386
+ values)
387
+ assert_equal(values, target.values)
388
+ end
389
+
376
390
  def test_sparse_union
377
391
  omit("Need to add support for SparseUnionArrayBuilder")
378
392
  values = [
@@ -378,6 +378,21 @@ module ValuesStructArrayTests
378
378
  assert_equal(values, target.values)
379
379
  end
380
380
 
381
+ def test_map
382
+ values = [
383
+ {"field" => {"key1" => true, "key2" => nil}},
384
+ nil,
385
+ {"field" => nil},
386
+ ]
387
+ target = build({
388
+ type: :map,
389
+ key: :string,
390
+ item: :boolean,
391
+ },
392
+ values)
393
+ assert_equal(values, target.values)
394
+ end
395
+
381
396
  def test_sparse_union
382
397
  omit("Need to add support for SparseUnionArrayBuilder")
383
398
  values = [