red-arrow 5.0.0 → 6.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +23 -0
  3. data/ext/arrow/converters.cpp +5 -0
  4. data/ext/arrow/converters.hpp +126 -0
  5. data/ext/arrow/extconf.rb +13 -0
  6. data/ext/arrow/raw-records.cpp +1 -0
  7. data/ext/arrow/values.cpp +1 -0
  8. data/lib/arrow/aggregate-node-options.rb +35 -0
  9. data/lib/arrow/aggregation.rb +46 -0
  10. data/lib/arrow/array-builder.rb +5 -0
  11. data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
  12. data/lib/arrow/column-containable.rb +100 -1
  13. data/lib/arrow/datum.rb +2 -0
  14. data/lib/arrow/expression.rb +48 -0
  15. data/lib/arrow/file-system.rb +34 -0
  16. data/lib/arrow/group.rb +116 -124
  17. data/lib/arrow/loader.rb +13 -0
  18. data/lib/arrow/map-array-builder.rb +109 -0
  19. data/lib/arrow/map-array.rb +26 -0
  20. data/lib/arrow/map-data-type.rb +89 -0
  21. data/lib/arrow/path-extension.rb +1 -1
  22. data/lib/arrow/record-batch-reader.rb +41 -0
  23. data/lib/arrow/record-batch.rb +0 -2
  24. data/lib/arrow/slicer.rb +44 -143
  25. data/lib/arrow/source-node-options.rb +32 -0
  26. data/lib/arrow/string-dictionary-array-builder.rb +27 -0
  27. data/lib/arrow/symbol-values-appendable.rb +34 -0
  28. data/lib/arrow/table-concatenate-options.rb +36 -0
  29. data/lib/arrow/table-formatter.rb +141 -17
  30. data/lib/arrow/table-list-formatter.rb +5 -3
  31. data/lib/arrow/table-loader.rb +41 -3
  32. data/lib/arrow/table-saver.rb +29 -3
  33. data/lib/arrow/table-table-formatter.rb +7 -31
  34. data/lib/arrow/table.rb +32 -38
  35. data/lib/arrow/version.rb +1 -1
  36. data/red-arrow.gemspec +1 -1
  37. data/test/raw-records/test-dense-union-array.rb +14 -0
  38. data/test/raw-records/test-list-array.rb +19 -0
  39. data/test/raw-records/test-map-array.rb +441 -0
  40. data/test/raw-records/test-sparse-union-array.rb +14 -0
  41. data/test/raw-records/test-struct-array.rb +15 -0
  42. data/test/test-array-builder.rb +7 -0
  43. data/test/test-binary-dictionary-array-builder.rb +103 -0
  44. data/test/test-csv-loader.rb +8 -8
  45. data/test/test-expression.rb +40 -0
  46. data/test/test-group.rb +75 -51
  47. data/test/test-map-array-builder.rb +110 -0
  48. data/test/test-map-array.rb +33 -0
  49. data/test/test-map-data-type.rb +36 -0
  50. data/test/test-record-batch-reader.rb +46 -0
  51. data/test/test-record-batch.rb +42 -0
  52. data/test/test-slicer.rb +166 -167
  53. data/test/test-string-dictionary-array-builder.rb +103 -0
  54. data/test/test-table.rb +190 -53
  55. data/test/values/test-dense-union-array.rb +14 -0
  56. data/test/values/test-list-array.rb +17 -0
  57. data/test/values/test-map-array.rb +433 -0
  58. data/test/values/test-sparse-union-array.rb +14 -0
  59. data/test/values/test-struct-array.rb +15 -0
  60. metadata +107 -76
@@ -0,0 +1,441 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module RawRecordsMapArrayTests
19
+ def build_schema(type)
20
+ {
21
+ column: {
22
+ type: :map,
23
+ key: :string,
24
+ item: type
25
+ },
26
+ }
27
+ end
28
+
29
+ def test_null
30
+ records = [
31
+ [{"key1" => nil}],
32
+ [nil],
33
+ ]
34
+ target = build(:null, records)
35
+ assert_equal(records, target.raw_records)
36
+ end
37
+
38
+ def test_boolean
39
+ records = [
40
+ [{"key1" => true, "key2" => nil}],
41
+ [nil],
42
+ ]
43
+ target = build(:boolean, records)
44
+ assert_equal(records, target.raw_records)
45
+ end
46
+
47
+ def test_int8
48
+ records = [
49
+ [{"key1" => -(2 ** 7), "key2" => nil}],
50
+ [nil],
51
+ ]
52
+ target = build(:int8, records)
53
+ assert_equal(records, target.raw_records)
54
+ end
55
+
56
+ def test_uint8
57
+ records = [
58
+ [{"key1" => (2 ** 8) - 1, "key2" => nil}],
59
+ [nil],
60
+ ]
61
+ target = build(:uint8, records)
62
+ assert_equal(records, target.raw_records)
63
+ end
64
+
65
+ def test_int16
66
+ records = [
67
+ [{"key1" => -(2 ** 15), "key2" => nil}],
68
+ [nil],
69
+ ]
70
+ target = build(:int16, records)
71
+ assert_equal(records, target.raw_records)
72
+ end
73
+
74
+ def test_uint16
75
+ records = [
76
+ [{"key1" => (2 ** 16) - 1, "key2" => nil}],
77
+ [nil],
78
+ ]
79
+ target = build(:uint16, records)
80
+ assert_equal(records, target.raw_records)
81
+ end
82
+
83
+ def test_int32
84
+ records = [
85
+ [{"key1" => -(2 ** 31), "key2" => nil}],
86
+ [nil],
87
+ ]
88
+ target = build(:int32, records)
89
+ assert_equal(records, target.raw_records)
90
+ end
91
+
92
+ def test_uint32
93
+ records = [
94
+ [{"key1" => (2 ** 32) - 1, "key2" => nil}],
95
+ [nil],
96
+ ]
97
+ target = build(:uint32, records)
98
+ assert_equal(records, target.raw_records)
99
+ end
100
+
101
+ def test_int64
102
+ records = [
103
+ [{"key1" => -(2 ** 63), "key2" => nil}],
104
+ [nil],
105
+ ]
106
+ target = build(:int64, records)
107
+ assert_equal(records, target.raw_records)
108
+ end
109
+
110
+ def test_uint64
111
+ records = [
112
+ [{"key1" => (2 ** 64) - 1, "key2" => nil}],
113
+ [nil],
114
+ ]
115
+ target = build(:uint64, records)
116
+ assert_equal(records, target.raw_records)
117
+ end
118
+
119
+ def test_float
120
+ records = [
121
+ [{"key1" => -1.0, "key2" => nil}],
122
+ [nil],
123
+ ]
124
+ target = build(:float, records)
125
+ assert_equal(records, target.raw_records)
126
+ end
127
+
128
+ def test_double
129
+ records = [
130
+ [{"key1" => -1.0, "key2" => nil}],
131
+ [nil],
132
+ ]
133
+ target = build(:double, records)
134
+ assert_equal(records, target.raw_records)
135
+ end
136
+
137
+ def test_binary
138
+ records = [
139
+ [{"key1" => "\xff".b, "key2" => nil}],
140
+ [nil],
141
+ ]
142
+ target = build(:binary, records)
143
+ assert_equal(records, target.raw_records)
144
+ end
145
+
146
+ def test_string
147
+ records = [
148
+ [{"key1" => "Ruby", "key2" => nil}],
149
+ [nil],
150
+ ]
151
+ target = build(:string, records)
152
+ assert_equal(records, target.raw_records)
153
+ end
154
+
155
+ def test_date32
156
+ records = [
157
+ [{"key1" => Date.new(1960, 1, 1), "key2" => nil}],
158
+ [nil],
159
+ ]
160
+ target = build(:date32, records)
161
+ assert_equal(records, target.raw_records)
162
+ end
163
+
164
+ def test_date64
165
+ records = [
166
+ [{"key1" => DateTime.new(1960, 1, 1, 2, 9, 30), "key2" => nil}],
167
+ [nil],
168
+ ]
169
+ target = build(:date64, records)
170
+ assert_equal(records, target.raw_records)
171
+ end
172
+
173
+ def test_timestamp_second
174
+ records = [
175
+ [{"key1" => Time.parse("1960-01-01T02:09:30Z"), "key2" => nil}],
176
+ [nil],
177
+ ]
178
+ target = build({
179
+ type: :timestamp,
180
+ unit: :second,
181
+ },
182
+ records)
183
+ assert_equal(records, target.raw_records)
184
+ end
185
+
186
+ def test_timestamp_milli
187
+ records = [
188
+ [{"key1" => Time.parse("1960-01-01T02:09:30.123Z"), "key2" => nil}],
189
+ [nil],
190
+ ]
191
+ target = build({
192
+ type: :timestamp,
193
+ unit: :milli,
194
+ },
195
+ records)
196
+ assert_equal(records, target.raw_records)
197
+ end
198
+
199
+ def test_timestamp_micro
200
+ records = [
201
+ [{"key1" => Time.parse("1960-01-01T02:09:30.123456Z"), "key2" => nil}],
202
+ [nil],
203
+ ]
204
+ target = build({
205
+ type: :timestamp,
206
+ unit: :micro,
207
+ },
208
+ records)
209
+ assert_equal(records, target.raw_records)
210
+ end
211
+
212
+ def test_timestamp_nano
213
+ records = [
214
+ [{"key1" => Time.parse("1960-01-01T02:09:30.123456789Z"), "key2" => nil}],
215
+ [nil],
216
+ ]
217
+ target = build({
218
+ type: :timestamp,
219
+ unit: :nano,
220
+ },
221
+ records)
222
+ assert_equal(records, target.raw_records)
223
+ end
224
+
225
+ def test_time32_second
226
+ unit = Arrow::TimeUnit::SECOND
227
+ records = [
228
+ # 00:10:00
229
+ [{"key1" => Arrow::Time.new(unit, 60 * 10), "key2" => nil}],
230
+ [nil],
231
+ ]
232
+ target = build({
233
+ type: :time32,
234
+ unit: :second,
235
+ },
236
+ records)
237
+ assert_equal(records, target.raw_records)
238
+ end
239
+
240
+ def test_time32_milli
241
+ unit = Arrow::TimeUnit::MILLI
242
+ records = [
243
+ # 00:10:00.123
244
+ [{"key1" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123), "key2" => nil}],
245
+ [nil],
246
+ ]
247
+ target = build({
248
+ type: :time32,
249
+ unit: :milli,
250
+ },
251
+ records)
252
+ assert_equal(records, target.raw_records)
253
+ end
254
+
255
+ def test_time64_micro
256
+ unit = Arrow::TimeUnit::MICRO
257
+ records = [
258
+ # 00:10:00.123456
259
+ [{"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456), "key2" => nil}],
260
+ [nil],
261
+ ]
262
+ target = build({
263
+ type: :time64,
264
+ unit: :micro,
265
+ },
266
+ records)
267
+ assert_equal(records, target.raw_records)
268
+ end
269
+
270
+ def test_time64_nano
271
+ unit = Arrow::TimeUnit::NANO
272
+ records = [
273
+ # 00:10:00.123456789
274
+ [{"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789), "key2" => nil}],
275
+ [nil],
276
+ ]
277
+ target = build({
278
+ type: :time64,
279
+ unit: :nano,
280
+ },
281
+ records)
282
+ assert_equal(records, target.raw_records)
283
+ end
284
+
285
+ def test_decimal128
286
+ records = [
287
+ [{"key1" => BigDecimal("92.92"), "key2" => nil}],
288
+ [nil],
289
+ ]
290
+ target = build({
291
+ type: :decimal128,
292
+ precision: 8,
293
+ scale: 2,
294
+ },
295
+ records)
296
+ assert_equal(records, target.raw_records)
297
+ end
298
+
299
+ def test_decimal256
300
+ records = [
301
+ [{"key1" => BigDecimal("92.92"), "key2" => nil}],
302
+ [nil],
303
+ ]
304
+ target = build({
305
+ type: :decimal256,
306
+ precision: 38,
307
+ scale: 2,
308
+ },
309
+ records)
310
+ assert_equal(records, target.raw_records)
311
+ end
312
+
313
+ def test_list
314
+ records = [
315
+ [{"key1" => [true, nil, false], "key2" => nil}],
316
+ [nil],
317
+ ]
318
+ target = build({
319
+ type: :list,
320
+ field: {
321
+ name: :element,
322
+ type: :boolean,
323
+ },
324
+ },
325
+ records)
326
+ assert_equal(records, target.raw_records)
327
+ end
328
+
329
+ def test_struct
330
+ records = [
331
+ [{"key1" => {"field" => true}, "key2" => nil, "key3" => {"field" => nil}}],
332
+ [nil],
333
+ ]
334
+ target = build({
335
+ type: :struct,
336
+ fields: [
337
+ {
338
+ name: :field,
339
+ type: :boolean,
340
+ },
341
+ ],
342
+ },
343
+ records)
344
+ assert_equal(records, target.raw_records)
345
+ end
346
+
347
+ def test_map
348
+ records = [
349
+ [{"key1" => {"sub_key1" => true, "sub_key2" => nil}, "key2" => nil}],
350
+ [nil],
351
+ ]
352
+ target = build({
353
+ type: :map,
354
+ key: :string,
355
+ item: :boolean,
356
+ },
357
+ records)
358
+ assert_equal(records, target.raw_records)
359
+ end
360
+
361
+ def test_sparse_union
362
+ omit("Need to add support for SparseUnionArrayBuilder")
363
+ records = [
364
+ [{"key1" => {"field" => true, "key2" => nil, "key3" => {"field" => nil}}}],
365
+ [nil],
366
+ ]
367
+ target = build({
368
+ type: :sparse_union,
369
+ fields: [
370
+ {
371
+ name: :field1,
372
+ type: :boolean,
373
+ },
374
+ {
375
+ name: :field2,
376
+ type: :uint8,
377
+ },
378
+ ],
379
+ type_codes: [0, 1],
380
+ },
381
+ records)
382
+ assert_equal(records, target.raw_records)
383
+ end
384
+
385
+ def test_dense_union
386
+ omit("Need to add support for DenseUnionArrayBuilder")
387
+ records = [
388
+ [{"key1" => {"field1" => true}, "key2" => nil, "key3" => {"field2" => nil}}],
389
+ [nil],
390
+ ]
391
+ target = build({
392
+ type: :dense_union,
393
+ fields: [
394
+ {
395
+ name: :field1,
396
+ type: :boolean,
397
+ },
398
+ {
399
+ name: :field2,
400
+ type: :uint8,
401
+ },
402
+ ],
403
+ type_codes: [0, 1],
404
+ },
405
+ records)
406
+ assert_equal(records, target.raw_records)
407
+ end
408
+
409
+ def test_dictionary
410
+ omit("Need to add support for DictionaryArrayBuilder")
411
+ records = [
412
+ [{"key1" => "Ruby", "key2" => nil, "key3" => "GLib"}],
413
+ [nil],
414
+ ]
415
+ dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
416
+ target = build({
417
+ type: :dictionary,
418
+ index_data_type: :int8,
419
+ dictionary: dictionary,
420
+ ordered: true,
421
+ },
422
+ records)
423
+ assert_equal(records, target.raw_records)
424
+ end
425
+ end
426
+
427
+ class RawRecordsRecordBatchMapArrayTest < Test::Unit::TestCase
428
+ include RawRecordsMapArrayTests
429
+
430
+ def build(type, records)
431
+ Arrow::RecordBatch.new(build_schema(type), records)
432
+ end
433
+ end
434
+
435
+ class RawRecordsTableMapArrayTest < Test::Unit::TestCase
436
+ include RawRecordsMapArrayTests
437
+
438
+ def build(type, records)
439
+ Arrow::Table.new(build_schema(type), records)
440
+ end
441
+ end
@@ -384,6 +384,20 @@ module RawRecordsSparseUnionArrayTests
384
384
  assert_equal(records, target.raw_records)
385
385
  end
386
386
 
387
+ def test_map
388
+ records = [
389
+ [{"0" => {"key1" => true, "key2" => nil}}],
390
+ [{"1" => nil}],
391
+ ]
392
+ target = build({
393
+ type: :map,
394
+ key: :string,
395
+ item: :boolean,
396
+ },
397
+ records)
398
+ assert_equal(records, target.raw_records)
399
+ end
400
+
387
401
  def test_sparse_union
388
402
  omit("Need to add support for SparseUnionArrayBuilder")
389
403
  records = [
@@ -381,6 +381,21 @@ module RawRecordsStructArrayTests
381
381
  assert_equal(records, target.raw_records)
382
382
  end
383
383
 
384
+ def test_map
385
+ records = [
386
+ [{"field" => {"key1" => true, "key2" => nil}}],
387
+ [nil],
388
+ [{"field" => nil}],
389
+ ]
390
+ target = build({
391
+ type: :map,
392
+ key: :string,
393
+ item: :boolean,
394
+ },
395
+ records)
396
+ assert_equal(records, target.raw_records)
397
+ end
398
+
384
399
  def test_sparse_union
385
400
  omit("Need to add support for SparseUnionArrayBuilder")
386
401
  records = [
@@ -28,6 +28,13 @@ class ArrayBuilderTest < Test::Unit::TestCase
28
28
  ["Hello", nil, "World"])
29
29
  end
30
30
 
31
+ test("symbols") do
32
+ array = Arrow::ArrayBuilder.build([:hello, nil, :world])
33
+ expected_builder = Arrow::StringDictionaryArrayBuilder.new
34
+ assert_equal(expected_builder.build(["hello", nil, "world"]),
35
+ array)
36
+ end
37
+
31
38
  test("boolean") do
32
39
  assert_build(Arrow::ArrayBuilder,
33
40
  [true, nil, false])
@@ -0,0 +1,103 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class BinaryDictionaryArrayBuilderTest < Test::Unit::TestCase
19
+ def setup
20
+ @builder = Arrow::BinaryDictionaryArrayBuilder.new
21
+ end
22
+
23
+ sub_test_case("#append_values") do
24
+ test("[nil]") do
25
+ @builder.append_values([nil])
26
+ array = @builder.finish
27
+ assert_equal([
28
+ [],
29
+ [nil],
30
+ ],
31
+ [
32
+ array.dictionary.to_a,
33
+ array.indices.to_a,
34
+ ])
35
+ end
36
+
37
+ test("[String]") do
38
+ @builder.append_values(["he\xffllo"])
39
+ array = @builder.finish
40
+ assert_equal([
41
+ ["he\xffllo".b],
42
+ [0],
43
+ ],
44
+ [
45
+ array.dictionary.to_a,
46
+ array.indices.to_a,
47
+ ])
48
+ end
49
+
50
+ test("[Symbol]") do
51
+ @builder.append_values([:hello])
52
+ array = @builder.finish
53
+ assert_equal([
54
+ ["hello"],
55
+ [0],
56
+ ],
57
+ [
58
+ array.dictionary.to_a,
59
+ array.indices.to_a,
60
+ ])
61
+ end
62
+
63
+ test("[nil, String, Symbol]") do
64
+ @builder.append_values([
65
+ nil,
66
+ "He\xffllo",
67
+ :world,
68
+ "world",
69
+ ])
70
+ array = @builder.finish
71
+ assert_equal([
72
+ ["He\xffllo".b, "world"],
73
+ [nil, 0, 1, 1],
74
+ ],
75
+ [
76
+ array.dictionary.to_a,
77
+ array.indices.to_a,
78
+ ])
79
+ end
80
+
81
+ test("is_valids") do
82
+ @builder.append_values([
83
+ "He\xffllo",
84
+ :world,
85
+ :goodbye,
86
+ ],
87
+ [
88
+ true,
89
+ false,
90
+ true,
91
+ ])
92
+ array = @builder.finish
93
+ assert_equal([
94
+ ["He\xffllo".b, "goodbye"],
95
+ [0, nil, 1],
96
+ ],
97
+ [
98
+ array.dictionary.to_a,
99
+ array.indices.to_a,
100
+ ])
101
+ end
102
+ end
103
+ end
@@ -86,20 +86,20 @@ class CSVLoaderTest < Test::Unit::TestCase
86
86
  test("null: with double quote") do
87
87
  path = fixture_path("null-with-double-quote.csv").to_s
88
88
  assert_equal(<<-TABLE, load_csv(path).to_s)
89
- name score
90
- 0 alice 10
91
- 1 bob
92
- 2 chris -1
89
+ name score
90
+ 0 alice 10
91
+ 1 bob (null)
92
+ 2 chris -1
93
93
  TABLE
94
94
  end
95
95
 
96
96
  test("null: without double quote") do
97
97
  path = fixture_path("null-without-double-quote.csv").to_s
98
98
  assert_equal(<<-TABLE, load_csv(path).to_s)
99
- name score
100
- 0 alice 10
101
- 1 bob
102
- 2 chris -1
99
+ name score
100
+ 0 alice 10
101
+ 1 bob (null)
102
+ 2 chris -1
103
103
  TABLE
104
104
  end
105
105
 
@@ -0,0 +1,40 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class TestExpression < Test::Unit::TestCase
19
+ sub_test_case(".try_convert") do
20
+ test("Symbol") do
21
+ assert_equal(Arrow::FieldExpression.new("visible"),
22
+ Arrow::Expression.try_convert(:visible))
23
+ end
24
+
25
+ test("[String]") do
26
+ assert_equal(Arrow::CallExpression.new("func", []),
27
+ Arrow::Expression.try_convert(["func"]))
28
+ end
29
+
30
+ test("[Symbol]") do
31
+ assert_equal(Arrow::CallExpression.new("func", []),
32
+ Arrow::Expression.try_convert([:func]))
33
+ end
34
+
35
+ test("[String, String]") do
36
+ assert_equal(Arrow::CallExpression.new("func", ["argument1"]),
37
+ Arrow::Expression.try_convert(["func", "argument1"]))
38
+ end
39
+ end
40
+ end