red-arrow 5.0.0 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +23 -0
- data/ext/arrow/converters.cpp +5 -0
- data/ext/arrow/converters.hpp +126 -0
- data/ext/arrow/extconf.rb +13 -0
- data/ext/arrow/raw-records.cpp +1 -0
- data/ext/arrow/values.cpp +1 -0
- data/lib/arrow/aggregate-node-options.rb +35 -0
- data/lib/arrow/aggregation.rb +46 -0
- data/lib/arrow/array-builder.rb +5 -0
- data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
- data/lib/arrow/column-containable.rb +100 -1
- data/lib/arrow/datum.rb +2 -0
- data/lib/arrow/expression.rb +48 -0
- data/lib/arrow/file-system.rb +34 -0
- data/lib/arrow/group.rb +116 -124
- data/lib/arrow/loader.rb +13 -0
- data/lib/arrow/map-array-builder.rb +109 -0
- data/lib/arrow/map-array.rb +26 -0
- data/lib/arrow/map-data-type.rb +89 -0
- data/lib/arrow/path-extension.rb +1 -1
- data/lib/arrow/record-batch-reader.rb +41 -0
- data/lib/arrow/record-batch.rb +0 -2
- data/lib/arrow/slicer.rb +44 -143
- data/lib/arrow/source-node-options.rb +32 -0
- data/lib/arrow/string-dictionary-array-builder.rb +27 -0
- data/lib/arrow/symbol-values-appendable.rb +34 -0
- data/lib/arrow/table-concatenate-options.rb +36 -0
- data/lib/arrow/table-formatter.rb +141 -17
- data/lib/arrow/table-list-formatter.rb +5 -3
- data/lib/arrow/table-loader.rb +41 -3
- data/lib/arrow/table-saver.rb +29 -3
- data/lib/arrow/table-table-formatter.rb +7 -31
- data/lib/arrow/table.rb +32 -38
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +1 -1
- data/test/raw-records/test-dense-union-array.rb +14 -0
- data/test/raw-records/test-list-array.rb +19 -0
- data/test/raw-records/test-map-array.rb +441 -0
- data/test/raw-records/test-sparse-union-array.rb +14 -0
- data/test/raw-records/test-struct-array.rb +15 -0
- data/test/test-array-builder.rb +7 -0
- data/test/test-binary-dictionary-array-builder.rb +103 -0
- data/test/test-csv-loader.rb +8 -8
- data/test/test-expression.rb +40 -0
- data/test/test-group.rb +75 -51
- data/test/test-map-array-builder.rb +110 -0
- data/test/test-map-array.rb +33 -0
- data/test/test-map-data-type.rb +36 -0
- data/test/test-record-batch-reader.rb +46 -0
- data/test/test-record-batch.rb +42 -0
- data/test/test-slicer.rb +166 -167
- data/test/test-string-dictionary-array-builder.rb +103 -0
- data/test/test-table.rb +190 -53
- data/test/values/test-dense-union-array.rb +14 -0
- data/test/values/test-list-array.rb +17 -0
- data/test/values/test-map-array.rb +433 -0
- data/test/values/test-sparse-union-array.rb +14 -0
- data/test/values/test-struct-array.rb +15 -0
- metadata +107 -76
data/test/test-table.rb
CHANGED
@@ -53,26 +53,26 @@ class TableTest < Test::Unit::TestCase
|
|
53
53
|
target_rows_raw = [nil, true, true, false, true, false, true, true]
|
54
54
|
target_rows = Arrow::BooleanArray.new(target_rows_raw)
|
55
55
|
assert_equal(<<-TABLE, @table.slice(target_rows).to_s)
|
56
|
-
|
57
|
-
0
|
58
|
-
1
|
59
|
-
2
|
60
|
-
3
|
61
|
-
4
|
62
|
-
5
|
56
|
+
count visible
|
57
|
+
0 (null) (null)
|
58
|
+
1 2 false
|
59
|
+
2 4 (null)
|
60
|
+
3 16 true
|
61
|
+
4 64 (null)
|
62
|
+
5 128 (null)
|
63
63
|
TABLE
|
64
64
|
end
|
65
65
|
|
66
66
|
test("Array: boolean") do
|
67
67
|
target_rows_raw = [nil, true, true, false, true, false, true, true]
|
68
68
|
assert_equal(<<-TABLE, @table.slice(target_rows_raw).to_s)
|
69
|
-
|
70
|
-
0
|
71
|
-
1
|
72
|
-
2
|
73
|
-
3
|
74
|
-
4
|
75
|
-
5
|
69
|
+
count visible
|
70
|
+
0 (null) (null)
|
71
|
+
1 2 false
|
72
|
+
2 4 (null)
|
73
|
+
3 16 true
|
74
|
+
4 64 (null)
|
75
|
+
5 128 (null)
|
76
76
|
TABLE
|
77
77
|
end
|
78
78
|
|
@@ -100,7 +100,7 @@ class TableTest < Test::Unit::TestCase
|
|
100
100
|
test("Range: positive: include end") do
|
101
101
|
assert_equal(<<-TABLE, @table.slice(2..4).to_s)
|
102
102
|
count visible
|
103
|
-
0 4
|
103
|
+
0 4 (null)
|
104
104
|
1 8 true
|
105
105
|
2 16 true
|
106
106
|
TABLE
|
@@ -109,7 +109,7 @@ class TableTest < Test::Unit::TestCase
|
|
109
109
|
test("Range: positive: exclude end") do
|
110
110
|
assert_equal(<<-TABLE, @table.slice(2...4).to_s)
|
111
111
|
count visible
|
112
|
-
0 4
|
112
|
+
0 4 (null)
|
113
113
|
1 8 true
|
114
114
|
TABLE
|
115
115
|
end
|
@@ -119,7 +119,7 @@ class TableTest < Test::Unit::TestCase
|
|
119
119
|
count visible
|
120
120
|
0 16 true
|
121
121
|
1 32 false
|
122
|
-
2 64
|
122
|
+
2 64 (null)
|
123
123
|
TABLE
|
124
124
|
end
|
125
125
|
|
@@ -147,6 +147,104 @@ class TableTest < Test::Unit::TestCase
|
|
147
147
|
TABLE
|
148
148
|
end
|
149
149
|
|
150
|
+
test("{key: Number}") do
|
151
|
+
assert_equal(<<-TABLE, @table.slice(count: 16).to_s)
|
152
|
+
count visible
|
153
|
+
0 16 true
|
154
|
+
TABLE
|
155
|
+
end
|
156
|
+
|
157
|
+
test("{key: String}") do
|
158
|
+
table = Arrow::Table.new(name: Arrow::StringArray.new(["a", "b", "c"]))
|
159
|
+
assert_equal(<<-TABLE, table.slice(name: 'b').to_s)
|
160
|
+
name
|
161
|
+
0 b
|
162
|
+
TABLE
|
163
|
+
end
|
164
|
+
|
165
|
+
test("{key: true}") do
|
166
|
+
assert_equal(<<-TABLE, @table.slice(visible: true).to_s)
|
167
|
+
count visible
|
168
|
+
0 1 true
|
169
|
+
1 (null) (null)
|
170
|
+
2 8 true
|
171
|
+
3 16 true
|
172
|
+
4 (null) (null)
|
173
|
+
5 (null) (null)
|
174
|
+
TABLE
|
175
|
+
end
|
176
|
+
|
177
|
+
test("{key: false}") do
|
178
|
+
assert_equal(<<-TABLE, @table.slice(visible: false).to_s)
|
179
|
+
count visible
|
180
|
+
0 2 false
|
181
|
+
1 (null) (null)
|
182
|
+
2 32 false
|
183
|
+
3 (null) (null)
|
184
|
+
4 (null) (null)
|
185
|
+
TABLE
|
186
|
+
end
|
187
|
+
|
188
|
+
test("{key: Range}: beginless include end") do
|
189
|
+
assert_equal(<<-TABLE, @table.slice(count: ..8).to_s)
|
190
|
+
count visible
|
191
|
+
0 1 true
|
192
|
+
1 2 false
|
193
|
+
2 4 (null)
|
194
|
+
3 8 true
|
195
|
+
TABLE
|
196
|
+
end
|
197
|
+
|
198
|
+
test("{key: Range}: beginless exclude end") do
|
199
|
+
assert_equal(<<-TABLE, @table.slice(count: ...8).to_s)
|
200
|
+
count visible
|
201
|
+
0 1 true
|
202
|
+
1 2 false
|
203
|
+
2 4 (null)
|
204
|
+
TABLE
|
205
|
+
end
|
206
|
+
|
207
|
+
test("{key: Range}: endless") do
|
208
|
+
assert_equal(<<-TABLE, @table.slice(count: 16..).to_s)
|
209
|
+
count visible
|
210
|
+
0 16 true
|
211
|
+
1 32 false
|
212
|
+
2 64 (null)
|
213
|
+
3 128 (null)
|
214
|
+
TABLE
|
215
|
+
end
|
216
|
+
|
217
|
+
test("{key: Range}: include end") do
|
218
|
+
assert_equal(<<-TABLE, @table.slice(count: 1..16).to_s)
|
219
|
+
count visible
|
220
|
+
0 1 true
|
221
|
+
1 2 false
|
222
|
+
2 4 (null)
|
223
|
+
3 8 true
|
224
|
+
4 16 true
|
225
|
+
TABLE
|
226
|
+
end
|
227
|
+
|
228
|
+
test("{key: Range}: exclude end") do
|
229
|
+
assert_equal(<<-TABLE, @table.slice(count: 1...16).to_s)
|
230
|
+
count visible
|
231
|
+
0 1 true
|
232
|
+
1 2 false
|
233
|
+
2 4 (null)
|
234
|
+
3 8 true
|
235
|
+
TABLE
|
236
|
+
end
|
237
|
+
|
238
|
+
test("{key1: Range, key2: true}") do
|
239
|
+
assert_equal(<<-TABLE, @table.slice(count: 0..8, visible: false).to_s)
|
240
|
+
count visible
|
241
|
+
0 2 false
|
242
|
+
1 (null) (null)
|
243
|
+
2 (null) (null)
|
244
|
+
3 (null) (null)
|
245
|
+
TABLE
|
246
|
+
end
|
247
|
+
|
150
248
|
sub_test_case("wrong argument") do
|
151
249
|
test("no arguments") do
|
152
250
|
message = "wrong number of arguments (given 0, expected 1..2)"
|
@@ -190,20 +288,45 @@ class TableTest < Test::Unit::TestCase
|
|
190
288
|
end
|
191
289
|
|
192
290
|
sub_test_case("#[]") do
|
291
|
+
def setup
|
292
|
+
@table = Arrow::Table.new(a: [true],
|
293
|
+
b: [true],
|
294
|
+
c: [true],
|
295
|
+
d: [true],
|
296
|
+
e: [true],
|
297
|
+
f: [true],
|
298
|
+
g: [true])
|
299
|
+
end
|
300
|
+
|
193
301
|
test("[String]") do
|
194
302
|
assert_equal(Arrow::Column.new(@table, 0),
|
195
|
-
@table["
|
303
|
+
@table["a"])
|
196
304
|
end
|
197
305
|
|
198
306
|
test("[Symbol]") do
|
199
307
|
assert_equal(Arrow::Column.new(@table, 1),
|
200
|
-
@table[:
|
308
|
+
@table[:b])
|
201
309
|
end
|
202
310
|
|
203
311
|
test("[Integer]") do
|
204
|
-
assert_equal(Arrow::Column.new(@table,
|
312
|
+
assert_equal(Arrow::Column.new(@table, 6),
|
205
313
|
@table[-1])
|
206
314
|
end
|
315
|
+
|
316
|
+
test("[Range]") do
|
317
|
+
assert_equal(Arrow::Table.new(d: [true],
|
318
|
+
e: [true]),
|
319
|
+
@table[3..4])
|
320
|
+
end
|
321
|
+
|
322
|
+
test("[[Symbol, String, Integer, Range]]") do
|
323
|
+
assert_equal(Arrow::Table.new(c: [true],
|
324
|
+
a: [true],
|
325
|
+
g: [true],
|
326
|
+
d: [true],
|
327
|
+
e: [true]),
|
328
|
+
@table[[:c, "a", -1, 3..4]])
|
329
|
+
end
|
207
330
|
end
|
208
331
|
|
209
332
|
sub_test_case("#merge") do
|
@@ -214,12 +337,12 @@ class TableTest < Test::Unit::TestCase
|
|
214
337
|
count visible name
|
215
338
|
0 1 true a
|
216
339
|
1 2 false b
|
217
|
-
2 4
|
340
|
+
2 4 (null) c
|
218
341
|
3 8 true d
|
219
342
|
4 16 true e
|
220
343
|
5 32 false f
|
221
|
-
6 64
|
222
|
-
7 128
|
344
|
+
6 64 (null) g
|
345
|
+
7 128 (null) h
|
223
346
|
TABLE
|
224
347
|
end
|
225
348
|
|
@@ -261,12 +384,12 @@ class TableTest < Test::Unit::TestCase
|
|
261
384
|
count visible name
|
262
385
|
0 1 true a
|
263
386
|
1 2 false b
|
264
|
-
2 4
|
387
|
+
2 4 (null) c
|
265
388
|
3 8 true d
|
266
389
|
4 16 true e
|
267
390
|
5 32 false f
|
268
|
-
6 64
|
269
|
-
7 128
|
391
|
+
6 64 (null) g
|
392
|
+
7 128 (null) h
|
270
393
|
TABLE
|
271
394
|
end
|
272
395
|
|
@@ -614,12 +737,12 @@ chris\t-1
|
|
614
737
|
count visible
|
615
738
|
0 1 true
|
616
739
|
1 2 false
|
617
|
-
2 4
|
740
|
+
2 4 (null)
|
618
741
|
3 8 true
|
619
742
|
4 16 true
|
620
743
|
5 32 false
|
621
|
-
6 64
|
622
|
-
7 128
|
744
|
+
6 64 (null)
|
745
|
+
7 128 (null)
|
623
746
|
TABLE
|
624
747
|
end
|
625
748
|
|
@@ -707,13 +830,13 @@ visible: false
|
|
707
830
|
test("Array: boolean") do
|
708
831
|
filter = [nil, true, true, false, true, false, true, true]
|
709
832
|
assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
|
710
|
-
|
711
|
-
0
|
712
|
-
1
|
713
|
-
2
|
714
|
-
3
|
715
|
-
4
|
716
|
-
5
|
833
|
+
count visible
|
834
|
+
0 (null) (null)
|
835
|
+
1 2 false
|
836
|
+
2 4 (null)
|
837
|
+
3 16 true
|
838
|
+
4 64 (null)
|
839
|
+
5 128 (null)
|
717
840
|
TABLE
|
718
841
|
end
|
719
842
|
|
@@ -721,13 +844,13 @@ visible: false
|
|
721
844
|
array = [nil, true, true, false, true, false, true, true]
|
722
845
|
filter = Arrow::BooleanArray.new(array)
|
723
846
|
assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
|
724
|
-
|
725
|
-
0
|
726
|
-
1
|
727
|
-
2
|
728
|
-
3
|
729
|
-
4
|
730
|
-
5
|
847
|
+
count visible
|
848
|
+
0 (null) (null)
|
849
|
+
1 2 false
|
850
|
+
2 4 (null)
|
851
|
+
3 16 true
|
852
|
+
4 64 (null)
|
853
|
+
5 128 (null)
|
731
854
|
TABLE
|
732
855
|
end
|
733
856
|
|
@@ -739,13 +862,13 @@ visible: false
|
|
739
862
|
]
|
740
863
|
filter = Arrow::ChunkedArray.new(filter_chunks)
|
741
864
|
assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
|
742
|
-
|
743
|
-
0
|
744
|
-
1
|
745
|
-
2
|
746
|
-
3
|
747
|
-
4
|
748
|
-
5
|
865
|
+
count visible
|
866
|
+
0 (null) (null)
|
867
|
+
1 2 false
|
868
|
+
2 4 (null)
|
869
|
+
3 16 true
|
870
|
+
4 64 (null)
|
871
|
+
5 128 (null)
|
749
872
|
TABLE
|
750
873
|
end
|
751
874
|
end
|
@@ -757,7 +880,7 @@ visible: false
|
|
757
880
|
count visible
|
758
881
|
0 2 false
|
759
882
|
1 1 true
|
760
|
-
2 4
|
883
|
+
2 4 (null)
|
761
884
|
TABLE
|
762
885
|
end
|
763
886
|
|
@@ -767,7 +890,7 @@ visible: false
|
|
767
890
|
count visible
|
768
891
|
0 2 false
|
769
892
|
1 1 true
|
770
|
-
2 4
|
893
|
+
2 4 (null)
|
771
894
|
TABLE
|
772
895
|
end
|
773
896
|
|
@@ -781,7 +904,21 @@ visible: false
|
|
781
904
|
count visible
|
782
905
|
0 2 false
|
783
906
|
1 1 true
|
784
|
-
2 4
|
907
|
+
2 4 (null)
|
908
|
+
TABLE
|
909
|
+
end
|
910
|
+
end
|
911
|
+
|
912
|
+
sub_test_case("#concatenate") do
|
913
|
+
test("options: :unify_schemas") do
|
914
|
+
table1 = Arrow::Table.new(a: [true],
|
915
|
+
b: [false])
|
916
|
+
table2 = Arrow::Table.new(b: [false])
|
917
|
+
concatenated = table1.concatenate([table2], unify_schemas: true)
|
918
|
+
assert_equal(<<-TABLE, concatenated.to_s)
|
919
|
+
a b
|
920
|
+
0 true false
|
921
|
+
1 (null) false
|
785
922
|
TABLE
|
786
923
|
end
|
787
924
|
end
|
@@ -382,6 +382,20 @@ module ValuesDenseUnionArrayTests
|
|
382
382
|
assert_equal(values, target.values)
|
383
383
|
end
|
384
384
|
|
385
|
+
def test_map
|
386
|
+
values = [
|
387
|
+
{"0" => {"key1" => true, "key2" => nil}},
|
388
|
+
{"1" => nil},
|
389
|
+
]
|
390
|
+
target = build({
|
391
|
+
type: :map,
|
392
|
+
key: :string,
|
393
|
+
item: :boolean,
|
394
|
+
},
|
395
|
+
values)
|
396
|
+
assert_equal(values, target.values)
|
397
|
+
end
|
398
|
+
|
385
399
|
def test_sparse_union
|
386
400
|
omit("Need to add support for SparseUnionArrayBuilder")
|
387
401
|
values = [
|
@@ -420,6 +420,23 @@ module ValuesListArrayTests
|
|
420
420
|
assert_equal(values, target.values)
|
421
421
|
end
|
422
422
|
|
423
|
+
def test_map
|
424
|
+
values = [
|
425
|
+
[
|
426
|
+
{"key1" => true, "key2" => nil},
|
427
|
+
nil,
|
428
|
+
],
|
429
|
+
nil,
|
430
|
+
]
|
431
|
+
target = build({
|
432
|
+
type: :map,
|
433
|
+
key: :string,
|
434
|
+
item: :boolean,
|
435
|
+
},
|
436
|
+
values)
|
437
|
+
assert_equal(values, target.values)
|
438
|
+
end
|
439
|
+
|
423
440
|
def test_sparse
|
424
441
|
omit("Need to add support for SparseUnionArrayBuilder")
|
425
442
|
values = [
|