red-arrow 5.0.0 → 6.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +23 -0
  3. data/ext/arrow/converters.cpp +5 -0
  4. data/ext/arrow/converters.hpp +126 -0
  5. data/ext/arrow/extconf.rb +13 -0
  6. data/ext/arrow/raw-records.cpp +1 -0
  7. data/ext/arrow/values.cpp +1 -0
  8. data/lib/arrow/aggregate-node-options.rb +35 -0
  9. data/lib/arrow/aggregation.rb +46 -0
  10. data/lib/arrow/array-builder.rb +5 -0
  11. data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
  12. data/lib/arrow/column-containable.rb +100 -1
  13. data/lib/arrow/datum.rb +2 -0
  14. data/lib/arrow/expression.rb +48 -0
  15. data/lib/arrow/file-system.rb +34 -0
  16. data/lib/arrow/group.rb +116 -124
  17. data/lib/arrow/loader.rb +13 -0
  18. data/lib/arrow/map-array-builder.rb +109 -0
  19. data/lib/arrow/map-array.rb +26 -0
  20. data/lib/arrow/map-data-type.rb +89 -0
  21. data/lib/arrow/path-extension.rb +1 -1
  22. data/lib/arrow/record-batch-reader.rb +41 -0
  23. data/lib/arrow/record-batch.rb +0 -2
  24. data/lib/arrow/slicer.rb +44 -143
  25. data/lib/arrow/source-node-options.rb +32 -0
  26. data/lib/arrow/string-dictionary-array-builder.rb +27 -0
  27. data/lib/arrow/symbol-values-appendable.rb +34 -0
  28. data/lib/arrow/table-concatenate-options.rb +36 -0
  29. data/lib/arrow/table-formatter.rb +141 -17
  30. data/lib/arrow/table-list-formatter.rb +5 -3
  31. data/lib/arrow/table-loader.rb +41 -3
  32. data/lib/arrow/table-saver.rb +29 -3
  33. data/lib/arrow/table-table-formatter.rb +7 -31
  34. data/lib/arrow/table.rb +32 -38
  35. data/lib/arrow/version.rb +1 -1
  36. data/red-arrow.gemspec +1 -1
  37. data/test/raw-records/test-dense-union-array.rb +14 -0
  38. data/test/raw-records/test-list-array.rb +19 -0
  39. data/test/raw-records/test-map-array.rb +441 -0
  40. data/test/raw-records/test-sparse-union-array.rb +14 -0
  41. data/test/raw-records/test-struct-array.rb +15 -0
  42. data/test/test-array-builder.rb +7 -0
  43. data/test/test-binary-dictionary-array-builder.rb +103 -0
  44. data/test/test-csv-loader.rb +8 -8
  45. data/test/test-expression.rb +40 -0
  46. data/test/test-group.rb +75 -51
  47. data/test/test-map-array-builder.rb +110 -0
  48. data/test/test-map-array.rb +33 -0
  49. data/test/test-map-data-type.rb +36 -0
  50. data/test/test-record-batch-reader.rb +46 -0
  51. data/test/test-record-batch.rb +42 -0
  52. data/test/test-slicer.rb +166 -167
  53. data/test/test-string-dictionary-array-builder.rb +103 -0
  54. data/test/test-table.rb +190 -53
  55. data/test/values/test-dense-union-array.rb +14 -0
  56. data/test/values/test-list-array.rb +17 -0
  57. data/test/values/test-map-array.rb +433 -0
  58. data/test/values/test-sparse-union-array.rb +14 -0
  59. data/test/values/test-struct-array.rb +15 -0
  60. metadata +107 -76
data/test/test-table.rb CHANGED
@@ -53,26 +53,26 @@ class TableTest < Test::Unit::TestCase
53
53
  target_rows_raw = [nil, true, true, false, true, false, true, true]
54
54
  target_rows = Arrow::BooleanArray.new(target_rows_raw)
55
55
  assert_equal(<<-TABLE, @table.slice(target_rows).to_s)
56
- count visible
57
- 0
58
- 1 2 false
59
- 2 4
60
- 3 16 true
61
- 4 64
62
- 5 128
56
+ count visible
57
+ 0 (null) (null)
58
+ 1 2 false
59
+ 2 4 (null)
60
+ 3 16 true
61
+ 4 64 (null)
62
+ 5 128 (null)
63
63
  TABLE
64
64
  end
65
65
 
66
66
  test("Array: boolean") do
67
67
  target_rows_raw = [nil, true, true, false, true, false, true, true]
68
68
  assert_equal(<<-TABLE, @table.slice(target_rows_raw).to_s)
69
- count visible
70
- 0
71
- 1 2 false
72
- 2 4
73
- 3 16 true
74
- 4 64
75
- 5 128
69
+ count visible
70
+ 0 (null) (null)
71
+ 1 2 false
72
+ 2 4 (null)
73
+ 3 16 true
74
+ 4 64 (null)
75
+ 5 128 (null)
76
76
  TABLE
77
77
  end
78
78
 
@@ -100,7 +100,7 @@ class TableTest < Test::Unit::TestCase
100
100
  test("Range: positive: include end") do
101
101
  assert_equal(<<-TABLE, @table.slice(2..4).to_s)
102
102
  count visible
103
- 0 4
103
+ 0 4 (null)
104
104
  1 8 true
105
105
  2 16 true
106
106
  TABLE
@@ -109,7 +109,7 @@ class TableTest < Test::Unit::TestCase
109
109
  test("Range: positive: exclude end") do
110
110
  assert_equal(<<-TABLE, @table.slice(2...4).to_s)
111
111
  count visible
112
- 0 4
112
+ 0 4 (null)
113
113
  1 8 true
114
114
  TABLE
115
115
  end
@@ -119,7 +119,7 @@ class TableTest < Test::Unit::TestCase
119
119
  count visible
120
120
  0 16 true
121
121
  1 32 false
122
- 2 64
122
+ 2 64 (null)
123
123
  TABLE
124
124
  end
125
125
 
@@ -147,6 +147,104 @@ class TableTest < Test::Unit::TestCase
147
147
  TABLE
148
148
  end
149
149
 
150
+ test("{key: Number}") do
151
+ assert_equal(<<-TABLE, @table.slice(count: 16).to_s)
152
+ count visible
153
+ 0 16 true
154
+ TABLE
155
+ end
156
+
157
+ test("{key: String}") do
158
+ table = Arrow::Table.new(name: Arrow::StringArray.new(["a", "b", "c"]))
159
+ assert_equal(<<-TABLE, table.slice(name: 'b').to_s)
160
+ name
161
+ 0 b
162
+ TABLE
163
+ end
164
+
165
+ test("{key: true}") do
166
+ assert_equal(<<-TABLE, @table.slice(visible: true).to_s)
167
+ count visible
168
+ 0 1 true
169
+ 1 (null) (null)
170
+ 2 8 true
171
+ 3 16 true
172
+ 4 (null) (null)
173
+ 5 (null) (null)
174
+ TABLE
175
+ end
176
+
177
+ test("{key: false}") do
178
+ assert_equal(<<-TABLE, @table.slice(visible: false).to_s)
179
+ count visible
180
+ 0 2 false
181
+ 1 (null) (null)
182
+ 2 32 false
183
+ 3 (null) (null)
184
+ 4 (null) (null)
185
+ TABLE
186
+ end
187
+
188
+ test("{key: Range}: beginless include end") do
189
+ assert_equal(<<-TABLE, @table.slice(count: ..8).to_s)
190
+ count visible
191
+ 0 1 true
192
+ 1 2 false
193
+ 2 4 (null)
194
+ 3 8 true
195
+ TABLE
196
+ end
197
+
198
+ test("{key: Range}: beginless exclude end") do
199
+ assert_equal(<<-TABLE, @table.slice(count: ...8).to_s)
200
+ count visible
201
+ 0 1 true
202
+ 1 2 false
203
+ 2 4 (null)
204
+ TABLE
205
+ end
206
+
207
+ test("{key: Range}: endless") do
208
+ assert_equal(<<-TABLE, @table.slice(count: 16..).to_s)
209
+ count visible
210
+ 0 16 true
211
+ 1 32 false
212
+ 2 64 (null)
213
+ 3 128 (null)
214
+ TABLE
215
+ end
216
+
217
+ test("{key: Range}: include end") do
218
+ assert_equal(<<-TABLE, @table.slice(count: 1..16).to_s)
219
+ count visible
220
+ 0 1 true
221
+ 1 2 false
222
+ 2 4 (null)
223
+ 3 8 true
224
+ 4 16 true
225
+ TABLE
226
+ end
227
+
228
+ test("{key: Range}: exclude end") do
229
+ assert_equal(<<-TABLE, @table.slice(count: 1...16).to_s)
230
+ count visible
231
+ 0 1 true
232
+ 1 2 false
233
+ 2 4 (null)
234
+ 3 8 true
235
+ TABLE
236
+ end
237
+
238
+ test("{key1: Range, key2: true}") do
239
+ assert_equal(<<-TABLE, @table.slice(count: 0..8, visible: false).to_s)
240
+ count visible
241
+ 0 2 false
242
+ 1 (null) (null)
243
+ 2 (null) (null)
244
+ 3 (null) (null)
245
+ TABLE
246
+ end
247
+
150
248
  sub_test_case("wrong argument") do
151
249
  test("no arguments") do
152
250
  message = "wrong number of arguments (given 0, expected 1..2)"
@@ -190,20 +288,45 @@ class TableTest < Test::Unit::TestCase
190
288
  end
191
289
 
192
290
  sub_test_case("#[]") do
291
+ def setup
292
+ @table = Arrow::Table.new(a: [true],
293
+ b: [true],
294
+ c: [true],
295
+ d: [true],
296
+ e: [true],
297
+ f: [true],
298
+ g: [true])
299
+ end
300
+
193
301
  test("[String]") do
194
302
  assert_equal(Arrow::Column.new(@table, 0),
195
- @table["count"])
303
+ @table["a"])
196
304
  end
197
305
 
198
306
  test("[Symbol]") do
199
307
  assert_equal(Arrow::Column.new(@table, 1),
200
- @table[:visible])
308
+ @table[:b])
201
309
  end
202
310
 
203
311
  test("[Integer]") do
204
- assert_equal(Arrow::Column.new(@table, 1),
312
+ assert_equal(Arrow::Column.new(@table, 6),
205
313
  @table[-1])
206
314
  end
315
+
316
+ test("[Range]") do
317
+ assert_equal(Arrow::Table.new(d: [true],
318
+ e: [true]),
319
+ @table[3..4])
320
+ end
321
+
322
+ test("[[Symbol, String, Integer, Range]]") do
323
+ assert_equal(Arrow::Table.new(c: [true],
324
+ a: [true],
325
+ g: [true],
326
+ d: [true],
327
+ e: [true]),
328
+ @table[[:c, "a", -1, 3..4]])
329
+ end
207
330
  end
208
331
 
209
332
  sub_test_case("#merge") do
@@ -214,12 +337,12 @@ class TableTest < Test::Unit::TestCase
214
337
  count visible name
215
338
  0 1 true a
216
339
  1 2 false b
217
- 2 4 c
340
+ 2 4 (null) c
218
341
  3 8 true d
219
342
  4 16 true e
220
343
  5 32 false f
221
- 6 64 g
222
- 7 128 h
344
+ 6 64 (null) g
345
+ 7 128 (null) h
223
346
  TABLE
224
347
  end
225
348
 
@@ -261,12 +384,12 @@ class TableTest < Test::Unit::TestCase
261
384
  count visible name
262
385
  0 1 true a
263
386
  1 2 false b
264
- 2 4 c
387
+ 2 4 (null) c
265
388
  3 8 true d
266
389
  4 16 true e
267
390
  5 32 false f
268
- 6 64 g
269
- 7 128 h
391
+ 6 64 (null) g
392
+ 7 128 (null) h
270
393
  TABLE
271
394
  end
272
395
 
@@ -614,12 +737,12 @@ chris\t-1
614
737
  count visible
615
738
  0 1 true
616
739
  1 2 false
617
- 2 4
740
+ 2 4 (null)
618
741
  3 8 true
619
742
  4 16 true
620
743
  5 32 false
621
- 6 64
622
- 7 128
744
+ 6 64 (null)
745
+ 7 128 (null)
623
746
  TABLE
624
747
  end
625
748
 
@@ -707,13 +830,13 @@ visible: false
707
830
  test("Array: boolean") do
708
831
  filter = [nil, true, true, false, true, false, true, true]
709
832
  assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
710
- count visible
711
- 0
712
- 1 2 false
713
- 2 4
714
- 3 16 true
715
- 4 64
716
- 5 128
833
+ count visible
834
+ 0 (null) (null)
835
+ 1 2 false
836
+ 2 4 (null)
837
+ 3 16 true
838
+ 4 64 (null)
839
+ 5 128 (null)
717
840
  TABLE
718
841
  end
719
842
 
@@ -721,13 +844,13 @@ visible: false
721
844
  array = [nil, true, true, false, true, false, true, true]
722
845
  filter = Arrow::BooleanArray.new(array)
723
846
  assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
724
- count visible
725
- 0
726
- 1 2 false
727
- 2 4
728
- 3 16 true
729
- 4 64
730
- 5 128
847
+ count visible
848
+ 0 (null) (null)
849
+ 1 2 false
850
+ 2 4 (null)
851
+ 3 16 true
852
+ 4 64 (null)
853
+ 5 128 (null)
731
854
  TABLE
732
855
  end
733
856
 
@@ -739,13 +862,13 @@ visible: false
739
862
  ]
740
863
  filter = Arrow::ChunkedArray.new(filter_chunks)
741
864
  assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
742
- count visible
743
- 0
744
- 1 2 false
745
- 2 4
746
- 3 16 true
747
- 4 64
748
- 5 128
865
+ count visible
866
+ 0 (null) (null)
867
+ 1 2 false
868
+ 2 4 (null)
869
+ 3 16 true
870
+ 4 64 (null)
871
+ 5 128 (null)
749
872
  TABLE
750
873
  end
751
874
  end
@@ -757,7 +880,7 @@ visible: false
757
880
  count visible
758
881
  0 2 false
759
882
  1 1 true
760
- 2 4
883
+ 2 4 (null)
761
884
  TABLE
762
885
  end
763
886
 
@@ -767,7 +890,7 @@ visible: false
767
890
  count visible
768
891
  0 2 false
769
892
  1 1 true
770
- 2 4
893
+ 2 4 (null)
771
894
  TABLE
772
895
  end
773
896
 
@@ -781,7 +904,21 @@ visible: false
781
904
  count visible
782
905
  0 2 false
783
906
  1 1 true
784
- 2 4
907
+ 2 4 (null)
908
+ TABLE
909
+ end
910
+ end
911
+
912
+ sub_test_case("#concatenate") do
913
+ test("options: :unify_schemas") do
914
+ table1 = Arrow::Table.new(a: [true],
915
+ b: [false])
916
+ table2 = Arrow::Table.new(b: [false])
917
+ concatenated = table1.concatenate([table2], unify_schemas: true)
918
+ assert_equal(<<-TABLE, concatenated.to_s)
919
+ a b
920
+ 0 true false
921
+ 1 (null) false
785
922
  TABLE
786
923
  end
787
924
  end
@@ -382,6 +382,20 @@ module ValuesDenseUnionArrayTests
382
382
  assert_equal(values, target.values)
383
383
  end
384
384
 
385
+ def test_map
386
+ values = [
387
+ {"0" => {"key1" => true, "key2" => nil}},
388
+ {"1" => nil},
389
+ ]
390
+ target = build({
391
+ type: :map,
392
+ key: :string,
393
+ item: :boolean,
394
+ },
395
+ values)
396
+ assert_equal(values, target.values)
397
+ end
398
+
385
399
  def test_sparse_union
386
400
  omit("Need to add support for SparseUnionArrayBuilder")
387
401
  values = [
@@ -420,6 +420,23 @@ module ValuesListArrayTests
420
420
  assert_equal(values, target.values)
421
421
  end
422
422
 
423
+ def test_map
424
+ values = [
425
+ [
426
+ {"key1" => true, "key2" => nil},
427
+ nil,
428
+ ],
429
+ nil,
430
+ ]
431
+ target = build({
432
+ type: :map,
433
+ key: :string,
434
+ item: :boolean,
435
+ },
436
+ values)
437
+ assert_equal(values, target.values)
438
+ end
439
+
423
440
  def test_sparse
424
441
  omit("Need to add support for SparseUnionArrayBuilder")
425
442
  values = [