red-arrow 5.0.0 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +23 -0
  3. data/ext/arrow/converters.cpp +5 -0
  4. data/ext/arrow/converters.hpp +126 -0
  5. data/ext/arrow/extconf.rb +13 -0
  6. data/ext/arrow/raw-records.cpp +1 -0
  7. data/ext/arrow/values.cpp +1 -0
  8. data/lib/arrow/aggregate-node-options.rb +35 -0
  9. data/lib/arrow/aggregation.rb +46 -0
  10. data/lib/arrow/array-builder.rb +5 -0
  11. data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
  12. data/lib/arrow/column-containable.rb +100 -1
  13. data/lib/arrow/datum.rb +2 -0
  14. data/lib/arrow/expression.rb +48 -0
  15. data/lib/arrow/file-system.rb +34 -0
  16. data/lib/arrow/group.rb +116 -124
  17. data/lib/arrow/loader.rb +13 -0
  18. data/lib/arrow/map-array-builder.rb +109 -0
  19. data/lib/arrow/map-array.rb +26 -0
  20. data/lib/arrow/map-data-type.rb +89 -0
  21. data/lib/arrow/path-extension.rb +1 -1
  22. data/lib/arrow/record-batch-reader.rb +41 -0
  23. data/lib/arrow/record-batch.rb +0 -2
  24. data/lib/arrow/slicer.rb +44 -143
  25. data/lib/arrow/source-node-options.rb +32 -0
  26. data/lib/arrow/string-dictionary-array-builder.rb +27 -0
  27. data/lib/arrow/symbol-values-appendable.rb +34 -0
  28. data/lib/arrow/table-concatenate-options.rb +36 -0
  29. data/lib/arrow/table-formatter.rb +141 -17
  30. data/lib/arrow/table-list-formatter.rb +5 -3
  31. data/lib/arrow/table-loader.rb +41 -3
  32. data/lib/arrow/table-saver.rb +29 -3
  33. data/lib/arrow/table-table-formatter.rb +7 -31
  34. data/lib/arrow/table.rb +32 -38
  35. data/lib/arrow/version.rb +1 -1
  36. data/red-arrow.gemspec +1 -1
  37. data/test/raw-records/test-dense-union-array.rb +14 -0
  38. data/test/raw-records/test-list-array.rb +19 -0
  39. data/test/raw-records/test-map-array.rb +441 -0
  40. data/test/raw-records/test-sparse-union-array.rb +14 -0
  41. data/test/raw-records/test-struct-array.rb +15 -0
  42. data/test/test-array-builder.rb +7 -0
  43. data/test/test-binary-dictionary-array-builder.rb +103 -0
  44. data/test/test-csv-loader.rb +8 -8
  45. data/test/test-expression.rb +40 -0
  46. data/test/test-group.rb +75 -51
  47. data/test/test-map-array-builder.rb +110 -0
  48. data/test/test-map-array.rb +33 -0
  49. data/test/test-map-data-type.rb +36 -0
  50. data/test/test-record-batch-reader.rb +46 -0
  51. data/test/test-record-batch.rb +42 -0
  52. data/test/test-slicer.rb +166 -167
  53. data/test/test-string-dictionary-array-builder.rb +103 -0
  54. data/test/test-table.rb +190 -53
  55. data/test/values/test-dense-union-array.rb +14 -0
  56. data/test/values/test-list-array.rb +17 -0
  57. data/test/values/test-map-array.rb +433 -0
  58. data/test/values/test-sparse-union-array.rb +14 -0
  59. data/test/values/test-struct-array.rb +15 -0
  60. metadata +107 -76
data/test/test-table.rb CHANGED
@@ -53,26 +53,26 @@ class TableTest < Test::Unit::TestCase
53
53
  target_rows_raw = [nil, true, true, false, true, false, true, true]
54
54
  target_rows = Arrow::BooleanArray.new(target_rows_raw)
55
55
  assert_equal(<<-TABLE, @table.slice(target_rows).to_s)
56
- count visible
57
- 0
58
- 1 2 false
59
- 2 4
60
- 3 16 true
61
- 4 64
62
- 5 128
56
+ count visible
57
+ 0 (null) (null)
58
+ 1 2 false
59
+ 2 4 (null)
60
+ 3 16 true
61
+ 4 64 (null)
62
+ 5 128 (null)
63
63
  TABLE
64
64
  end
65
65
 
66
66
  test("Array: boolean") do
67
67
  target_rows_raw = [nil, true, true, false, true, false, true, true]
68
68
  assert_equal(<<-TABLE, @table.slice(target_rows_raw).to_s)
69
- count visible
70
- 0
71
- 1 2 false
72
- 2 4
73
- 3 16 true
74
- 4 64
75
- 5 128
69
+ count visible
70
+ 0 (null) (null)
71
+ 1 2 false
72
+ 2 4 (null)
73
+ 3 16 true
74
+ 4 64 (null)
75
+ 5 128 (null)
76
76
  TABLE
77
77
  end
78
78
 
@@ -100,7 +100,7 @@ class TableTest < Test::Unit::TestCase
100
100
  test("Range: positive: include end") do
101
101
  assert_equal(<<-TABLE, @table.slice(2..4).to_s)
102
102
  count visible
103
- 0 4
103
+ 0 4 (null)
104
104
  1 8 true
105
105
  2 16 true
106
106
  TABLE
@@ -109,7 +109,7 @@ class TableTest < Test::Unit::TestCase
109
109
  test("Range: positive: exclude end") do
110
110
  assert_equal(<<-TABLE, @table.slice(2...4).to_s)
111
111
  count visible
112
- 0 4
112
+ 0 4 (null)
113
113
  1 8 true
114
114
  TABLE
115
115
  end
@@ -119,7 +119,7 @@ class TableTest < Test::Unit::TestCase
119
119
  count visible
120
120
  0 16 true
121
121
  1 32 false
122
- 2 64
122
+ 2 64 (null)
123
123
  TABLE
124
124
  end
125
125
 
@@ -147,6 +147,104 @@ class TableTest < Test::Unit::TestCase
147
147
  TABLE
148
148
  end
149
149
 
150
+ test("{key: Number}") do
151
+ assert_equal(<<-TABLE, @table.slice(count: 16).to_s)
152
+ count visible
153
+ 0 16 true
154
+ TABLE
155
+ end
156
+
157
+ test("{key: String}") do
158
+ table = Arrow::Table.new(name: Arrow::StringArray.new(["a", "b", "c"]))
159
+ assert_equal(<<-TABLE, table.slice(name: 'b').to_s)
160
+ name
161
+ 0 b
162
+ TABLE
163
+ end
164
+
165
+ test("{key: true}") do
166
+ assert_equal(<<-TABLE, @table.slice(visible: true).to_s)
167
+ count visible
168
+ 0 1 true
169
+ 1 (null) (null)
170
+ 2 8 true
171
+ 3 16 true
172
+ 4 (null) (null)
173
+ 5 (null) (null)
174
+ TABLE
175
+ end
176
+
177
+ test("{key: false}") do
178
+ assert_equal(<<-TABLE, @table.slice(visible: false).to_s)
179
+ count visible
180
+ 0 2 false
181
+ 1 (null) (null)
182
+ 2 32 false
183
+ 3 (null) (null)
184
+ 4 (null) (null)
185
+ TABLE
186
+ end
187
+
188
+ test("{key: Range}: beginless include end") do
189
+ assert_equal(<<-TABLE, @table.slice(count: ..8).to_s)
190
+ count visible
191
+ 0 1 true
192
+ 1 2 false
193
+ 2 4 (null)
194
+ 3 8 true
195
+ TABLE
196
+ end
197
+
198
+ test("{key: Range}: beginless exclude end") do
199
+ assert_equal(<<-TABLE, @table.slice(count: ...8).to_s)
200
+ count visible
201
+ 0 1 true
202
+ 1 2 false
203
+ 2 4 (null)
204
+ TABLE
205
+ end
206
+
207
+ test("{key: Range}: endless") do
208
+ assert_equal(<<-TABLE, @table.slice(count: 16..).to_s)
209
+ count visible
210
+ 0 16 true
211
+ 1 32 false
212
+ 2 64 (null)
213
+ 3 128 (null)
214
+ TABLE
215
+ end
216
+
217
+ test("{key: Range}: include end") do
218
+ assert_equal(<<-TABLE, @table.slice(count: 1..16).to_s)
219
+ count visible
220
+ 0 1 true
221
+ 1 2 false
222
+ 2 4 (null)
223
+ 3 8 true
224
+ 4 16 true
225
+ TABLE
226
+ end
227
+
228
+ test("{key: Range}: exclude end") do
229
+ assert_equal(<<-TABLE, @table.slice(count: 1...16).to_s)
230
+ count visible
231
+ 0 1 true
232
+ 1 2 false
233
+ 2 4 (null)
234
+ 3 8 true
235
+ TABLE
236
+ end
237
+
238
+ test("{key1: Range, key2: true}") do
239
+ assert_equal(<<-TABLE, @table.slice(count: 0..8, visible: false).to_s)
240
+ count visible
241
+ 0 2 false
242
+ 1 (null) (null)
243
+ 2 (null) (null)
244
+ 3 (null) (null)
245
+ TABLE
246
+ end
247
+
150
248
  sub_test_case("wrong argument") do
151
249
  test("no arguments") do
152
250
  message = "wrong number of arguments (given 0, expected 1..2)"
@@ -190,20 +288,45 @@ class TableTest < Test::Unit::TestCase
190
288
  end
191
289
 
192
290
  sub_test_case("#[]") do
291
+ def setup
292
+ @table = Arrow::Table.new(a: [true],
293
+ b: [true],
294
+ c: [true],
295
+ d: [true],
296
+ e: [true],
297
+ f: [true],
298
+ g: [true])
299
+ end
300
+
193
301
  test("[String]") do
194
302
  assert_equal(Arrow::Column.new(@table, 0),
195
- @table["count"])
303
+ @table["a"])
196
304
  end
197
305
 
198
306
  test("[Symbol]") do
199
307
  assert_equal(Arrow::Column.new(@table, 1),
200
- @table[:visible])
308
+ @table[:b])
201
309
  end
202
310
 
203
311
  test("[Integer]") do
204
- assert_equal(Arrow::Column.new(@table, 1),
312
+ assert_equal(Arrow::Column.new(@table, 6),
205
313
  @table[-1])
206
314
  end
315
+
316
+ test("[Range]") do
317
+ assert_equal(Arrow::Table.new(d: [true],
318
+ e: [true]),
319
+ @table[3..4])
320
+ end
321
+
322
+ test("[[Symbol, String, Integer, Range]]") do
323
+ assert_equal(Arrow::Table.new(c: [true],
324
+ a: [true],
325
+ g: [true],
326
+ d: [true],
327
+ e: [true]),
328
+ @table[[:c, "a", -1, 3..4]])
329
+ end
207
330
  end
208
331
 
209
332
  sub_test_case("#merge") do
@@ -214,12 +337,12 @@ class TableTest < Test::Unit::TestCase
214
337
  count visible name
215
338
  0 1 true a
216
339
  1 2 false b
217
- 2 4 c
340
+ 2 4 (null) c
218
341
  3 8 true d
219
342
  4 16 true e
220
343
  5 32 false f
221
- 6 64 g
222
- 7 128 h
344
+ 6 64 (null) g
345
+ 7 128 (null) h
223
346
  TABLE
224
347
  end
225
348
 
@@ -261,12 +384,12 @@ class TableTest < Test::Unit::TestCase
261
384
  count visible name
262
385
  0 1 true a
263
386
  1 2 false b
264
- 2 4 c
387
+ 2 4 (null) c
265
388
  3 8 true d
266
389
  4 16 true e
267
390
  5 32 false f
268
- 6 64 g
269
- 7 128 h
391
+ 6 64 (null) g
392
+ 7 128 (null) h
270
393
  TABLE
271
394
  end
272
395
 
@@ -614,12 +737,12 @@ chris\t-1
614
737
  count visible
615
738
  0 1 true
616
739
  1 2 false
617
- 2 4
740
+ 2 4 (null)
618
741
  3 8 true
619
742
  4 16 true
620
743
  5 32 false
621
- 6 64
622
- 7 128
744
+ 6 64 (null)
745
+ 7 128 (null)
623
746
  TABLE
624
747
  end
625
748
 
@@ -707,13 +830,13 @@ visible: false
707
830
  test("Array: boolean") do
708
831
  filter = [nil, true, true, false, true, false, true, true]
709
832
  assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
710
- count visible
711
- 0
712
- 1 2 false
713
- 2 4
714
- 3 16 true
715
- 4 64
716
- 5 128
833
+ count visible
834
+ 0 (null) (null)
835
+ 1 2 false
836
+ 2 4 (null)
837
+ 3 16 true
838
+ 4 64 (null)
839
+ 5 128 (null)
717
840
  TABLE
718
841
  end
719
842
 
@@ -721,13 +844,13 @@ visible: false
721
844
  array = [nil, true, true, false, true, false, true, true]
722
845
  filter = Arrow::BooleanArray.new(array)
723
846
  assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
724
- count visible
725
- 0
726
- 1 2 false
727
- 2 4
728
- 3 16 true
729
- 4 64
730
- 5 128
847
+ count visible
848
+ 0 (null) (null)
849
+ 1 2 false
850
+ 2 4 (null)
851
+ 3 16 true
852
+ 4 64 (null)
853
+ 5 128 (null)
731
854
  TABLE
732
855
  end
733
856
 
@@ -739,13 +862,13 @@ visible: false
739
862
  ]
740
863
  filter = Arrow::ChunkedArray.new(filter_chunks)
741
864
  assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
742
- count visible
743
- 0
744
- 1 2 false
745
- 2 4
746
- 3 16 true
747
- 4 64
748
- 5 128
865
+ count visible
866
+ 0 (null) (null)
867
+ 1 2 false
868
+ 2 4 (null)
869
+ 3 16 true
870
+ 4 64 (null)
871
+ 5 128 (null)
749
872
  TABLE
750
873
  end
751
874
  end
@@ -757,7 +880,7 @@ visible: false
757
880
  count visible
758
881
  0 2 false
759
882
  1 1 true
760
- 2 4
883
+ 2 4 (null)
761
884
  TABLE
762
885
  end
763
886
 
@@ -767,7 +890,7 @@ visible: false
767
890
  count visible
768
891
  0 2 false
769
892
  1 1 true
770
- 2 4
893
+ 2 4 (null)
771
894
  TABLE
772
895
  end
773
896
 
@@ -781,7 +904,21 @@ visible: false
781
904
  count visible
782
905
  0 2 false
783
906
  1 1 true
784
- 2 4
907
+ 2 4 (null)
908
+ TABLE
909
+ end
910
+ end
911
+
912
+ sub_test_case("#concatenate") do
913
+ test("options: :unify_schemas") do
914
+ table1 = Arrow::Table.new(a: [true],
915
+ b: [false])
916
+ table2 = Arrow::Table.new(b: [false])
917
+ concatenated = table1.concatenate([table2], unify_schemas: true)
918
+ assert_equal(<<-TABLE, concatenated.to_s)
919
+ a b
920
+ 0 true false
921
+ 1 (null) false
785
922
  TABLE
786
923
  end
787
924
  end
@@ -382,6 +382,20 @@ module ValuesDenseUnionArrayTests
382
382
  assert_equal(values, target.values)
383
383
  end
384
384
 
385
+ def test_map
386
+ values = [
387
+ {"0" => {"key1" => true, "key2" => nil}},
388
+ {"1" => nil},
389
+ ]
390
+ target = build({
391
+ type: :map,
392
+ key: :string,
393
+ item: :boolean,
394
+ },
395
+ values)
396
+ assert_equal(values, target.values)
397
+ end
398
+
385
399
  def test_sparse_union
386
400
  omit("Need to add support for SparseUnionArrayBuilder")
387
401
  values = [
@@ -420,6 +420,23 @@ module ValuesListArrayTests
420
420
  assert_equal(values, target.values)
421
421
  end
422
422
 
423
+ def test_map
424
+ values = [
425
+ [
426
+ {"key1" => true, "key2" => nil},
427
+ nil,
428
+ ],
429
+ nil,
430
+ ]
431
+ target = build({
432
+ type: :map,
433
+ key: :string,
434
+ item: :boolean,
435
+ },
436
+ values)
437
+ assert_equal(values, target.values)
438
+ end
439
+
423
440
  def test_sparse
424
441
  omit("Need to add support for SparseUnionArrayBuilder")
425
442
  values = [