red-arrow 10.0.1 → 12.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/ext/arrow/converters.hpp +45 -41
  4. data/ext/arrow/extconf.rb +14 -2
  5. data/ext/arrow/raw-records.cpp +1 -2
  6. data/ext/arrow/values.cpp +1 -2
  7. data/lib/arrow/array-computable.rb +13 -0
  8. data/lib/arrow/array.rb +5 -0
  9. data/lib/arrow/chunked-array.rb +23 -1
  10. data/lib/arrow/column-containable.rb +9 -0
  11. data/lib/arrow/column.rb +1 -0
  12. data/lib/arrow/data-type.rb +9 -0
  13. data/lib/arrow/dense-union-array-builder.rb +49 -0
  14. data/lib/arrow/dense-union-array.rb +26 -0
  15. data/lib/arrow/half-float-array-builder.rb +32 -0
  16. data/lib/arrow/half-float-array.rb +24 -0
  17. data/lib/arrow/half-float.rb +118 -0
  18. data/lib/arrow/input-referable.rb +29 -0
  19. data/lib/arrow/loader.rb +10 -0
  20. data/lib/arrow/raw-table-converter.rb +7 -5
  21. data/lib/arrow/record-batch-file-reader.rb +2 -0
  22. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  23. data/lib/arrow/record-batch.rb +6 -2
  24. data/lib/arrow/scalar.rb +67 -0
  25. data/lib/arrow/slicer.rb +61 -0
  26. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  27. data/lib/arrow/sparse-union-array.rb +26 -0
  28. data/lib/arrow/struct-array-builder.rb +0 -5
  29. data/lib/arrow/table-loader.rb +4 -4
  30. data/lib/arrow/table-saver.rb +1 -0
  31. data/lib/arrow/table.rb +178 -31
  32. data/lib/arrow/tensor.rb +4 -0
  33. data/lib/arrow/union-array-builder.rb +59 -0
  34. data/lib/arrow/version.rb +1 -1
  35. data/red-arrow.gemspec +1 -1
  36. data/test/raw-records/test-basic-arrays.rb +10 -0
  37. data/test/raw-records/test-dense-union-array.rb +90 -45
  38. data/test/raw-records/test-list-array.rb +28 -10
  39. data/test/raw-records/test-map-array.rb +39 -10
  40. data/test/raw-records/test-sparse-union-array.rb +86 -41
  41. data/test/raw-records/test-struct-array.rb +22 -8
  42. data/test/test-array.rb +7 -0
  43. data/test/test-chunked-array.rb +9 -0
  44. data/test/test-data-type.rb +2 -1
  45. data/test/test-dense-union-array.rb +42 -0
  46. data/test/test-dense-union-data-type.rb +1 -1
  47. data/test/test-function.rb +7 -7
  48. data/test/test-group.rb +58 -58
  49. data/test/test-half-float-array.rb +43 -0
  50. data/test/test-half-float.rb +130 -0
  51. data/test/test-record-batch-file-reader.rb +21 -0
  52. data/test/test-record-batch-stream-reader.rb +129 -0
  53. data/test/test-scalar.rb +65 -0
  54. data/test/test-slicer.rb +194 -129
  55. data/test/test-sparse-union-array.rb +38 -0
  56. data/test/test-table.rb +324 -40
  57. data/test/values/test-basic-arrays.rb +10 -0
  58. data/test/values/test-dense-union-array.rb +88 -45
  59. data/test/values/test-list-array.rb +26 -10
  60. data/test/values/test-map-array.rb +33 -10
  61. data/test/values/test-sparse-union-array.rb +84 -41
  62. data/test/values/test-struct-array.rb +20 -8
  63. metadata +30 -9
data/test/test-table.rb CHANGED
@@ -41,8 +41,25 @@ class TableTest < Test::Unit::TestCase
41
41
  end
42
42
 
43
43
  sub_test_case(".new") do
44
+ test("{Symbol: Arrow::Array}") do
45
+ schema = Arrow::Schema.new(numbers: :int64)
46
+ assert_equal(Arrow::Table.new(schema,
47
+ [Arrow::Int64Array.new([1, 2, 3])]),
48
+ Arrow::Table.new(numbers: Arrow::Int64Array.new([1, 2, 3])))
49
+ end
50
+
51
+ test("{Symbol: Arrow::ChunkedArray}") do
52
+ chunked_array = Arrow::ChunkedArray.new([Arrow::Int64Array.new([1, 2, 3])])
53
+ schema = Arrow::Schema.new(numbers: :int64)
54
+ assert_equal(Arrow::Table.new(schema,
55
+ [Arrow::Int64Array.new([1, 2, 3])]),
56
+ Arrow::Table.new(numbers: chunked_array))
57
+ end
58
+
44
59
  test("{Symbol: Arrow::Tensor}") do
45
- assert_equal(Arrow::Table.new(numbers: Arrow::UInt8Array.new([1, 2, 3])),
60
+ schema = Arrow::Schema.new(numbers: :uint8)
61
+ assert_equal(Arrow::Table.new(schema,
62
+ [Arrow::UInt8Array.new([1, 2, 3])]),
46
63
  Arrow::Table.new(numbers: Arrow::Tensor.new([1, 2, 3])))
47
64
  end
48
65
 
@@ -51,7 +68,8 @@ class TableTest < Test::Unit::TestCase
51
68
  def array_like.to_ary
52
69
  [1, 2, 3]
53
70
  end
54
- assert_equal(Arrow::Table.new(numbers: Arrow::UInt8Array.new([1, 2, 3])),
71
+ schema = Arrow::Schema.new(numbers: :uint8)
72
+ assert_equal(Arrow::Table.new(schema, [Arrow::UInt8Array.new([1, 2, 3])]),
55
73
  Arrow::Table.new(numbers: array_like))
56
74
  end
57
75
  end
@@ -69,26 +87,24 @@ class TableTest < Test::Unit::TestCase
69
87
  target_rows_raw = [nil, true, true, false, true, false, true, true]
70
88
  target_rows = Arrow::BooleanArray.new(target_rows_raw)
71
89
  assert_equal(<<-TABLE, @table.slice(target_rows).to_s)
72
- count visible
73
- 0 (null) (null)
74
- 1 2 false
75
- 2 4 (null)
76
- 3 16 true
77
- 4 64 (null)
78
- 5 128 (null)
90
+ count visible
91
+ 0 2 false
92
+ 1 4 (null)
93
+ 2 16 true
94
+ 3 64 (null)
95
+ 4 128 (null)
79
96
  TABLE
80
97
  end
81
98
 
82
99
  test("Array: boolean") do
83
100
  target_rows_raw = [nil, true, true, false, true, false, true, true]
84
101
  assert_equal(<<-TABLE, @table.slice(target_rows_raw).to_s)
85
- count visible
86
- 0 (null) (null)
87
- 1 2 false
88
- 2 4 (null)
89
- 3 16 true
90
- 4 64 (null)
91
- 5 128 (null)
102
+ count visible
103
+ 0 2 false
104
+ 1 4 (null)
105
+ 2 16 true
106
+ 3 64 (null)
107
+ 4 128 (null)
92
108
  TABLE
93
109
  end
94
110
 
@@ -180,24 +196,18 @@ class TableTest < Test::Unit::TestCase
180
196
 
181
197
  test("{key: true}") do
182
198
  assert_equal(<<-TABLE, @table.slice(visible: true).to_s)
183
- count visible
184
- 0 1 true
185
- 1 (null) (null)
186
- 2 8 true
187
- 3 16 true
188
- 4 (null) (null)
189
- 5 (null) (null)
199
+ count visible
200
+ 0 1 true
201
+ 1 8 true
202
+ 2 16 true
190
203
  TABLE
191
204
  end
192
205
 
193
206
  test("{key: false}") do
194
207
  assert_equal(<<-TABLE, @table.slice(visible: false).to_s)
195
- count visible
196
- 0 2 false
197
- 1 (null) (null)
198
- 2 32 false
199
- 3 (null) (null)
200
- 4 (null) (null)
208
+ count visible
209
+ 0 2 false
210
+ 1 32 false
201
211
  TABLE
202
212
  end
203
213
 
@@ -268,11 +278,8 @@ class TableTest < Test::Unit::TestCase
268
278
 
269
279
  test("{key1: Range, key2: true}") do
270
280
  assert_equal(<<-TABLE, @table.slice(count: 0..8, visible: false).to_s)
271
- count visible
272
- 0 2 false
273
- 1 (null) (null)
274
- 2 (null) (null)
275
- 3 (null) (null)
281
+ count visible
282
+ 0 2 false
276
283
  TABLE
277
284
  end
278
285
 
@@ -584,6 +591,18 @@ class TableTest < Test::Unit::TestCase
584
591
  end
585
592
  end
586
593
 
594
+ sub_test_case("#column_names") do
595
+ test("unique") do
596
+ table = Arrow::Table.new(a: [1], b: [2], c: [3])
597
+ assert_equal(%w[a b c], table.column_names)
598
+ end
599
+
600
+ test("duplicated") do
601
+ table = Arrow::Table.new([["a", [1, 2, 3]], ["a", [4, 5, 6]]])
602
+ assert_equal(%w[a a], table.column_names)
603
+ end
604
+ end
605
+
587
606
  sub_test_case("#save and .load") do
588
607
  module SaveLoadFormatTests
589
608
  def test_default
@@ -690,6 +709,11 @@ class TableTest < Test::Unit::TestCase
690
709
  schema: @table.schema))
691
710
  end
692
711
 
712
+ test("csv, return value") do
713
+ output = create_output(".csv")
714
+ assert_equal(@table, @table.save(output))
715
+ end
716
+
693
717
  test("csv.gz") do
694
718
  output = create_output(".csv.gz")
695
719
  @table.save(output)
@@ -830,6 +854,76 @@ chris\t-1
830
854
  end
831
855
  end
832
856
  end
857
+
858
+ sub_test_case("GC") do
859
+ def setup
860
+ table = Arrow::Table.new(integer: [1, 2, 3],
861
+ string: ["a", "b", "c"])
862
+ @buffer = Arrow::ResizableBuffer.new(1024)
863
+ table.save(@buffer, format: :arrow)
864
+ @loaded_table = Arrow::Table.load(@buffer)
865
+ end
866
+
867
+ def test_chunked_array
868
+ chunked_array = @loaded_table[0].data
869
+ assert_equal(@buffer,
870
+ chunked_array.instance_variable_get(:@input).buffer)
871
+ end
872
+
873
+ def test_array
874
+ array = @loaded_table[0].data.chunks[0]
875
+ assert_equal(@buffer,
876
+ array.instance_variable_get(:@input).buffer)
877
+ end
878
+
879
+ def test_record_batch
880
+ record_batch = @loaded_table.each_record_batch.first
881
+ assert_equal(@buffer,
882
+ record_batch.instance_variable_get(:@input).buffer)
883
+ end
884
+
885
+ def test_record_batch_array
886
+ array = @loaded_table.each_record_batch.first[0].data
887
+ assert_equal(@buffer,
888
+ array.instance_variable_get(:@input).buffer)
889
+ end
890
+
891
+ def test_record_batch_table
892
+ table = @loaded_table.each_record_batch.first.to_table
893
+ assert_equal(@buffer,
894
+ table.instance_variable_get(:@input).buffer)
895
+ end
896
+
897
+ def test_slice
898
+ table = @loaded_table.slice(0..-1)
899
+ assert_equal(@buffer,
900
+ table.instance_variable_get(:@input).buffer)
901
+ end
902
+
903
+ def test_merge
904
+ table = @loaded_table.merge({})
905
+ assert_equal(@buffer,
906
+ table.instance_variable_get(:@input).buffer)
907
+ end
908
+
909
+ def test_remove_column
910
+ table = @loaded_table.remove_column(0)
911
+ assert_equal(@buffer,
912
+ table.instance_variable_get(:@input).buffer)
913
+ end
914
+
915
+ def test_pack
916
+ table = @loaded_table.pack
917
+ assert_equal(@buffer,
918
+ table.instance_variable_get(:@input).buffer)
919
+ end
920
+
921
+ def test_join
922
+ table = @loaded_table.join(@loaded_table, :integer)
923
+ assert_equal(@buffer,
924
+ table.instance_variable_get(:@input).buffer)
925
+ end
926
+ end
833
927
  end
834
928
 
835
929
  test("#pack") do
@@ -1026,7 +1120,7 @@ visible: false
1026
1120
  end
1027
1121
 
1028
1122
  sub_test_case("#join") do
1029
- test("keys: String") do
1123
+ test("keys: nil (natural join)") do
1030
1124
  table1 = Arrow::Table.new(key: [1, 2, 3],
1031
1125
  number: [10, 20, 30])
1032
1126
  table2 = Arrow::Table.new(key: [3, 1],
@@ -1034,7 +1128,19 @@ visible: false
1034
1128
  assert_equal(Arrow::Table.new([
1035
1129
  ["key", [1, 3]],
1036
1130
  ["number", [10, 30]],
1131
+ ["string", ["one", "three"]],
1132
+ ]),
1133
+ table1.join(table2))
1134
+ end
1135
+
1136
+ test("keys: String") do
1137
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1138
+ number: [10, 20, 30])
1139
+ table2 = Arrow::Table.new(key: [3, 1],
1140
+ string: ["three", "one"])
1141
+ assert_equal(Arrow::Table.new([
1037
1142
  ["key", [1, 3]],
1143
+ ["number", [10, 30]],
1038
1144
  ["string", ["one", "three"]],
1039
1145
  ]),
1040
1146
  table1.join(table2, "key"))
@@ -1048,12 +1154,25 @@ visible: false
1048
1154
  assert_equal(Arrow::Table.new([
1049
1155
  ["key", [1, 3]],
1050
1156
  ["number", [10, 30]],
1051
- ["key", [1, 3]],
1052
1157
  ["string", ["one", "three"]],
1053
1158
  ]),
1054
1159
  table1.join(table2, :key))
1055
1160
  end
1056
1161
 
1162
+ test("keys: [String]") do
1163
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1164
+ number: [10, 20, 30])
1165
+ table2 = Arrow::Table.new(key: [3, 1],
1166
+ string: ["three", "one"])
1167
+ assert_equal(Arrow::Table.new([
1168
+ ["key", [1, 3]],
1169
+ ["number", [10, 30]],
1170
+ ["key", [1, 3]],
1171
+ ["string", ["one", "three"]],
1172
+ ]),
1173
+ table1.join(table2, ["key"]))
1174
+ end
1175
+
1057
1176
  test("keys: [String, Symbol]") do
1058
1177
  table1 = Arrow::Table.new(key1: [1, 1, 2, 2],
1059
1178
  key2: [10, 100, 20, 200],
@@ -1083,7 +1202,9 @@ visible: false
1083
1202
  ["right_key", [1, 3]],
1084
1203
  ["string", ["one", "three"]],
1085
1204
  ]),
1086
- table1.join(table2, {left: "left_key", right: :right_key}))
1205
+ table1.join(table2,
1206
+ {left: "left_key", right: :right_key},
1207
+ type: :inner))
1087
1208
  end
1088
1209
 
1089
1210
  test("keys: {left: [String, Symbol], right: [Symbol, String]}") do
@@ -1105,10 +1226,11 @@ visible: false
1105
1226
  {
1106
1227
  left: ["left_key1", :left_key2],
1107
1228
  right: [:right_key1, "right_key2"],
1108
- }))
1229
+ },
1230
+ type: :inner))
1109
1231
  end
1110
1232
 
1111
- test("type:") do
1233
+ test("type: :left_outer") do
1112
1234
  table1 = Arrow::Table.new(key: [1, 2, 3],
1113
1235
  number: [10, 20, 30])
1114
1236
  table2 = Arrow::Table.new(key: [3, 1],
@@ -1116,12 +1238,85 @@ visible: false
1116
1238
  assert_equal(Arrow::Table.new([
1117
1239
  ["key", [1, 3, 2]],
1118
1240
  ["number", [10, 30, 20]],
1119
- ["key", [1, 3, nil]],
1120
1241
  ["string", ["one", "three", nil]],
1121
1242
  ]),
1122
1243
  table1.join(table2, "key", type: :left_outer))
1123
1244
  end
1124
1245
 
1246
+ test("type: :right_outer") do
1247
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1248
+ number: [10, 20, 30])
1249
+ table2 = Arrow::Table.new(key: [3, 1],
1250
+ string: ["three", "one"])
1251
+ assert_equal(Arrow::Table.new([
1252
+ ["key", [1, 3]],
1253
+ ["number", [10, 30]],
1254
+ ["string", ["one", "three"]],
1255
+ ]),
1256
+ table1.join(table2, "key", type: :right_outer))
1257
+ end
1258
+
1259
+ test("type: :full_outer") do
1260
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1261
+ number: [10, 20, 30])
1262
+ table2 = Arrow::Table.new(key: [3, 1],
1263
+ string: ["three", "one"])
1264
+ assert_equal(Arrow::Table.new([
1265
+ ["key", [1, 3, 2]],
1266
+ ["number", [10, 30, 20]],
1267
+ ["string", ["one", "three", nil]],
1268
+ ]),
1269
+ table1.join(table2, "key", type: :full_outer))
1270
+ end
1271
+
1272
+ test("type: :left_semi") do
1273
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1274
+ number: [10, 20, 30])
1275
+ table2 = Arrow::Table.new(key: [3, 1],
1276
+ string: ["three", "one"])
1277
+ assert_equal(Arrow::Table.new([
1278
+ ["key", [1, 3]],
1279
+ ["number", [10, 30]],
1280
+ ]),
1281
+ table1.join(table2, "key", type: :left_semi))
1282
+ end
1283
+
1284
+ test("type: :right_semi") do
1285
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1286
+ number: [10, 20, 30])
1287
+ table2 = Arrow::Table.new(key: [3, 1],
1288
+ string: ["three", "one"])
1289
+ assert_equal(Arrow::Table.new([
1290
+ ["key", [3, 1]],
1291
+ ["string", ["three", "one"]],
1292
+ ]),
1293
+ table1.join(table2, "key", type: :right_semi))
1294
+ end
1295
+
1296
+ test("type: :left_anti") do
1297
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1298
+ number: [10, 20, 30])
1299
+ table2 = Arrow::Table.new(key: [3, 1],
1300
+ string: ["three", "one"])
1301
+ assert_equal(Arrow::Table.new([
1302
+ ["key", [2]],
1303
+ ["number", [20]],
1304
+ ]),
1305
+ table1.join(table2, "key", type: :left_anti))
1306
+ end
1307
+
1308
+ test("type: :right_anti") do
1309
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1310
+ number: [10, 20, 30])
1311
+ table2 = Arrow::Table.new(key: [3, 1],
1312
+ string: ["three", "one"])
1313
+ assert_equal(Arrow::Table.new([
1314
+ ["key", Arrow::ChunkedArray.new(:uint8)],
1315
+ ["string", Arrow::ChunkedArray.new(:string)],
1316
+ ]),
1317
+ table1.join(table2, "key", type: :right_anti))
1318
+ end
1319
+
1125
1320
  test("left_outputs: & right_outputs:") do
1126
1321
  table1 = Arrow::Table.new(key: [1, 2, 3],
1127
1322
  number: [10, 20, 30])
@@ -1135,5 +1330,94 @@ visible: false
1135
1330
  left_outputs: ["key", "number"],
1136
1331
  right_outputs: ["string"]))
1137
1332
  end
1333
+
1334
+ test("left_outputs: & type: :inner") do
1335
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1336
+ number: [10, 20, 30])
1337
+ table2 = Arrow::Table.new(key: [3, 1],
1338
+ string: ["three", "one"])
1339
+ assert_equal(Arrow::Table.new([
1340
+ ["key", [1, 3]],
1341
+ ["number", [10, 30]],
1342
+ ["key", [1, 3]],
1343
+ ["string", ["one", "three"]]
1344
+ ]),
1345
+ table1.join(table2,
1346
+ type: :inner,
1347
+ left_outputs: table1.column_names,
1348
+ right_outputs: table2.column_names))
1349
+ end
1350
+
1351
+ test("left_outputs: & type: :left_outer") do
1352
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1353
+ number: [10, 20, 30])
1354
+ table2 = Arrow::Table.new(key: [3, 1],
1355
+ string: ["three", "one"])
1356
+ assert_equal(Arrow::Table.new([
1357
+ ["key", [1, 3, 2]],
1358
+ ["number", [10, 30, 20]],
1359
+ ["key", [1, 3, nil]],
1360
+ ["string", ["one", "three", nil]],
1361
+ ]),
1362
+ table1.join(table2,
1363
+ type: :left_outer,
1364
+ left_outputs: table1.column_names,
1365
+ right_outputs: table2.column_names))
1366
+ end
1367
+
1368
+ test("left_outputs: & type: :right_outer") do
1369
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1370
+ number: [10, 20, 30])
1371
+ table2 = Arrow::Table.new(key: [3, 1],
1372
+ string: ["three", "one"])
1373
+ assert_equal(Arrow::Table.new([
1374
+ ["key", [1, 3]],
1375
+ ["number", [10, 30]],
1376
+ ["key", [1, 3]],
1377
+ ["string", ["one", "three"]],
1378
+ ]),
1379
+ table1.join(table2,
1380
+ type: :right_outer,
1381
+ left_outputs: table1.column_names,
1382
+ right_outputs: table2.column_names))
1383
+ end
1384
+
1385
+ test("left_outputs: & type: :full_outer") do
1386
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1387
+ number: [10, 20, 30])
1388
+ table2 = Arrow::Table.new(key: [3, 1],
1389
+ string: ["three", "one"])
1390
+ assert_equal(Arrow::Table.new([
1391
+ ["key", [1, 3, 2]],
1392
+ ["number", [10, 30, 20]],
1393
+ ["key", [1, 3, nil]],
1394
+ ["string", ["one", "three", nil]],
1395
+ ]),
1396
+ table1.join(table2,
1397
+ type: :full_outer,
1398
+ left_outputs: table1.column_names,
1399
+ right_outputs: table2.column_names))
1400
+ end
1401
+
1402
+ test("left_suffix: & keys: [String]") do
1403
+ table1 = Arrow::Table.new(key1: [1, 1, 2, 2],
1404
+ key2: [10, 100, 20, 200],
1405
+ number: [1010, 1100, 2020, 2200])
1406
+ table2 = Arrow::Table.new(key1: [1, 2, 2],
1407
+ key2: [100, 20, 50],
1408
+ string: ["1-100", "2-20", "2-50"])
1409
+ assert_equal(Arrow::Table.new([
1410
+ ["key1_left", [1, 2]],
1411
+ ["key2_left", [100, 20]],
1412
+ ["number", [1100, 2020]],
1413
+ ["key1_right", [1, 2]],
1414
+ ["key2_right", [100, 20]],
1415
+ ["string", ["1-100", "2-20"]],
1416
+ ]),
1417
+ table1.join(table2,
1418
+ ["key1", "key2"],
1419
+ left_suffix: "_left",
1420
+ right_suffix: "_right"))
1421
+ end
1138
1422
  end
1139
1423
  end
@@ -107,6 +107,16 @@ module ValuesBasicArraysTests
107
107
  assert_equal(values, target.values)
108
108
  end
109
109
 
110
+ def test_half_float
111
+ values = [
112
+ -1.5,
113
+ nil,
114
+ 1.5,
115
+ ]
116
+ target = build(Arrow::HalfFloatArray.new(values))
117
+ assert_equal(values, target.values)
118
+ end
119
+
110
120
  def test_float
111
121
  values = [
112
122
  -1.0,