red-arrow 10.0.1 → 12.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/ext/arrow/converters.hpp +45 -41
  4. data/ext/arrow/extconf.rb +14 -2
  5. data/ext/arrow/raw-records.cpp +1 -2
  6. data/ext/arrow/values.cpp +1 -2
  7. data/lib/arrow/array-computable.rb +13 -0
  8. data/lib/arrow/array.rb +5 -0
  9. data/lib/arrow/chunked-array.rb +23 -1
  10. data/lib/arrow/column-containable.rb +9 -0
  11. data/lib/arrow/column.rb +1 -0
  12. data/lib/arrow/data-type.rb +9 -0
  13. data/lib/arrow/dense-union-array-builder.rb +49 -0
  14. data/lib/arrow/dense-union-array.rb +26 -0
  15. data/lib/arrow/half-float-array-builder.rb +32 -0
  16. data/lib/arrow/half-float-array.rb +24 -0
  17. data/lib/arrow/half-float.rb +118 -0
  18. data/lib/arrow/input-referable.rb +29 -0
  19. data/lib/arrow/loader.rb +10 -0
  20. data/lib/arrow/raw-table-converter.rb +7 -5
  21. data/lib/arrow/record-batch-file-reader.rb +2 -0
  22. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  23. data/lib/arrow/record-batch.rb +6 -2
  24. data/lib/arrow/scalar.rb +67 -0
  25. data/lib/arrow/slicer.rb +61 -0
  26. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  27. data/lib/arrow/sparse-union-array.rb +26 -0
  28. data/lib/arrow/struct-array-builder.rb +0 -5
  29. data/lib/arrow/table-loader.rb +4 -4
  30. data/lib/arrow/table-saver.rb +1 -0
  31. data/lib/arrow/table.rb +178 -31
  32. data/lib/arrow/tensor.rb +4 -0
  33. data/lib/arrow/union-array-builder.rb +59 -0
  34. data/lib/arrow/version.rb +1 -1
  35. data/red-arrow.gemspec +1 -1
  36. data/test/raw-records/test-basic-arrays.rb +10 -0
  37. data/test/raw-records/test-dense-union-array.rb +90 -45
  38. data/test/raw-records/test-list-array.rb +28 -10
  39. data/test/raw-records/test-map-array.rb +39 -10
  40. data/test/raw-records/test-sparse-union-array.rb +86 -41
  41. data/test/raw-records/test-struct-array.rb +22 -8
  42. data/test/test-array.rb +7 -0
  43. data/test/test-chunked-array.rb +9 -0
  44. data/test/test-data-type.rb +2 -1
  45. data/test/test-dense-union-array.rb +42 -0
  46. data/test/test-dense-union-data-type.rb +1 -1
  47. data/test/test-function.rb +7 -7
  48. data/test/test-group.rb +58 -58
  49. data/test/test-half-float-array.rb +43 -0
  50. data/test/test-half-float.rb +130 -0
  51. data/test/test-record-batch-file-reader.rb +21 -0
  52. data/test/test-record-batch-stream-reader.rb +129 -0
  53. data/test/test-scalar.rb +65 -0
  54. data/test/test-slicer.rb +194 -129
  55. data/test/test-sparse-union-array.rb +38 -0
  56. data/test/test-table.rb +324 -40
  57. data/test/values/test-basic-arrays.rb +10 -0
  58. data/test/values/test-dense-union-array.rb +88 -45
  59. data/test/values/test-list-array.rb +26 -10
  60. data/test/values/test-map-array.rb +33 -10
  61. data/test/values/test-sparse-union-array.rb +84 -41
  62. data/test/values/test-struct-array.rb +20 -8
  63. metadata +30 -9
data/test/test-table.rb CHANGED
@@ -41,8 +41,25 @@ class TableTest < Test::Unit::TestCase
41
41
  end
42
42
 
43
43
  sub_test_case(".new") do
44
+ test("{Symbol: Arrow::Array}") do
45
+ schema = Arrow::Schema.new(numbers: :int64)
46
+ assert_equal(Arrow::Table.new(schema,
47
+ [Arrow::Int64Array.new([1, 2, 3])]),
48
+ Arrow::Table.new(numbers: Arrow::Int64Array.new([1, 2, 3])))
49
+ end
50
+
51
+ test("{Symbol: Arrow::ChunkedArray}") do
52
+ chunked_array = Arrow::ChunkedArray.new([Arrow::Int64Array.new([1, 2, 3])])
53
+ schema = Arrow::Schema.new(numbers: :int64)
54
+ assert_equal(Arrow::Table.new(schema,
55
+ [Arrow::Int64Array.new([1, 2, 3])]),
56
+ Arrow::Table.new(numbers: chunked_array))
57
+ end
58
+
44
59
  test("{Symbol: Arrow::Tensor}") do
45
- assert_equal(Arrow::Table.new(numbers: Arrow::UInt8Array.new([1, 2, 3])),
60
+ schema = Arrow::Schema.new(numbers: :uint8)
61
+ assert_equal(Arrow::Table.new(schema,
62
+ [Arrow::UInt8Array.new([1, 2, 3])]),
46
63
  Arrow::Table.new(numbers: Arrow::Tensor.new([1, 2, 3])))
47
64
  end
48
65
 
@@ -51,7 +68,8 @@ class TableTest < Test::Unit::TestCase
51
68
  def array_like.to_ary
52
69
  [1, 2, 3]
53
70
  end
54
- assert_equal(Arrow::Table.new(numbers: Arrow::UInt8Array.new([1, 2, 3])),
71
+ schema = Arrow::Schema.new(numbers: :uint8)
72
+ assert_equal(Arrow::Table.new(schema, [Arrow::UInt8Array.new([1, 2, 3])]),
55
73
  Arrow::Table.new(numbers: array_like))
56
74
  end
57
75
  end
@@ -69,26 +87,24 @@ class TableTest < Test::Unit::TestCase
69
87
  target_rows_raw = [nil, true, true, false, true, false, true, true]
70
88
  target_rows = Arrow::BooleanArray.new(target_rows_raw)
71
89
  assert_equal(<<-TABLE, @table.slice(target_rows).to_s)
72
- count visible
73
- 0 (null) (null)
74
- 1 2 false
75
- 2 4 (null)
76
- 3 16 true
77
- 4 64 (null)
78
- 5 128 (null)
90
+ count visible
91
+ 0 2 false
92
+ 1 4 (null)
93
+ 2 16 true
94
+ 3 64 (null)
95
+ 4 128 (null)
79
96
  TABLE
80
97
  end
81
98
 
82
99
  test("Array: boolean") do
83
100
  target_rows_raw = [nil, true, true, false, true, false, true, true]
84
101
  assert_equal(<<-TABLE, @table.slice(target_rows_raw).to_s)
85
- count visible
86
- 0 (null) (null)
87
- 1 2 false
88
- 2 4 (null)
89
- 3 16 true
90
- 4 64 (null)
91
- 5 128 (null)
102
+ count visible
103
+ 0 2 false
104
+ 1 4 (null)
105
+ 2 16 true
106
+ 3 64 (null)
107
+ 4 128 (null)
92
108
  TABLE
93
109
  end
94
110
 
@@ -180,24 +196,18 @@ class TableTest < Test::Unit::TestCase
180
196
 
181
197
  test("{key: true}") do
182
198
  assert_equal(<<-TABLE, @table.slice(visible: true).to_s)
183
- count visible
184
- 0 1 true
185
- 1 (null) (null)
186
- 2 8 true
187
- 3 16 true
188
- 4 (null) (null)
189
- 5 (null) (null)
199
+ count visible
200
+ 0 1 true
201
+ 1 8 true
202
+ 2 16 true
190
203
  TABLE
191
204
  end
192
205
 
193
206
  test("{key: false}") do
194
207
  assert_equal(<<-TABLE, @table.slice(visible: false).to_s)
195
- count visible
196
- 0 2 false
197
- 1 (null) (null)
198
- 2 32 false
199
- 3 (null) (null)
200
- 4 (null) (null)
208
+ count visible
209
+ 0 2 false
210
+ 1 32 false
201
211
  TABLE
202
212
  end
203
213
 
@@ -268,11 +278,8 @@ class TableTest < Test::Unit::TestCase
268
278
 
269
279
  test("{key1: Range, key2: true}") do
270
280
  assert_equal(<<-TABLE, @table.slice(count: 0..8, visible: false).to_s)
271
- count visible
272
- 0 2 false
273
- 1 (null) (null)
274
- 2 (null) (null)
275
- 3 (null) (null)
281
+ count visible
282
+ 0 2 false
276
283
  TABLE
277
284
  end
278
285
 
@@ -584,6 +591,18 @@ class TableTest < Test::Unit::TestCase
584
591
  end
585
592
  end
586
593
 
594
+ sub_test_case("#column_names") do
595
+ test("unique") do
596
+ table = Arrow::Table.new(a: [1], b: [2], c: [3])
597
+ assert_equal(%w[a b c], table.column_names)
598
+ end
599
+
600
+ test("duplicated") do
601
+ table = Arrow::Table.new([["a", [1, 2, 3]], ["a", [4, 5, 6]]])
602
+ assert_equal(%w[a a], table.column_names)
603
+ end
604
+ end
605
+
587
606
  sub_test_case("#save and .load") do
588
607
  module SaveLoadFormatTests
589
608
  def test_default
@@ -690,6 +709,11 @@ class TableTest < Test::Unit::TestCase
690
709
  schema: @table.schema))
691
710
  end
692
711
 
712
+ test("csv, return value") do
713
+ output = create_output(".csv")
714
+ assert_equal(@table, @table.save(output))
715
+ end
716
+
693
717
  test("csv.gz") do
694
718
  output = create_output(".csv.gz")
695
719
  @table.save(output)
@@ -830,6 +854,76 @@ chris\t-1
830
854
  end
831
855
  end
832
856
  end
857
+
858
+ sub_test_case("GC") do
859
+ def setup
860
+ table = Arrow::Table.new(integer: [1, 2, 3],
861
+ string: ["a", "b", "c"])
862
+ @buffer = Arrow::ResizableBuffer.new(1024)
863
+ table.save(@buffer, format: :arrow)
864
+ @loaded_table = Arrow::Table.load(@buffer)
865
+ end
866
+
867
+ def test_chunked_array
868
+ chunked_array = @loaded_table[0].data
869
+ assert_equal(@buffer,
870
+ chunked_array.instance_variable_get(:@input).buffer)
871
+ end
872
+
873
+ def test_array
874
+ array = @loaded_table[0].data.chunks[0]
875
+ assert_equal(@buffer,
876
+ array.instance_variable_get(:@input).buffer)
877
+ end
878
+
879
+ def test_record_batch
880
+ record_batch = @loaded_table.each_record_batch.first
881
+ assert_equal(@buffer,
882
+ record_batch.instance_variable_get(:@input).buffer)
883
+ end
884
+
885
+ def test_record_batch_array
886
+ array = @loaded_table.each_record_batch.first[0].data
887
+ assert_equal(@buffer,
888
+ array.instance_variable_get(:@input).buffer)
889
+ end
890
+
891
+ def test_record_batch_table
892
+ table = @loaded_table.each_record_batch.first.to_table
893
+ assert_equal(@buffer,
894
+ table.instance_variable_get(:@input).buffer)
895
+ end
896
+
897
+ def test_slice
898
+ table = @loaded_table.slice(0..-1)
899
+ assert_equal(@buffer,
900
+ table.instance_variable_get(:@input).buffer)
901
+ end
902
+
903
+ def test_merge
904
+ table = @loaded_table.merge({})
905
+ assert_equal(@buffer,
906
+ table.instance_variable_get(:@input).buffer)
907
+ end
908
+
909
+ def test_remove_column
910
+ table = @loaded_table.remove_column(0)
911
+ assert_equal(@buffer,
912
+ table.instance_variable_get(:@input).buffer)
913
+ end
914
+
915
+ def test_pack
916
+ table = @loaded_table.pack
917
+ assert_equal(@buffer,
918
+ table.instance_variable_get(:@input).buffer)
919
+ end
920
+
921
+ def test_join
922
+ table = @loaded_table.join(@loaded_table, :integer)
923
+ assert_equal(@buffer,
924
+ table.instance_variable_get(:@input).buffer)
925
+ end
926
+ end
833
927
  end
834
928
 
835
929
  test("#pack") do
@@ -1026,7 +1120,7 @@ visible: false
1026
1120
  end
1027
1121
 
1028
1122
  sub_test_case("#join") do
1029
- test("keys: String") do
1123
+ test("keys: nil (natural join)") do
1030
1124
  table1 = Arrow::Table.new(key: [1, 2, 3],
1031
1125
  number: [10, 20, 30])
1032
1126
  table2 = Arrow::Table.new(key: [3, 1],
@@ -1034,7 +1128,19 @@ visible: false
1034
1128
  assert_equal(Arrow::Table.new([
1035
1129
  ["key", [1, 3]],
1036
1130
  ["number", [10, 30]],
1131
+ ["string", ["one", "three"]],
1132
+ ]),
1133
+ table1.join(table2))
1134
+ end
1135
+
1136
+ test("keys: String") do
1137
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1138
+ number: [10, 20, 30])
1139
+ table2 = Arrow::Table.new(key: [3, 1],
1140
+ string: ["three", "one"])
1141
+ assert_equal(Arrow::Table.new([
1037
1142
  ["key", [1, 3]],
1143
+ ["number", [10, 30]],
1038
1144
  ["string", ["one", "three"]],
1039
1145
  ]),
1040
1146
  table1.join(table2, "key"))
@@ -1048,12 +1154,25 @@ visible: false
1048
1154
  assert_equal(Arrow::Table.new([
1049
1155
  ["key", [1, 3]],
1050
1156
  ["number", [10, 30]],
1051
- ["key", [1, 3]],
1052
1157
  ["string", ["one", "three"]],
1053
1158
  ]),
1054
1159
  table1.join(table2, :key))
1055
1160
  end
1056
1161
 
1162
+ test("keys: [String]") do
1163
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1164
+ number: [10, 20, 30])
1165
+ table2 = Arrow::Table.new(key: [3, 1],
1166
+ string: ["three", "one"])
1167
+ assert_equal(Arrow::Table.new([
1168
+ ["key", [1, 3]],
1169
+ ["number", [10, 30]],
1170
+ ["key", [1, 3]],
1171
+ ["string", ["one", "three"]],
1172
+ ]),
1173
+ table1.join(table2, ["key"]))
1174
+ end
1175
+
1057
1176
  test("keys: [String, Symbol]") do
1058
1177
  table1 = Arrow::Table.new(key1: [1, 1, 2, 2],
1059
1178
  key2: [10, 100, 20, 200],
@@ -1083,7 +1202,9 @@ visible: false
1083
1202
  ["right_key", [1, 3]],
1084
1203
  ["string", ["one", "three"]],
1085
1204
  ]),
1086
- table1.join(table2, {left: "left_key", right: :right_key}))
1205
+ table1.join(table2,
1206
+ {left: "left_key", right: :right_key},
1207
+ type: :inner))
1087
1208
  end
1088
1209
 
1089
1210
  test("keys: {left: [String, Symbol], right: [Symbol, String]}") do
@@ -1105,10 +1226,11 @@ visible: false
1105
1226
  {
1106
1227
  left: ["left_key1", :left_key2],
1107
1228
  right: [:right_key1, "right_key2"],
1108
- }))
1229
+ },
1230
+ type: :inner))
1109
1231
  end
1110
1232
 
1111
- test("type:") do
1233
+ test("type: :left_outer") do
1112
1234
  table1 = Arrow::Table.new(key: [1, 2, 3],
1113
1235
  number: [10, 20, 30])
1114
1236
  table2 = Arrow::Table.new(key: [3, 1],
@@ -1116,12 +1238,85 @@ visible: false
1116
1238
  assert_equal(Arrow::Table.new([
1117
1239
  ["key", [1, 3, 2]],
1118
1240
  ["number", [10, 30, 20]],
1119
- ["key", [1, 3, nil]],
1120
1241
  ["string", ["one", "three", nil]],
1121
1242
  ]),
1122
1243
  table1.join(table2, "key", type: :left_outer))
1123
1244
  end
1124
1245
 
1246
+ test("type: :right_outer") do
1247
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1248
+ number: [10, 20, 30])
1249
+ table2 = Arrow::Table.new(key: [3, 1],
1250
+ string: ["three", "one"])
1251
+ assert_equal(Arrow::Table.new([
1252
+ ["key", [1, 3]],
1253
+ ["number", [10, 30]],
1254
+ ["string", ["one", "three"]],
1255
+ ]),
1256
+ table1.join(table2, "key", type: :right_outer))
1257
+ end
1258
+
1259
+ test("type: :full_outer") do
1260
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1261
+ number: [10, 20, 30])
1262
+ table2 = Arrow::Table.new(key: [3, 1],
1263
+ string: ["three", "one"])
1264
+ assert_equal(Arrow::Table.new([
1265
+ ["key", [1, 3, 2]],
1266
+ ["number", [10, 30, 20]],
1267
+ ["string", ["one", "three", nil]],
1268
+ ]),
1269
+ table1.join(table2, "key", type: :full_outer))
1270
+ end
1271
+
1272
+ test("type: :left_semi") do
1273
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1274
+ number: [10, 20, 30])
1275
+ table2 = Arrow::Table.new(key: [3, 1],
1276
+ string: ["three", "one"])
1277
+ assert_equal(Arrow::Table.new([
1278
+ ["key", [1, 3]],
1279
+ ["number", [10, 30]],
1280
+ ]),
1281
+ table1.join(table2, "key", type: :left_semi))
1282
+ end
1283
+
1284
+ test("type: :right_semi") do
1285
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1286
+ number: [10, 20, 30])
1287
+ table2 = Arrow::Table.new(key: [3, 1],
1288
+ string: ["three", "one"])
1289
+ assert_equal(Arrow::Table.new([
1290
+ ["key", [3, 1]],
1291
+ ["string", ["three", "one"]],
1292
+ ]),
1293
+ table1.join(table2, "key", type: :right_semi))
1294
+ end
1295
+
1296
+ test("type: :left_anti") do
1297
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1298
+ number: [10, 20, 30])
1299
+ table2 = Arrow::Table.new(key: [3, 1],
1300
+ string: ["three", "one"])
1301
+ assert_equal(Arrow::Table.new([
1302
+ ["key", [2]],
1303
+ ["number", [20]],
1304
+ ]),
1305
+ table1.join(table2, "key", type: :left_anti))
1306
+ end
1307
+
1308
+ test("type: :right_anti") do
1309
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1310
+ number: [10, 20, 30])
1311
+ table2 = Arrow::Table.new(key: [3, 1],
1312
+ string: ["three", "one"])
1313
+ assert_equal(Arrow::Table.new([
1314
+ ["key", Arrow::ChunkedArray.new(:uint8)],
1315
+ ["string", Arrow::ChunkedArray.new(:string)],
1316
+ ]),
1317
+ table1.join(table2, "key", type: :right_anti))
1318
+ end
1319
+
1125
1320
  test("left_outputs: & right_outputs:") do
1126
1321
  table1 = Arrow::Table.new(key: [1, 2, 3],
1127
1322
  number: [10, 20, 30])
@@ -1135,5 +1330,94 @@ visible: false
1135
1330
  left_outputs: ["key", "number"],
1136
1331
  right_outputs: ["string"]))
1137
1332
  end
1333
+
1334
+ test("left_outputs: & type: :inner") do
1335
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1336
+ number: [10, 20, 30])
1337
+ table2 = Arrow::Table.new(key: [3, 1],
1338
+ string: ["three", "one"])
1339
+ assert_equal(Arrow::Table.new([
1340
+ ["key", [1, 3]],
1341
+ ["number", [10, 30]],
1342
+ ["key", [1, 3]],
1343
+ ["string", ["one", "three"]]
1344
+ ]),
1345
+ table1.join(table2,
1346
+ type: :inner,
1347
+ left_outputs: table1.column_names,
1348
+ right_outputs: table2.column_names))
1349
+ end
1350
+
1351
+ test("left_outputs: & type: :left_outer") do
1352
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1353
+ number: [10, 20, 30])
1354
+ table2 = Arrow::Table.new(key: [3, 1],
1355
+ string: ["three", "one"])
1356
+ assert_equal(Arrow::Table.new([
1357
+ ["key", [1, 3, 2]],
1358
+ ["number", [10, 30, 20]],
1359
+ ["key", [1, 3, nil]],
1360
+ ["string", ["one", "three", nil]],
1361
+ ]),
1362
+ table1.join(table2,
1363
+ type: :left_outer,
1364
+ left_outputs: table1.column_names,
1365
+ right_outputs: table2.column_names))
1366
+ end
1367
+
1368
+ test("left_outputs: & type: :right_outer") do
1369
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1370
+ number: [10, 20, 30])
1371
+ table2 = Arrow::Table.new(key: [3, 1],
1372
+ string: ["three", "one"])
1373
+ assert_equal(Arrow::Table.new([
1374
+ ["key", [1, 3]],
1375
+ ["number", [10, 30]],
1376
+ ["key", [1, 3]],
1377
+ ["string", ["one", "three"]],
1378
+ ]),
1379
+ table1.join(table2,
1380
+ type: :right_outer,
1381
+ left_outputs: table1.column_names,
1382
+ right_outputs: table2.column_names))
1383
+ end
1384
+
1385
+ test("left_outputs: & type: :full_outer") do
1386
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1387
+ number: [10, 20, 30])
1388
+ table2 = Arrow::Table.new(key: [3, 1],
1389
+ string: ["three", "one"])
1390
+ assert_equal(Arrow::Table.new([
1391
+ ["key", [1, 3, 2]],
1392
+ ["number", [10, 30, 20]],
1393
+ ["key", [1, 3, nil]],
1394
+ ["string", ["one", "three", nil]],
1395
+ ]),
1396
+ table1.join(table2,
1397
+ type: :full_outer,
1398
+ left_outputs: table1.column_names,
1399
+ right_outputs: table2.column_names))
1400
+ end
1401
+
1402
+ test("left_suffix: & keys: [String]") do
1403
+ table1 = Arrow::Table.new(key1: [1, 1, 2, 2],
1404
+ key2: [10, 100, 20, 200],
1405
+ number: [1010, 1100, 2020, 2200])
1406
+ table2 = Arrow::Table.new(key1: [1, 2, 2],
1407
+ key2: [100, 20, 50],
1408
+ string: ["1-100", "2-20", "2-50"])
1409
+ assert_equal(Arrow::Table.new([
1410
+ ["key1_left", [1, 2]],
1411
+ ["key2_left", [100, 20]],
1412
+ ["number", [1100, 2020]],
1413
+ ["key1_right", [1, 2]],
1414
+ ["key2_right", [100, 20]],
1415
+ ["string", ["1-100", "2-20"]],
1416
+ ]),
1417
+ table1.join(table2,
1418
+ ["key1", "key2"],
1419
+ left_suffix: "_left",
1420
+ right_suffix: "_right"))
1421
+ end
1138
1422
  end
1139
1423
  end
@@ -107,6 +107,16 @@ module ValuesBasicArraysTests
107
107
  assert_equal(values, target.values)
108
108
  end
109
109
 
110
+ def test_half_float
111
+ values = [
112
+ -1.5,
113
+ nil,
114
+ 1.5,
115
+ ]
116
+ target = build(Arrow::HalfFloatArray.new(values))
117
+ assert_equal(values, target.values)
118
+ end
119
+
110
120
  def test_float
111
121
  values = [
112
122
  -1.0,