red-arrow 11.0.0 → 13.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/ext/arrow/arrow.cpp +25 -0
  4. data/ext/arrow/converters.hpp +12 -27
  5. data/ext/arrow/extconf.rb +2 -2
  6. data/lib/arrow/array-computable.rb +13 -0
  7. data/lib/arrow/chunked-array.rb +5 -1
  8. data/lib/arrow/data-type.rb +9 -0
  9. data/lib/arrow/dense-union-array-builder.rb +49 -0
  10. data/lib/arrow/dense-union-array.rb +26 -0
  11. data/lib/arrow/expression.rb +6 -2
  12. data/lib/arrow/function.rb +0 -1
  13. data/lib/arrow/loader.rb +5 -0
  14. data/lib/arrow/record-batch-file-reader.rb +2 -0
  15. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  16. data/lib/arrow/scalar.rb +67 -0
  17. data/lib/arrow/slicer.rb +61 -0
  18. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  19. data/lib/arrow/sparse-union-array.rb +26 -0
  20. data/lib/arrow/struct-array-builder.rb +0 -5
  21. data/lib/arrow/table.rb +130 -10
  22. data/lib/arrow/union-array-builder.rb +59 -0
  23. data/lib/arrow/version.rb +1 -1
  24. data/test/raw-records/test-dense-union-array.rb +90 -45
  25. data/test/raw-records/test-list-array.rb +28 -10
  26. data/test/raw-records/test-map-array.rb +39 -10
  27. data/test/raw-records/test-sparse-union-array.rb +86 -41
  28. data/test/raw-records/test-struct-array.rb +22 -8
  29. data/test/test-array.rb +7 -0
  30. data/test/test-chunked-array.rb +9 -0
  31. data/test/test-dense-union-array.rb +42 -0
  32. data/test/test-dense-union-data-type.rb +1 -1
  33. data/test/test-expression.rb +11 -0
  34. data/test/test-function.rb +7 -7
  35. data/test/test-group.rb +58 -58
  36. data/test/test-record-batch-file-reader.rb +21 -0
  37. data/test/test-record-batch-stream-reader.rb +129 -0
  38. data/test/test-scalar.rb +65 -0
  39. data/test/test-slicer.rb +194 -129
  40. data/test/test-sparse-union-array.rb +38 -0
  41. data/test/test-table.rb +207 -38
  42. data/test/values/test-dense-union-array.rb +88 -45
  43. data/test/values/test-list-array.rb +26 -10
  44. data/test/values/test-map-array.rb +33 -10
  45. data/test/values/test-sparse-union-array.rb +84 -41
  46. data/test/values/test-struct-array.rb +20 -8
  47. metadata +20 -7
data/test/test-table.rb CHANGED
@@ -87,26 +87,24 @@ class TableTest < Test::Unit::TestCase
87
87
  target_rows_raw = [nil, true, true, false, true, false, true, true]
88
88
  target_rows = Arrow::BooleanArray.new(target_rows_raw)
89
89
  assert_equal(<<-TABLE, @table.slice(target_rows).to_s)
90
- count visible
91
- 0 (null) (null)
92
- 1 2 false
93
- 2 4 (null)
94
- 3 16 true
95
- 4 64 (null)
96
- 5 128 (null)
90
+ count visible
91
+ 0 2 false
92
+ 1 4 (null)
93
+ 2 16 true
94
+ 3 64 (null)
95
+ 4 128 (null)
97
96
  TABLE
98
97
  end
99
98
 
100
99
  test("Array: boolean") do
101
100
  target_rows_raw = [nil, true, true, false, true, false, true, true]
102
101
  assert_equal(<<-TABLE, @table.slice(target_rows_raw).to_s)
103
- count visible
104
- 0 (null) (null)
105
- 1 2 false
106
- 2 4 (null)
107
- 3 16 true
108
- 4 64 (null)
109
- 5 128 (null)
102
+ count visible
103
+ 0 2 false
104
+ 1 4 (null)
105
+ 2 16 true
106
+ 3 64 (null)
107
+ 4 128 (null)
110
108
  TABLE
111
109
  end
112
110
 
@@ -198,24 +196,18 @@ class TableTest < Test::Unit::TestCase
198
196
 
199
197
  test("{key: true}") do
200
198
  assert_equal(<<-TABLE, @table.slice(visible: true).to_s)
201
- count visible
202
- 0 1 true
203
- 1 (null) (null)
204
- 2 8 true
205
- 3 16 true
206
- 4 (null) (null)
207
- 5 (null) (null)
199
+ count visible
200
+ 0 1 true
201
+ 1 8 true
202
+ 2 16 true
208
203
  TABLE
209
204
  end
210
205
 
211
206
  test("{key: false}") do
212
207
  assert_equal(<<-TABLE, @table.slice(visible: false).to_s)
213
- count visible
214
- 0 2 false
215
- 1 (null) (null)
216
- 2 32 false
217
- 3 (null) (null)
218
- 4 (null) (null)
208
+ count visible
209
+ 0 2 false
210
+ 1 32 false
219
211
  TABLE
220
212
  end
221
213
 
@@ -286,11 +278,8 @@ class TableTest < Test::Unit::TestCase
286
278
 
287
279
  test("{key1: Range, key2: true}") do
288
280
  assert_equal(<<-TABLE, @table.slice(count: 0..8, visible: false).to_s)
289
- count visible
290
- 0 2 false
291
- 1 (null) (null)
292
- 2 (null) (null)
293
- 3 (null) (null)
281
+ count visible
282
+ 0 2 false
294
283
  TABLE
295
284
  end
296
285
 
@@ -600,6 +589,13 @@ class TableTest < Test::Unit::TestCase
600
589
  0 1
601
590
  TABLE
602
591
  end
592
+
593
+ test("empty result") do
594
+ selected_table = @table.filter([false] * @table.size).select_columns(:a)
595
+ assert_equal(<<-TABLE, selected_table.to_s)
596
+ a
597
+ TABLE
598
+ end
603
599
  end
604
600
 
605
601
  sub_test_case("#column_names") do
@@ -1131,7 +1127,7 @@ visible: false
1131
1127
  end
1132
1128
 
1133
1129
  sub_test_case("#join") do
1134
- test("no keys") do
1130
+ test("keys: nil (natural join)") do
1135
1131
  table1 = Arrow::Table.new(key: [1, 2, 3],
1136
1132
  number: [10, 20, 30])
1137
1133
  table2 = Arrow::Table.new(key: [3, 1],
@@ -1139,7 +1135,6 @@ visible: false
1139
1135
  assert_equal(Arrow::Table.new([
1140
1136
  ["key", [1, 3]],
1141
1137
  ["number", [10, 30]],
1142
- ["key", [1, 3]],
1143
1138
  ["string", ["one", "three"]],
1144
1139
  ]),
1145
1140
  table1.join(table2))
@@ -1153,7 +1148,6 @@ visible: false
1153
1148
  assert_equal(Arrow::Table.new([
1154
1149
  ["key", [1, 3]],
1155
1150
  ["number", [10, 30]],
1156
- ["key", [1, 3]],
1157
1151
  ["string", ["one", "three"]],
1158
1152
  ]),
1159
1153
  table1.join(table2, "key"))
@@ -1167,12 +1161,25 @@ visible: false
1167
1161
  assert_equal(Arrow::Table.new([
1168
1162
  ["key", [1, 3]],
1169
1163
  ["number", [10, 30]],
1170
- ["key", [1, 3]],
1171
1164
  ["string", ["one", "three"]],
1172
1165
  ]),
1173
1166
  table1.join(table2, :key))
1174
1167
  end
1175
1168
 
1169
+ test("keys: [String]") do
1170
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1171
+ number: [10, 20, 30])
1172
+ table2 = Arrow::Table.new(key: [3, 1],
1173
+ string: ["three", "one"])
1174
+ assert_equal(Arrow::Table.new([
1175
+ ["key", [1, 3]],
1176
+ ["number", [10, 30]],
1177
+ ["key", [1, 3]],
1178
+ ["string", ["one", "three"]],
1179
+ ]),
1180
+ table1.join(table2, ["key"]))
1181
+ end
1182
+
1176
1183
  test("keys: [String, Symbol]") do
1177
1184
  table1 = Arrow::Table.new(key1: [1, 1, 2, 2],
1178
1185
  key2: [10, 100, 20, 200],
@@ -1230,7 +1237,7 @@ visible: false
1230
1237
  type: :inner))
1231
1238
  end
1232
1239
 
1233
- test("type:") do
1240
+ test("type: :left_outer") do
1234
1241
  table1 = Arrow::Table.new(key: [1, 2, 3],
1235
1242
  number: [10, 20, 30])
1236
1243
  table2 = Arrow::Table.new(key: [3, 1],
@@ -1238,12 +1245,85 @@ visible: false
1238
1245
  assert_equal(Arrow::Table.new([
1239
1246
  ["key", [1, 3, 2]],
1240
1247
  ["number", [10, 30, 20]],
1241
- ["key", [1, 3, nil]],
1242
1248
  ["string", ["one", "three", nil]],
1243
1249
  ]),
1244
1250
  table1.join(table2, "key", type: :left_outer))
1245
1251
  end
1246
1252
 
1253
+ test("type: :right_outer") do
1254
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1255
+ number: [10, 20, 30])
1256
+ table2 = Arrow::Table.new(key: [3, 1],
1257
+ string: ["three", "one"])
1258
+ assert_equal(Arrow::Table.new([
1259
+ ["key", [1, 3]],
1260
+ ["number", [10, 30]],
1261
+ ["string", ["one", "three"]],
1262
+ ]),
1263
+ table1.join(table2, "key", type: :right_outer))
1264
+ end
1265
+
1266
+ test("type: :full_outer") do
1267
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1268
+ number: [10, 20, 30])
1269
+ table2 = Arrow::Table.new(key: [3, 1],
1270
+ string: ["three", "one"])
1271
+ assert_equal(Arrow::Table.new([
1272
+ ["key", [1, 3, 2]],
1273
+ ["number", [10, 30, 20]],
1274
+ ["string", ["one", "three", nil]],
1275
+ ]),
1276
+ table1.join(table2, "key", type: :full_outer))
1277
+ end
1278
+
1279
+ test("type: :left_semi") do
1280
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1281
+ number: [10, 20, 30])
1282
+ table2 = Arrow::Table.new(key: [3, 1],
1283
+ string: ["three", "one"])
1284
+ assert_equal(Arrow::Table.new([
1285
+ ["key", [1, 3]],
1286
+ ["number", [10, 30]],
1287
+ ]),
1288
+ table1.join(table2, "key", type: :left_semi))
1289
+ end
1290
+
1291
+ test("type: :right_semi") do
1292
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1293
+ number: [10, 20, 30])
1294
+ table2 = Arrow::Table.new(key: [3, 1],
1295
+ string: ["three", "one"])
1296
+ assert_equal(Arrow::Table.new([
1297
+ ["key", [3, 1]],
1298
+ ["string", ["three", "one"]],
1299
+ ]),
1300
+ table1.join(table2, "key", type: :right_semi))
1301
+ end
1302
+
1303
+ test("type: :left_anti") do
1304
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1305
+ number: [10, 20, 30])
1306
+ table2 = Arrow::Table.new(key: [3, 1],
1307
+ string: ["three", "one"])
1308
+ assert_equal(Arrow::Table.new([
1309
+ ["key", [2]],
1310
+ ["number", [20]],
1311
+ ]),
1312
+ table1.join(table2, "key", type: :left_anti))
1313
+ end
1314
+
1315
+ test("type: :right_anti") do
1316
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1317
+ number: [10, 20, 30])
1318
+ table2 = Arrow::Table.new(key: [3, 1],
1319
+ string: ["three", "one"])
1320
+ assert_equal(Arrow::Table.new([
1321
+ ["key", Arrow::ChunkedArray.new(:uint8)],
1322
+ ["string", Arrow::ChunkedArray.new(:string)],
1323
+ ]),
1324
+ table1.join(table2, "key", type: :right_anti))
1325
+ end
1326
+
1247
1327
  test("left_outputs: & right_outputs:") do
1248
1328
  table1 = Arrow::Table.new(key: [1, 2, 3],
1249
1329
  number: [10, 20, 30])
@@ -1257,5 +1337,94 @@ visible: false
1257
1337
  left_outputs: ["key", "number"],
1258
1338
  right_outputs: ["string"]))
1259
1339
  end
1340
+
1341
+ test("left_outputs: & type: :inner") do
1342
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1343
+ number: [10, 20, 30])
1344
+ table2 = Arrow::Table.new(key: [3, 1],
1345
+ string: ["three", "one"])
1346
+ assert_equal(Arrow::Table.new([
1347
+ ["key", [1, 3]],
1348
+ ["number", [10, 30]],
1349
+ ["key", [1, 3]],
1350
+ ["string", ["one", "three"]]
1351
+ ]),
1352
+ table1.join(table2,
1353
+ type: :inner,
1354
+ left_outputs: table1.column_names,
1355
+ right_outputs: table2.column_names))
1356
+ end
1357
+
1358
+ test("left_outputs: & type: :left_outer") do
1359
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1360
+ number: [10, 20, 30])
1361
+ table2 = Arrow::Table.new(key: [3, 1],
1362
+ string: ["three", "one"])
1363
+ assert_equal(Arrow::Table.new([
1364
+ ["key", [1, 3, 2]],
1365
+ ["number", [10, 30, 20]],
1366
+ ["key", [1, 3, nil]],
1367
+ ["string", ["one", "three", nil]],
1368
+ ]),
1369
+ table1.join(table2,
1370
+ type: :left_outer,
1371
+ left_outputs: table1.column_names,
1372
+ right_outputs: table2.column_names))
1373
+ end
1374
+
1375
+ test("left_outputs: & type: :right_outer") do
1376
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1377
+ number: [10, 20, 30])
1378
+ table2 = Arrow::Table.new(key: [3, 1],
1379
+ string: ["three", "one"])
1380
+ assert_equal(Arrow::Table.new([
1381
+ ["key", [1, 3]],
1382
+ ["number", [10, 30]],
1383
+ ["key", [1, 3]],
1384
+ ["string", ["one", "three"]],
1385
+ ]),
1386
+ table1.join(table2,
1387
+ type: :right_outer,
1388
+ left_outputs: table1.column_names,
1389
+ right_outputs: table2.column_names))
1390
+ end
1391
+
1392
+ test("left_outputs: & type: :full_outer") do
1393
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1394
+ number: [10, 20, 30])
1395
+ table2 = Arrow::Table.new(key: [3, 1],
1396
+ string: ["three", "one"])
1397
+ assert_equal(Arrow::Table.new([
1398
+ ["key", [1, 3, 2]],
1399
+ ["number", [10, 30, 20]],
1400
+ ["key", [1, 3, nil]],
1401
+ ["string", ["one", "three", nil]],
1402
+ ]),
1403
+ table1.join(table2,
1404
+ type: :full_outer,
1405
+ left_outputs: table1.column_names,
1406
+ right_outputs: table2.column_names))
1407
+ end
1408
+
1409
+ test("left_suffix: & keys: [String]") do
1410
+ table1 = Arrow::Table.new(key1: [1, 1, 2, 2],
1411
+ key2: [10, 100, 20, 200],
1412
+ number: [1010, 1100, 2020, 2200])
1413
+ table2 = Arrow::Table.new(key1: [1, 2, 2],
1414
+ key2: [100, 20, 50],
1415
+ string: ["1-100", "2-20", "2-50"])
1416
+ assert_equal(Arrow::Table.new([
1417
+ ["key1_left", [1, 2]],
1418
+ ["key2_left", [100, 20]],
1419
+ ["number", [1100, 2020]],
1420
+ ["key1_right", [1, 2]],
1421
+ ["key2_right", [100, 20]],
1422
+ ["string", ["1-100", "2-20"]],
1423
+ ]),
1424
+ table1.join(table2,
1425
+ ["key1", "key2"],
1426
+ left_suffix: "_left",
1427
+ right_suffix: "_right"))
1428
+ end
1260
1429
  end
1261
1430
  end