red-arrow 11.0.0 → 13.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/ext/arrow/arrow.cpp +25 -0
  4. data/ext/arrow/converters.hpp +12 -27
  5. data/ext/arrow/extconf.rb +2 -2
  6. data/lib/arrow/array-computable.rb +13 -0
  7. data/lib/arrow/chunked-array.rb +5 -1
  8. data/lib/arrow/data-type.rb +9 -0
  9. data/lib/arrow/dense-union-array-builder.rb +49 -0
  10. data/lib/arrow/dense-union-array.rb +26 -0
  11. data/lib/arrow/expression.rb +6 -2
  12. data/lib/arrow/function.rb +0 -1
  13. data/lib/arrow/loader.rb +5 -0
  14. data/lib/arrow/record-batch-file-reader.rb +2 -0
  15. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  16. data/lib/arrow/scalar.rb +67 -0
  17. data/lib/arrow/slicer.rb +61 -0
  18. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  19. data/lib/arrow/sparse-union-array.rb +26 -0
  20. data/lib/arrow/struct-array-builder.rb +0 -5
  21. data/lib/arrow/table.rb +130 -10
  22. data/lib/arrow/union-array-builder.rb +59 -0
  23. data/lib/arrow/version.rb +1 -1
  24. data/test/raw-records/test-dense-union-array.rb +90 -45
  25. data/test/raw-records/test-list-array.rb +28 -10
  26. data/test/raw-records/test-map-array.rb +39 -10
  27. data/test/raw-records/test-sparse-union-array.rb +86 -41
  28. data/test/raw-records/test-struct-array.rb +22 -8
  29. data/test/test-array.rb +7 -0
  30. data/test/test-chunked-array.rb +9 -0
  31. data/test/test-dense-union-array.rb +42 -0
  32. data/test/test-dense-union-data-type.rb +1 -1
  33. data/test/test-expression.rb +11 -0
  34. data/test/test-function.rb +7 -7
  35. data/test/test-group.rb +58 -58
  36. data/test/test-record-batch-file-reader.rb +21 -0
  37. data/test/test-record-batch-stream-reader.rb +129 -0
  38. data/test/test-scalar.rb +65 -0
  39. data/test/test-slicer.rb +194 -129
  40. data/test/test-sparse-union-array.rb +38 -0
  41. data/test/test-table.rb +207 -38
  42. data/test/values/test-dense-union-array.rb +88 -45
  43. data/test/values/test-list-array.rb +26 -10
  44. data/test/values/test-map-array.rb +33 -10
  45. data/test/values/test-sparse-union-array.rb +84 -41
  46. data/test/values/test-struct-array.rb +20 -8
  47. metadata +20 -7
data/test/test-table.rb CHANGED
@@ -87,26 +87,24 @@ class TableTest < Test::Unit::TestCase
87
87
  target_rows_raw = [nil, true, true, false, true, false, true, true]
88
88
  target_rows = Arrow::BooleanArray.new(target_rows_raw)
89
89
  assert_equal(<<-TABLE, @table.slice(target_rows).to_s)
90
- count visible
91
- 0 (null) (null)
92
- 1 2 false
93
- 2 4 (null)
94
- 3 16 true
95
- 4 64 (null)
96
- 5 128 (null)
90
+ count visible
91
+ 0 2 false
92
+ 1 4 (null)
93
+ 2 16 true
94
+ 3 64 (null)
95
+ 4 128 (null)
97
96
  TABLE
98
97
  end
99
98
 
100
99
  test("Array: boolean") do
101
100
  target_rows_raw = [nil, true, true, false, true, false, true, true]
102
101
  assert_equal(<<-TABLE, @table.slice(target_rows_raw).to_s)
103
- count visible
104
- 0 (null) (null)
105
- 1 2 false
106
- 2 4 (null)
107
- 3 16 true
108
- 4 64 (null)
109
- 5 128 (null)
102
+ count visible
103
+ 0 2 false
104
+ 1 4 (null)
105
+ 2 16 true
106
+ 3 64 (null)
107
+ 4 128 (null)
110
108
  TABLE
111
109
  end
112
110
 
@@ -198,24 +196,18 @@ class TableTest < Test::Unit::TestCase
198
196
 
199
197
  test("{key: true}") do
200
198
  assert_equal(<<-TABLE, @table.slice(visible: true).to_s)
201
- count visible
202
- 0 1 true
203
- 1 (null) (null)
204
- 2 8 true
205
- 3 16 true
206
- 4 (null) (null)
207
- 5 (null) (null)
199
+ count visible
200
+ 0 1 true
201
+ 1 8 true
202
+ 2 16 true
208
203
  TABLE
209
204
  end
210
205
 
211
206
  test("{key: false}") do
212
207
  assert_equal(<<-TABLE, @table.slice(visible: false).to_s)
213
- count visible
214
- 0 2 false
215
- 1 (null) (null)
216
- 2 32 false
217
- 3 (null) (null)
218
- 4 (null) (null)
208
+ count visible
209
+ 0 2 false
210
+ 1 32 false
219
211
  TABLE
220
212
  end
221
213
 
@@ -286,11 +278,8 @@ class TableTest < Test::Unit::TestCase
286
278
 
287
279
  test("{key1: Range, key2: true}") do
288
280
  assert_equal(<<-TABLE, @table.slice(count: 0..8, visible: false).to_s)
289
- count visible
290
- 0 2 false
291
- 1 (null) (null)
292
- 2 (null) (null)
293
- 3 (null) (null)
281
+ count visible
282
+ 0 2 false
294
283
  TABLE
295
284
  end
296
285
 
@@ -600,6 +589,13 @@ class TableTest < Test::Unit::TestCase
600
589
  0 1
601
590
  TABLE
602
591
  end
592
+
593
+ test("empty result") do
594
+ selected_table = @table.filter([false] * @table.size).select_columns(:a)
595
+ assert_equal(<<-TABLE, selected_table.to_s)
596
+ a
597
+ TABLE
598
+ end
603
599
  end
604
600
 
605
601
  sub_test_case("#column_names") do
@@ -1131,7 +1127,7 @@ visible: false
1131
1127
  end
1132
1128
 
1133
1129
  sub_test_case("#join") do
1134
- test("no keys") do
1130
+ test("keys: nil (natural join)") do
1135
1131
  table1 = Arrow::Table.new(key: [1, 2, 3],
1136
1132
  number: [10, 20, 30])
1137
1133
  table2 = Arrow::Table.new(key: [3, 1],
@@ -1139,7 +1135,6 @@ visible: false
1139
1135
  assert_equal(Arrow::Table.new([
1140
1136
  ["key", [1, 3]],
1141
1137
  ["number", [10, 30]],
1142
- ["key", [1, 3]],
1143
1138
  ["string", ["one", "three"]],
1144
1139
  ]),
1145
1140
  table1.join(table2))
@@ -1153,7 +1148,6 @@ visible: false
1153
1148
  assert_equal(Arrow::Table.new([
1154
1149
  ["key", [1, 3]],
1155
1150
  ["number", [10, 30]],
1156
- ["key", [1, 3]],
1157
1151
  ["string", ["one", "three"]],
1158
1152
  ]),
1159
1153
  table1.join(table2, "key"))
@@ -1167,12 +1161,25 @@ visible: false
1167
1161
  assert_equal(Arrow::Table.new([
1168
1162
  ["key", [1, 3]],
1169
1163
  ["number", [10, 30]],
1170
- ["key", [1, 3]],
1171
1164
  ["string", ["one", "three"]],
1172
1165
  ]),
1173
1166
  table1.join(table2, :key))
1174
1167
  end
1175
1168
 
1169
+ test("keys: [String]") do
1170
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1171
+ number: [10, 20, 30])
1172
+ table2 = Arrow::Table.new(key: [3, 1],
1173
+ string: ["three", "one"])
1174
+ assert_equal(Arrow::Table.new([
1175
+ ["key", [1, 3]],
1176
+ ["number", [10, 30]],
1177
+ ["key", [1, 3]],
1178
+ ["string", ["one", "three"]],
1179
+ ]),
1180
+ table1.join(table2, ["key"]))
1181
+ end
1182
+
1176
1183
  test("keys: [String, Symbol]") do
1177
1184
  table1 = Arrow::Table.new(key1: [1, 1, 2, 2],
1178
1185
  key2: [10, 100, 20, 200],
@@ -1230,7 +1237,7 @@ visible: false
1230
1237
  type: :inner))
1231
1238
  end
1232
1239
 
1233
- test("type:") do
1240
+ test("type: :left_outer") do
1234
1241
  table1 = Arrow::Table.new(key: [1, 2, 3],
1235
1242
  number: [10, 20, 30])
1236
1243
  table2 = Arrow::Table.new(key: [3, 1],
@@ -1238,12 +1245,85 @@ visible: false
1238
1245
  assert_equal(Arrow::Table.new([
1239
1246
  ["key", [1, 3, 2]],
1240
1247
  ["number", [10, 30, 20]],
1241
- ["key", [1, 3, nil]],
1242
1248
  ["string", ["one", "three", nil]],
1243
1249
  ]),
1244
1250
  table1.join(table2, "key", type: :left_outer))
1245
1251
  end
1246
1252
 
1253
+ test("type: :right_outer") do
1254
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1255
+ number: [10, 20, 30])
1256
+ table2 = Arrow::Table.new(key: [3, 1],
1257
+ string: ["three", "one"])
1258
+ assert_equal(Arrow::Table.new([
1259
+ ["key", [1, 3]],
1260
+ ["number", [10, 30]],
1261
+ ["string", ["one", "three"]],
1262
+ ]),
1263
+ table1.join(table2, "key", type: :right_outer))
1264
+ end
1265
+
1266
+ test("type: :full_outer") do
1267
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1268
+ number: [10, 20, 30])
1269
+ table2 = Arrow::Table.new(key: [3, 1],
1270
+ string: ["three", "one"])
1271
+ assert_equal(Arrow::Table.new([
1272
+ ["key", [1, 3, 2]],
1273
+ ["number", [10, 30, 20]],
1274
+ ["string", ["one", "three", nil]],
1275
+ ]),
1276
+ table1.join(table2, "key", type: :full_outer))
1277
+ end
1278
+
1279
+ test("type: :left_semi") do
1280
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1281
+ number: [10, 20, 30])
1282
+ table2 = Arrow::Table.new(key: [3, 1],
1283
+ string: ["three", "one"])
1284
+ assert_equal(Arrow::Table.new([
1285
+ ["key", [1, 3]],
1286
+ ["number", [10, 30]],
1287
+ ]),
1288
+ table1.join(table2, "key", type: :left_semi))
1289
+ end
1290
+
1291
+ test("type: :right_semi") do
1292
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1293
+ number: [10, 20, 30])
1294
+ table2 = Arrow::Table.new(key: [3, 1],
1295
+ string: ["three", "one"])
1296
+ assert_equal(Arrow::Table.new([
1297
+ ["key", [3, 1]],
1298
+ ["string", ["three", "one"]],
1299
+ ]),
1300
+ table1.join(table2, "key", type: :right_semi))
1301
+ end
1302
+
1303
+ test("type: :left_anti") do
1304
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1305
+ number: [10, 20, 30])
1306
+ table2 = Arrow::Table.new(key: [3, 1],
1307
+ string: ["three", "one"])
1308
+ assert_equal(Arrow::Table.new([
1309
+ ["key", [2]],
1310
+ ["number", [20]],
1311
+ ]),
1312
+ table1.join(table2, "key", type: :left_anti))
1313
+ end
1314
+
1315
+ test("type: :right_anti") do
1316
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1317
+ number: [10, 20, 30])
1318
+ table2 = Arrow::Table.new(key: [3, 1],
1319
+ string: ["three", "one"])
1320
+ assert_equal(Arrow::Table.new([
1321
+ ["key", Arrow::ChunkedArray.new(:uint8)],
1322
+ ["string", Arrow::ChunkedArray.new(:string)],
1323
+ ]),
1324
+ table1.join(table2, "key", type: :right_anti))
1325
+ end
1326
+
1247
1327
  test("left_outputs: & right_outputs:") do
1248
1328
  table1 = Arrow::Table.new(key: [1, 2, 3],
1249
1329
  number: [10, 20, 30])
@@ -1257,5 +1337,94 @@ visible: false
1257
1337
  left_outputs: ["key", "number"],
1258
1338
  right_outputs: ["string"]))
1259
1339
  end
1340
+
1341
+ test("left_outputs: & type: :inner") do
1342
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1343
+ number: [10, 20, 30])
1344
+ table2 = Arrow::Table.new(key: [3, 1],
1345
+ string: ["three", "one"])
1346
+ assert_equal(Arrow::Table.new([
1347
+ ["key", [1, 3]],
1348
+ ["number", [10, 30]],
1349
+ ["key", [1, 3]],
1350
+ ["string", ["one", "three"]]
1351
+ ]),
1352
+ table1.join(table2,
1353
+ type: :inner,
1354
+ left_outputs: table1.column_names,
1355
+ right_outputs: table2.column_names))
1356
+ end
1357
+
1358
+ test("left_outputs: & type: :left_outer") do
1359
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1360
+ number: [10, 20, 30])
1361
+ table2 = Arrow::Table.new(key: [3, 1],
1362
+ string: ["three", "one"])
1363
+ assert_equal(Arrow::Table.new([
1364
+ ["key", [1, 3, 2]],
1365
+ ["number", [10, 30, 20]],
1366
+ ["key", [1, 3, nil]],
1367
+ ["string", ["one", "three", nil]],
1368
+ ]),
1369
+ table1.join(table2,
1370
+ type: :left_outer,
1371
+ left_outputs: table1.column_names,
1372
+ right_outputs: table2.column_names))
1373
+ end
1374
+
1375
+ test("left_outputs: & type: :right_outer") do
1376
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1377
+ number: [10, 20, 30])
1378
+ table2 = Arrow::Table.new(key: [3, 1],
1379
+ string: ["three", "one"])
1380
+ assert_equal(Arrow::Table.new([
1381
+ ["key", [1, 3]],
1382
+ ["number", [10, 30]],
1383
+ ["key", [1, 3]],
1384
+ ["string", ["one", "three"]],
1385
+ ]),
1386
+ table1.join(table2,
1387
+ type: :right_outer,
1388
+ left_outputs: table1.column_names,
1389
+ right_outputs: table2.column_names))
1390
+ end
1391
+
1392
+ test("left_outputs: & type: :full_outer") do
1393
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1394
+ number: [10, 20, 30])
1395
+ table2 = Arrow::Table.new(key: [3, 1],
1396
+ string: ["three", "one"])
1397
+ assert_equal(Arrow::Table.new([
1398
+ ["key", [1, 3, 2]],
1399
+ ["number", [10, 30, 20]],
1400
+ ["key", [1, 3, nil]],
1401
+ ["string", ["one", "three", nil]],
1402
+ ]),
1403
+ table1.join(table2,
1404
+ type: :full_outer,
1405
+ left_outputs: table1.column_names,
1406
+ right_outputs: table2.column_names))
1407
+ end
1408
+
1409
+ test("left_suffix: & keys: [String]") do
1410
+ table1 = Arrow::Table.new(key1: [1, 1, 2, 2],
1411
+ key2: [10, 100, 20, 200],
1412
+ number: [1010, 1100, 2020, 2200])
1413
+ table2 = Arrow::Table.new(key1: [1, 2, 2],
1414
+ key2: [100, 20, 50],
1415
+ string: ["1-100", "2-20", "2-50"])
1416
+ assert_equal(Arrow::Table.new([
1417
+ ["key1_left", [1, 2]],
1418
+ ["key2_left", [100, 20]],
1419
+ ["number", [1100, 2020]],
1420
+ ["key1_right", [1, 2]],
1421
+ ["key2_right", [100, 20]],
1422
+ ["string", ["1-100", "2-20"]],
1423
+ ]),
1424
+ table1.join(table2,
1425
+ ["key1", "key2"],
1426
+ left_suffix: "_left",
1427
+ right_suffix: "_right"))
1428
+ end
1260
1429
  end
1261
1430
  end