red-arrow 11.0.0 → 12.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/ext/arrow/converters.hpp +12 -27
- data/lib/arrow/array-computable.rb +13 -0
- data/lib/arrow/data-type.rb +9 -0
- data/lib/arrow/dense-union-array-builder.rb +49 -0
- data/lib/arrow/dense-union-array.rb +26 -0
- data/lib/arrow/loader.rb +5 -0
- data/lib/arrow/record-batch-file-reader.rb +2 -0
- data/lib/arrow/record-batch-stream-reader.rb +2 -0
- data/lib/arrow/scalar.rb +67 -0
- data/lib/arrow/slicer.rb +61 -0
- data/lib/arrow/sparse-union-array-builder.rb +56 -0
- data/lib/arrow/sparse-union-array.rb +26 -0
- data/lib/arrow/struct-array-builder.rb +0 -5
- data/lib/arrow/table.rb +130 -10
- data/lib/arrow/union-array-builder.rb +59 -0
- data/lib/arrow/version.rb +1 -1
- data/test/raw-records/test-dense-union-array.rb +90 -45
- data/test/raw-records/test-list-array.rb +28 -10
- data/test/raw-records/test-map-array.rb +39 -10
- data/test/raw-records/test-sparse-union-array.rb +86 -41
- data/test/raw-records/test-struct-array.rb +22 -8
- data/test/test-array.rb +7 -0
- data/test/test-chunked-array.rb +9 -0
- data/test/test-dense-union-array.rb +42 -0
- data/test/test-dense-union-data-type.rb +1 -1
- data/test/test-function.rb +7 -7
- data/test/test-group.rb +58 -58
- data/test/test-record-batch-file-reader.rb +21 -0
- data/test/test-record-batch-stream-reader.rb +129 -0
- data/test/test-scalar.rb +65 -0
- data/test/test-slicer.rb +194 -129
- data/test/test-sparse-union-array.rb +38 -0
- data/test/test-table.rb +200 -38
- data/test/values/test-dense-union-array.rb +88 -45
- data/test/values/test-list-array.rb +26 -10
- data/test/values/test-map-array.rb +33 -10
- data/test/values/test-sparse-union-array.rb +84 -41
- data/test/values/test-struct-array.rb +20 -8
- metadata +20 -7
data/test/test-table.rb
CHANGED
@@ -87,26 +87,24 @@ class TableTest < Test::Unit::TestCase
|
|
87
87
|
target_rows_raw = [nil, true, true, false, true, false, true, true]
|
88
88
|
target_rows = Arrow::BooleanArray.new(target_rows_raw)
|
89
89
|
assert_equal(<<-TABLE, @table.slice(target_rows).to_s)
|
90
|
-
|
91
|
-
0
|
92
|
-
1
|
93
|
-
2
|
94
|
-
3
|
95
|
-
4
|
96
|
-
5 128 (null)
|
90
|
+
count visible
|
91
|
+
0 2 false
|
92
|
+
1 4 (null)
|
93
|
+
2 16 true
|
94
|
+
3 64 (null)
|
95
|
+
4 128 (null)
|
97
96
|
TABLE
|
98
97
|
end
|
99
98
|
|
100
99
|
test("Array: boolean") do
|
101
100
|
target_rows_raw = [nil, true, true, false, true, false, true, true]
|
102
101
|
assert_equal(<<-TABLE, @table.slice(target_rows_raw).to_s)
|
103
|
-
|
104
|
-
0
|
105
|
-
1
|
106
|
-
2
|
107
|
-
3
|
108
|
-
4
|
109
|
-
5 128 (null)
|
102
|
+
count visible
|
103
|
+
0 2 false
|
104
|
+
1 4 (null)
|
105
|
+
2 16 true
|
106
|
+
3 64 (null)
|
107
|
+
4 128 (null)
|
110
108
|
TABLE
|
111
109
|
end
|
112
110
|
|
@@ -198,24 +196,18 @@ class TableTest < Test::Unit::TestCase
|
|
198
196
|
|
199
197
|
test("{key: true}") do
|
200
198
|
assert_equal(<<-TABLE, @table.slice(visible: true).to_s)
|
201
|
-
|
202
|
-
0
|
203
|
-
1
|
204
|
-
2
|
205
|
-
3 16 true
|
206
|
-
4 (null) (null)
|
207
|
-
5 (null) (null)
|
199
|
+
count visible
|
200
|
+
0 1 true
|
201
|
+
1 8 true
|
202
|
+
2 16 true
|
208
203
|
TABLE
|
209
204
|
end
|
210
205
|
|
211
206
|
test("{key: false}") do
|
212
207
|
assert_equal(<<-TABLE, @table.slice(visible: false).to_s)
|
213
|
-
|
214
|
-
0
|
215
|
-
1
|
216
|
-
2 32 false
|
217
|
-
3 (null) (null)
|
218
|
-
4 (null) (null)
|
208
|
+
count visible
|
209
|
+
0 2 false
|
210
|
+
1 32 false
|
219
211
|
TABLE
|
220
212
|
end
|
221
213
|
|
@@ -286,11 +278,8 @@ class TableTest < Test::Unit::TestCase
|
|
286
278
|
|
287
279
|
test("{key1: Range, key2: true}") do
|
288
280
|
assert_equal(<<-TABLE, @table.slice(count: 0..8, visible: false).to_s)
|
289
|
-
|
290
|
-
0
|
291
|
-
1 (null) (null)
|
292
|
-
2 (null) (null)
|
293
|
-
3 (null) (null)
|
281
|
+
count visible
|
282
|
+
0 2 false
|
294
283
|
TABLE
|
295
284
|
end
|
296
285
|
|
@@ -1131,7 +1120,7 @@ visible: false
|
|
1131
1120
|
end
|
1132
1121
|
|
1133
1122
|
sub_test_case("#join") do
|
1134
|
-
test("
|
1123
|
+
test("keys: nil (natural join)") do
|
1135
1124
|
table1 = Arrow::Table.new(key: [1, 2, 3],
|
1136
1125
|
number: [10, 20, 30])
|
1137
1126
|
table2 = Arrow::Table.new(key: [3, 1],
|
@@ -1139,7 +1128,6 @@ visible: false
|
|
1139
1128
|
assert_equal(Arrow::Table.new([
|
1140
1129
|
["key", [1, 3]],
|
1141
1130
|
["number", [10, 30]],
|
1142
|
-
["key", [1, 3]],
|
1143
1131
|
["string", ["one", "three"]],
|
1144
1132
|
]),
|
1145
1133
|
table1.join(table2))
|
@@ -1153,7 +1141,6 @@ visible: false
|
|
1153
1141
|
assert_equal(Arrow::Table.new([
|
1154
1142
|
["key", [1, 3]],
|
1155
1143
|
["number", [10, 30]],
|
1156
|
-
["key", [1, 3]],
|
1157
1144
|
["string", ["one", "three"]],
|
1158
1145
|
]),
|
1159
1146
|
table1.join(table2, "key"))
|
@@ -1167,12 +1154,25 @@ visible: false
|
|
1167
1154
|
assert_equal(Arrow::Table.new([
|
1168
1155
|
["key", [1, 3]],
|
1169
1156
|
["number", [10, 30]],
|
1170
|
-
["key", [1, 3]],
|
1171
1157
|
["string", ["one", "three"]],
|
1172
1158
|
]),
|
1173
1159
|
table1.join(table2, :key))
|
1174
1160
|
end
|
1175
1161
|
|
1162
|
+
test("keys: [String]") do
|
1163
|
+
table1 = Arrow::Table.new(key: [1, 2, 3],
|
1164
|
+
number: [10, 20, 30])
|
1165
|
+
table2 = Arrow::Table.new(key: [3, 1],
|
1166
|
+
string: ["three", "one"])
|
1167
|
+
assert_equal(Arrow::Table.new([
|
1168
|
+
["key", [1, 3]],
|
1169
|
+
["number", [10, 30]],
|
1170
|
+
["key", [1, 3]],
|
1171
|
+
["string", ["one", "three"]],
|
1172
|
+
]),
|
1173
|
+
table1.join(table2, ["key"]))
|
1174
|
+
end
|
1175
|
+
|
1176
1176
|
test("keys: [String, Symbol]") do
|
1177
1177
|
table1 = Arrow::Table.new(key1: [1, 1, 2, 2],
|
1178
1178
|
key2: [10, 100, 20, 200],
|
@@ -1230,7 +1230,7 @@ visible: false
|
|
1230
1230
|
type: :inner))
|
1231
1231
|
end
|
1232
1232
|
|
1233
|
-
test("type:") do
|
1233
|
+
test("type: :left_outer") do
|
1234
1234
|
table1 = Arrow::Table.new(key: [1, 2, 3],
|
1235
1235
|
number: [10, 20, 30])
|
1236
1236
|
table2 = Arrow::Table.new(key: [3, 1],
|
@@ -1238,12 +1238,85 @@ visible: false
|
|
1238
1238
|
assert_equal(Arrow::Table.new([
|
1239
1239
|
["key", [1, 3, 2]],
|
1240
1240
|
["number", [10, 30, 20]],
|
1241
|
-
["key", [1, 3, nil]],
|
1242
1241
|
["string", ["one", "three", nil]],
|
1243
1242
|
]),
|
1244
1243
|
table1.join(table2, "key", type: :left_outer))
|
1245
1244
|
end
|
1246
1245
|
|
1246
|
+
test("type: :right_outer") do
|
1247
|
+
table1 = Arrow::Table.new(key: [1, 2, 3],
|
1248
|
+
number: [10, 20, 30])
|
1249
|
+
table2 = Arrow::Table.new(key: [3, 1],
|
1250
|
+
string: ["three", "one"])
|
1251
|
+
assert_equal(Arrow::Table.new([
|
1252
|
+
["key", [1, 3]],
|
1253
|
+
["number", [10, 30]],
|
1254
|
+
["string", ["one", "three"]],
|
1255
|
+
]),
|
1256
|
+
table1.join(table2, "key", type: :right_outer))
|
1257
|
+
end
|
1258
|
+
|
1259
|
+
test("type: :full_outer") do
|
1260
|
+
table1 = Arrow::Table.new(key: [1, 2, 3],
|
1261
|
+
number: [10, 20, 30])
|
1262
|
+
table2 = Arrow::Table.new(key: [3, 1],
|
1263
|
+
string: ["three", "one"])
|
1264
|
+
assert_equal(Arrow::Table.new([
|
1265
|
+
["key", [1, 3, 2]],
|
1266
|
+
["number", [10, 30, 20]],
|
1267
|
+
["string", ["one", "three", nil]],
|
1268
|
+
]),
|
1269
|
+
table1.join(table2, "key", type: :full_outer))
|
1270
|
+
end
|
1271
|
+
|
1272
|
+
test("type: :left_semi") do
|
1273
|
+
table1 = Arrow::Table.new(key: [1, 2, 3],
|
1274
|
+
number: [10, 20, 30])
|
1275
|
+
table2 = Arrow::Table.new(key: [3, 1],
|
1276
|
+
string: ["three", "one"])
|
1277
|
+
assert_equal(Arrow::Table.new([
|
1278
|
+
["key", [1, 3]],
|
1279
|
+
["number", [10, 30]],
|
1280
|
+
]),
|
1281
|
+
table1.join(table2, "key", type: :left_semi))
|
1282
|
+
end
|
1283
|
+
|
1284
|
+
test("type: :right_semi") do
|
1285
|
+
table1 = Arrow::Table.new(key: [1, 2, 3],
|
1286
|
+
number: [10, 20, 30])
|
1287
|
+
table2 = Arrow::Table.new(key: [3, 1],
|
1288
|
+
string: ["three", "one"])
|
1289
|
+
assert_equal(Arrow::Table.new([
|
1290
|
+
["key", [3, 1]],
|
1291
|
+
["string", ["three", "one"]],
|
1292
|
+
]),
|
1293
|
+
table1.join(table2, "key", type: :right_semi))
|
1294
|
+
end
|
1295
|
+
|
1296
|
+
test("type: :left_anti") do
|
1297
|
+
table1 = Arrow::Table.new(key: [1, 2, 3],
|
1298
|
+
number: [10, 20, 30])
|
1299
|
+
table2 = Arrow::Table.new(key: [3, 1],
|
1300
|
+
string: ["three", "one"])
|
1301
|
+
assert_equal(Arrow::Table.new([
|
1302
|
+
["key", [2]],
|
1303
|
+
["number", [20]],
|
1304
|
+
]),
|
1305
|
+
table1.join(table2, "key", type: :left_anti))
|
1306
|
+
end
|
1307
|
+
|
1308
|
+
test("type: :right_anti") do
|
1309
|
+
table1 = Arrow::Table.new(key: [1, 2, 3],
|
1310
|
+
number: [10, 20, 30])
|
1311
|
+
table2 = Arrow::Table.new(key: [3, 1],
|
1312
|
+
string: ["three", "one"])
|
1313
|
+
assert_equal(Arrow::Table.new([
|
1314
|
+
["key", Arrow::ChunkedArray.new(:uint8)],
|
1315
|
+
["string", Arrow::ChunkedArray.new(:string)],
|
1316
|
+
]),
|
1317
|
+
table1.join(table2, "key", type: :right_anti))
|
1318
|
+
end
|
1319
|
+
|
1247
1320
|
test("left_outputs: & right_outputs:") do
|
1248
1321
|
table1 = Arrow::Table.new(key: [1, 2, 3],
|
1249
1322
|
number: [10, 20, 30])
|
@@ -1257,5 +1330,94 @@ visible: false
|
|
1257
1330
|
left_outputs: ["key", "number"],
|
1258
1331
|
right_outputs: ["string"]))
|
1259
1332
|
end
|
1333
|
+
|
1334
|
+
test("left_outputs: & type: :inner") do
|
1335
|
+
table1 = Arrow::Table.new(key: [1, 2, 3],
|
1336
|
+
number: [10, 20, 30])
|
1337
|
+
table2 = Arrow::Table.new(key: [3, 1],
|
1338
|
+
string: ["three", "one"])
|
1339
|
+
assert_equal(Arrow::Table.new([
|
1340
|
+
["key", [1, 3]],
|
1341
|
+
["number", [10, 30]],
|
1342
|
+
["key", [1, 3]],
|
1343
|
+
["string", ["one", "three"]]
|
1344
|
+
]),
|
1345
|
+
table1.join(table2,
|
1346
|
+
type: :inner,
|
1347
|
+
left_outputs: table1.column_names,
|
1348
|
+
right_outputs: table2.column_names))
|
1349
|
+
end
|
1350
|
+
|
1351
|
+
test("left_outputs: & type: :left_outer") do
|
1352
|
+
table1 = Arrow::Table.new(key: [1, 2, 3],
|
1353
|
+
number: [10, 20, 30])
|
1354
|
+
table2 = Arrow::Table.new(key: [3, 1],
|
1355
|
+
string: ["three", "one"])
|
1356
|
+
assert_equal(Arrow::Table.new([
|
1357
|
+
["key", [1, 3, 2]],
|
1358
|
+
["number", [10, 30, 20]],
|
1359
|
+
["key", [1, 3, nil]],
|
1360
|
+
["string", ["one", "three", nil]],
|
1361
|
+
]),
|
1362
|
+
table1.join(table2,
|
1363
|
+
type: :left_outer,
|
1364
|
+
left_outputs: table1.column_names,
|
1365
|
+
right_outputs: table2.column_names))
|
1366
|
+
end
|
1367
|
+
|
1368
|
+
test("left_outputs: & type: :right_outer") do
|
1369
|
+
table1 = Arrow::Table.new(key: [1, 2, 3],
|
1370
|
+
number: [10, 20, 30])
|
1371
|
+
table2 = Arrow::Table.new(key: [3, 1],
|
1372
|
+
string: ["three", "one"])
|
1373
|
+
assert_equal(Arrow::Table.new([
|
1374
|
+
["key", [1, 3]],
|
1375
|
+
["number", [10, 30]],
|
1376
|
+
["key", [1, 3]],
|
1377
|
+
["string", ["one", "three"]],
|
1378
|
+
]),
|
1379
|
+
table1.join(table2,
|
1380
|
+
type: :right_outer,
|
1381
|
+
left_outputs: table1.column_names,
|
1382
|
+
right_outputs: table2.column_names))
|
1383
|
+
end
|
1384
|
+
|
1385
|
+
test("left_outputs: & type: :full_outer") do
|
1386
|
+
table1 = Arrow::Table.new(key: [1, 2, 3],
|
1387
|
+
number: [10, 20, 30])
|
1388
|
+
table2 = Arrow::Table.new(key: [3, 1],
|
1389
|
+
string: ["three", "one"])
|
1390
|
+
assert_equal(Arrow::Table.new([
|
1391
|
+
["key", [1, 3, 2]],
|
1392
|
+
["number", [10, 30, 20]],
|
1393
|
+
["key", [1, 3, nil]],
|
1394
|
+
["string", ["one", "three", nil]],
|
1395
|
+
]),
|
1396
|
+
table1.join(table2,
|
1397
|
+
type: :full_outer,
|
1398
|
+
left_outputs: table1.column_names,
|
1399
|
+
right_outputs: table2.column_names))
|
1400
|
+
end
|
1401
|
+
|
1402
|
+
test("left_suffix: & keys: [String]") do
|
1403
|
+
table1 = Arrow::Table.new(key1: [1, 1, 2, 2],
|
1404
|
+
key2: [10, 100, 20, 200],
|
1405
|
+
number: [1010, 1100, 2020, 2200])
|
1406
|
+
table2 = Arrow::Table.new(key1: [1, 2, 2],
|
1407
|
+
key2: [100, 20, 50],
|
1408
|
+
string: ["1-100", "2-20", "2-50"])
|
1409
|
+
assert_equal(Arrow::Table.new([
|
1410
|
+
["key1_left", [1, 2]],
|
1411
|
+
["key2_left", [100, 20]],
|
1412
|
+
["number", [1100, 2020]],
|
1413
|
+
["key1_right", [1, 2]],
|
1414
|
+
["key2_right", [100, 20]],
|
1415
|
+
["string", ["1-100", "2-20"]],
|
1416
|
+
]),
|
1417
|
+
table1.join(table2,
|
1418
|
+
["key1", "key2"],
|
1419
|
+
left_suffix: "_left",
|
1420
|
+
right_suffix: "_right"))
|
1421
|
+
end
|
1260
1422
|
end
|
1261
1423
|
end
|