red-arrow 11.0.0 → 12.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/ext/arrow/converters.hpp +12 -27
  4. data/lib/arrow/array-computable.rb +13 -0
  5. data/lib/arrow/data-type.rb +9 -0
  6. data/lib/arrow/dense-union-array-builder.rb +49 -0
  7. data/lib/arrow/dense-union-array.rb +26 -0
  8. data/lib/arrow/loader.rb +5 -0
  9. data/lib/arrow/record-batch-file-reader.rb +2 -0
  10. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  11. data/lib/arrow/scalar.rb +67 -0
  12. data/lib/arrow/slicer.rb +61 -0
  13. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  14. data/lib/arrow/sparse-union-array.rb +26 -0
  15. data/lib/arrow/struct-array-builder.rb +0 -5
  16. data/lib/arrow/table.rb +130 -10
  17. data/lib/arrow/union-array-builder.rb +59 -0
  18. data/lib/arrow/version.rb +1 -1
  19. data/test/raw-records/test-dense-union-array.rb +90 -45
  20. data/test/raw-records/test-list-array.rb +28 -10
  21. data/test/raw-records/test-map-array.rb +39 -10
  22. data/test/raw-records/test-sparse-union-array.rb +86 -41
  23. data/test/raw-records/test-struct-array.rb +22 -8
  24. data/test/test-array.rb +7 -0
  25. data/test/test-chunked-array.rb +9 -0
  26. data/test/test-dense-union-array.rb +42 -0
  27. data/test/test-dense-union-data-type.rb +1 -1
  28. data/test/test-function.rb +7 -7
  29. data/test/test-group.rb +58 -58
  30. data/test/test-record-batch-file-reader.rb +21 -0
  31. data/test/test-record-batch-stream-reader.rb +129 -0
  32. data/test/test-scalar.rb +65 -0
  33. data/test/test-slicer.rb +194 -129
  34. data/test/test-sparse-union-array.rb +38 -0
  35. data/test/test-table.rb +200 -38
  36. data/test/values/test-dense-union-array.rb +88 -45
  37. data/test/values/test-list-array.rb +26 -10
  38. data/test/values/test-map-array.rb +33 -10
  39. data/test/values/test-sparse-union-array.rb +84 -41
  40. data/test/values/test-struct-array.rb +20 -8
  41. metadata +20 -7
data/test/test-table.rb CHANGED
@@ -87,26 +87,24 @@ class TableTest < Test::Unit::TestCase
87
87
  target_rows_raw = [nil, true, true, false, true, false, true, true]
88
88
  target_rows = Arrow::BooleanArray.new(target_rows_raw)
89
89
  assert_equal(<<-TABLE, @table.slice(target_rows).to_s)
90
- count visible
91
- 0 (null) (null)
92
- 1 2 false
93
- 2 4 (null)
94
- 3 16 true
95
- 4 64 (null)
96
- 5 128 (null)
90
+ count visible
91
+ 0 2 false
92
+ 1 4 (null)
93
+ 2 16 true
94
+ 3 64 (null)
95
+ 4 128 (null)
97
96
  TABLE
98
97
  end
99
98
 
100
99
  test("Array: boolean") do
101
100
  target_rows_raw = [nil, true, true, false, true, false, true, true]
102
101
  assert_equal(<<-TABLE, @table.slice(target_rows_raw).to_s)
103
- count visible
104
- 0 (null) (null)
105
- 1 2 false
106
- 2 4 (null)
107
- 3 16 true
108
- 4 64 (null)
109
- 5 128 (null)
102
+ count visible
103
+ 0 2 false
104
+ 1 4 (null)
105
+ 2 16 true
106
+ 3 64 (null)
107
+ 4 128 (null)
110
108
  TABLE
111
109
  end
112
110
 
@@ -198,24 +196,18 @@ class TableTest < Test::Unit::TestCase
198
196
 
199
197
  test("{key: true}") do
200
198
  assert_equal(<<-TABLE, @table.slice(visible: true).to_s)
201
- count visible
202
- 0 1 true
203
- 1 (null) (null)
204
- 2 8 true
205
- 3 16 true
206
- 4 (null) (null)
207
- 5 (null) (null)
199
+ count visible
200
+ 0 1 true
201
+ 1 8 true
202
+ 2 16 true
208
203
  TABLE
209
204
  end
210
205
 
211
206
  test("{key: false}") do
212
207
  assert_equal(<<-TABLE, @table.slice(visible: false).to_s)
213
- count visible
214
- 0 2 false
215
- 1 (null) (null)
216
- 2 32 false
217
- 3 (null) (null)
218
- 4 (null) (null)
208
+ count visible
209
+ 0 2 false
210
+ 1 32 false
219
211
  TABLE
220
212
  end
221
213
 
@@ -286,11 +278,8 @@ class TableTest < Test::Unit::TestCase
286
278
 
287
279
  test("{key1: Range, key2: true}") do
288
280
  assert_equal(<<-TABLE, @table.slice(count: 0..8, visible: false).to_s)
289
- count visible
290
- 0 2 false
291
- 1 (null) (null)
292
- 2 (null) (null)
293
- 3 (null) (null)
281
+ count visible
282
+ 0 2 false
294
283
  TABLE
295
284
  end
296
285
 
@@ -1131,7 +1120,7 @@ visible: false
1131
1120
  end
1132
1121
 
1133
1122
  sub_test_case("#join") do
1134
- test("no keys") do
1123
+ test("keys: nil (natural join)") do
1135
1124
  table1 = Arrow::Table.new(key: [1, 2, 3],
1136
1125
  number: [10, 20, 30])
1137
1126
  table2 = Arrow::Table.new(key: [3, 1],
@@ -1139,7 +1128,6 @@ visible: false
1139
1128
  assert_equal(Arrow::Table.new([
1140
1129
  ["key", [1, 3]],
1141
1130
  ["number", [10, 30]],
1142
- ["key", [1, 3]],
1143
1131
  ["string", ["one", "three"]],
1144
1132
  ]),
1145
1133
  table1.join(table2))
@@ -1153,7 +1141,6 @@ visible: false
1153
1141
  assert_equal(Arrow::Table.new([
1154
1142
  ["key", [1, 3]],
1155
1143
  ["number", [10, 30]],
1156
- ["key", [1, 3]],
1157
1144
  ["string", ["one", "three"]],
1158
1145
  ]),
1159
1146
  table1.join(table2, "key"))
@@ -1167,12 +1154,25 @@ visible: false
1167
1154
  assert_equal(Arrow::Table.new([
1168
1155
  ["key", [1, 3]],
1169
1156
  ["number", [10, 30]],
1170
- ["key", [1, 3]],
1171
1157
  ["string", ["one", "three"]],
1172
1158
  ]),
1173
1159
  table1.join(table2, :key))
1174
1160
  end
1175
1161
 
1162
+ test("keys: [String]") do
1163
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1164
+ number: [10, 20, 30])
1165
+ table2 = Arrow::Table.new(key: [3, 1],
1166
+ string: ["three", "one"])
1167
+ assert_equal(Arrow::Table.new([
1168
+ ["key", [1, 3]],
1169
+ ["number", [10, 30]],
1170
+ ["key", [1, 3]],
1171
+ ["string", ["one", "three"]],
1172
+ ]),
1173
+ table1.join(table2, ["key"]))
1174
+ end
1175
+
1176
1176
  test("keys: [String, Symbol]") do
1177
1177
  table1 = Arrow::Table.new(key1: [1, 1, 2, 2],
1178
1178
  key2: [10, 100, 20, 200],
@@ -1230,7 +1230,7 @@ visible: false
1230
1230
  type: :inner))
1231
1231
  end
1232
1232
 
1233
- test("type:") do
1233
+ test("type: :left_outer") do
1234
1234
  table1 = Arrow::Table.new(key: [1, 2, 3],
1235
1235
  number: [10, 20, 30])
1236
1236
  table2 = Arrow::Table.new(key: [3, 1],
@@ -1238,12 +1238,85 @@ visible: false
1238
1238
  assert_equal(Arrow::Table.new([
1239
1239
  ["key", [1, 3, 2]],
1240
1240
  ["number", [10, 30, 20]],
1241
- ["key", [1, 3, nil]],
1242
1241
  ["string", ["one", "three", nil]],
1243
1242
  ]),
1244
1243
  table1.join(table2, "key", type: :left_outer))
1245
1244
  end
1246
1245
 
1246
+ test("type: :right_outer") do
1247
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1248
+ number: [10, 20, 30])
1249
+ table2 = Arrow::Table.new(key: [3, 1],
1250
+ string: ["three", "one"])
1251
+ assert_equal(Arrow::Table.new([
1252
+ ["key", [1, 3]],
1253
+ ["number", [10, 30]],
1254
+ ["string", ["one", "three"]],
1255
+ ]),
1256
+ table1.join(table2, "key", type: :right_outer))
1257
+ end
1258
+
1259
+ test("type: :full_outer") do
1260
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1261
+ number: [10, 20, 30])
1262
+ table2 = Arrow::Table.new(key: [3, 1],
1263
+ string: ["three", "one"])
1264
+ assert_equal(Arrow::Table.new([
1265
+ ["key", [1, 3, 2]],
1266
+ ["number", [10, 30, 20]],
1267
+ ["string", ["one", "three", nil]],
1268
+ ]),
1269
+ table1.join(table2, "key", type: :full_outer))
1270
+ end
1271
+
1272
+ test("type: :left_semi") do
1273
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1274
+ number: [10, 20, 30])
1275
+ table2 = Arrow::Table.new(key: [3, 1],
1276
+ string: ["three", "one"])
1277
+ assert_equal(Arrow::Table.new([
1278
+ ["key", [1, 3]],
1279
+ ["number", [10, 30]],
1280
+ ]),
1281
+ table1.join(table2, "key", type: :left_semi))
1282
+ end
1283
+
1284
+ test("type: :right_semi") do
1285
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1286
+ number: [10, 20, 30])
1287
+ table2 = Arrow::Table.new(key: [3, 1],
1288
+ string: ["three", "one"])
1289
+ assert_equal(Arrow::Table.new([
1290
+ ["key", [3, 1]],
1291
+ ["string", ["three", "one"]],
1292
+ ]),
1293
+ table1.join(table2, "key", type: :right_semi))
1294
+ end
1295
+
1296
+ test("type: :left_anti") do
1297
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1298
+ number: [10, 20, 30])
1299
+ table2 = Arrow::Table.new(key: [3, 1],
1300
+ string: ["three", "one"])
1301
+ assert_equal(Arrow::Table.new([
1302
+ ["key", [2]],
1303
+ ["number", [20]],
1304
+ ]),
1305
+ table1.join(table2, "key", type: :left_anti))
1306
+ end
1307
+
1308
+ test("type: :right_anti") do
1309
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1310
+ number: [10, 20, 30])
1311
+ table2 = Arrow::Table.new(key: [3, 1],
1312
+ string: ["three", "one"])
1313
+ assert_equal(Arrow::Table.new([
1314
+ ["key", Arrow::ChunkedArray.new(:uint8)],
1315
+ ["string", Arrow::ChunkedArray.new(:string)],
1316
+ ]),
1317
+ table1.join(table2, "key", type: :right_anti))
1318
+ end
1319
+
1247
1320
  test("left_outputs: & right_outputs:") do
1248
1321
  table1 = Arrow::Table.new(key: [1, 2, 3],
1249
1322
  number: [10, 20, 30])
@@ -1257,5 +1330,94 @@ visible: false
1257
1330
  left_outputs: ["key", "number"],
1258
1331
  right_outputs: ["string"]))
1259
1332
  end
1333
+
1334
+ test("left_outputs: & type: :inner") do
1335
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1336
+ number: [10, 20, 30])
1337
+ table2 = Arrow::Table.new(key: [3, 1],
1338
+ string: ["three", "one"])
1339
+ assert_equal(Arrow::Table.new([
1340
+ ["key", [1, 3]],
1341
+ ["number", [10, 30]],
1342
+ ["key", [1, 3]],
1343
+ ["string", ["one", "three"]]
1344
+ ]),
1345
+ table1.join(table2,
1346
+ type: :inner,
1347
+ left_outputs: table1.column_names,
1348
+ right_outputs: table2.column_names))
1349
+ end
1350
+
1351
+ test("left_outputs: & type: :left_outer") do
1352
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1353
+ number: [10, 20, 30])
1354
+ table2 = Arrow::Table.new(key: [3, 1],
1355
+ string: ["three", "one"])
1356
+ assert_equal(Arrow::Table.new([
1357
+ ["key", [1, 3, 2]],
1358
+ ["number", [10, 30, 20]],
1359
+ ["key", [1, 3, nil]],
1360
+ ["string", ["one", "three", nil]],
1361
+ ]),
1362
+ table1.join(table2,
1363
+ type: :left_outer,
1364
+ left_outputs: table1.column_names,
1365
+ right_outputs: table2.column_names))
1366
+ end
1367
+
1368
+ test("left_outputs: & type: :right_outer") do
1369
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1370
+ number: [10, 20, 30])
1371
+ table2 = Arrow::Table.new(key: [3, 1],
1372
+ string: ["three", "one"])
1373
+ assert_equal(Arrow::Table.new([
1374
+ ["key", [1, 3]],
1375
+ ["number", [10, 30]],
1376
+ ["key", [1, 3]],
1377
+ ["string", ["one", "three"]],
1378
+ ]),
1379
+ table1.join(table2,
1380
+ type: :right_outer,
1381
+ left_outputs: table1.column_names,
1382
+ right_outputs: table2.column_names))
1383
+ end
1384
+
1385
+ test("left_outputs: & type: :full_outer") do
1386
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1387
+ number: [10, 20, 30])
1388
+ table2 = Arrow::Table.new(key: [3, 1],
1389
+ string: ["three", "one"])
1390
+ assert_equal(Arrow::Table.new([
1391
+ ["key", [1, 3, 2]],
1392
+ ["number", [10, 30, 20]],
1393
+ ["key", [1, 3, nil]],
1394
+ ["string", ["one", "three", nil]],
1395
+ ]),
1396
+ table1.join(table2,
1397
+ type: :full_outer,
1398
+ left_outputs: table1.column_names,
1399
+ right_outputs: table2.column_names))
1400
+ end
1401
+
1402
+ test("left_suffix: & keys: [String]") do
1403
+ table1 = Arrow::Table.new(key1: [1, 1, 2, 2],
1404
+ key2: [10, 100, 20, 200],
1405
+ number: [1010, 1100, 2020, 2200])
1406
+ table2 = Arrow::Table.new(key1: [1, 2, 2],
1407
+ key2: [100, 20, 50],
1408
+ string: ["1-100", "2-20", "2-50"])
1409
+ assert_equal(Arrow::Table.new([
1410
+ ["key1_left", [1, 2]],
1411
+ ["key2_left", [100, 20]],
1412
+ ["number", [1100, 2020]],
1413
+ ["key1_right", [1, 2]],
1414
+ ["key2_right", [100, 20]],
1415
+ ["string", ["1-100", "2-20"]],
1416
+ ]),
1417
+ table1.join(table2,
1418
+ ["key1", "key2"],
1419
+ left_suffix: "_left",
1420
+ right_suffix: "_right"))
1421
+ end
1260
1422
  end
1261
1423
  end