red-arrow 11.0.0 → 12.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/ext/arrow/converters.hpp +12 -27
  4. data/lib/arrow/array-computable.rb +13 -0
  5. data/lib/arrow/data-type.rb +9 -0
  6. data/lib/arrow/dense-union-array-builder.rb +49 -0
  7. data/lib/arrow/dense-union-array.rb +26 -0
  8. data/lib/arrow/loader.rb +5 -0
  9. data/lib/arrow/record-batch-file-reader.rb +2 -0
  10. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  11. data/lib/arrow/scalar.rb +67 -0
  12. data/lib/arrow/slicer.rb +61 -0
  13. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  14. data/lib/arrow/sparse-union-array.rb +26 -0
  15. data/lib/arrow/struct-array-builder.rb +0 -5
  16. data/lib/arrow/table.rb +130 -10
  17. data/lib/arrow/union-array-builder.rb +59 -0
  18. data/lib/arrow/version.rb +1 -1
  19. data/test/raw-records/test-dense-union-array.rb +90 -45
  20. data/test/raw-records/test-list-array.rb +28 -10
  21. data/test/raw-records/test-map-array.rb +39 -10
  22. data/test/raw-records/test-sparse-union-array.rb +86 -41
  23. data/test/raw-records/test-struct-array.rb +22 -8
  24. data/test/test-array.rb +7 -0
  25. data/test/test-chunked-array.rb +9 -0
  26. data/test/test-dense-union-array.rb +42 -0
  27. data/test/test-dense-union-data-type.rb +1 -1
  28. data/test/test-function.rb +7 -7
  29. data/test/test-group.rb +58 -58
  30. data/test/test-record-batch-file-reader.rb +21 -0
  31. data/test/test-record-batch-stream-reader.rb +129 -0
  32. data/test/test-scalar.rb +65 -0
  33. data/test/test-slicer.rb +194 -129
  34. data/test/test-sparse-union-array.rb +38 -0
  35. data/test/test-table.rb +200 -38
  36. data/test/values/test-dense-union-array.rb +88 -45
  37. data/test/values/test-list-array.rb +26 -10
  38. data/test/values/test-map-array.rb +33 -10
  39. data/test/values/test-sparse-union-array.rb +84 -41
  40. data/test/values/test-struct-array.rb +20 -8
  41. metadata +20 -7
data/test/test-table.rb CHANGED
@@ -87,26 +87,24 @@ class TableTest < Test::Unit::TestCase
87
87
  target_rows_raw = [nil, true, true, false, true, false, true, true]
88
88
  target_rows = Arrow::BooleanArray.new(target_rows_raw)
89
89
  assert_equal(<<-TABLE, @table.slice(target_rows).to_s)
90
- count visible
91
- 0 (null) (null)
92
- 1 2 false
93
- 2 4 (null)
94
- 3 16 true
95
- 4 64 (null)
96
- 5 128 (null)
90
+ count visible
91
+ 0 2 false
92
+ 1 4 (null)
93
+ 2 16 true
94
+ 3 64 (null)
95
+ 4 128 (null)
97
96
  TABLE
98
97
  end
99
98
 
100
99
  test("Array: boolean") do
101
100
  target_rows_raw = [nil, true, true, false, true, false, true, true]
102
101
  assert_equal(<<-TABLE, @table.slice(target_rows_raw).to_s)
103
- count visible
104
- 0 (null) (null)
105
- 1 2 false
106
- 2 4 (null)
107
- 3 16 true
108
- 4 64 (null)
109
- 5 128 (null)
102
+ count visible
103
+ 0 2 false
104
+ 1 4 (null)
105
+ 2 16 true
106
+ 3 64 (null)
107
+ 4 128 (null)
110
108
  TABLE
111
109
  end
112
110
 
@@ -198,24 +196,18 @@ class TableTest < Test::Unit::TestCase
198
196
 
199
197
  test("{key: true}") do
200
198
  assert_equal(<<-TABLE, @table.slice(visible: true).to_s)
201
- count visible
202
- 0 1 true
203
- 1 (null) (null)
204
- 2 8 true
205
- 3 16 true
206
- 4 (null) (null)
207
- 5 (null) (null)
199
+ count visible
200
+ 0 1 true
201
+ 1 8 true
202
+ 2 16 true
208
203
  TABLE
209
204
  end
210
205
 
211
206
  test("{key: false}") do
212
207
  assert_equal(<<-TABLE, @table.slice(visible: false).to_s)
213
- count visible
214
- 0 2 false
215
- 1 (null) (null)
216
- 2 32 false
217
- 3 (null) (null)
218
- 4 (null) (null)
208
+ count visible
209
+ 0 2 false
210
+ 1 32 false
219
211
  TABLE
220
212
  end
221
213
 
@@ -286,11 +278,8 @@ class TableTest < Test::Unit::TestCase
286
278
 
287
279
  test("{key1: Range, key2: true}") do
288
280
  assert_equal(<<-TABLE, @table.slice(count: 0..8, visible: false).to_s)
289
- count visible
290
- 0 2 false
291
- 1 (null) (null)
292
- 2 (null) (null)
293
- 3 (null) (null)
281
+ count visible
282
+ 0 2 false
294
283
  TABLE
295
284
  end
296
285
 
@@ -1131,7 +1120,7 @@ visible: false
1131
1120
  end
1132
1121
 
1133
1122
  sub_test_case("#join") do
1134
- test("no keys") do
1123
+ test("keys: nil (natural join)") do
1135
1124
  table1 = Arrow::Table.new(key: [1, 2, 3],
1136
1125
  number: [10, 20, 30])
1137
1126
  table2 = Arrow::Table.new(key: [3, 1],
@@ -1139,7 +1128,6 @@ visible: false
1139
1128
  assert_equal(Arrow::Table.new([
1140
1129
  ["key", [1, 3]],
1141
1130
  ["number", [10, 30]],
1142
- ["key", [1, 3]],
1143
1131
  ["string", ["one", "three"]],
1144
1132
  ]),
1145
1133
  table1.join(table2))
@@ -1153,7 +1141,6 @@ visible: false
1153
1141
  assert_equal(Arrow::Table.new([
1154
1142
  ["key", [1, 3]],
1155
1143
  ["number", [10, 30]],
1156
- ["key", [1, 3]],
1157
1144
  ["string", ["one", "three"]],
1158
1145
  ]),
1159
1146
  table1.join(table2, "key"))
@@ -1167,12 +1154,25 @@ visible: false
1167
1154
  assert_equal(Arrow::Table.new([
1168
1155
  ["key", [1, 3]],
1169
1156
  ["number", [10, 30]],
1170
- ["key", [1, 3]],
1171
1157
  ["string", ["one", "three"]],
1172
1158
  ]),
1173
1159
  table1.join(table2, :key))
1174
1160
  end
1175
1161
 
1162
+ test("keys: [String]") do
1163
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1164
+ number: [10, 20, 30])
1165
+ table2 = Arrow::Table.new(key: [3, 1],
1166
+ string: ["three", "one"])
1167
+ assert_equal(Arrow::Table.new([
1168
+ ["key", [1, 3]],
1169
+ ["number", [10, 30]],
1170
+ ["key", [1, 3]],
1171
+ ["string", ["one", "three"]],
1172
+ ]),
1173
+ table1.join(table2, ["key"]))
1174
+ end
1175
+
1176
1176
  test("keys: [String, Symbol]") do
1177
1177
  table1 = Arrow::Table.new(key1: [1, 1, 2, 2],
1178
1178
  key2: [10, 100, 20, 200],
@@ -1230,7 +1230,7 @@ visible: false
1230
1230
  type: :inner))
1231
1231
  end
1232
1232
 
1233
- test("type:") do
1233
+ test("type: :left_outer") do
1234
1234
  table1 = Arrow::Table.new(key: [1, 2, 3],
1235
1235
  number: [10, 20, 30])
1236
1236
  table2 = Arrow::Table.new(key: [3, 1],
@@ -1238,12 +1238,85 @@ visible: false
1238
1238
  assert_equal(Arrow::Table.new([
1239
1239
  ["key", [1, 3, 2]],
1240
1240
  ["number", [10, 30, 20]],
1241
- ["key", [1, 3, nil]],
1242
1241
  ["string", ["one", "three", nil]],
1243
1242
  ]),
1244
1243
  table1.join(table2, "key", type: :left_outer))
1245
1244
  end
1246
1245
 
1246
+ test("type: :right_outer") do
1247
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1248
+ number: [10, 20, 30])
1249
+ table2 = Arrow::Table.new(key: [3, 1],
1250
+ string: ["three", "one"])
1251
+ assert_equal(Arrow::Table.new([
1252
+ ["key", [1, 3]],
1253
+ ["number", [10, 30]],
1254
+ ["string", ["one", "three"]],
1255
+ ]),
1256
+ table1.join(table2, "key", type: :right_outer))
1257
+ end
1258
+
1259
+ test("type: :full_outer") do
1260
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1261
+ number: [10, 20, 30])
1262
+ table2 = Arrow::Table.new(key: [3, 1],
1263
+ string: ["three", "one"])
1264
+ assert_equal(Arrow::Table.new([
1265
+ ["key", [1, 3, 2]],
1266
+ ["number", [10, 30, 20]],
1267
+ ["string", ["one", "three", nil]],
1268
+ ]),
1269
+ table1.join(table2, "key", type: :full_outer))
1270
+ end
1271
+
1272
+ test("type: :left_semi") do
1273
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1274
+ number: [10, 20, 30])
1275
+ table2 = Arrow::Table.new(key: [3, 1],
1276
+ string: ["three", "one"])
1277
+ assert_equal(Arrow::Table.new([
1278
+ ["key", [1, 3]],
1279
+ ["number", [10, 30]],
1280
+ ]),
1281
+ table1.join(table2, "key", type: :left_semi))
1282
+ end
1283
+
1284
+ test("type: :right_semi") do
1285
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1286
+ number: [10, 20, 30])
1287
+ table2 = Arrow::Table.new(key: [3, 1],
1288
+ string: ["three", "one"])
1289
+ assert_equal(Arrow::Table.new([
1290
+ ["key", [3, 1]],
1291
+ ["string", ["three", "one"]],
1292
+ ]),
1293
+ table1.join(table2, "key", type: :right_semi))
1294
+ end
1295
+
1296
+ test("type: :left_anti") do
1297
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1298
+ number: [10, 20, 30])
1299
+ table2 = Arrow::Table.new(key: [3, 1],
1300
+ string: ["three", "one"])
1301
+ assert_equal(Arrow::Table.new([
1302
+ ["key", [2]],
1303
+ ["number", [20]],
1304
+ ]),
1305
+ table1.join(table2, "key", type: :left_anti))
1306
+ end
1307
+
1308
+ test("type: :right_anti") do
1309
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1310
+ number: [10, 20, 30])
1311
+ table2 = Arrow::Table.new(key: [3, 1],
1312
+ string: ["three", "one"])
1313
+ assert_equal(Arrow::Table.new([
1314
+ ["key", Arrow::ChunkedArray.new(:uint8)],
1315
+ ["string", Arrow::ChunkedArray.new(:string)],
1316
+ ]),
1317
+ table1.join(table2, "key", type: :right_anti))
1318
+ end
1319
+
1247
1320
  test("left_outputs: & right_outputs:") do
1248
1321
  table1 = Arrow::Table.new(key: [1, 2, 3],
1249
1322
  number: [10, 20, 30])
@@ -1257,5 +1330,94 @@ visible: false
1257
1330
  left_outputs: ["key", "number"],
1258
1331
  right_outputs: ["string"]))
1259
1332
  end
1333
+
1334
+ test("left_outputs: & type: :inner") do
1335
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1336
+ number: [10, 20, 30])
1337
+ table2 = Arrow::Table.new(key: [3, 1],
1338
+ string: ["three", "one"])
1339
+ assert_equal(Arrow::Table.new([
1340
+ ["key", [1, 3]],
1341
+ ["number", [10, 30]],
1342
+ ["key", [1, 3]],
1343
+ ["string", ["one", "three"]]
1344
+ ]),
1345
+ table1.join(table2,
1346
+ type: :inner,
1347
+ left_outputs: table1.column_names,
1348
+ right_outputs: table2.column_names))
1349
+ end
1350
+
1351
+ test("left_outputs: & type: :left_outer") do
1352
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1353
+ number: [10, 20, 30])
1354
+ table2 = Arrow::Table.new(key: [3, 1],
1355
+ string: ["three", "one"])
1356
+ assert_equal(Arrow::Table.new([
1357
+ ["key", [1, 3, 2]],
1358
+ ["number", [10, 30, 20]],
1359
+ ["key", [1, 3, nil]],
1360
+ ["string", ["one", "three", nil]],
1361
+ ]),
1362
+ table1.join(table2,
1363
+ type: :left_outer,
1364
+ left_outputs: table1.column_names,
1365
+ right_outputs: table2.column_names))
1366
+ end
1367
+
1368
+ test("left_outputs: & type: :right_outer") do
1369
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1370
+ number: [10, 20, 30])
1371
+ table2 = Arrow::Table.new(key: [3, 1],
1372
+ string: ["three", "one"])
1373
+ assert_equal(Arrow::Table.new([
1374
+ ["key", [1, 3]],
1375
+ ["number", [10, 30]],
1376
+ ["key", [1, 3]],
1377
+ ["string", ["one", "three"]],
1378
+ ]),
1379
+ table1.join(table2,
1380
+ type: :right_outer,
1381
+ left_outputs: table1.column_names,
1382
+ right_outputs: table2.column_names))
1383
+ end
1384
+
1385
+ test("left_outputs: & type: :full_outer") do
1386
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1387
+ number: [10, 20, 30])
1388
+ table2 = Arrow::Table.new(key: [3, 1],
1389
+ string: ["three", "one"])
1390
+ assert_equal(Arrow::Table.new([
1391
+ ["key", [1, 3, 2]],
1392
+ ["number", [10, 30, 20]],
1393
+ ["key", [1, 3, nil]],
1394
+ ["string", ["one", "three", nil]],
1395
+ ]),
1396
+ table1.join(table2,
1397
+ type: :full_outer,
1398
+ left_outputs: table1.column_names,
1399
+ right_outputs: table2.column_names))
1400
+ end
1401
+
1402
+ test("left_suffix: & keys: [String]") do
1403
+ table1 = Arrow::Table.new(key1: [1, 1, 2, 2],
1404
+ key2: [10, 100, 20, 200],
1405
+ number: [1010, 1100, 2020, 2200])
1406
+ table2 = Arrow::Table.new(key1: [1, 2, 2],
1407
+ key2: [100, 20, 50],
1408
+ string: ["1-100", "2-20", "2-50"])
1409
+ assert_equal(Arrow::Table.new([
1410
+ ["key1_left", [1, 2]],
1411
+ ["key2_left", [100, 20]],
1412
+ ["number", [1100, 2020]],
1413
+ ["key1_right", [1, 2]],
1414
+ ["key2_right", [100, 20]],
1415
+ ["string", ["1-100", "2-20"]],
1416
+ ]),
1417
+ table1.join(table2,
1418
+ ["key1", "key2"],
1419
+ left_suffix: "_left",
1420
+ right_suffix: "_right"))
1421
+ end
1260
1422
  end
1261
1423
  end