deltalake-rb 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Cargo.lock +144 -97
- data/ext/deltalake/Cargo.toml +12 -4
- data/ext/deltalake/src/error.rs +9 -8
- data/ext/deltalake/src/lib.rs +96 -62
- data/ext/deltalake/src/schema.rs +47 -7
- data/ext/deltalake/src/utils.rs +2 -3
- data/lib/deltalake/table.rb +3 -1
- data/lib/deltalake/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 70bfe7933e46d65d042f24d33e594d7bb85d8ba7c710f33314d406539824ef44
|
4
|
+
data.tar.gz: 84cc2317df6d58cc2a115a1a0d96ed9dcaff8d7b1726f3482e26a9e25e8401e8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f53c616be11eaddf82405eb88f2c5b793dc696a40f8817a7d8622e20bfe5dde3affc1f21bca345215a46b92192ba9c5d539eeac3ac67d44d3874a54bedd2a729
|
7
|
+
data.tar.gz: c3948d2ac176dc4030bb75a62091ce6f63a78b064b3159cc52cd8f880a701790279d783d6163da6cc2deb22cbefa305ecc3f8f3071b16ab0b1bc0d87f713f648
|
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
@@ -217,6 +217,7 @@ dependencies = [
|
|
217
217
|
"arrow-schema",
|
218
218
|
"flatbuffers",
|
219
219
|
"lz4_flex",
|
220
|
+
"zstd",
|
220
221
|
]
|
221
222
|
|
222
223
|
[[package]]
|
@@ -315,7 +316,7 @@ version = "0.4.19"
|
|
315
316
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
316
317
|
checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c"
|
317
318
|
dependencies = [
|
318
|
-
"bzip2",
|
319
|
+
"bzip2 0.5.2",
|
319
320
|
"flate2",
|
320
321
|
"futures-core",
|
321
322
|
"memchr",
|
@@ -920,6 +921,15 @@ dependencies = [
|
|
920
921
|
"bzip2-sys",
|
921
922
|
]
|
922
923
|
|
924
|
+
[[package]]
|
925
|
+
name = "bzip2"
|
926
|
+
version = "0.6.0"
|
927
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
928
|
+
checksum = "bea8dcd42434048e4f7a304411d9273a411f647446c1234a65ce0554923f4cff"
|
929
|
+
dependencies = [
|
930
|
+
"libbz2-rs-sys",
|
931
|
+
]
|
932
|
+
|
923
933
|
[[package]]
|
924
934
|
name = "bzip2-sys"
|
925
935
|
version = "0.1.13+1.0.8"
|
@@ -1190,16 +1200,16 @@ dependencies = [
|
|
1190
1200
|
|
1191
1201
|
[[package]]
|
1192
1202
|
name = "datafusion"
|
1193
|
-
version = "
|
1203
|
+
version = "49.0.2"
|
1194
1204
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1195
|
-
checksum = "
|
1205
|
+
checksum = "69dfeda1633bf8ec75b068d9f6c27cdc392ffcf5ff83128d5dbab65b73c1fd02"
|
1196
1206
|
dependencies = [
|
1197
1207
|
"arrow",
|
1198
1208
|
"arrow-ipc",
|
1199
1209
|
"arrow-schema",
|
1200
1210
|
"async-trait",
|
1201
1211
|
"bytes",
|
1202
|
-
"bzip2",
|
1212
|
+
"bzip2 0.6.0",
|
1203
1213
|
"chrono",
|
1204
1214
|
"datafusion-catalog",
|
1205
1215
|
"datafusion-catalog-listing",
|
@@ -1226,6 +1236,7 @@ dependencies = [
|
|
1226
1236
|
"datafusion-sql",
|
1227
1237
|
"flate2",
|
1228
1238
|
"futures",
|
1239
|
+
"hex",
|
1229
1240
|
"itertools 0.14.0",
|
1230
1241
|
"log",
|
1231
1242
|
"object_store",
|
@@ -1244,9 +1255,9 @@ dependencies = [
|
|
1244
1255
|
|
1245
1256
|
[[package]]
|
1246
1257
|
name = "datafusion-catalog"
|
1247
|
-
version = "
|
1258
|
+
version = "49.0.2"
|
1248
1259
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1249
|
-
checksum = "
|
1260
|
+
checksum = "2848fd1e85e2953116dab9cc2eb109214b0888d7bbd2230e30c07f1794f642c0"
|
1250
1261
|
dependencies = [
|
1251
1262
|
"arrow",
|
1252
1263
|
"async-trait",
|
@@ -1270,9 +1281,9 @@ dependencies = [
|
|
1270
1281
|
|
1271
1282
|
[[package]]
|
1272
1283
|
name = "datafusion-catalog-listing"
|
1273
|
-
version = "
|
1284
|
+
version = "49.0.2"
|
1274
1285
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1275
|
-
checksum = "
|
1286
|
+
checksum = "051a1634628c2d1296d4e326823e7536640d87a118966cdaff069b68821ad53b"
|
1276
1287
|
dependencies = [
|
1277
1288
|
"arrow",
|
1278
1289
|
"async-trait",
|
@@ -1293,16 +1304,18 @@ dependencies = [
|
|
1293
1304
|
|
1294
1305
|
[[package]]
|
1295
1306
|
name = "datafusion-common"
|
1296
|
-
version = "
|
1307
|
+
version = "49.0.2"
|
1297
1308
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1298
|
-
checksum = "
|
1309
|
+
checksum = "765e4ad4ef7a4500e389a3f1e738791b71ff4c29fd00912c2f541d62b25da096"
|
1299
1310
|
dependencies = [
|
1300
1311
|
"ahash",
|
1301
1312
|
"arrow",
|
1302
1313
|
"arrow-ipc",
|
1303
1314
|
"base64 0.22.1",
|
1315
|
+
"chrono",
|
1304
1316
|
"half",
|
1305
1317
|
"hashbrown 0.14.5",
|
1318
|
+
"hex",
|
1306
1319
|
"indexmap",
|
1307
1320
|
"libc",
|
1308
1321
|
"log",
|
@@ -1317,9 +1330,9 @@ dependencies = [
|
|
1317
1330
|
|
1318
1331
|
[[package]]
|
1319
1332
|
name = "datafusion-common-runtime"
|
1320
|
-
version = "
|
1333
|
+
version = "49.0.2"
|
1321
1334
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1322
|
-
checksum = "
|
1335
|
+
checksum = "40a2ae8393051ce25d232a6065c4558ab5a535c9637d5373bacfd464ac88ea12"
|
1323
1336
|
dependencies = [
|
1324
1337
|
"futures",
|
1325
1338
|
"log",
|
@@ -1328,15 +1341,15 @@ dependencies = [
|
|
1328
1341
|
|
1329
1342
|
[[package]]
|
1330
1343
|
name = "datafusion-datasource"
|
1331
|
-
version = "
|
1344
|
+
version = "49.0.2"
|
1332
1345
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1333
|
-
checksum = "
|
1346
|
+
checksum = "90cd841a77f378bc1a5c4a1c37345e1885a9203b008203f9f4b3a769729bf330"
|
1334
1347
|
dependencies = [
|
1335
1348
|
"arrow",
|
1336
1349
|
"async-compression",
|
1337
1350
|
"async-trait",
|
1338
1351
|
"bytes",
|
1339
|
-
"bzip2",
|
1352
|
+
"bzip2 0.6.0",
|
1340
1353
|
"chrono",
|
1341
1354
|
"datafusion-common",
|
1342
1355
|
"datafusion-common-runtime",
|
@@ -1364,9 +1377,9 @@ dependencies = [
|
|
1364
1377
|
|
1365
1378
|
[[package]]
|
1366
1379
|
name = "datafusion-datasource-csv"
|
1367
|
-
version = "
|
1380
|
+
version = "49.0.2"
|
1368
1381
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1369
|
-
checksum = "
|
1382
|
+
checksum = "77f4a2c64939c6f0dd15b246723a699fa30d59d0133eb36a86e8ff8c6e2a8dc6"
|
1370
1383
|
dependencies = [
|
1371
1384
|
"arrow",
|
1372
1385
|
"async-trait",
|
@@ -1389,9 +1402,9 @@ dependencies = [
|
|
1389
1402
|
|
1390
1403
|
[[package]]
|
1391
1404
|
name = "datafusion-datasource-json"
|
1392
|
-
version = "
|
1405
|
+
version = "49.0.2"
|
1393
1406
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1394
|
-
checksum = "
|
1407
|
+
checksum = "11387aaf931b2993ad9273c63ddca33f05aef7d02df9b70fb757429b4b71cdae"
|
1395
1408
|
dependencies = [
|
1396
1409
|
"arrow",
|
1397
1410
|
"async-trait",
|
@@ -1414,9 +1427,9 @@ dependencies = [
|
|
1414
1427
|
|
1415
1428
|
[[package]]
|
1416
1429
|
name = "datafusion-datasource-parquet"
|
1417
|
-
version = "
|
1430
|
+
version = "49.0.2"
|
1418
1431
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1419
|
-
checksum = "
|
1432
|
+
checksum = "028f430c5185120bf806347848b8d8acd9823f4038875b3820eeefa35f2bb4a2"
|
1420
1433
|
dependencies = [
|
1421
1434
|
"arrow",
|
1422
1435
|
"async-trait",
|
@@ -1432,8 +1445,10 @@ dependencies = [
|
|
1432
1445
|
"datafusion-physical-expr-common",
|
1433
1446
|
"datafusion-physical-optimizer",
|
1434
1447
|
"datafusion-physical-plan",
|
1448
|
+
"datafusion-pruning",
|
1435
1449
|
"datafusion-session",
|
1436
1450
|
"futures",
|
1451
|
+
"hex",
|
1437
1452
|
"itertools 0.14.0",
|
1438
1453
|
"log",
|
1439
1454
|
"object_store",
|
@@ -1445,15 +1460,15 @@ dependencies = [
|
|
1445
1460
|
|
1446
1461
|
[[package]]
|
1447
1462
|
name = "datafusion-doc"
|
1448
|
-
version = "
|
1463
|
+
version = "49.0.2"
|
1449
1464
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1450
|
-
checksum = "
|
1465
|
+
checksum = "8ff336d1d755399753a9e4fbab001180e346fc8bfa063a97f1214b82274c00f8"
|
1451
1466
|
|
1452
1467
|
[[package]]
|
1453
1468
|
name = "datafusion-execution"
|
1454
|
-
version = "
|
1469
|
+
version = "49.0.2"
|
1455
1470
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1456
|
-
checksum = "
|
1471
|
+
checksum = "042ea192757d1b2d7dcf71643e7ff33f6542c7704f00228d8b85b40003fd8e0f"
|
1457
1472
|
dependencies = [
|
1458
1473
|
"arrow",
|
1459
1474
|
"dashmap",
|
@@ -1470,11 +1485,12 @@ dependencies = [
|
|
1470
1485
|
|
1471
1486
|
[[package]]
|
1472
1487
|
name = "datafusion-expr"
|
1473
|
-
version = "
|
1488
|
+
version = "49.0.2"
|
1474
1489
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1475
|
-
checksum = "
|
1490
|
+
checksum = "025222545d6d7fab71e2ae2b356526a1df67a2872222cbae7535e557a42abd2e"
|
1476
1491
|
dependencies = [
|
1477
1492
|
"arrow",
|
1493
|
+
"async-trait",
|
1478
1494
|
"chrono",
|
1479
1495
|
"datafusion-common",
|
1480
1496
|
"datafusion-doc",
|
@@ -1491,9 +1507,9 @@ dependencies = [
|
|
1491
1507
|
|
1492
1508
|
[[package]]
|
1493
1509
|
name = "datafusion-expr-common"
|
1494
|
-
version = "
|
1510
|
+
version = "49.0.2"
|
1495
1511
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1496
|
-
checksum = "
|
1512
|
+
checksum = "9d5c267104849d5fa6d81cf5ba88f35ecd58727729c5eb84066c25227b644ae2"
|
1497
1513
|
dependencies = [
|
1498
1514
|
"arrow",
|
1499
1515
|
"datafusion-common",
|
@@ -1504,9 +1520,9 @@ dependencies = [
|
|
1504
1520
|
|
1505
1521
|
[[package]]
|
1506
1522
|
name = "datafusion-functions"
|
1507
|
-
version = "
|
1523
|
+
version = "49.0.2"
|
1508
1524
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1509
|
-
checksum = "
|
1525
|
+
checksum = "c620d105aa208fcee45c588765483314eb415f5571cfd6c1bae3a59c5b4d15bb"
|
1510
1526
|
dependencies = [
|
1511
1527
|
"arrow",
|
1512
1528
|
"arrow-buffer",
|
@@ -1533,9 +1549,9 @@ dependencies = [
|
|
1533
1549
|
|
1534
1550
|
[[package]]
|
1535
1551
|
name = "datafusion-functions-aggregate"
|
1536
|
-
version = "
|
1552
|
+
version = "49.0.2"
|
1537
1553
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1538
|
-
checksum = "
|
1554
|
+
checksum = "35f61d5198a35ed368bf3aacac74f0d0fa33de7a7cb0c57e9f68ab1346d2f952"
|
1539
1555
|
dependencies = [
|
1540
1556
|
"ahash",
|
1541
1557
|
"arrow",
|
@@ -1554,9 +1570,9 @@ dependencies = [
|
|
1554
1570
|
|
1555
1571
|
[[package]]
|
1556
1572
|
name = "datafusion-functions-aggregate-common"
|
1557
|
-
version = "
|
1573
|
+
version = "49.0.2"
|
1558
1574
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1559
|
-
checksum = "
|
1575
|
+
checksum = "13efdb17362be39b5024f6da0d977ffe49c0212929ec36eec550e07e2bc7812f"
|
1560
1576
|
dependencies = [
|
1561
1577
|
"ahash",
|
1562
1578
|
"arrow",
|
@@ -1567,9 +1583,9 @@ dependencies = [
|
|
1567
1583
|
|
1568
1584
|
[[package]]
|
1569
1585
|
name = "datafusion-functions-nested"
|
1570
|
-
version = "
|
1586
|
+
version = "49.0.2"
|
1571
1587
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1572
|
-
checksum = "
|
1588
|
+
checksum = "9187678af567d7c9e004b72a0b6dc5b0a00ebf4901cb3511ed2db4effe092e66"
|
1573
1589
|
dependencies = [
|
1574
1590
|
"arrow",
|
1575
1591
|
"arrow-ord",
|
@@ -1579,6 +1595,7 @@ dependencies = [
|
|
1579
1595
|
"datafusion-expr",
|
1580
1596
|
"datafusion-functions",
|
1581
1597
|
"datafusion-functions-aggregate",
|
1598
|
+
"datafusion-functions-aggregate-common",
|
1582
1599
|
"datafusion-macros",
|
1583
1600
|
"datafusion-physical-expr-common",
|
1584
1601
|
"itertools 0.14.0",
|
@@ -1588,9 +1605,9 @@ dependencies = [
|
|
1588
1605
|
|
1589
1606
|
[[package]]
|
1590
1607
|
name = "datafusion-functions-table"
|
1591
|
-
version = "
|
1608
|
+
version = "49.0.2"
|
1592
1609
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1593
|
-
checksum = "
|
1610
|
+
checksum = "ecf156589cc21ef59fe39c7a9a841b4a97394549643bbfa88cc44e8588cf8fe5"
|
1594
1611
|
dependencies = [
|
1595
1612
|
"arrow",
|
1596
1613
|
"async-trait",
|
@@ -1604,9 +1621,9 @@ dependencies = [
|
|
1604
1621
|
|
1605
1622
|
[[package]]
|
1606
1623
|
name = "datafusion-functions-window"
|
1607
|
-
version = "
|
1624
|
+
version = "49.0.2"
|
1608
1625
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1609
|
-
checksum = "
|
1626
|
+
checksum = "edcb25e3e369f1366ec9a261456e45b5aad6ea1c0c8b4ce546587207c501ed9e"
|
1610
1627
|
dependencies = [
|
1611
1628
|
"arrow",
|
1612
1629
|
"datafusion-common",
|
@@ -1622,9 +1639,9 @@ dependencies = [
|
|
1622
1639
|
|
1623
1640
|
[[package]]
|
1624
1641
|
name = "datafusion-functions-window-common"
|
1625
|
-
version = "
|
1642
|
+
version = "49.0.2"
|
1626
1643
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1627
|
-
checksum = "
|
1644
|
+
checksum = "8996a8e11174d0bd7c62dc2f316485affc6ae5ffd5b8a68b508137ace2310294"
|
1628
1645
|
dependencies = [
|
1629
1646
|
"datafusion-common",
|
1630
1647
|
"datafusion-physical-expr-common",
|
@@ -1632,9 +1649,9 @@ dependencies = [
|
|
1632
1649
|
|
1633
1650
|
[[package]]
|
1634
1651
|
name = "datafusion-macros"
|
1635
|
-
version = "
|
1652
|
+
version = "49.0.2"
|
1636
1653
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1637
|
-
checksum = "
|
1654
|
+
checksum = "95ee8d1be549eb7316f437035f2cec7ec42aba8374096d807c4de006a3b5d78a"
|
1638
1655
|
dependencies = [
|
1639
1656
|
"datafusion-expr",
|
1640
1657
|
"quote",
|
@@ -1643,14 +1660,15 @@ dependencies = [
|
|
1643
1660
|
|
1644
1661
|
[[package]]
|
1645
1662
|
name = "datafusion-optimizer"
|
1646
|
-
version = "
|
1663
|
+
version = "49.0.2"
|
1647
1664
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1648
|
-
checksum = "
|
1665
|
+
checksum = "c9fa98671458254928af854e5f6c915e66b860a8bde505baea0ff2892deab74d"
|
1649
1666
|
dependencies = [
|
1650
1667
|
"arrow",
|
1651
1668
|
"chrono",
|
1652
1669
|
"datafusion-common",
|
1653
1670
|
"datafusion-expr",
|
1671
|
+
"datafusion-expr-common",
|
1654
1672
|
"datafusion-physical-expr",
|
1655
1673
|
"indexmap",
|
1656
1674
|
"itertools 0.14.0",
|
@@ -1662,9 +1680,9 @@ dependencies = [
|
|
1662
1680
|
|
1663
1681
|
[[package]]
|
1664
1682
|
name = "datafusion-physical-expr"
|
1665
|
-
version = "
|
1683
|
+
version = "49.0.2"
|
1666
1684
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1667
|
-
checksum = "
|
1685
|
+
checksum = "3515d51531cca5f7b5a6f3ea22742b71bb36fc378b465df124ff9a2fa349b002"
|
1668
1686
|
dependencies = [
|
1669
1687
|
"ahash",
|
1670
1688
|
"arrow",
|
@@ -1684,9 +1702,9 @@ dependencies = [
|
|
1684
1702
|
|
1685
1703
|
[[package]]
|
1686
1704
|
name = "datafusion-physical-expr-common"
|
1687
|
-
version = "
|
1705
|
+
version = "49.0.2"
|
1688
1706
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1689
|
-
checksum = "
|
1707
|
+
checksum = "24485475d9c618a1d33b2a3dad003d946dc7a7bbf0354d125301abc0a5a79e3e"
|
1690
1708
|
dependencies = [
|
1691
1709
|
"ahash",
|
1692
1710
|
"arrow",
|
@@ -1698,9 +1716,9 @@ dependencies = [
|
|
1698
1716
|
|
1699
1717
|
[[package]]
|
1700
1718
|
name = "datafusion-physical-optimizer"
|
1701
|
-
version = "
|
1719
|
+
version = "49.0.2"
|
1702
1720
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1703
|
-
checksum = "
|
1721
|
+
checksum = "b9da411a0a64702f941a12af2b979434d14ec5d36c6f49296966b2c7639cbb3a"
|
1704
1722
|
dependencies = [
|
1705
1723
|
"arrow",
|
1706
1724
|
"datafusion-common",
|
@@ -1710,6 +1728,7 @@ dependencies = [
|
|
1710
1728
|
"datafusion-physical-expr",
|
1711
1729
|
"datafusion-physical-expr-common",
|
1712
1730
|
"datafusion-physical-plan",
|
1731
|
+
"datafusion-pruning",
|
1713
1732
|
"itertools 0.14.0",
|
1714
1733
|
"log",
|
1715
1734
|
"recursive",
|
@@ -1717,9 +1736,9 @@ dependencies = [
|
|
1717
1736
|
|
1718
1737
|
[[package]]
|
1719
1738
|
name = "datafusion-physical-plan"
|
1720
|
-
version = "
|
1739
|
+
version = "49.0.2"
|
1721
1740
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1722
|
-
checksum = "
|
1741
|
+
checksum = "a6d168282bb7b54880bb3159f89b51c047db4287f5014d60c3ef4c6e1468212b"
|
1723
1742
|
dependencies = [
|
1724
1743
|
"ahash",
|
1725
1744
|
"arrow",
|
@@ -1747,9 +1766,9 @@ dependencies = [
|
|
1747
1766
|
|
1748
1767
|
[[package]]
|
1749
1768
|
name = "datafusion-proto"
|
1750
|
-
version = "
|
1769
|
+
version = "49.0.2"
|
1751
1770
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1752
|
-
checksum = "
|
1771
|
+
checksum = "1b36a0c84f4500efd90487a004b533bd81de1f2bb3f143f71b7526f33b85d2e2"
|
1753
1772
|
dependencies = [
|
1754
1773
|
"arrow",
|
1755
1774
|
"chrono",
|
@@ -1763,20 +1782,38 @@ dependencies = [
|
|
1763
1782
|
|
1764
1783
|
[[package]]
|
1765
1784
|
name = "datafusion-proto-common"
|
1766
|
-
version = "
|
1785
|
+
version = "49.0.2"
|
1767
1786
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1768
|
-
checksum = "
|
1787
|
+
checksum = "2ec788be522806740ad6372c0a2f7e45fb37cb37f786d9b77933add49cdd058f"
|
1769
1788
|
dependencies = [
|
1770
1789
|
"arrow",
|
1771
1790
|
"datafusion-common",
|
1772
1791
|
"prost",
|
1773
1792
|
]
|
1774
1793
|
|
1794
|
+
[[package]]
|
1795
|
+
name = "datafusion-pruning"
|
1796
|
+
version = "49.0.0"
|
1797
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1798
|
+
checksum = "391a457b9d23744c53eeb89edd1027424cba100581488d89800ed841182df905"
|
1799
|
+
dependencies = [
|
1800
|
+
"arrow",
|
1801
|
+
"arrow-schema",
|
1802
|
+
"datafusion-common",
|
1803
|
+
"datafusion-datasource",
|
1804
|
+
"datafusion-expr-common",
|
1805
|
+
"datafusion-physical-expr",
|
1806
|
+
"datafusion-physical-expr-common",
|
1807
|
+
"datafusion-physical-plan",
|
1808
|
+
"itertools 0.14.0",
|
1809
|
+
"log",
|
1810
|
+
]
|
1811
|
+
|
1775
1812
|
[[package]]
|
1776
1813
|
name = "datafusion-session"
|
1777
|
-
version = "
|
1814
|
+
version = "49.0.2"
|
1778
1815
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1779
|
-
checksum = "
|
1816
|
+
checksum = "053201c2bb729c7938f85879034df2b5a52cfaba16f1b3b66ab8505c81b2aad3"
|
1780
1817
|
dependencies = [
|
1781
1818
|
"arrow",
|
1782
1819
|
"async-trait",
|
@@ -1798,9 +1835,9 @@ dependencies = [
|
|
1798
1835
|
|
1799
1836
|
[[package]]
|
1800
1837
|
name = "datafusion-sql"
|
1801
|
-
version = "
|
1838
|
+
version = "49.0.2"
|
1802
1839
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1803
|
-
checksum = "
|
1840
|
+
checksum = "9082779be8ce4882189b229c0cff4393bd0808282a7194130c9f32159f185e25"
|
1804
1841
|
dependencies = [
|
1805
1842
|
"arrow",
|
1806
1843
|
"bigdecimal",
|
@@ -1815,9 +1852,9 @@ dependencies = [
|
|
1815
1852
|
|
1816
1853
|
[[package]]
|
1817
1854
|
name = "delta_kernel"
|
1818
|
-
version = "0.
|
1855
|
+
version = "0.14.0"
|
1819
1856
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1820
|
-
checksum = "
|
1857
|
+
checksum = "cac0f0eae6345b0cfb67c4304da961e590370860aa51e88315e808c5d496629f"
|
1821
1858
|
dependencies = [
|
1822
1859
|
"arrow",
|
1823
1860
|
"bytes",
|
@@ -1844,9 +1881,9 @@ dependencies = [
|
|
1844
1881
|
|
1845
1882
|
[[package]]
|
1846
1883
|
name = "delta_kernel_derive"
|
1847
|
-
version = "0.
|
1884
|
+
version = "0.14.0"
|
1848
1885
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1849
|
-
checksum = "
|
1886
|
+
checksum = "064456b054cf26b607f4cbcef6d2ca102f64ed8e4fa702d2e307ce67b5b93569"
|
1850
1887
|
dependencies = [
|
1851
1888
|
"proc-macro2",
|
1852
1889
|
"quote",
|
@@ -1855,13 +1892,13 @@ dependencies = [
|
|
1855
1892
|
|
1856
1893
|
[[package]]
|
1857
1894
|
name = "deltalake"
|
1858
|
-
version = "0.2.
|
1895
|
+
version = "0.2.1"
|
1859
1896
|
dependencies = [
|
1860
1897
|
"arrow",
|
1861
1898
|
"arrow-schema",
|
1862
1899
|
"chrono",
|
1863
1900
|
"delta_kernel",
|
1864
|
-
"deltalake 0.
|
1901
|
+
"deltalake 0.28.0",
|
1865
1902
|
"futures",
|
1866
1903
|
"magnus",
|
1867
1904
|
"num_cpus",
|
@@ -1872,9 +1909,9 @@ dependencies = [
|
|
1872
1909
|
|
1873
1910
|
[[package]]
|
1874
1911
|
name = "deltalake"
|
1875
|
-
version = "0.
|
1912
|
+
version = "0.28.0"
|
1876
1913
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1877
|
-
checksum = "
|
1914
|
+
checksum = "38b6e94bac6fd3bfd2cac139e21bf701b623955cf8b7a91af14528e759fb5d63"
|
1878
1915
|
dependencies = [
|
1879
1916
|
"delta_kernel",
|
1880
1917
|
"deltalake-aws",
|
@@ -1885,9 +1922,9 @@ dependencies = [
|
|
1885
1922
|
|
1886
1923
|
[[package]]
|
1887
1924
|
name = "deltalake-aws"
|
1888
|
-
version = "0.
|
1925
|
+
version = "0.11.0"
|
1889
1926
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1890
|
-
checksum = "
|
1927
|
+
checksum = "dc991ff8304152a245c92b3c2d17eb56f86c14755fe56671ccd75e9c6759f3c9"
|
1891
1928
|
dependencies = [
|
1892
1929
|
"async-trait",
|
1893
1930
|
"aws-config",
|
@@ -1914,9 +1951,9 @@ dependencies = [
|
|
1914
1951
|
|
1915
1952
|
[[package]]
|
1916
1953
|
name = "deltalake-azure"
|
1917
|
-
version = "0.
|
1954
|
+
version = "0.11.0"
|
1918
1955
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1919
|
-
checksum = "
|
1956
|
+
checksum = "f84f7dca1e77038140b4633b6b3c70d6426a452432d5619207ac3a6103385039"
|
1920
1957
|
dependencies = [
|
1921
1958
|
"async-trait",
|
1922
1959
|
"bytes",
|
@@ -1932,9 +1969,9 @@ dependencies = [
|
|
1932
1969
|
|
1933
1970
|
[[package]]
|
1934
1971
|
name = "deltalake-core"
|
1935
|
-
version = "0.
|
1972
|
+
version = "0.28.0"
|
1936
1973
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1937
|
-
checksum = "
|
1974
|
+
checksum = "7526cddb8cb6e6d99b073f98226e16f8e6c40111b9536509d13be6bfda085548"
|
1938
1975
|
dependencies = [
|
1939
1976
|
"arrow",
|
1940
1977
|
"arrow-arith",
|
@@ -1962,14 +1999,12 @@ dependencies = [
|
|
1962
1999
|
"indexmap",
|
1963
2000
|
"itertools 0.14.0",
|
1964
2001
|
"maplit",
|
1965
|
-
"num-bigint",
|
1966
|
-
"num-traits",
|
1967
2002
|
"num_cpus",
|
1968
2003
|
"object_store",
|
1969
2004
|
"parking_lot",
|
1970
2005
|
"parquet",
|
1971
2006
|
"percent-encoding",
|
1972
|
-
"
|
2007
|
+
"percent-encoding-rfc3986",
|
1973
2008
|
"rand 0.8.5",
|
1974
2009
|
"regex",
|
1975
2010
|
"serde",
|
@@ -1980,16 +2015,15 @@ dependencies = [
|
|
1980
2015
|
"tokio",
|
1981
2016
|
"tracing",
|
1982
2017
|
"url",
|
1983
|
-
"urlencoding",
|
1984
2018
|
"uuid",
|
1985
2019
|
"validator",
|
1986
2020
|
]
|
1987
2021
|
|
1988
2022
|
[[package]]
|
1989
2023
|
name = "deltalake-derive"
|
1990
|
-
version = "0.
|
2024
|
+
version = "0.28.0"
|
1991
2025
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1992
|
-
checksum = "
|
2026
|
+
checksum = "751cfe39c31f065104f3c2238d0e423849a4e4f2e2b8adf923d8276a59da7f3a"
|
1993
2027
|
dependencies = [
|
1994
2028
|
"convert_case",
|
1995
2029
|
"itertools 0.14.0",
|
@@ -2000,9 +2034,9 @@ dependencies = [
|
|
2000
2034
|
|
2001
2035
|
[[package]]
|
2002
2036
|
name = "deltalake-gcp"
|
2003
|
-
version = "0.
|
2037
|
+
version = "0.12.0"
|
2004
2038
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2005
|
-
checksum = "
|
2039
|
+
checksum = "1ad06893b5a821337aa4123a7409e1bdd444e4ff0db467f1f80faeb95b33d8f4"
|
2006
2040
|
dependencies = [
|
2007
2041
|
"async-trait",
|
2008
2042
|
"bytes",
|
@@ -2854,6 +2888,12 @@ dependencies = [
|
|
2854
2888
|
"static_assertions",
|
2855
2889
|
]
|
2856
2890
|
|
2891
|
+
[[package]]
|
2892
|
+
name = "libbz2-rs-sys"
|
2893
|
+
version = "0.2.2"
|
2894
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2895
|
+
checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7"
|
2896
|
+
|
2857
2897
|
[[package]]
|
2858
2898
|
name = "libc"
|
2859
2899
|
version = "0.2.174"
|
@@ -2947,9 +2987,9 @@ dependencies = [
|
|
2947
2987
|
|
2948
2988
|
[[package]]
|
2949
2989
|
name = "magnus"
|
2950
|
-
version = "0.
|
2990
|
+
version = "0.8.0"
|
2951
2991
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2952
|
-
checksum = "
|
2992
|
+
checksum = "3f14d3cc31b2dc4fce6cd447a83c7a7ca2ab8a9f1e535dcb2f796ff972b0e68b"
|
2953
2993
|
dependencies = [
|
2954
2994
|
"magnus-macros",
|
2955
2995
|
"rb-sys",
|
@@ -2959,9 +2999,9 @@ dependencies = [
|
|
2959
2999
|
|
2960
3000
|
[[package]]
|
2961
3001
|
name = "magnus-macros"
|
2962
|
-
version = "0.
|
3002
|
+
version = "0.8.0"
|
2963
3003
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2964
|
-
checksum = "
|
3004
|
+
checksum = "47607461fd8e1513cb4f2076c197d8092d921a1ea75bd08af97398f593751892"
|
2965
3005
|
dependencies = [
|
2966
3006
|
"proc-macro2",
|
2967
3007
|
"quote",
|
@@ -3127,9 +3167,9 @@ dependencies = [
|
|
3127
3167
|
|
3128
3168
|
[[package]]
|
3129
3169
|
name = "object_store"
|
3130
|
-
version = "0.12.
|
3170
|
+
version = "0.12.3"
|
3131
3171
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3132
|
-
checksum = "
|
3172
|
+
checksum = "efc4f07659e11cd45a341cd24d71e683e3be65d9ff1f8150061678fe60437496"
|
3133
3173
|
dependencies = [
|
3134
3174
|
"async-trait",
|
3135
3175
|
"base64 0.22.1",
|
@@ -3240,6 +3280,7 @@ dependencies = [
|
|
3240
3280
|
"num-bigint",
|
3241
3281
|
"object_store",
|
3242
3282
|
"paste",
|
3283
|
+
"ring",
|
3243
3284
|
"seq-macro",
|
3244
3285
|
"simdutf8",
|
3245
3286
|
"snap",
|
@@ -3261,6 +3302,12 @@ version = "2.3.1"
|
|
3261
3302
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3262
3303
|
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
3263
3304
|
|
3305
|
+
[[package]]
|
3306
|
+
name = "percent-encoding-rfc3986"
|
3307
|
+
version = "0.1.3"
|
3308
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3309
|
+
checksum = "3637c05577168127568a64e9dc5a6887da720efef07b3d9472d45f63ab191166"
|
3310
|
+
|
3264
3311
|
[[package]]
|
3265
3312
|
name = "petgraph"
|
3266
3313
|
version = "0.8.2"
|
@@ -3408,9 +3455,9 @@ dependencies = [
|
|
3408
3455
|
|
3409
3456
|
[[package]]
|
3410
3457
|
name = "quick-xml"
|
3411
|
-
version = "0.
|
3458
|
+
version = "0.38.3"
|
3412
3459
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3413
|
-
checksum = "
|
3460
|
+
checksum = "42a232e7487fc2ef313d96dde7948e7a3c05101870d8985e4fd8d26aedd27b89"
|
3414
3461
|
dependencies = [
|
3415
3462
|
"memchr",
|
3416
3463
|
"serde",
|
@@ -3571,9 +3618,9 @@ dependencies = [
|
|
3571
3618
|
|
3572
3619
|
[[package]]
|
3573
3620
|
name = "rb-sys-env"
|
3574
|
-
version = "0.
|
3621
|
+
version = "0.2.2"
|
3575
3622
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3576
|
-
checksum = "
|
3623
|
+
checksum = "08f8d2924cf136a1315e2b4c7460a39f62ef11ee5d522df9b2750fab55b868b6"
|
3577
3624
|
|
3578
3625
|
[[package]]
|
3579
3626
|
name = "recursive"
|
@@ -4043,9 +4090,9 @@ checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
|
|
4043
4090
|
|
4044
4091
|
[[package]]
|
4045
4092
|
name = "slab"
|
4046
|
-
version = "0.4.
|
4093
|
+
version = "0.4.11"
|
4047
4094
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
4048
|
-
checksum = "
|
4095
|
+
checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"
|
4049
4096
|
|
4050
4097
|
[[package]]
|
4051
4098
|
name = "smallvec"
|
data/ext/deltalake/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "deltalake"
|
3
|
-
version = "0.2.
|
3
|
+
version = "0.2.1"
|
4
4
|
license = "Apache-2.0"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -14,11 +14,19 @@ crate-type = ["cdylib"]
|
|
14
14
|
arrow = { version = "55.2", features = ["ffi"] }
|
15
15
|
arrow-schema = { version = "55.2", features = ["serde"] }
|
16
16
|
chrono = "0.4"
|
17
|
-
delta_kernel = "0.
|
18
|
-
deltalake = { version = "=0.27.0", features = ["azure", "datafusion", "gcs", "s3"] }
|
17
|
+
delta_kernel = { version = "0.14", features = ["arrow-55", "default-engine-rustls"] }
|
19
18
|
futures = "0.3"
|
20
|
-
magnus = "0.
|
19
|
+
magnus = "0.8"
|
21
20
|
num_cpus = "1"
|
22
21
|
serde = "1"
|
23
22
|
serde_json = "1"
|
24
23
|
tokio = { version = "1", features = ["rt-multi-thread"] }
|
24
|
+
|
25
|
+
[dependencies.deltalake]
|
26
|
+
version = "=0.28.0"
|
27
|
+
features = [
|
28
|
+
"azure",
|
29
|
+
"datafusion",
|
30
|
+
"gcs",
|
31
|
+
"s3"
|
32
|
+
]
|
data/ext/deltalake/src/error.rs
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
use arrow_schema::ArrowError;
|
2
2
|
use deltalake::datafusion::error::DataFusionError;
|
3
3
|
use deltalake::{errors::DeltaTableError, ObjectStoreError};
|
4
|
-
use magnus::{
|
4
|
+
use magnus::{Error as RbErr, Module, RModule, Ruby};
|
5
5
|
use std::borrow::Cow;
|
6
6
|
|
7
7
|
macro_rules! create_exception {
|
@@ -41,7 +41,7 @@ fn inner_to_rb_err(err: DeltaTableError) -> RbErr {
|
|
41
41
|
DeltaTableError::InvalidJsonLog { .. } => DeltaProtocolError::new_err(err.to_string()),
|
42
42
|
DeltaTableError::InvalidStatsJson { .. } => DeltaProtocolError::new_err(err.to_string()),
|
43
43
|
DeltaTableError::InvalidData { violations } => {
|
44
|
-
DeltaProtocolError::new_err(format!("Invariant violations: {:?}"
|
44
|
+
DeltaProtocolError::new_err(format!("Invariant violations: {violations:?}"))
|
45
45
|
}
|
46
46
|
|
47
47
|
// commit errors
|
@@ -111,7 +111,7 @@ impl From<RubyError> for RbErr {
|
|
111
111
|
}
|
112
112
|
|
113
113
|
macro_rules! create_builtin_exception {
|
114
|
-
($type:ident, $
|
114
|
+
($type:ident, $method:ident) => {
|
115
115
|
pub struct $type {}
|
116
116
|
|
117
117
|
impl $type {
|
@@ -119,13 +119,14 @@ macro_rules! create_builtin_exception {
|
|
119
119
|
where
|
120
120
|
T: Into<Cow<'static, str>>,
|
121
121
|
{
|
122
|
-
|
122
|
+
let ruby = Ruby::get().unwrap();
|
123
|
+
RbErr::new(ruby.$method(), message)
|
123
124
|
}
|
124
125
|
}
|
125
126
|
};
|
126
127
|
}
|
127
128
|
|
128
|
-
create_builtin_exception!(RbException,
|
129
|
-
create_builtin_exception!(RbIOError,
|
130
|
-
create_builtin_exception!(RbNotImplementedError,
|
131
|
-
create_builtin_exception!(RbValueError,
|
129
|
+
create_builtin_exception!(RbException, exception_runtime_error);
|
130
|
+
create_builtin_exception!(RbIOError, exception_io_error);
|
131
|
+
create_builtin_exception!(RbNotImplementedError, exception_not_imp_error);
|
132
|
+
create_builtin_exception!(RbValueError, exception_arg_error);
|
data/ext/deltalake/src/lib.rs
CHANGED
@@ -13,6 +13,7 @@ use std::time;
|
|
13
13
|
|
14
14
|
use chrono::{DateTime, Duration, FixedOffset, Utc};
|
15
15
|
use delta_kernel::schema::StructField;
|
16
|
+
use delta_kernel::table_properties::DataSkippingNumIndexedCols;
|
16
17
|
use deltalake::arrow::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream};
|
17
18
|
use deltalake::arrow::record_batch::RecordBatchIterator;
|
18
19
|
use deltalake::checkpoints::{cleanup_metadata, create_checkpoint};
|
@@ -21,6 +22,7 @@ use deltalake::datafusion::prelude::SessionContext;
|
|
21
22
|
use deltalake::delta_datafusion::DeltaCdfTableProvider;
|
22
23
|
use deltalake::errors::DeltaTableError;
|
23
24
|
use deltalake::kernel::transaction::{CommitProperties, TableReference};
|
25
|
+
use deltalake::kernel::StructDataExt;
|
24
26
|
use deltalake::kernel::{scalars::ScalarExt, StructType, Transaction};
|
25
27
|
use deltalake::logstore::IORuntime;
|
26
28
|
use deltalake::logstore::LogStoreRef;
|
@@ -40,17 +42,20 @@ use deltalake::parquet::basic::Compression;
|
|
40
42
|
use deltalake::parquet::errors::ParquetError;
|
41
43
|
use deltalake::parquet::file::properties::WriterProperties;
|
42
44
|
use deltalake::partitions::PartitionFilter;
|
45
|
+
use deltalake::table::config::TablePropertiesExt;
|
46
|
+
use deltalake::table::state::DeltaTableState;
|
43
47
|
use deltalake::{DeltaOps, DeltaResult};
|
44
48
|
use error::DeltaError;
|
45
49
|
use futures::future::join_all;
|
50
|
+
use futures::TryStreamExt;
|
46
51
|
|
47
52
|
use magnus::{
|
48
|
-
function, method, prelude::*, typed_data::Obj, Error as RbErr,
|
49
|
-
TryConvert, Value,
|
53
|
+
function, method, prelude::*, try_convert::TryConvertOwned, typed_data::Obj, Error as RbErr,
|
54
|
+
Integer, Module, RArray, Ruby, TryConvert, Value,
|
50
55
|
};
|
51
56
|
use serde_json::Map;
|
52
57
|
|
53
|
-
use crate::error::{
|
58
|
+
use crate::error::{RbValueError, RubyError};
|
54
59
|
use crate::features::TableFeatures;
|
55
60
|
use crate::merge::RbMergeBuilder;
|
56
61
|
use crate::schema::{schema_to_rbobject, Field};
|
@@ -73,6 +78,8 @@ impl TryConvert for PartitionFilterValue {
|
|
73
78
|
}
|
74
79
|
}
|
75
80
|
|
81
|
+
unsafe impl TryConvertOwned for PartitionFilterValue {}
|
82
|
+
|
76
83
|
#[magnus::wrap(class = "DeltaLake::RawDeltaTable")]
|
77
84
|
struct RawDeltaTable {
|
78
85
|
_table: RefCell<deltalake::DeltaTable>,
|
@@ -121,6 +128,15 @@ impl RawDeltaTable {
|
|
121
128
|
func(&self._table.borrow())
|
122
129
|
}
|
123
130
|
|
131
|
+
fn cloned_state(&self) -> RbResult<DeltaTableState> {
|
132
|
+
self.with_table(|t| {
|
133
|
+
t.snapshot()
|
134
|
+
.cloned()
|
135
|
+
.map_err(RubyError::from)
|
136
|
+
.map_err(RbErr::from)
|
137
|
+
})
|
138
|
+
}
|
139
|
+
|
124
140
|
fn log_store(&self) -> RbResult<LogStoreRef> {
|
125
141
|
self.with_table(|t| Ok(t.log_store().clone()))
|
126
142
|
}
|
@@ -190,10 +206,8 @@ impl RawDeltaTable {
|
|
190
206
|
|
191
207
|
pub fn metadata(&self) -> RbResult<RawDeltaTableMetaData> {
|
192
208
|
let metadata = self.with_table(|t| {
|
193
|
-
t.
|
194
|
-
|
195
|
-
.map_err(RubyError::from)
|
196
|
-
.map_err(RbErr::from)
|
209
|
+
let snapshot = t.snapshot().map_err(RubyError::from).map_err(RbErr::from)?;
|
210
|
+
Ok(snapshot.metadata().clone())
|
197
211
|
})?;
|
198
212
|
Ok(RawDeltaTableMetaData {
|
199
213
|
id: metadata.id().to_string(),
|
@@ -207,10 +221,8 @@ impl RawDeltaTable {
|
|
207
221
|
|
208
222
|
pub fn protocol_versions(&self) -> RbResult<(i32, i32, Option<StringVec>, Option<StringVec>)> {
|
209
223
|
let table_protocol = self.with_table(|t| {
|
210
|
-
t.
|
211
|
-
|
212
|
-
.map_err(RubyError::from)
|
213
|
-
.map_err(RbErr::from)
|
224
|
+
let snapshot = t.snapshot().map_err(RubyError::from).map_err(RbErr::from)?;
|
225
|
+
Ok(snapshot.protocol().clone())
|
214
226
|
})?;
|
215
227
|
Ok((
|
216
228
|
table_protocol.min_reader_version(),
|
@@ -250,10 +262,15 @@ impl RawDeltaTable {
|
|
250
262
|
|
251
263
|
pub fn get_num_index_cols(&self) -> RbResult<i32> {
|
252
264
|
self.with_table(|t| {
|
253
|
-
|
265
|
+
let n_cols = t
|
266
|
+
.snapshot()
|
254
267
|
.map_err(RubyError::from)?
|
255
268
|
.config()
|
256
|
-
.num_indexed_cols()
|
269
|
+
.num_indexed_cols();
|
270
|
+
Ok(match n_cols {
|
271
|
+
DataSkippingNumIndexedCols::NumColumns(n_cols) => n_cols as i32,
|
272
|
+
DataSkippingNumIndexedCols::AllColumns => -1,
|
273
|
+
})
|
257
274
|
})
|
258
275
|
}
|
259
276
|
|
@@ -262,7 +279,8 @@ impl RawDeltaTable {
|
|
262
279
|
Ok(t.snapshot()
|
263
280
|
.map_err(RubyError::from)?
|
264
281
|
.config()
|
265
|
-
.
|
282
|
+
.data_skipping_stats_columns
|
283
|
+
.as_ref()
|
266
284
|
.map(|v| v.iter().map(|s| s.to_string()).collect::<Vec<String>>()))
|
267
285
|
})
|
268
286
|
}
|
@@ -289,9 +307,12 @@ impl RawDeltaTable {
|
|
289
307
|
let filters = convert_partition_filters(filters).map_err(RubyError::from)?;
|
290
308
|
Ok(self
|
291
309
|
.with_table(|t| {
|
292
|
-
|
293
|
-
.
|
294
|
-
|
310
|
+
rt().block_on(async {
|
311
|
+
t.get_files_by_partitions(&filters)
|
312
|
+
.await
|
313
|
+
.map_err(RubyError::from)
|
314
|
+
.map_err(RbErr::from)
|
315
|
+
})
|
295
316
|
})?
|
296
317
|
.into_iter()
|
297
318
|
.map(|p| p.to_string())
|
@@ -300,8 +321,9 @@ impl RawDeltaTable {
|
|
300
321
|
Ok(self
|
301
322
|
._table
|
302
323
|
.borrow()
|
303
|
-
.
|
324
|
+
.snapshot()
|
304
325
|
.map_err(RubyError::from)?
|
326
|
+
.file_paths_iter()
|
305
327
|
.map(|f| f.to_string())
|
306
328
|
.collect())
|
307
329
|
}
|
@@ -318,9 +340,12 @@ impl RawDeltaTable {
|
|
318
340
|
if let Some(filters) = partition_filters {
|
319
341
|
let filters = convert_partition_filters(filters).map_err(RubyError::from)?;
|
320
342
|
self.with_table(|t| {
|
321
|
-
|
322
|
-
.
|
323
|
-
|
343
|
+
rt().block_on(async {
|
344
|
+
t.get_file_uris_by_partitions(&filters)
|
345
|
+
.await
|
346
|
+
.map_err(RubyError::from)
|
347
|
+
.map_err(RbErr::from)
|
348
|
+
})
|
324
349
|
})
|
325
350
|
} else {
|
326
351
|
self.with_table(|t| {
|
@@ -332,14 +357,12 @@ impl RawDeltaTable {
|
|
332
357
|
}
|
333
358
|
}
|
334
359
|
|
335
|
-
pub fn schema(&
|
336
|
-
let schema: StructType =
|
337
|
-
t.
|
338
|
-
|
339
|
-
.map_err(RbErr::from)
|
340
|
-
.map(|s| s.to_owned())
|
360
|
+
pub fn schema(ruby: &Ruby, rb_self: &Self) -> RbResult<Value> {
|
361
|
+
let schema: StructType = rb_self.with_table(|t| {
|
362
|
+
let snapshot = t.snapshot().map_err(RubyError::from).map_err(RbErr::from)?;
|
363
|
+
Ok(snapshot.schema().clone())
|
341
364
|
})?;
|
342
|
-
schema_to_rbobject(schema.to_owned())
|
365
|
+
schema_to_rbobject(schema.to_owned(), ruby)
|
343
366
|
}
|
344
367
|
|
345
368
|
pub fn vacuum(
|
@@ -378,7 +401,7 @@ impl RawDeltaTable {
|
|
378
401
|
pub fn compact_optimize(
|
379
402
|
&self,
|
380
403
|
partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
|
381
|
-
target_size: Option<
|
404
|
+
target_size: Option<u64>,
|
382
405
|
max_concurrent_tasks: Option<usize>,
|
383
406
|
min_commit_interval: Option<u64>,
|
384
407
|
writer_properties: Option<RbWriterProperties>,
|
@@ -427,7 +450,7 @@ impl RawDeltaTable {
|
|
427
450
|
&self,
|
428
451
|
z_order_columns: Vec<String>,
|
429
452
|
partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
|
430
|
-
target_size: Option<
|
453
|
+
target_size: Option<u64>,
|
431
454
|
max_concurrent_tasks: Option<usize>,
|
432
455
|
max_spill_size: usize,
|
433
456
|
min_commit_interval: Option<u64>,
|
@@ -724,17 +747,18 @@ impl RawDeltaTable {
|
|
724
747
|
.map_err(RubyError::from)?)
|
725
748
|
}
|
726
749
|
|
727
|
-
fn get_active_partitions(&
|
728
|
-
let
|
729
|
-
|
730
|
-
.
|
731
|
-
|
732
|
-
|
733
|
-
.
|
734
|
-
.
|
735
|
-
|
736
|
-
|
737
|
-
.
|
750
|
+
fn get_active_partitions(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
|
751
|
+
let schema = rb_self.with_table(|t| {
|
752
|
+
let snapshot = t.snapshot().map_err(RubyError::from).map_err(RbErr::from)?;
|
753
|
+
Ok(snapshot.schema().clone())
|
754
|
+
})?;
|
755
|
+
let metadata = rb_self.with_table(|t| {
|
756
|
+
let snapshot = t.snapshot().map_err(RubyError::from).map_err(RbErr::from)?;
|
757
|
+
Ok(snapshot.metadata().clone())
|
758
|
+
})?;
|
759
|
+
let _column_names: HashSet<&str> =
|
760
|
+
schema.fields().map(|field| field.name().as_str()).collect();
|
761
|
+
let partition_columns: HashSet<&str> = metadata
|
738
762
|
.partition_columns()
|
739
763
|
.iter()
|
740
764
|
.map(|col| col.as_str())
|
@@ -744,12 +768,15 @@ impl RawDeltaTable {
|
|
744
768
|
|
745
769
|
let partition_columns: Vec<&str> = partition_columns.into_iter().collect();
|
746
770
|
|
747
|
-
let
|
748
|
-
|
749
|
-
|
750
|
-
.
|
751
|
-
|
752
|
-
|
771
|
+
let state = rb_self.cloned_state()?;
|
772
|
+
let log_store = rb_self.log_store()?;
|
773
|
+
let adds: Vec<_> = rt()
|
774
|
+
.block_on(async {
|
775
|
+
state
|
776
|
+
.get_active_add_actions_by_partitions(&log_store, &converted_filters)
|
777
|
+
.try_collect()
|
778
|
+
.await
|
779
|
+
})
|
753
780
|
.map_err(RubyError::from)?;
|
754
781
|
let active_partitions: HashSet<Vec<(&str, Option<String>)>> = adds
|
755
782
|
.iter()
|
@@ -757,21 +784,22 @@ impl RawDeltaTable {
|
|
757
784
|
Ok::<_, RubyError>(
|
758
785
|
partition_columns
|
759
786
|
.iter()
|
760
|
-
.
|
761
|
-
|
787
|
+
.map(|col| {
|
788
|
+
(
|
762
789
|
*col,
|
763
790
|
add.partition_values()
|
764
|
-
.
|
765
|
-
|
791
|
+
.and_then(|v| {
|
792
|
+
v.index_of(col).and_then(|idx| v.value(idx).cloned())
|
793
|
+
})
|
766
794
|
.map(|v| v.serialize()),
|
767
|
-
)
|
795
|
+
)
|
768
796
|
})
|
769
797
|
.collect(),
|
770
798
|
)
|
771
799
|
})
|
772
800
|
.collect();
|
773
801
|
|
774
|
-
Ok(
|
802
|
+
Ok(ruby.ary_from_iter(active_partitions))
|
775
803
|
}
|
776
804
|
|
777
805
|
pub fn create_checkpoint(&self) -> RbResult<()> {
|
@@ -789,15 +817,20 @@ impl RawDeltaTable {
|
|
789
817
|
}
|
790
818
|
|
791
819
|
pub fn get_add_file_sizes(&self) -> RbResult<HashMap<String, i64>> {
|
792
|
-
|
793
|
-
.
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
820
|
+
self.with_table(|t| {
|
821
|
+
let log_store = t.log_store();
|
822
|
+
let sizes: HashMap<String, i64> = rt()
|
823
|
+
.block_on(async {
|
824
|
+
t.snapshot()?
|
825
|
+
.snapshot()
|
826
|
+
.files(&log_store, None)
|
827
|
+
.map_ok(|f| (f.path().to_string(), f.size()))
|
828
|
+
.try_collect()
|
829
|
+
.await
|
830
|
+
})
|
831
|
+
.map_err(RubyError::from)?;
|
832
|
+
Ok(sizes)
|
833
|
+
})
|
801
834
|
}
|
802
835
|
|
803
836
|
pub fn delete(
|
@@ -1397,6 +1430,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
1397
1430
|
class.define_method("to_i", method!(ArrowArrayStream::to_i, 0))?;
|
1398
1431
|
|
1399
1432
|
let class = module.define_class("Field", ruby.class_object())?;
|
1433
|
+
class.define_singleton_method("new", function!(Field::new, 2))?;
|
1400
1434
|
class.define_method("name", method!(Field::name, 0))?;
|
1401
1435
|
class.define_method("type", method!(Field::get_type, 0))?;
|
1402
1436
|
class.define_method("nullable", method!(Field::nullable, 0))?;
|
data/ext/deltalake/src/schema.rs
CHANGED
@@ -1,20 +1,53 @@
|
|
1
|
-
use deltalake::kernel::{
|
2
|
-
|
1
|
+
use deltalake::kernel::{
|
2
|
+
DataType, PrimitiveType as DeltaPrimitive, StructField, StructType as DeltaStructType,
|
3
|
+
};
|
4
|
+
use magnus::{value::ReprValue, Module, RModule, Ruby, TryConvert, Value};
|
3
5
|
|
4
|
-
use crate::RbResult;
|
6
|
+
use crate::{RbResult, RbValueError};
|
5
7
|
|
6
|
-
pub fn schema_to_rbobject(schema: DeltaStructType) -> RbResult<Value> {
|
8
|
+
pub fn schema_to_rbobject(schema: DeltaStructType, ruby: &Ruby) -> RbResult<Value> {
|
7
9
|
let fields = schema.fields().map(|field| Field {
|
8
10
|
inner: field.clone(),
|
9
11
|
});
|
10
12
|
|
11
|
-
let rb_schema: Value =
|
12
|
-
.unwrap()
|
13
|
+
let rb_schema: Value = ruby
|
13
14
|
.class_object()
|
14
15
|
.const_get::<_, RModule>("DeltaLake")?
|
15
16
|
.const_get("Schema")?;
|
16
17
|
|
17
|
-
rb_schema.funcall("new", (
|
18
|
+
rb_schema.funcall("new", (ruby.ary_from_iter(fields),))
|
19
|
+
}
|
20
|
+
|
21
|
+
fn ruby_type_to_schema(ob: Value) -> RbResult<DataType> {
|
22
|
+
if let Ok(raw_primitive) = String::try_convert(ob) {
|
23
|
+
// Pass through PrimitiveType::new() to do validation
|
24
|
+
return PrimitiveType::new(raw_primitive)
|
25
|
+
.map(|data_type| DataType::Primitive(data_type.inner_type));
|
26
|
+
}
|
27
|
+
Err(RbValueError::new_err("Invalid data type"))
|
28
|
+
}
|
29
|
+
|
30
|
+
pub struct PrimitiveType {
|
31
|
+
inner_type: DeltaPrimitive,
|
32
|
+
}
|
33
|
+
|
34
|
+
impl PrimitiveType {
|
35
|
+
fn new(data_type: String) -> RbResult<Self> {
|
36
|
+
let data_type: DeltaPrimitive =
|
37
|
+
serde_json::from_str(&format!("\"{data_type}\"")).map_err(|_| {
|
38
|
+
if data_type.starts_with("decimal") {
|
39
|
+
RbValueError::new_err(format!(
|
40
|
+
"invalid type string: {data_type}, precision/scale can't be larger than 38"
|
41
|
+
))
|
42
|
+
} else {
|
43
|
+
RbValueError::new_err(format!("invalid type string: {data_type}"))
|
44
|
+
}
|
45
|
+
})?;
|
46
|
+
|
47
|
+
Ok(Self {
|
48
|
+
inner_type: data_type,
|
49
|
+
})
|
50
|
+
}
|
18
51
|
}
|
19
52
|
|
20
53
|
#[magnus::wrap(class = "DeltaLake::Field")]
|
@@ -23,6 +56,13 @@ pub struct Field {
|
|
23
56
|
}
|
24
57
|
|
25
58
|
impl Field {
|
59
|
+
pub fn new(name: String, r#type: Value) -> RbResult<Self> {
|
60
|
+
let ty = ruby_type_to_schema(r#type)?;
|
61
|
+
Ok(Self {
|
62
|
+
inner: StructField::new(name, ty, true),
|
63
|
+
})
|
64
|
+
}
|
65
|
+
|
26
66
|
pub fn name(&self) -> String {
|
27
67
|
self.inner.name().to_string()
|
28
68
|
}
|
data/ext/deltalake/src/utils.rs
CHANGED
@@ -10,11 +10,10 @@ pub fn rt() -> &'static Runtime {
|
|
10
10
|
let runtime_pid = *PID.get_or_init(|| pid);
|
11
11
|
if pid != runtime_pid {
|
12
12
|
panic!(
|
13
|
-
"Forked process detected - current PID is {} but the tokio runtime was created by {}. The tokio \
|
13
|
+
"Forked process detected - current PID is {pid} but the tokio runtime was created by {runtime_pid}. The tokio \
|
14
14
|
runtime does not support forked processes https://github.com/tokio-rs/tokio/issues/4301. If you are \
|
15
15
|
seeing this message while using Ruby multithreading make sure to use the `spawn` or `forkserver` \
|
16
|
-
mode."
|
17
|
-
pid, runtime_pid
|
16
|
+
mode."
|
18
17
|
);
|
19
18
|
}
|
20
19
|
TOKIO_RT.get_or_init(|| Runtime::new().expect("Failed to create a tokio runtime."))
|
data/lib/deltalake/table.rb
CHANGED
@@ -195,6 +195,7 @@ module DeltaLake
|
|
195
195
|
|
196
196
|
sources = file_uris
|
197
197
|
if sources.empty?
|
198
|
+
# TODO pass schema
|
198
199
|
lf = Polars::LazyFrame.new
|
199
200
|
else
|
200
201
|
delta_keys = [
|
@@ -209,7 +210,8 @@ module DeltaLake
|
|
209
210
|
sources,
|
210
211
|
hive_partitioning: true,
|
211
212
|
storage_options: storage_options,
|
212
|
-
rechunk: rechunk
|
213
|
+
rechunk: rechunk,
|
214
|
+
allow_missing_columns: true
|
213
215
|
)
|
214
216
|
|
215
217
|
if columns
|
data/lib/deltalake/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deltalake-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
@@ -71,7 +71,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
71
71
|
- !ruby/object:Gem::Version
|
72
72
|
version: '0'
|
73
73
|
requirements: []
|
74
|
-
rubygems_version: 3.6.
|
74
|
+
rubygems_version: 3.6.9
|
75
75
|
specification_version: 4
|
76
76
|
summary: Delta Lake for Ruby
|
77
77
|
test_files: []
|