polars-df 0.9.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +90 -45
- data/README.md +1 -0
- data/ext/polars/Cargo.toml +8 -6
- data/ext/polars/src/batched_csv.rs +3 -1
- data/ext/polars/src/conversion/anyvalue.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +18 -7
- data/ext/polars/src/dataframe.rs +40 -14
- data/ext/polars/src/expr/array.rs +6 -2
- data/ext/polars/src/expr/datetime.rs +7 -2
- data/ext/polars/src/expr/general.rs +22 -3
- data/ext/polars/src/expr/list.rs +6 -2
- data/ext/polars/src/expr/string.rs +3 -3
- data/ext/polars/src/file.rs +158 -11
- data/ext/polars/src/functions/lazy.rs +18 -3
- data/ext/polars/src/functions/whenthen.rs +47 -17
- data/ext/polars/src/lazyframe/mod.rs +58 -19
- data/ext/polars/src/lib.rs +23 -14
- data/ext/polars/src/map/dataframe.rs +17 -9
- data/ext/polars/src/series/mod.rs +12 -2
- data/lib/polars/array_expr.rb +6 -2
- data/lib/polars/batched_csv_reader.rb +4 -2
- data/lib/polars/data_frame.rb +148 -74
- data/lib/polars/date_time_expr.rb +10 -4
- data/lib/polars/date_time_name_space.rb +9 -3
- data/lib/polars/expr.rb +37 -34
- data/lib/polars/functions/lazy.rb +3 -3
- data/lib/polars/functions/whenthen.rb +74 -5
- data/lib/polars/io.rb +18 -6
- data/lib/polars/lazy_frame.rb +39 -36
- data/lib/polars/list_expr.rb +6 -2
- data/lib/polars/series.rb +12 -10
- data/lib/polars/string_expr.rb +1 -0
- data/lib/polars/utils.rb +54 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars.rb +1 -2
- metadata +4 -5
- data/lib/polars/when.rb +0 -16
- data/lib/polars/when_then.rb +0 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 33ce29f4e785d3152582767ea838dc4636c7585b44508b23468cc133d2e00c9a
|
4
|
+
data.tar.gz: 0243b5580f8e85439b6b41e1572e9e630b2d610ebfe647c2f09683f029f3026c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b55fa78a8d21213151ff6729d950dc62fd17fb14841046dbd2b076be713cbf158df2da0b5f49c5bc86eb8c54e502f33d96b4468b5087948f0f3fa6aec1fee083
|
7
|
+
data.tar.gz: 959d5781a9541f37d614b48a7d2f531fadd39d67b6dcf0f8afa5593a082bc3106366046548d6a6d641ed7c060efd14da915949eb49f85adfa81e6be381148749
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,15 @@
|
|
1
|
+
## 0.10.0 (2024-05-02)
|
2
|
+
|
3
|
+
- Updated Polars to 0.39.2
|
4
|
+
- Added support for writing JSON to string
|
5
|
+
- Added support for writing Parquet to `StringIO`
|
6
|
+
- Added support for cross joins
|
7
|
+
- Added `data_page_size` option to `write_parquet` method
|
8
|
+
- Added `truncate_ragged_lines` option to `read_csv`, `read_csv_batched`, and `scan_csv` methods
|
9
|
+
- Added precompiled gem for Linux x86-64 MUSL
|
10
|
+
- Changed `drop` method to ignore missing columns
|
11
|
+
- Fixed error with `then` method
|
12
|
+
|
1
13
|
## 0.9.0 (2024-03-03)
|
2
14
|
|
3
15
|
See the [upgrade guide](https://docs.pola.rs/releases/upgrade/0.20/)
|
data/Cargo.lock
CHANGED
@@ -83,9 +83,9 @@ dependencies = [
|
|
83
83
|
|
84
84
|
[[package]]
|
85
85
|
name = "argminmax"
|
86
|
-
version = "0.6.
|
86
|
+
version = "0.6.2"
|
87
87
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
88
|
-
checksum = "
|
88
|
+
checksum = "52424b59d69d69d5056d508b260553afd91c57e21849579cd1f50ee8b8b88eaa"
|
89
89
|
dependencies = [
|
90
90
|
"num-traits",
|
91
91
|
]
|
@@ -300,9 +300,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
|
300
300
|
|
301
301
|
[[package]]
|
302
302
|
name = "chrono"
|
303
|
-
version = "0.4.
|
303
|
+
version = "0.4.35"
|
304
304
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
305
|
-
checksum = "
|
305
|
+
checksum = "8eaf5903dcbc0a39312feb77df2ff4c76387d591b9fc7b04a238dcf8bb62639a"
|
306
306
|
dependencies = [
|
307
307
|
"android-tzdata",
|
308
308
|
"iana-time-zone",
|
@@ -310,7 +310,7 @@ dependencies = [
|
|
310
310
|
"num-traits",
|
311
311
|
"serde",
|
312
312
|
"wasm-bindgen",
|
313
|
-
"windows-targets 0.
|
313
|
+
"windows-targets 0.52.0",
|
314
314
|
]
|
315
315
|
|
316
316
|
[[package]]
|
@@ -966,9 +966,9 @@ dependencies = [
|
|
966
966
|
|
967
967
|
[[package]]
|
968
968
|
name = "magnus"
|
969
|
-
version = "0.6.
|
969
|
+
version = "0.6.3"
|
970
970
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
971
|
-
checksum = "
|
971
|
+
checksum = "0fc7a31fb0b64761e3cd09a6975577601fccc5f08b8fc9245064fc4f71ed6a9d"
|
972
972
|
dependencies = [
|
973
973
|
"magnus-macros",
|
974
974
|
"rb-sys",
|
@@ -1243,7 +1243,7 @@ dependencies = [
|
|
1243
1243
|
|
1244
1244
|
[[package]]
|
1245
1245
|
name = "polars"
|
1246
|
-
version = "0.
|
1246
|
+
version = "0.10.0"
|
1247
1247
|
dependencies = [
|
1248
1248
|
"ahash",
|
1249
1249
|
"chrono",
|
@@ -1251,7 +1251,7 @@ dependencies = [
|
|
1251
1251
|
"jemallocator",
|
1252
1252
|
"magnus",
|
1253
1253
|
"mimalloc",
|
1254
|
-
"polars 0.
|
1254
|
+
"polars 0.39.2",
|
1255
1255
|
"polars-core",
|
1256
1256
|
"polars-parquet",
|
1257
1257
|
"polars-utils",
|
@@ -1261,9 +1261,9 @@ dependencies = [
|
|
1261
1261
|
|
1262
1262
|
[[package]]
|
1263
1263
|
name = "polars"
|
1264
|
-
version = "0.
|
1264
|
+
version = "0.39.2"
|
1265
1265
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1266
|
-
checksum = "
|
1266
|
+
checksum = "0ea21b858b16b9c0e17a12db2800d11aa5b4bd182be6b3022eb537bbfc1f2db5"
|
1267
1267
|
dependencies = [
|
1268
1268
|
"getrandom",
|
1269
1269
|
"polars-arrow",
|
@@ -1282,9 +1282,9 @@ dependencies = [
|
|
1282
1282
|
|
1283
1283
|
[[package]]
|
1284
1284
|
name = "polars-arrow"
|
1285
|
-
version = "0.
|
1285
|
+
version = "0.39.2"
|
1286
1286
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1287
|
-
checksum = "
|
1287
|
+
checksum = "725b09f2b5ef31279b66e27bbab63c58d49d8f6696b66b1f46c7eaab95e80f75"
|
1288
1288
|
dependencies = [
|
1289
1289
|
"ahash",
|
1290
1290
|
"atoi",
|
@@ -1330,9 +1330,9 @@ dependencies = [
|
|
1330
1330
|
|
1331
1331
|
[[package]]
|
1332
1332
|
name = "polars-compute"
|
1333
|
-
version = "0.
|
1333
|
+
version = "0.39.2"
|
1334
1334
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1335
|
-
checksum = "
|
1335
|
+
checksum = "a796945b14b14fbb79b91ef0406e6fddca2be636e889f81ea5d6ee7d36efb4fe"
|
1336
1336
|
dependencies = [
|
1337
1337
|
"bytemuck",
|
1338
1338
|
"either",
|
@@ -1346,9 +1346,9 @@ dependencies = [
|
|
1346
1346
|
|
1347
1347
|
[[package]]
|
1348
1348
|
name = "polars-core"
|
1349
|
-
version = "0.
|
1349
|
+
version = "0.39.2"
|
1350
1350
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1351
|
-
checksum = "
|
1351
|
+
checksum = "465f70d3e96b6d0b1a43c358ba451286b8c8bd56696feff020d65702aa33e35c"
|
1352
1352
|
dependencies = [
|
1353
1353
|
"ahash",
|
1354
1354
|
"bitflags 2.4.1",
|
@@ -1380,9 +1380,9 @@ dependencies = [
|
|
1380
1380
|
|
1381
1381
|
[[package]]
|
1382
1382
|
name = "polars-error"
|
1383
|
-
version = "0.
|
1383
|
+
version = "0.39.2"
|
1384
1384
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1385
|
-
checksum = "
|
1385
|
+
checksum = "5224d5d05e6b8a6f78b75951ae1b5f82c8ab1979e11ffaf5fd41941e3d5b0757"
|
1386
1386
|
dependencies = [
|
1387
1387
|
"avro-schema",
|
1388
1388
|
"polars-arrow-format",
|
@@ -1393,9 +1393,9 @@ dependencies = [
|
|
1393
1393
|
|
1394
1394
|
[[package]]
|
1395
1395
|
name = "polars-io"
|
1396
|
-
version = "0.
|
1396
|
+
version = "0.39.2"
|
1397
1397
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1398
|
-
checksum = "
|
1398
|
+
checksum = "b2c8589e418cbe4a48228d64b2a8a40284a82ec3c98817c0c2bcc0267701338b"
|
1399
1399
|
dependencies = [
|
1400
1400
|
"ahash",
|
1401
1401
|
"async-trait",
|
@@ -1433,9 +1433,9 @@ dependencies = [
|
|
1433
1433
|
|
1434
1434
|
[[package]]
|
1435
1435
|
name = "polars-json"
|
1436
|
-
version = "0.
|
1436
|
+
version = "0.39.2"
|
1437
1437
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1438
|
-
checksum = "
|
1438
|
+
checksum = "81224492a649a12b668480c0cf219d703f432509765d2717e72fe32ad16fc701"
|
1439
1439
|
dependencies = [
|
1440
1440
|
"ahash",
|
1441
1441
|
"chrono",
|
@@ -1454,9 +1454,9 @@ dependencies = [
|
|
1454
1454
|
|
1455
1455
|
[[package]]
|
1456
1456
|
name = "polars-lazy"
|
1457
|
-
version = "0.
|
1457
|
+
version = "0.39.2"
|
1458
1458
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1459
|
-
checksum = "
|
1459
|
+
checksum = "89b2632b1af668e2058d5f8f916d8fbde3cac63d03ae29a705f598e41dcfeb7f"
|
1460
1460
|
dependencies = [
|
1461
1461
|
"ahash",
|
1462
1462
|
"bitflags 2.4.1",
|
@@ -1478,9 +1478,9 @@ dependencies = [
|
|
1478
1478
|
|
1479
1479
|
[[package]]
|
1480
1480
|
name = "polars-ops"
|
1481
|
-
version = "0.
|
1481
|
+
version = "0.39.2"
|
1482
1482
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1483
|
-
checksum = "
|
1483
|
+
checksum = "efdbdb4d9a92109bc2e0ce8e17af5ae8ab643bb5b7ee9d1d74f0aeffd1fbc95f"
|
1484
1484
|
dependencies = [
|
1485
1485
|
"ahash",
|
1486
1486
|
"aho-corasick",
|
@@ -1515,9 +1515,9 @@ dependencies = [
|
|
1515
1515
|
|
1516
1516
|
[[package]]
|
1517
1517
|
name = "polars-parquet"
|
1518
|
-
version = "0.
|
1518
|
+
version = "0.39.2"
|
1519
1519
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1520
|
-
checksum = "
|
1520
|
+
checksum = "b421d2196f786fdfe162db614c8485f8308fe41575d4de634a39bbe460d1eb6a"
|
1521
1521
|
dependencies = [
|
1522
1522
|
"ahash",
|
1523
1523
|
"async-stream",
|
@@ -1541,9 +1541,9 @@ dependencies = [
|
|
1541
1541
|
|
1542
1542
|
[[package]]
|
1543
1543
|
name = "polars-pipe"
|
1544
|
-
version = "0.
|
1544
|
+
version = "0.39.2"
|
1545
1545
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1546
|
-
checksum = "
|
1546
|
+
checksum = "48700f1d5bd56a15451e581f465c09541492750360f18637b196f995470a015c"
|
1547
1547
|
dependencies = [
|
1548
1548
|
"crossbeam-channel",
|
1549
1549
|
"crossbeam-queue",
|
@@ -1566,14 +1566,15 @@ dependencies = [
|
|
1566
1566
|
|
1567
1567
|
[[package]]
|
1568
1568
|
name = "polars-plan"
|
1569
|
-
version = "0.
|
1569
|
+
version = "0.39.2"
|
1570
1570
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1571
|
-
checksum = "
|
1571
|
+
checksum = "2fb8e2302e20c44defd5be8cad9c96e75face63c3a5f609aced8c4ec3b3ac97d"
|
1572
1572
|
dependencies = [
|
1573
1573
|
"ahash",
|
1574
1574
|
"bytemuck",
|
1575
1575
|
"chrono",
|
1576
1576
|
"chrono-tz",
|
1577
|
+
"hashbrown 0.14.3",
|
1577
1578
|
"once_cell",
|
1578
1579
|
"percent-encoding",
|
1579
1580
|
"polars-arrow",
|
@@ -1585,6 +1586,7 @@ dependencies = [
|
|
1585
1586
|
"polars-time",
|
1586
1587
|
"polars-utils",
|
1587
1588
|
"rayon",
|
1589
|
+
"recursive",
|
1588
1590
|
"regex",
|
1589
1591
|
"serde",
|
1590
1592
|
"smartstring",
|
@@ -1594,9 +1596,9 @@ dependencies = [
|
|
1594
1596
|
|
1595
1597
|
[[package]]
|
1596
1598
|
name = "polars-row"
|
1597
|
-
version = "0.
|
1599
|
+
version = "0.39.2"
|
1598
1600
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1599
|
-
checksum = "
|
1601
|
+
checksum = "a515bdc68c2ae3702e3de70d89601f3b71ca8137e282a226dddb53ee4bacfa2e"
|
1600
1602
|
dependencies = [
|
1601
1603
|
"bytemuck",
|
1602
1604
|
"polars-arrow",
|
@@ -1606,9 +1608,9 @@ dependencies = [
|
|
1606
1608
|
|
1607
1609
|
[[package]]
|
1608
1610
|
name = "polars-sql"
|
1609
|
-
version = "0.
|
1611
|
+
version = "0.39.2"
|
1610
1612
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1611
|
-
checksum = "
|
1613
|
+
checksum = "7b4bb7cc1c04c3023d1953b2f1dec50515e8fd8169a5a2bf4967b3b082232db7"
|
1612
1614
|
dependencies = [
|
1613
1615
|
"hex",
|
1614
1616
|
"polars-arrow",
|
@@ -1624,9 +1626,9 @@ dependencies = [
|
|
1624
1626
|
|
1625
1627
|
[[package]]
|
1626
1628
|
name = "polars-time"
|
1627
|
-
version = "0.
|
1629
|
+
version = "0.39.2"
|
1628
1630
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1629
|
-
checksum = "
|
1631
|
+
checksum = "efc18e3ad92eec55db89d88f16c22d436559ba7030cf76f86f6ed7a754b673f1"
|
1630
1632
|
dependencies = [
|
1631
1633
|
"atoi",
|
1632
1634
|
"chrono",
|
@@ -1645,9 +1647,9 @@ dependencies = [
|
|
1645
1647
|
|
1646
1648
|
[[package]]
|
1647
1649
|
name = "polars-utils"
|
1648
|
-
version = "0.
|
1650
|
+
version = "0.39.2"
|
1649
1651
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1650
|
-
checksum = "
|
1652
|
+
checksum = "c760b6c698cfe2fbbbd93d6cfb408db14ececfe1d92445dae2229ce1b5b21ae8"
|
1651
1653
|
dependencies = [
|
1652
1654
|
"ahash",
|
1653
1655
|
"bytemuck",
|
@@ -1659,6 +1661,7 @@ dependencies = [
|
|
1659
1661
|
"raw-cpuid",
|
1660
1662
|
"rayon",
|
1661
1663
|
"smartstring",
|
1664
|
+
"stacker",
|
1662
1665
|
"sysinfo",
|
1663
1666
|
"version_check",
|
1664
1667
|
]
|
@@ -1678,6 +1681,15 @@ dependencies = [
|
|
1678
1681
|
"unicode-ident",
|
1679
1682
|
]
|
1680
1683
|
|
1684
|
+
[[package]]
|
1685
|
+
name = "psm"
|
1686
|
+
version = "0.1.21"
|
1687
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1688
|
+
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
|
1689
|
+
dependencies = [
|
1690
|
+
"cc",
|
1691
|
+
]
|
1692
|
+
|
1681
1693
|
[[package]]
|
1682
1694
|
name = "quote"
|
1683
1695
|
version = "1.0.35"
|
@@ -1758,18 +1770,18 @@ dependencies = [
|
|
1758
1770
|
|
1759
1771
|
[[package]]
|
1760
1772
|
name = "rb-sys"
|
1761
|
-
version = "0.9.
|
1773
|
+
version = "0.9.97"
|
1762
1774
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1763
|
-
checksum = "
|
1775
|
+
checksum = "47d30bcad206b51f2f66121190ca678dce1fdf3a2eae0ac5d838d1818b19bdf5"
|
1764
1776
|
dependencies = [
|
1765
1777
|
"rb-sys-build",
|
1766
1778
|
]
|
1767
1779
|
|
1768
1780
|
[[package]]
|
1769
1781
|
name = "rb-sys-build"
|
1770
|
-
version = "0.9.
|
1782
|
+
version = "0.9.97"
|
1771
1783
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1772
|
-
checksum = "
|
1784
|
+
checksum = "3cbd92f281615f3c2dcb9dcb0f0576624752afbf9a7f99173b37c4b55b62dd8a"
|
1773
1785
|
dependencies = [
|
1774
1786
|
"bindgen",
|
1775
1787
|
"lazy_static",
|
@@ -1786,6 +1798,26 @@ version = "0.1.2"
|
|
1786
1798
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1787
1799
|
checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
|
1788
1800
|
|
1801
|
+
[[package]]
|
1802
|
+
name = "recursive"
|
1803
|
+
version = "0.1.1"
|
1804
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1805
|
+
checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e"
|
1806
|
+
dependencies = [
|
1807
|
+
"recursive-proc-macro-impl",
|
1808
|
+
"stacker",
|
1809
|
+
]
|
1810
|
+
|
1811
|
+
[[package]]
|
1812
|
+
name = "recursive-proc-macro-impl"
|
1813
|
+
version = "0.1.1"
|
1814
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1815
|
+
checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b"
|
1816
|
+
dependencies = [
|
1817
|
+
"quote",
|
1818
|
+
"syn 2.0.46",
|
1819
|
+
]
|
1820
|
+
|
1789
1821
|
[[package]]
|
1790
1822
|
name = "redox_syscall"
|
1791
1823
|
version = "0.4.1"
|
@@ -2012,6 +2044,19 @@ dependencies = [
|
|
2012
2044
|
"log",
|
2013
2045
|
]
|
2014
2046
|
|
2047
|
+
[[package]]
|
2048
|
+
name = "stacker"
|
2049
|
+
version = "0.1.15"
|
2050
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2051
|
+
checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
|
2052
|
+
dependencies = [
|
2053
|
+
"cc",
|
2054
|
+
"cfg-if",
|
2055
|
+
"libc",
|
2056
|
+
"psm",
|
2057
|
+
"winapi",
|
2058
|
+
]
|
2059
|
+
|
2015
2060
|
[[package]]
|
2016
2061
|
name = "static_assertions"
|
2017
2062
|
version = "1.1.0"
|
data/README.md
CHANGED
data/ext/polars/Cargo.toml
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.10.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
7
|
-
rust-version = "1.
|
7
|
+
rust-version = "1.76.0"
|
8
8
|
publish = false
|
9
9
|
|
10
10
|
[lib]
|
@@ -15,14 +15,14 @@ ahash = "0.8"
|
|
15
15
|
chrono = "0.4"
|
16
16
|
either = "1.8"
|
17
17
|
magnus = "0.6"
|
18
|
-
polars-core = "=0.
|
19
|
-
polars-parquet = "=0.
|
20
|
-
polars-utils = "=0.
|
18
|
+
polars-core = "=0.39.2"
|
19
|
+
polars-parquet = "=0.39.2"
|
20
|
+
polars-utils = "=0.39.2"
|
21
21
|
serde_json = "1"
|
22
22
|
smartstring = "1"
|
23
23
|
|
24
24
|
[dependencies.polars]
|
25
|
-
version = "=0.
|
25
|
+
version = "=0.39.2"
|
26
26
|
features = [
|
27
27
|
"abs",
|
28
28
|
"approx_unique",
|
@@ -34,6 +34,7 @@ features = [
|
|
34
34
|
"binary_encoding",
|
35
35
|
"concat_str",
|
36
36
|
"cov",
|
37
|
+
"cross_join",
|
37
38
|
"cse",
|
38
39
|
"csv",
|
39
40
|
"cum_agg",
|
@@ -47,6 +48,7 @@ features = [
|
|
47
48
|
"dtype-full",
|
48
49
|
"dynamic_group_by",
|
49
50
|
"ewma",
|
51
|
+
"ewma_by",
|
50
52
|
"extract_groups",
|
51
53
|
"extract_jsonpath",
|
52
54
|
"find_many",
|
@@ -49,6 +49,7 @@ impl RbBatchedCsv {
|
|
49
49
|
let row_index = Option::<(String, IdxSize)>::try_convert(arguments[21])?;
|
50
50
|
let sample_size = usize::try_convert(arguments[22])?;
|
51
51
|
let eol_char = String::try_convert(arguments[23])?;
|
52
|
+
let truncate_ragged_lines = bool::try_convert(arguments[24])?;
|
52
53
|
// end arguments
|
53
54
|
|
54
55
|
let null_values = null_values.map(|w| w.0);
|
@@ -107,7 +108,8 @@ impl RbBatchedCsv {
|
|
107
108
|
.with_end_of_line_char(eol_char)
|
108
109
|
.with_skip_rows_after_header(skip_rows_after_header)
|
109
110
|
.with_row_index(row_index)
|
110
|
-
.sample_size(sample_size)
|
111
|
+
.sample_size(sample_size)
|
112
|
+
.truncate_ragged_lines(truncate_ragged_lines);
|
111
113
|
|
112
114
|
let reader = if low_memory {
|
113
115
|
let reader = reader
|
@@ -3,8 +3,8 @@ use magnus::{
|
|
3
3
|
class, prelude::*, r_hash::ForEach, Float, Integer, IntoValue, RArray, RHash, RString, Ruby,
|
4
4
|
TryConvert, Value,
|
5
5
|
};
|
6
|
-
use polars::frame::row::any_values_to_dtype;
|
7
6
|
use polars::prelude::*;
|
7
|
+
use polars_core::utils::any_values_to_supertype_and_n_dtypes;
|
8
8
|
|
9
9
|
use super::{struct_dict, ObjectValue, Wrap};
|
10
10
|
|
@@ -120,7 +120,8 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
120
120
|
avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
|
121
121
|
}
|
122
122
|
|
123
|
-
let (dtype, _n_types) =
|
123
|
+
let (dtype, _n_types) =
|
124
|
+
any_values_to_supertype_and_n_dtypes(&avs).map_err(RbPolarsErr::from)?;
|
124
125
|
|
125
126
|
// push the rest
|
126
127
|
avs.reserve(list.len());
|
@@ -217,10 +217,7 @@ impl IntoValue for Wrap<DataType> {
|
|
217
217
|
let class = pl.const_get::<_, Value>("Enum").unwrap();
|
218
218
|
let s = Series::from_arrow("category", categories.to_boxed()).unwrap();
|
219
219
|
let series = to_series(s.into());
|
220
|
-
class
|
221
|
-
.funcall::<_, _, Value>("new", (series,))
|
222
|
-
.unwrap()
|
223
|
-
.into()
|
220
|
+
class.funcall::<_, _, Value>("new", (series,)).unwrap()
|
224
221
|
}
|
225
222
|
DataType::Time => {
|
226
223
|
let class = pl.const_get::<_, Value>("Time").unwrap();
|
@@ -740,8 +737,7 @@ impl TryConvert for Wrap<JoinType> {
|
|
740
737
|
"outer_coalesce" => JoinType::Outer { coalesce: true },
|
741
738
|
"semi" => JoinType::Semi,
|
742
739
|
"anti" => JoinType::Anti,
|
743
|
-
|
744
|
-
// "cross" => JoinType::Cross,
|
740
|
+
"cross" => JoinType::Cross,
|
745
741
|
v => {
|
746
742
|
return Err(RbValueError::new_err(format!(
|
747
743
|
"how must be one of {{'inner', 'left', 'outer', 'semi', 'anti', 'cross'}}, got {}",
|
@@ -785,6 +781,21 @@ impl TryConvert for Wrap<ListToStructWidthStrategy> {
|
|
785
781
|
}
|
786
782
|
}
|
787
783
|
|
784
|
+
impl TryConvert for Wrap<NonExistent> {
|
785
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
786
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
787
|
+
"null" => NonExistent::Null,
|
788
|
+
"raise" => NonExistent::Raise,
|
789
|
+
v => {
|
790
|
+
return Err(RbValueError::new_err(format!(
|
791
|
+
"`non_existent` must be one of {{'null', 'raise'}}, got {v}",
|
792
|
+
)))
|
793
|
+
}
|
794
|
+
};
|
795
|
+
Ok(Wrap(parsed))
|
796
|
+
}
|
797
|
+
}
|
798
|
+
|
788
799
|
impl TryConvert for Wrap<NullBehavior> {
|
789
800
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
790
801
|
let parsed = match String::try_convert(ob)?.as_str() {
|
@@ -1066,7 +1077,7 @@ impl TryConvert for Wrap<NonZeroUsize> {
|
|
1066
1077
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
1067
1078
|
let v = usize::try_convert(ob)?;
|
1068
1079
|
NonZeroUsize::new(v)
|
1069
|
-
.map(
|
1080
|
+
.map(Wrap)
|
1070
1081
|
.ok_or(RbValueError::new_err("must be non-zero".into()))
|
1071
1082
|
}
|
1072
1083
|
}
|
data/ext/polars/src/dataframe.rs
CHANGED
@@ -16,7 +16,7 @@ use std::num::NonZeroUsize;
|
|
16
16
|
use std::ops::Deref;
|
17
17
|
|
18
18
|
use crate::conversion::*;
|
19
|
-
use crate::file::{get_file_like, get_mmap_bytes_reader};
|
19
|
+
use crate::file::{get_either_file, get_file_like, get_mmap_bytes_reader, EitherRustRubyFile};
|
20
20
|
use crate::map::dataframe::{
|
21
21
|
apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
|
22
22
|
apply_lambda_with_utf8_out_type,
|
@@ -136,6 +136,7 @@ impl RbDataFrame {
|
|
136
136
|
let row_index = Option::<(String, IdxSize)>::try_convert(arguments[22])?;
|
137
137
|
let sample_size = usize::try_convert(arguments[23])?;
|
138
138
|
let eol_char = String::try_convert(arguments[24])?;
|
139
|
+
let truncate_ragged_lines = bool::try_convert(arguments[25])?;
|
139
140
|
// end arguments
|
140
141
|
|
141
142
|
let null_values = null_values.map(|w| w.0);
|
@@ -196,6 +197,7 @@ impl RbDataFrame {
|
|
196
197
|
.with_skip_rows_after_header(skip_rows_after_header)
|
197
198
|
.with_row_index(row_index)
|
198
199
|
.sample_size(sample_size)
|
200
|
+
.truncate_ragged_lines(truncate_ragged_lines)
|
199
201
|
.finish()
|
200
202
|
.map_err(RbPolarsErr::from)?;
|
201
203
|
Ok(df.into())
|
@@ -213,19 +215,34 @@ impl RbDataFrame {
|
|
213
215
|
use_statistics: bool,
|
214
216
|
rechunk: bool,
|
215
217
|
) -> RbResult<Self> {
|
218
|
+
use EitherRustRubyFile::*;
|
219
|
+
|
216
220
|
let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
|
217
|
-
let
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
221
|
+
let result = match get_either_file(rb_f, false)? {
|
222
|
+
Rb(f) => {
|
223
|
+
let buf = f.as_buffer();
|
224
|
+
ParquetReader::new(buf)
|
225
|
+
.with_projection(projection)
|
226
|
+
.with_columns(columns)
|
227
|
+
.read_parallel(parallel.0)
|
228
|
+
.with_n_rows(n_rows)
|
229
|
+
.with_row_index(row_index)
|
230
|
+
.set_low_memory(low_memory)
|
231
|
+
.use_statistics(use_statistics)
|
232
|
+
.set_rechunk(rechunk)
|
233
|
+
.finish()
|
234
|
+
}
|
235
|
+
Rust(f) => ParquetReader::new(f.into_inner())
|
236
|
+
.with_projection(projection)
|
237
|
+
.with_columns(columns)
|
238
|
+
.read_parallel(parallel.0)
|
239
|
+
.with_n_rows(n_rows)
|
240
|
+
.with_row_index(row_index)
|
241
|
+
.use_statistics(use_statistics)
|
242
|
+
.set_rechunk(rechunk)
|
243
|
+
.finish(),
|
244
|
+
};
|
245
|
+
let df = result.map_err(RbPolarsErr::from)?;
|
229
246
|
Ok(RbDataFrame::new(df))
|
230
247
|
}
|
231
248
|
|
@@ -576,6 +593,7 @@ impl RbDataFrame {
|
|
576
593
|
compression_level: Option<i32>,
|
577
594
|
statistics: bool,
|
578
595
|
row_group_size: Option<usize>,
|
596
|
+
data_page_size: Option<usize>,
|
579
597
|
) -> RbResult<()> {
|
580
598
|
let compression = parse_parquet_compression(&compression, compression_level)?;
|
581
599
|
|
@@ -585,10 +603,18 @@ impl RbDataFrame {
|
|
585
603
|
.with_compression(compression)
|
586
604
|
.with_statistics(statistics)
|
587
605
|
.with_row_group_size(row_group_size)
|
606
|
+
.with_data_page_size(data_page_size)
|
588
607
|
.finish(&mut self.df.borrow_mut())
|
589
608
|
.map_err(RbPolarsErr::from)?;
|
590
609
|
} else {
|
591
|
-
|
610
|
+
let buf = get_file_like(rb_f, true)?;
|
611
|
+
ParquetWriter::new(buf)
|
612
|
+
.with_compression(compression)
|
613
|
+
.with_statistics(statistics)
|
614
|
+
.with_row_group_size(row_group_size)
|
615
|
+
.with_data_page_size(data_page_size)
|
616
|
+
.finish(&mut self.df.borrow_mut())
|
617
|
+
.map_err(RbPolarsErr::from)?;
|
592
618
|
}
|
593
619
|
|
594
620
|
Ok(())
|
@@ -59,8 +59,12 @@ impl RbExpr {
|
|
59
59
|
self.inner.clone().arr().arg_max().into()
|
60
60
|
}
|
61
61
|
|
62
|
-
pub fn arr_get(&self, index: &RbExpr) -> Self {
|
63
|
-
self.inner
|
62
|
+
pub fn arr_get(&self, index: &RbExpr, null_on_oob: bool) -> Self {
|
63
|
+
self.inner
|
64
|
+
.clone()
|
65
|
+
.arr()
|
66
|
+
.get(index.inner.clone(), null_on_oob)
|
67
|
+
.into()
|
64
68
|
}
|
65
69
|
|
66
70
|
pub fn arr_join(&self, separator: &RbExpr, ignore_nulls: bool) -> Self {
|
@@ -37,11 +37,16 @@ impl RbExpr {
|
|
37
37
|
self.inner.clone().dt().cast_time_unit(tu.0).into()
|
38
38
|
}
|
39
39
|
|
40
|
-
pub fn dt_replace_time_zone(
|
40
|
+
pub fn dt_replace_time_zone(
|
41
|
+
&self,
|
42
|
+
time_zone: Option<String>,
|
43
|
+
ambiguous: &Self,
|
44
|
+
non_existent: Wrap<NonExistent>,
|
45
|
+
) -> Self {
|
41
46
|
self.inner
|
42
47
|
.clone()
|
43
48
|
.dt()
|
44
|
-
.replace_time_zone(time_zone, ambiguous.inner.clone())
|
49
|
+
.replace_time_zone(time_zone, ambiguous.inner.clone(), non_existent.0)
|
45
50
|
.into()
|
46
51
|
}
|
47
52
|
|