polars-df 0.10.0 → 0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Cargo.lock +90 -48
- data/README.md +6 -6
- data/ext/polars/Cargo.toml +7 -5
- data/ext/polars/src/batched_csv.rs +53 -52
- data/ext/polars/src/conversion/mod.rs +13 -60
- data/ext/polars/src/dataframe/construction.rs +186 -0
- data/ext/polars/src/dataframe/export.rs +48 -0
- data/ext/polars/src/dataframe/general.rs +607 -0
- data/ext/polars/src/dataframe/io.rs +463 -0
- data/ext/polars/src/dataframe/mod.rs +26 -0
- data/ext/polars/src/expr/datetime.rs +6 -2
- data/ext/polars/src/expr/general.rs +28 -6
- data/ext/polars/src/expr/rolling.rs +185 -69
- data/ext/polars/src/expr/string.rs +9 -30
- data/ext/polars/src/functions/lazy.rs +2 -0
- data/ext/polars/src/functions/range.rs +74 -0
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/interop/numo/mod.rs +2 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +60 -0
- data/ext/polars/src/lazyframe/mod.rs +54 -38
- data/ext/polars/src/lib.rs +46 -21
- data/ext/polars/src/map/lazy.rs +5 -25
- data/ext/polars/src/map/series.rs +7 -1
- data/ext/polars/src/series/aggregation.rs +47 -30
- data/ext/polars/src/series/export.rs +131 -49
- data/ext/polars/src/series/mod.rs +1 -131
- data/lib/polars/batched_csv_reader.rb +9 -3
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +83 -302
- data/lib/polars/date_time_expr.rb +1 -0
- data/lib/polars/date_time_name_space.rb +5 -1
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/exceptions.rb +4 -0
- data/lib/polars/expr.rb +1134 -20
- data/lib/polars/functions/range/date_range.rb +92 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +296 -490
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +18 -0
- data/lib/polars/io/ndjson.rb +69 -0
- data/lib/polars/io/parquet.rb +226 -0
- data/lib/polars/lazy_frame.rb +23 -166
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +2 -2
- data/lib/polars/string_expr.rb +37 -36
- data/lib/polars/utils.rb +35 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +9 -1
- metadata +21 -5
- data/ext/polars/src/dataframe.rs +0 -1208
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2f3ab43985e7c935b41db31806bde6389ab5f01a0674e5c94d8e109d8b74ec0e
|
4
|
+
data.tar.gz: 9742bb044b65f5fccf41ced07c849b5fa70f241e3100c0f0bf8ec0de4f61d1dd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ab7f8f4a20790038760603ff9a86175a354f7264de3630c4116c4eff7271d83b5dfc4f5b16b16ba5aa6286606487923b3491dfe1849dc4117341eb864ff0a000
|
7
|
+
data.tar.gz: 5416270f6a5fb234ee846b6eb1e57bb4f287269d0d8172abf4b00154e797807634014ba5b2f43223ce5bb0e0721d90b5b33b910c679dcd6d8ec70b1c53376cdf
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
## 0.11.0 (2024-06-02)
|
2
|
+
|
3
|
+
- Updated Polars to 0.40.0
|
4
|
+
- Added `date_ranges` method to `Polars`
|
5
|
+
- Added `read_ipc_stream` method to `Polars`
|
6
|
+
- Added `write_ipc_stream` to `DataFrame`
|
7
|
+
- Added `flags` method to `DataFrame`
|
8
|
+
- Added support for keyword arguments to `agg` methods
|
9
|
+
- Aliased `apply` to `map_rows` for `DataFrame`
|
10
|
+
- Changed default `name` for `with_row_index` from `row_nr` to `index`
|
11
|
+
|
1
12
|
## 0.10.0 (2024-05-02)
|
2
13
|
|
3
14
|
- Updated Polars to 0.39.2
|
data/Cargo.lock
CHANGED
@@ -181,9 +181,9 @@ dependencies = [
|
|
181
181
|
|
182
182
|
[[package]]
|
183
183
|
name = "base64"
|
184
|
-
version = "0.
|
184
|
+
version = "0.22.1"
|
185
185
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
186
|
-
checksum = "
|
186
|
+
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
|
187
187
|
|
188
188
|
[[package]]
|
189
189
|
name = "bindgen"
|
@@ -222,9 +222,9 @@ dependencies = [
|
|
222
222
|
|
223
223
|
[[package]]
|
224
224
|
name = "brotli"
|
225
|
-
version = "
|
225
|
+
version = "5.0.0"
|
226
226
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
227
|
-
checksum = "
|
227
|
+
checksum = "19483b140a7ac7174d34b5a581b406c64f84da5409d3e09cf4fff604f9270e67"
|
228
228
|
dependencies = [
|
229
229
|
"alloc-no-stdlib",
|
230
230
|
"alloc-stdlib",
|
@@ -233,9 +233,9 @@ dependencies = [
|
|
233
233
|
|
234
234
|
[[package]]
|
235
235
|
name = "brotli-decompressor"
|
236
|
-
version = "
|
236
|
+
version = "4.0.0"
|
237
237
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
238
|
-
checksum = "
|
238
|
+
checksum = "e6221fe77a248b9117d431ad93761222e1cf8ff282d9d1d5d9f53d6299a1cf76"
|
239
239
|
dependencies = [
|
240
240
|
"alloc-no-stdlib",
|
241
241
|
"alloc-stdlib",
|
@@ -354,7 +354,7 @@ checksum = "7c64043d6c7b7a4c58e39e7efccfdea7b93d885a795d0c054a69dbbf4dd52686"
|
|
354
354
|
dependencies = [
|
355
355
|
"crossterm",
|
356
356
|
"strum",
|
357
|
-
"strum_macros",
|
357
|
+
"strum_macros 0.25.3",
|
358
358
|
"unicode-width",
|
359
359
|
]
|
360
360
|
|
@@ -469,9 +469,12 @@ checksum = "545b22097d44f8a9581187cdf93de7a71e4722bf51200cfaba810865b49a495d"
|
|
469
469
|
|
470
470
|
[[package]]
|
471
471
|
name = "either"
|
472
|
-
version = "1.
|
472
|
+
version = "1.12.0"
|
473
473
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
474
|
-
checksum = "
|
474
|
+
checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b"
|
475
|
+
dependencies = [
|
476
|
+
"serde",
|
477
|
+
]
|
475
478
|
|
476
479
|
[[package]]
|
477
480
|
name = "enum_dispatch"
|
@@ -676,6 +679,7 @@ dependencies = [
|
|
676
679
|
"ahash",
|
677
680
|
"allocator-api2",
|
678
681
|
"rayon",
|
682
|
+
"serde",
|
679
683
|
]
|
680
684
|
|
681
685
|
[[package]]
|
@@ -966,9 +970,9 @@ dependencies = [
|
|
966
970
|
|
967
971
|
[[package]]
|
968
972
|
name = "magnus"
|
969
|
-
version = "0.6.
|
973
|
+
version = "0.6.4"
|
970
974
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
971
|
-
checksum = "
|
975
|
+
checksum = "b1597ef40aa8c36be098249e82c9a20cf7199278ac1c1a1a995eeead6a184479"
|
972
976
|
dependencies = [
|
973
977
|
"magnus-macros",
|
974
978
|
"rb-sys",
|
@@ -1089,9 +1093,9 @@ dependencies = [
|
|
1089
1093
|
|
1090
1094
|
[[package]]
|
1091
1095
|
name = "num-traits"
|
1092
|
-
version = "0.2.
|
1096
|
+
version = "0.2.19"
|
1093
1097
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1094
|
-
checksum = "
|
1098
|
+
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
|
1095
1099
|
dependencies = [
|
1096
1100
|
"autocfg",
|
1097
1101
|
"libm",
|
@@ -1243,7 +1247,7 @@ dependencies = [
|
|
1243
1247
|
|
1244
1248
|
[[package]]
|
1245
1249
|
name = "polars"
|
1246
|
-
version = "0.
|
1250
|
+
version = "0.11.0"
|
1247
1251
|
dependencies = [
|
1248
1252
|
"ahash",
|
1249
1253
|
"chrono",
|
@@ -1251,7 +1255,7 @@ dependencies = [
|
|
1251
1255
|
"jemallocator",
|
1252
1256
|
"magnus",
|
1253
1257
|
"mimalloc",
|
1254
|
-
"polars 0.
|
1258
|
+
"polars 0.40.0",
|
1255
1259
|
"polars-core",
|
1256
1260
|
"polars-parquet",
|
1257
1261
|
"polars-utils",
|
@@ -1261,9 +1265,9 @@ dependencies = [
|
|
1261
1265
|
|
1262
1266
|
[[package]]
|
1263
1267
|
name = "polars"
|
1264
|
-
version = "0.
|
1268
|
+
version = "0.40.0"
|
1265
1269
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1266
|
-
checksum = "
|
1270
|
+
checksum = "e148396dca5496566880fa19374f3f789a29db94e3eb458afac1497b4bac5442"
|
1267
1271
|
dependencies = [
|
1268
1272
|
"getrandom",
|
1269
1273
|
"polars-arrow",
|
@@ -1282,9 +1286,9 @@ dependencies = [
|
|
1282
1286
|
|
1283
1287
|
[[package]]
|
1284
1288
|
name = "polars-arrow"
|
1285
|
-
version = "0.
|
1289
|
+
version = "0.40.0"
|
1286
1290
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1287
|
-
checksum = "
|
1291
|
+
checksum = "1cb5e11cd0752ae022fa6ca3afa50a14b0301b7ce53c0135828fbb0f4fa8303e"
|
1288
1292
|
dependencies = [
|
1289
1293
|
"ahash",
|
1290
1294
|
"atoi",
|
@@ -1330,9 +1334,9 @@ dependencies = [
|
|
1330
1334
|
|
1331
1335
|
[[package]]
|
1332
1336
|
name = "polars-compute"
|
1333
|
-
version = "0.
|
1337
|
+
version = "0.40.0"
|
1334
1338
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1335
|
-
checksum = "
|
1339
|
+
checksum = "89fc4578f826234cdecb782952aa9c479dc49373f81694a7b439c70b6f609ba0"
|
1336
1340
|
dependencies = [
|
1337
1341
|
"bytemuck",
|
1338
1342
|
"either",
|
@@ -1346,9 +1350,9 @@ dependencies = [
|
|
1346
1350
|
|
1347
1351
|
[[package]]
|
1348
1352
|
name = "polars-core"
|
1349
|
-
version = "0.
|
1353
|
+
version = "0.40.0"
|
1350
1354
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1351
|
-
checksum = "
|
1355
|
+
checksum = "e490c6bace1366a558feea33d1846f749a8ca90bd72a6748752bc65bb4710b2a"
|
1352
1356
|
dependencies = [
|
1353
1357
|
"ahash",
|
1354
1358
|
"bitflags 2.4.1",
|
@@ -1380,9 +1384,9 @@ dependencies = [
|
|
1380
1384
|
|
1381
1385
|
[[package]]
|
1382
1386
|
name = "polars-error"
|
1383
|
-
version = "0.
|
1387
|
+
version = "0.40.0"
|
1384
1388
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1385
|
-
checksum = "
|
1389
|
+
checksum = "08888f58e61599b00f5ea0c2ccdc796b54b9859559cc0d4582733509451fa01a"
|
1386
1390
|
dependencies = [
|
1387
1391
|
"avro-schema",
|
1388
1392
|
"polars-arrow-format",
|
@@ -1391,11 +1395,31 @@ dependencies = [
|
|
1391
1395
|
"thiserror",
|
1392
1396
|
]
|
1393
1397
|
|
1398
|
+
[[package]]
|
1399
|
+
name = "polars-expr"
|
1400
|
+
version = "0.40.0"
|
1401
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1402
|
+
checksum = "4173591920fe56ad55af025f92eb0d08421ca85705c326a640c43856094e3484"
|
1403
|
+
dependencies = [
|
1404
|
+
"ahash",
|
1405
|
+
"bitflags 2.4.1",
|
1406
|
+
"once_cell",
|
1407
|
+
"polars-arrow",
|
1408
|
+
"polars-core",
|
1409
|
+
"polars-io",
|
1410
|
+
"polars-ops",
|
1411
|
+
"polars-plan",
|
1412
|
+
"polars-time",
|
1413
|
+
"polars-utils",
|
1414
|
+
"rayon",
|
1415
|
+
"smartstring",
|
1416
|
+
]
|
1417
|
+
|
1394
1418
|
[[package]]
|
1395
1419
|
name = "polars-io"
|
1396
|
-
version = "0.
|
1420
|
+
version = "0.40.0"
|
1397
1421
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1398
|
-
checksum = "
|
1422
|
+
checksum = "5842896aea46d975b425d63f156f412aed3cfde4c257b64fb1f43ceea288074e"
|
1399
1423
|
dependencies = [
|
1400
1424
|
"ahash",
|
1401
1425
|
"async-trait",
|
@@ -1433,9 +1457,9 @@ dependencies = [
|
|
1433
1457
|
|
1434
1458
|
[[package]]
|
1435
1459
|
name = "polars-json"
|
1436
|
-
version = "0.
|
1460
|
+
version = "0.40.0"
|
1437
1461
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1438
|
-
checksum = "
|
1462
|
+
checksum = "160cbad0145b93ac6a88639aadfa6f7d7c769d05a8674f9b7e895b398cae9901"
|
1439
1463
|
dependencies = [
|
1440
1464
|
"ahash",
|
1441
1465
|
"chrono",
|
@@ -1454,9 +1478,9 @@ dependencies = [
|
|
1454
1478
|
|
1455
1479
|
[[package]]
|
1456
1480
|
name = "polars-lazy"
|
1457
|
-
version = "0.
|
1481
|
+
version = "0.40.0"
|
1458
1482
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1459
|
-
checksum = "
|
1483
|
+
checksum = "e805ea2ebbc6b7749b0afb31b7fc5d32b42b57ba29b984549d43d3a16114c4a5"
|
1460
1484
|
dependencies = [
|
1461
1485
|
"ahash",
|
1462
1486
|
"bitflags 2.4.1",
|
@@ -1464,6 +1488,7 @@ dependencies = [
|
|
1464
1488
|
"once_cell",
|
1465
1489
|
"polars-arrow",
|
1466
1490
|
"polars-core",
|
1491
|
+
"polars-expr",
|
1467
1492
|
"polars-io",
|
1468
1493
|
"polars-json",
|
1469
1494
|
"polars-ops",
|
@@ -1478,9 +1503,9 @@ dependencies = [
|
|
1478
1503
|
|
1479
1504
|
[[package]]
|
1480
1505
|
name = "polars-ops"
|
1481
|
-
version = "0.
|
1506
|
+
version = "0.40.0"
|
1482
1507
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1483
|
-
checksum = "
|
1508
|
+
checksum = "7b0aed7e169c81b98457641cf82b251f52239a668916c2e683abd1f38df00d58"
|
1484
1509
|
dependencies = [
|
1485
1510
|
"ahash",
|
1486
1511
|
"aho-corasick",
|
@@ -1515,9 +1540,9 @@ dependencies = [
|
|
1515
1540
|
|
1516
1541
|
[[package]]
|
1517
1542
|
name = "polars-parquet"
|
1518
|
-
version = "0.
|
1543
|
+
version = "0.40.0"
|
1519
1544
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1520
|
-
checksum = "
|
1545
|
+
checksum = "c70670a9e51cac66d0e77fd20b5cc957dbcf9f2660d410633862bb72f846d5b8"
|
1521
1546
|
dependencies = [
|
1522
1547
|
"ahash",
|
1523
1548
|
"async-stream",
|
@@ -1541,9 +1566,9 @@ dependencies = [
|
|
1541
1566
|
|
1542
1567
|
[[package]]
|
1543
1568
|
name = "polars-pipe"
|
1544
|
-
version = "0.
|
1569
|
+
version = "0.40.0"
|
1545
1570
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1546
|
-
checksum = "
|
1571
|
+
checksum = "0a40ae1b3c74ee07e2d1f7cbf56c5d6e15969e45d9b6f0903bd2acaf783ba436"
|
1547
1572
|
dependencies = [
|
1548
1573
|
"crossbeam-channel",
|
1549
1574
|
"crossbeam-queue",
|
@@ -1553,6 +1578,7 @@ dependencies = [
|
|
1553
1578
|
"polars-arrow",
|
1554
1579
|
"polars-compute",
|
1555
1580
|
"polars-core",
|
1581
|
+
"polars-expr",
|
1556
1582
|
"polars-io",
|
1557
1583
|
"polars-ops",
|
1558
1584
|
"polars-plan",
|
@@ -1566,14 +1592,15 @@ dependencies = [
|
|
1566
1592
|
|
1567
1593
|
[[package]]
|
1568
1594
|
name = "polars-plan"
|
1569
|
-
version = "0.
|
1595
|
+
version = "0.40.0"
|
1570
1596
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1571
|
-
checksum = "
|
1597
|
+
checksum = "8daa3541ae7e9af311a4389bc2b21f83349c34c723cc67fa524cdefdaa172d90"
|
1572
1598
|
dependencies = [
|
1573
1599
|
"ahash",
|
1574
1600
|
"bytemuck",
|
1575
1601
|
"chrono",
|
1576
1602
|
"chrono-tz",
|
1603
|
+
"either",
|
1577
1604
|
"hashbrown 0.14.3",
|
1578
1605
|
"once_cell",
|
1579
1606
|
"percent-encoding",
|
@@ -1590,15 +1617,15 @@ dependencies = [
|
|
1590
1617
|
"regex",
|
1591
1618
|
"serde",
|
1592
1619
|
"smartstring",
|
1593
|
-
"strum_macros",
|
1620
|
+
"strum_macros 0.26.2",
|
1594
1621
|
"version_check",
|
1595
1622
|
]
|
1596
1623
|
|
1597
1624
|
[[package]]
|
1598
1625
|
name = "polars-row"
|
1599
|
-
version = "0.
|
1626
|
+
version = "0.40.0"
|
1600
1627
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1601
|
-
checksum = "
|
1628
|
+
checksum = "deb285f2f3a65b00dd06bef16bb9f712dbb5478f941dab5cf74f9f016d382e40"
|
1602
1629
|
dependencies = [
|
1603
1630
|
"bytemuck",
|
1604
1631
|
"polars-arrow",
|
@@ -1608,11 +1635,12 @@ dependencies = [
|
|
1608
1635
|
|
1609
1636
|
[[package]]
|
1610
1637
|
name = "polars-sql"
|
1611
|
-
version = "0.
|
1638
|
+
version = "0.40.0"
|
1612
1639
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1613
|
-
checksum = "
|
1640
|
+
checksum = "a724f699d194cb02c25124d3832f7d4d77f387f1a89ee42f6b9e88ec561d4ad9"
|
1614
1641
|
dependencies = [
|
1615
1642
|
"hex",
|
1643
|
+
"once_cell",
|
1616
1644
|
"polars-arrow",
|
1617
1645
|
"polars-core",
|
1618
1646
|
"polars-error",
|
@@ -1626,11 +1654,12 @@ dependencies = [
|
|
1626
1654
|
|
1627
1655
|
[[package]]
|
1628
1656
|
name = "polars-time"
|
1629
|
-
version = "0.
|
1657
|
+
version = "0.40.0"
|
1630
1658
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1631
|
-
checksum = "
|
1659
|
+
checksum = "87ebec238d8b6200d9f0c3ce411c8441e950bd5a7df7806b8172d06c1d5a4b97"
|
1632
1660
|
dependencies = [
|
1633
1661
|
"atoi",
|
1662
|
+
"bytemuck",
|
1634
1663
|
"chrono",
|
1635
1664
|
"chrono-tz",
|
1636
1665
|
"now",
|
@@ -1647,9 +1676,9 @@ dependencies = [
|
|
1647
1676
|
|
1648
1677
|
[[package]]
|
1649
1678
|
name = "polars-utils"
|
1650
|
-
version = "0.
|
1679
|
+
version = "0.40.0"
|
1651
1680
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1652
|
-
checksum = "
|
1681
|
+
checksum = "34e1a907c63abf71e5f21467e2e4ff748896c28196746f631c6c25512ec6102c"
|
1653
1682
|
dependencies = [
|
1654
1683
|
"ahash",
|
1655
1684
|
"bytemuck",
|
@@ -2103,6 +2132,19 @@ dependencies = [
|
|
2103
2132
|
"syn 2.0.46",
|
2104
2133
|
]
|
2105
2134
|
|
2135
|
+
[[package]]
|
2136
|
+
name = "strum_macros"
|
2137
|
+
version = "0.26.2"
|
2138
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2139
|
+
checksum = "c6cf59daf282c0a494ba14fd21610a0325f9f90ec9d1231dea26bcb1d696c946"
|
2140
|
+
dependencies = [
|
2141
|
+
"heck",
|
2142
|
+
"proc-macro2",
|
2143
|
+
"quote",
|
2144
|
+
"rustversion",
|
2145
|
+
"syn 2.0.46",
|
2146
|
+
]
|
2147
|
+
|
2106
2148
|
[[package]]
|
2107
2149
|
name = "syn"
|
2108
2150
|
version = "1.0.109"
|
data/README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
# Polars
|
1
|
+
# Ruby Polars
|
2
2
|
|
3
3
|
:fire: Blazingly fast DataFrames for Ruby, powered by [Polars](https://github.com/pola-rs/polars)
|
4
4
|
|
5
|
-
[![Build Status](https://github.com/ankane/polars
|
5
|
+
[![Build Status](https://github.com/ankane/ruby-polars/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/ruby-polars/actions)
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -420,16 +420,16 @@ View the [changelog](CHANGELOG.md)
|
|
420
420
|
|
421
421
|
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
422
422
|
|
423
|
-
- [Report bugs](https://github.com/ankane/polars
|
424
|
-
- Fix bugs and [submit pull requests](https://github.com/ankane/polars
|
423
|
+
- [Report bugs](https://github.com/ankane/ruby-polars/issues)
|
424
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/ruby-polars/pulls)
|
425
425
|
- Write, clarify, or fix documentation
|
426
426
|
- Suggest or add new features
|
427
427
|
|
428
428
|
To get started with development:
|
429
429
|
|
430
430
|
```sh
|
431
|
-
git clone https://github.com/ankane/polars
|
432
|
-
cd polars
|
431
|
+
git clone https://github.com/ankane/ruby-polars.git
|
432
|
+
cd ruby-polars
|
433
433
|
bundle install
|
434
434
|
bundle exec rake compile
|
435
435
|
bundle exec rake test
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.11.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -15,14 +15,14 @@ ahash = "0.8"
|
|
15
15
|
chrono = "0.4"
|
16
16
|
either = "1.8"
|
17
17
|
magnus = "0.6"
|
18
|
-
polars-core = "=0.
|
19
|
-
polars-parquet = "=0.
|
20
|
-
polars-utils = "=0.
|
18
|
+
polars-core = "=0.40.0"
|
19
|
+
polars-parquet = "=0.40.0"
|
20
|
+
polars-utils = "=0.40.0"
|
21
21
|
serde_json = "1"
|
22
22
|
smartstring = "1"
|
23
23
|
|
24
24
|
[dependencies.polars]
|
25
|
-
version = "=0.
|
25
|
+
version = "=0.40.0"
|
26
26
|
features = [
|
27
27
|
"abs",
|
28
28
|
"approx_unique",
|
@@ -55,6 +55,7 @@ features = [
|
|
55
55
|
"fmt",
|
56
56
|
"interpolate",
|
57
57
|
"ipc",
|
58
|
+
"ipc_streaming",
|
58
59
|
"is_first_distinct",
|
59
60
|
"is_in",
|
60
61
|
"is_last_distinct",
|
@@ -91,6 +92,7 @@ features = [
|
|
91
92
|
"replace",
|
92
93
|
"rle",
|
93
94
|
"rolling_window",
|
95
|
+
"rolling_window_by",
|
94
96
|
"round_series",
|
95
97
|
"row_hash",
|
96
98
|
"search_sorted",
|
@@ -1,23 +1,19 @@
|
|
1
|
+
use std::cell::RefCell;
|
2
|
+
use std::path::PathBuf;
|
3
|
+
use std::sync::Mutex;
|
4
|
+
|
1
5
|
use magnus::{prelude::*, RArray, Value};
|
6
|
+
use polars::io::csv::read::OwnedBatchedCsvReader;
|
2
7
|
use polars::io::mmap::MmapBytesReader;
|
3
8
|
use polars::io::RowIndex;
|
4
|
-
use polars::prelude::read_impl::OwnedBatchedCsvReader;
|
5
9
|
use polars::prelude::*;
|
6
|
-
use std::cell::RefCell;
|
7
|
-
use std::path::PathBuf;
|
8
10
|
|
9
11
|
use crate::conversion::*;
|
10
|
-
use crate::prelude::read_impl::OwnedBatchedCsvReaderMmap;
|
11
12
|
use crate::{RbDataFrame, RbPolarsErr, RbResult};
|
12
13
|
|
13
|
-
pub enum BatchedReader {
|
14
|
-
MMap(OwnedBatchedCsvReaderMmap),
|
15
|
-
Read(OwnedBatchedCsvReader),
|
16
|
-
}
|
17
|
-
|
18
14
|
#[magnus::wrap(class = "Polars::RbBatchedCsv")]
|
19
15
|
pub struct RbBatchedCsv {
|
20
|
-
pub reader: RefCell<
|
16
|
+
pub reader: RefCell<Mutex<OwnedBatchedCsvReader>>,
|
21
17
|
}
|
22
18
|
|
23
19
|
impl RbBatchedCsv {
|
@@ -44,19 +40,23 @@ impl RbBatchedCsv {
|
|
44
40
|
let comment_prefix = Option::<String>::try_convert(arguments[16])?;
|
45
41
|
let quote_char = Option::<String>::try_convert(arguments[17])?;
|
46
42
|
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[18])?;
|
47
|
-
let
|
48
|
-
let
|
49
|
-
let
|
50
|
-
let
|
51
|
-
let
|
52
|
-
let
|
43
|
+
let missing_utf8_is_empty_string = bool::try_convert(arguments[19])?;
|
44
|
+
let try_parse_dates = bool::try_convert(arguments[20])?;
|
45
|
+
let skip_rows_after_header = usize::try_convert(arguments[21])?;
|
46
|
+
let row_index = Option::<(String, IdxSize)>::try_convert(arguments[22])?;
|
47
|
+
let sample_size = usize::try_convert(arguments[23])?;
|
48
|
+
let eol_char = String::try_convert(arguments[24])?;
|
49
|
+
let raise_if_empty = bool::try_convert(arguments[25])?;
|
50
|
+
let truncate_ragged_lines = bool::try_convert(arguments[26])?;
|
51
|
+
let decimal_comma = bool::try_convert(arguments[27])?;
|
53
52
|
// end arguments
|
54
53
|
|
55
54
|
let null_values = null_values.map(|w| w.0);
|
56
55
|
let eol_char = eol_char.as_bytes()[0];
|
57
|
-
|
58
|
-
|
59
|
-
|
56
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
57
|
+
name: Arc::from(name.as_str()),
|
58
|
+
offset,
|
59
|
+
});
|
60
60
|
let quote_char = if let Some(s) = quote_char {
|
61
61
|
if s.is_empty() {
|
62
62
|
None
|
@@ -86,54 +86,55 @@ impl RbBatchedCsv {
|
|
86
86
|
|
87
87
|
let file = std::fs::File::open(path).map_err(RbPolarsErr::io)?;
|
88
88
|
let reader = Box::new(file) as Box<dyn MmapBytesReader>;
|
89
|
-
let reader =
|
90
|
-
.
|
91
|
-
.
|
89
|
+
let reader = CsvReadOptions::default()
|
90
|
+
.with_infer_schema_length(infer_schema_length)
|
91
|
+
.with_has_header(has_header)
|
92
92
|
.with_n_rows(n_rows)
|
93
|
-
.with_separator(separator.as_bytes()[0])
|
94
93
|
.with_skip_rows(skip_rows)
|
95
94
|
.with_ignore_errors(ignore_errors)
|
96
|
-
.with_projection(projection)
|
95
|
+
.with_projection(projection.map(Arc::new))
|
97
96
|
.with_rechunk(rechunk)
|
98
97
|
.with_chunk_size(chunk_size)
|
99
|
-
.
|
100
|
-
.with_columns(columns)
|
98
|
+
.with_columns(columns.map(Arc::new))
|
101
99
|
.with_n_threads(n_threads)
|
102
|
-
.
|
103
|
-
.
|
104
|
-
.with_comment_prefix(comment_prefix.as_deref())
|
105
|
-
.with_null_values(null_values)
|
106
|
-
.with_try_parse_dates(try_parse_dates)
|
107
|
-
.with_quote_char(quote_char)
|
108
|
-
.with_end_of_line_char(eol_char)
|
100
|
+
.with_dtype_overwrite(overwrite_dtype_slice.map(Arc::new))
|
101
|
+
.with_low_memory(low_memory)
|
109
102
|
.with_skip_rows_after_header(skip_rows_after_header)
|
110
103
|
.with_row_index(row_index)
|
111
|
-
.
|
112
|
-
.
|
104
|
+
.with_sample_size(sample_size)
|
105
|
+
.with_raise_if_empty(raise_if_empty)
|
106
|
+
.with_parse_options(
|
107
|
+
CsvParseOptions::default()
|
108
|
+
.with_separator(separator.as_bytes()[0])
|
109
|
+
.with_encoding(encoding.0)
|
110
|
+
.with_missing_is_null(!missing_utf8_is_empty_string)
|
111
|
+
.with_comment_prefix(comment_prefix.as_deref())
|
112
|
+
.with_null_values(null_values)
|
113
|
+
.with_try_parse_dates(try_parse_dates)
|
114
|
+
.with_quote_char(quote_char)
|
115
|
+
.with_eol_char(eol_char)
|
116
|
+
.with_truncate_ragged_lines(truncate_ragged_lines)
|
117
|
+
.with_decimal_comma(decimal_comma),
|
118
|
+
)
|
119
|
+
.into_reader_with_file_handle(reader);
|
113
120
|
|
114
|
-
let reader =
|
115
|
-
|
116
|
-
|
117
|
-
.map_err(RbPolarsErr::from)?;
|
118
|
-
BatchedReader::Read(reader)
|
119
|
-
} else {
|
120
|
-
let reader = reader
|
121
|
-
.batched_mmap(overwrite_dtype.map(Arc::new))
|
122
|
-
.map_err(RbPolarsErr::from)?;
|
123
|
-
BatchedReader::MMap(reader)
|
124
|
-
};
|
121
|
+
let reader = reader
|
122
|
+
.batched(overwrite_dtype.map(Arc::new))
|
123
|
+
.map_err(RbPolarsErr::from)?;
|
125
124
|
|
126
125
|
Ok(RbBatchedCsv {
|
127
|
-
reader: RefCell::new(reader),
|
126
|
+
reader: RefCell::new(Mutex::new(reader)),
|
128
127
|
})
|
129
128
|
}
|
130
129
|
|
131
130
|
pub fn next_batches(&self, n: usize) -> RbResult<Option<RArray>> {
|
132
|
-
let
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
131
|
+
let reader = &self.reader;
|
132
|
+
let batches = reader
|
133
|
+
.borrow()
|
134
|
+
.lock()
|
135
|
+
.map_err(|e| RbPolarsErr::other(e.to_string()))?
|
136
|
+
.next_batches(n)
|
137
|
+
.map_err(RbPolarsErr::from)?;
|
137
138
|
|
138
139
|
Ok(batches.map(|batches| RArray::from_iter(batches.into_iter().map(RbDataFrame::from))))
|
139
140
|
}
|