polars-df 0.1.3 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Cargo.lock +142 -11
- data/Cargo.toml +5 -0
- data/ext/polars/Cargo.toml +17 -1
- data/ext/polars/src/apply/dataframe.rs +292 -0
- data/ext/polars/src/apply/mod.rs +254 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +180 -5
- data/ext/polars/src/dataframe.rs +146 -1
- data/ext/polars/src/error.rs +12 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +74 -3
- data/ext/polars/src/lazy/dsl.rs +136 -0
- data/ext/polars/src/lib.rs +199 -1
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +331 -0
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1558 -60
- data/lib/polars/date_time_expr.rb +2 -2
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/dynamic_group_by.rb +49 -0
- data/lib/polars/expr.rb +4072 -107
- data/lib/polars/expr_dispatch.rb +8 -0
- data/lib/polars/functions.rb +192 -3
- data/lib/polars/group_by.rb +44 -3
- data/lib/polars/io.rb +20 -4
- data/lib/polars/lazy_frame.rb +800 -26
- data/lib/polars/lazy_functions.rb +687 -43
- data/lib/polars/lazy_group_by.rb +1 -0
- data/lib/polars/list_expr.rb +502 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +934 -62
- data/lib/polars/string_expr.rb +189 -13
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +44 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +14 -1
- metadata +15 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3a08e866e51227716cd3cb4454835016a7d61e30e964fe76a8b99704dcb60a12
|
4
|
+
data.tar.gz: 1f30c3fdd47ebf52a311909aa26ba4b6d64e426622455854b9bbc660de1229b3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1531fff4fc2fab8b2dc72709a69fb2890c215ae08e4223aa32262dbb4b0debb4b6f2fbab1e8138953871f5d02d462abfaba49cc7f22a66e25aa7d60f128a89bc
|
7
|
+
data.tar.gz: e1041d708e2f8046c14c565a65879fa4e5c6671cf526736a3f8418a82dfa70e17692a96d383e43f393d8761e5f29f717d63185ae1ed3f0793a6876be2d946fc0
|
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
@@ -8,6 +8,12 @@ version = "1.0.2"
|
|
8
8
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
9
9
|
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
|
10
10
|
|
11
|
+
[[package]]
|
12
|
+
name = "adler32"
|
13
|
+
version = "1.2.0"
|
14
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
15
|
+
checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
|
16
|
+
|
11
17
|
[[package]]
|
12
18
|
name = "ahash"
|
13
19
|
version = "0.7.6"
|
@@ -85,6 +91,7 @@ checksum = "ee6f62e41078c967a4c063fcbdfd3801a2a9632276402c045311c4d73d0845f3"
|
|
85
91
|
dependencies = [
|
86
92
|
"ahash 0.7.6",
|
87
93
|
"arrow-format",
|
94
|
+
"avro-schema",
|
88
95
|
"base64",
|
89
96
|
"bytemuck",
|
90
97
|
"chrono",
|
@@ -147,6 +154,20 @@ version = "1.1.0"
|
|
147
154
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
148
155
|
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
149
156
|
|
157
|
+
[[package]]
|
158
|
+
name = "avro-schema"
|
159
|
+
version = "0.3.0"
|
160
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
161
|
+
checksum = "b5281855b39aba9684d2f47bf96983fbfd8f1725f12fabb0513a8ab879647bbd"
|
162
|
+
dependencies = [
|
163
|
+
"crc",
|
164
|
+
"fallible-streaming-iterator",
|
165
|
+
"libflate",
|
166
|
+
"serde",
|
167
|
+
"serde_json",
|
168
|
+
"snap",
|
169
|
+
]
|
170
|
+
|
150
171
|
[[package]]
|
151
172
|
name = "base64"
|
152
173
|
version = "0.13.1"
|
@@ -314,6 +335,21 @@ dependencies = [
|
|
314
335
|
"unicode-width",
|
315
336
|
]
|
316
337
|
|
338
|
+
[[package]]
|
339
|
+
name = "crc"
|
340
|
+
version = "2.1.0"
|
341
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
342
|
+
checksum = "49fc9a695bca7f35f5f4c15cddc84415f66a74ea78eef08e90c5024f2b540e23"
|
343
|
+
dependencies = [
|
344
|
+
"crc-catalog",
|
345
|
+
]
|
346
|
+
|
347
|
+
[[package]]
|
348
|
+
name = "crc-catalog"
|
349
|
+
version = "1.1.1"
|
350
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
351
|
+
checksum = "ccaeedb56da03b09f598226e25e80088cb4cd25f316e6e4df7d695f0feeb1403"
|
352
|
+
|
317
353
|
[[package]]
|
318
354
|
name = "crc32fast"
|
319
355
|
version = "1.3.2"
|
@@ -483,6 +519,12 @@ version = "0.1.0"
|
|
483
519
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
484
520
|
checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673"
|
485
521
|
|
522
|
+
[[package]]
|
523
|
+
name = "fs_extra"
|
524
|
+
version = "1.2.0"
|
525
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
526
|
+
checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394"
|
527
|
+
|
486
528
|
[[package]]
|
487
529
|
name = "futures"
|
488
530
|
version = "0.3.25"
|
@@ -646,6 +688,12 @@ dependencies = [
|
|
646
688
|
"libc",
|
647
689
|
]
|
648
690
|
|
691
|
+
[[package]]
|
692
|
+
name = "hex"
|
693
|
+
version = "0.4.3"
|
694
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
695
|
+
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
|
696
|
+
|
649
697
|
[[package]]
|
650
698
|
name = "indexmap"
|
651
699
|
version = "1.8.0"
|
@@ -663,6 +711,27 @@ version = "1.0.4"
|
|
663
711
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
664
712
|
checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc"
|
665
713
|
|
714
|
+
[[package]]
|
715
|
+
name = "jemalloc-sys"
|
716
|
+
version = "0.5.2+5.3.0-patched"
|
717
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
718
|
+
checksum = "134163979b6eed9564c98637b710b40979939ba351f59952708234ea11b5f3f8"
|
719
|
+
dependencies = [
|
720
|
+
"cc",
|
721
|
+
"fs_extra",
|
722
|
+
"libc",
|
723
|
+
]
|
724
|
+
|
725
|
+
[[package]]
|
726
|
+
name = "jemallocator"
|
727
|
+
version = "0.5.0"
|
728
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
729
|
+
checksum = "16c2514137880c52b0b4822b563fadd38257c1f380858addb74a400889696ea6"
|
730
|
+
dependencies = [
|
731
|
+
"jemalloc-sys",
|
732
|
+
"libc",
|
733
|
+
]
|
734
|
+
|
666
735
|
[[package]]
|
667
736
|
name = "jobserver"
|
668
737
|
version = "0.1.25"
|
@@ -683,13 +752,23 @@ dependencies = [
|
|
683
752
|
|
684
753
|
[[package]]
|
685
754
|
name = "json-deserializer"
|
686
|
-
version = "0.4.
|
755
|
+
version = "0.4.3"
|
687
756
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
688
|
-
checksum = "
|
757
|
+
checksum = "daba674f7eecf80fe8bbbf196340908ad1a22510fe71fd6111bb50f441b26440"
|
689
758
|
dependencies = [
|
690
759
|
"indexmap",
|
691
760
|
]
|
692
761
|
|
762
|
+
[[package]]
|
763
|
+
name = "jsonpath_lib"
|
764
|
+
version = "0.3.0"
|
765
|
+
source = "git+https://github.com/ritchie46/jsonpath?rev=24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b#24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b"
|
766
|
+
dependencies = [
|
767
|
+
"log",
|
768
|
+
"serde",
|
769
|
+
"serde_json",
|
770
|
+
]
|
771
|
+
|
693
772
|
[[package]]
|
694
773
|
name = "lazy_static"
|
695
774
|
version = "1.4.0"
|
@@ -781,6 +860,26 @@ version = "0.2.121"
|
|
781
860
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
782
861
|
checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f"
|
783
862
|
|
863
|
+
[[package]]
|
864
|
+
name = "libflate"
|
865
|
+
version = "1.2.0"
|
866
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
867
|
+
checksum = "05605ab2bce11bcfc0e9c635ff29ef8b2ea83f29be257ee7d730cac3ee373093"
|
868
|
+
dependencies = [
|
869
|
+
"adler32",
|
870
|
+
"crc32fast",
|
871
|
+
"libflate_lz77",
|
872
|
+
]
|
873
|
+
|
874
|
+
[[package]]
|
875
|
+
name = "libflate_lz77"
|
876
|
+
version = "1.1.0"
|
877
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
878
|
+
checksum = "39a734c0493409afcd49deee13c006a04e3586b9761a03543c6272c9c51f2f5a"
|
879
|
+
dependencies = [
|
880
|
+
"rle-decode-fast",
|
881
|
+
]
|
882
|
+
|
784
883
|
[[package]]
|
785
884
|
name = "libloading"
|
786
885
|
version = "0.7.4"
|
@@ -797,6 +896,16 @@ version = "0.2.6"
|
|
797
896
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
798
897
|
checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb"
|
799
898
|
|
899
|
+
[[package]]
|
900
|
+
name = "libmimalloc-sys"
|
901
|
+
version = "0.1.28"
|
902
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
903
|
+
checksum = "04d1c67deb83e6b75fa4fe3309e09cfeade12e7721d95322af500d3814ea60c9"
|
904
|
+
dependencies = [
|
905
|
+
"cc",
|
906
|
+
"libc",
|
907
|
+
]
|
908
|
+
|
800
909
|
[[package]]
|
801
910
|
name = "lock_api"
|
802
911
|
version = "0.4.9"
|
@@ -850,8 +959,7 @@ dependencies = [
|
|
850
959
|
[[package]]
|
851
960
|
name = "magnus-macros"
|
852
961
|
version = "0.2.0"
|
853
|
-
source = "
|
854
|
-
checksum = "acc8ba6908cb0f67a4e75cb48fc81a1f0e6a6dd1501936e0c9e2c7c8f9f18e05"
|
962
|
+
source = "git+https://github.com/matsadler/magnus#ae792419bed70107d4c930e1f8193272750b9fd2"
|
855
963
|
dependencies = [
|
856
964
|
"proc-macro2",
|
857
965
|
"quote",
|
@@ -882,6 +990,15 @@ dependencies = [
|
|
882
990
|
"autocfg",
|
883
991
|
]
|
884
992
|
|
993
|
+
[[package]]
|
994
|
+
name = "mimalloc"
|
995
|
+
version = "0.1.32"
|
996
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
997
|
+
checksum = "9b2374e2999959a7b583e1811a1ddbf1d3a4b9496eceb9746f1192a59d871eca"
|
998
|
+
dependencies = [
|
999
|
+
"libmimalloc-sys",
|
1000
|
+
]
|
1001
|
+
|
885
1002
|
[[package]]
|
886
1003
|
name = "minimal-lexical"
|
887
1004
|
version = "0.2.1"
|
@@ -1160,10 +1277,14 @@ dependencies = [
|
|
1160
1277
|
|
1161
1278
|
[[package]]
|
1162
1279
|
name = "polars"
|
1163
|
-
version = "0.1.
|
1280
|
+
version = "0.1.5"
|
1164
1281
|
dependencies = [
|
1282
|
+
"ahash 0.8.2",
|
1283
|
+
"jemallocator",
|
1165
1284
|
"magnus",
|
1285
|
+
"mimalloc",
|
1166
1286
|
"polars 0.25.1",
|
1287
|
+
"polars-core",
|
1167
1288
|
"serde_json",
|
1168
1289
|
]
|
1169
1290
|
|
@@ -1202,11 +1323,13 @@ dependencies = [
|
|
1202
1323
|
"ahash 0.8.2",
|
1203
1324
|
"anyhow",
|
1204
1325
|
"arrow2",
|
1326
|
+
"base64",
|
1205
1327
|
"bitflags",
|
1206
1328
|
"chrono",
|
1207
1329
|
"chrono-tz",
|
1208
1330
|
"comfy-table",
|
1209
1331
|
"hashbrown 0.12.3",
|
1332
|
+
"hex",
|
1210
1333
|
"indexmap",
|
1211
1334
|
"num",
|
1212
1335
|
"once_cell",
|
@@ -1277,9 +1400,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1277
1400
|
checksum = "bfd3f6552b3e9539634c35047f372db331b6227f75c36fcbe4670ab58bbcbeb3"
|
1278
1401
|
dependencies = [
|
1279
1402
|
"arrow2",
|
1403
|
+
"jsonpath_lib",
|
1280
1404
|
"polars-arrow",
|
1281
1405
|
"polars-core",
|
1282
1406
|
"polars-utils",
|
1407
|
+
"serde_json",
|
1283
1408
|
]
|
1284
1409
|
|
1285
1410
|
[[package]]
|
@@ -1431,18 +1556,18 @@ dependencies = [
|
|
1431
1556
|
|
1432
1557
|
[[package]]
|
1433
1558
|
name = "rb-sys"
|
1434
|
-
version = "0.9.
|
1559
|
+
version = "0.9.48"
|
1435
1560
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1436
|
-
checksum = "
|
1561
|
+
checksum = "dfc6b8f3bf2d04b0180e243ceeb033b51ca267d839aa1c12fa25f262c17d0596"
|
1437
1562
|
dependencies = [
|
1438
1563
|
"rb-sys-build",
|
1439
1564
|
]
|
1440
1565
|
|
1441
1566
|
[[package]]
|
1442
1567
|
name = "rb-sys-build"
|
1443
|
-
version = "0.9.
|
1568
|
+
version = "0.9.48"
|
1444
1569
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1445
|
-
checksum = "
|
1570
|
+
checksum = "2cd591ebf22c45a44e51192fbeebba473aea0fe2a708b0b24665a13010c58b8d"
|
1446
1571
|
dependencies = [
|
1447
1572
|
"bindgen",
|
1448
1573
|
"regex",
|
@@ -1452,8 +1577,7 @@ dependencies = [
|
|
1452
1577
|
[[package]]
|
1453
1578
|
name = "rb-sys-env"
|
1454
1579
|
version = "0.1.1"
|
1455
|
-
source = "
|
1456
|
-
checksum = "74c38752410925faeb82c400c06ba2fd9ee6aa8f719dd33994c9e53f5242d25f"
|
1580
|
+
source = "git+https://github.com/oxidize-rb/rb-sys#93c4f97a244168b9ebc2c5682275e7281421f4b8"
|
1457
1581
|
|
1458
1582
|
[[package]]
|
1459
1583
|
name = "redox_syscall"
|
@@ -1492,6 +1616,12 @@ version = "0.6.28"
|
|
1492
1616
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1493
1617
|
checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848"
|
1494
1618
|
|
1619
|
+
[[package]]
|
1620
|
+
name = "rle-decode-fast"
|
1621
|
+
version = "1.0.3"
|
1622
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1623
|
+
checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422"
|
1624
|
+
|
1495
1625
|
[[package]]
|
1496
1626
|
name = "rustc-hash"
|
1497
1627
|
version = "1.1.0"
|
@@ -1548,6 +1678,7 @@ version = "1.0.88"
|
|
1548
1678
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1549
1679
|
checksum = "8e8b3801309262e8184d9687fb697586833e939767aea0dda89f5a8e650e8bd7"
|
1550
1680
|
dependencies = [
|
1681
|
+
"indexmap",
|
1551
1682
|
"itoa",
|
1552
1683
|
"ryu",
|
1553
1684
|
"serde",
|
data/Cargo.toml
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
[workspace]
|
2
2
|
members = ["ext/polars"]
|
3
3
|
|
4
|
+
[patch.crates-io]
|
5
|
+
jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
|
6
|
+
magnus-macros = { git = "https://github.com/matsadler/magnus" }
|
7
|
+
rb-sys-env = { git = "https://github.com/oxidize-rb/rb-sys" }
|
8
|
+
|
4
9
|
[profile.release]
|
5
10
|
strip = true
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.5"
|
4
4
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
5
5
|
edition = "2021"
|
6
6
|
publish = false
|
@@ -9,7 +9,9 @@ publish = false
|
|
9
9
|
crate-type = ["cdylib"]
|
10
10
|
|
11
11
|
[dependencies]
|
12
|
+
ahash = "0.8"
|
12
13
|
magnus = "0.4"
|
14
|
+
polars-core = "0.25.1"
|
13
15
|
serde_json = "1"
|
14
16
|
|
15
17
|
[dependencies.polars]
|
@@ -18,7 +20,10 @@ features = [
|
|
18
20
|
"abs",
|
19
21
|
"arange",
|
20
22
|
"arg_where",
|
23
|
+
"asof_join",
|
24
|
+
"avro",
|
21
25
|
"concat_str",
|
26
|
+
"cse",
|
22
27
|
"csv-file",
|
23
28
|
"cum_agg",
|
24
29
|
"cumulative_eval",
|
@@ -30,6 +35,7 @@ features = [
|
|
30
35
|
"dtype-full",
|
31
36
|
"dynamic_groupby",
|
32
37
|
"ewma",
|
38
|
+
"extract_jsonpath",
|
33
39
|
"fmt",
|
34
40
|
"horizontal_concat",
|
35
41
|
"interpolate",
|
@@ -49,6 +55,8 @@ features = [
|
|
49
55
|
"parquet",
|
50
56
|
"partition_by",
|
51
57
|
"pct_change",
|
58
|
+
"performant",
|
59
|
+
"pivot",
|
52
60
|
"product",
|
53
61
|
"propagate_nans",
|
54
62
|
"random",
|
@@ -57,10 +65,12 @@ features = [
|
|
57
65
|
"repeat_by",
|
58
66
|
"rolling_window",
|
59
67
|
"round_series",
|
68
|
+
"row_hash",
|
60
69
|
"search_sorted",
|
61
70
|
"semi_anti_join",
|
62
71
|
"serde-lazy",
|
63
72
|
"sign",
|
73
|
+
"string_encoding",
|
64
74
|
"string_justify",
|
65
75
|
"strings",
|
66
76
|
"timezones",
|
@@ -69,3 +79,9 @@ features = [
|
|
69
79
|
"trigonometry",
|
70
80
|
"unique_counts",
|
71
81
|
]
|
82
|
+
|
83
|
+
[target.'cfg(target_os = "linux")'.dependencies]
|
84
|
+
jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }
|
85
|
+
|
86
|
+
[target.'cfg(not(target_os = "linux"))'.dependencies]
|
87
|
+
mimalloc = { version = "0.1", default-features = false }
|
@@ -0,0 +1,292 @@
|
|
1
|
+
use magnus::{class, RArray, TryConvert, Value};
|
2
|
+
use polars::prelude::*;
|
3
|
+
use polars_core::frame::row::{rows_to_schema_first_non_null, Row};
|
4
|
+
|
5
|
+
use super::*;
|
6
|
+
use crate::{RbDataFrame, RbPolarsErr, RbSeries, Wrap};
|
7
|
+
|
8
|
+
pub fn apply_lambda_unknown<'a>(
|
9
|
+
df: &'a DataFrame,
|
10
|
+
lambda: Value,
|
11
|
+
inference_size: usize,
|
12
|
+
) -> RbResult<(Value, bool)> {
|
13
|
+
let columns = df.get_columns();
|
14
|
+
let mut null_count = 0;
|
15
|
+
|
16
|
+
for idx in 0..df.height() {
|
17
|
+
let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
|
18
|
+
let arg = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
19
|
+
let out: Value = lambda.funcall("call", arg)?;
|
20
|
+
|
21
|
+
if out.is_nil() {
|
22
|
+
null_count += 1;
|
23
|
+
continue;
|
24
|
+
} else if out.is_kind_of(class::true_class()) || out.is_kind_of(class::false_class()) {
|
25
|
+
let first_value = out.try_convert::<bool>().ok();
|
26
|
+
return Ok((
|
27
|
+
RbSeries::new(
|
28
|
+
apply_lambda_with_bool_out_type(df, lambda, null_count, first_value)
|
29
|
+
.into_series(),
|
30
|
+
)
|
31
|
+
.into(),
|
32
|
+
false,
|
33
|
+
));
|
34
|
+
} else if out.is_kind_of(class::float()) {
|
35
|
+
let first_value = out.try_convert::<f64>().ok();
|
36
|
+
|
37
|
+
return Ok((
|
38
|
+
RbSeries::new(
|
39
|
+
apply_lambda_with_primitive_out_type::<Float64Type>(
|
40
|
+
df,
|
41
|
+
lambda,
|
42
|
+
null_count,
|
43
|
+
first_value,
|
44
|
+
)
|
45
|
+
.into_series(),
|
46
|
+
)
|
47
|
+
.into(),
|
48
|
+
false,
|
49
|
+
));
|
50
|
+
} else if out.is_kind_of(class::integer()) {
|
51
|
+
let first_value = out.try_convert::<i64>().ok();
|
52
|
+
return Ok((
|
53
|
+
RbSeries::new(
|
54
|
+
apply_lambda_with_primitive_out_type::<Int64Type>(
|
55
|
+
df,
|
56
|
+
lambda,
|
57
|
+
null_count,
|
58
|
+
first_value,
|
59
|
+
)
|
60
|
+
.into_series(),
|
61
|
+
)
|
62
|
+
.into(),
|
63
|
+
false,
|
64
|
+
));
|
65
|
+
// } else if out.is_kind_of(class::string()) {
|
66
|
+
// let first_value = out.try_convert::<String>().ok();
|
67
|
+
// return Ok((
|
68
|
+
// RbSeries::new(
|
69
|
+
// apply_lambda_with_utf8_out_type(df, lambda, null_count, first_value)
|
70
|
+
// .into_series(),
|
71
|
+
// )
|
72
|
+
// .into(),
|
73
|
+
// false,
|
74
|
+
// ));
|
75
|
+
} else if out.respond_to("_s", true)? {
|
76
|
+
let rb_rbseries: Value = out.funcall("_s", ()).unwrap();
|
77
|
+
let series = rb_rbseries
|
78
|
+
.try_convert::<&RbSeries>()
|
79
|
+
.unwrap()
|
80
|
+
.series
|
81
|
+
.borrow();
|
82
|
+
let dt = series.dtype();
|
83
|
+
return Ok((
|
84
|
+
RbSeries::new(
|
85
|
+
apply_lambda_with_list_out_type(df, lambda, null_count, Some(&series), dt)?
|
86
|
+
.into_series(),
|
87
|
+
)
|
88
|
+
.into(),
|
89
|
+
false,
|
90
|
+
));
|
91
|
+
} else if out.try_convert::<Wrap<Row<'a>>>().is_ok() {
|
92
|
+
let first_value = out.try_convert::<Wrap<Row<'a>>>().unwrap().0;
|
93
|
+
return Ok((
|
94
|
+
RbDataFrame::from(
|
95
|
+
apply_lambda_with_rows_output(
|
96
|
+
df,
|
97
|
+
lambda,
|
98
|
+
null_count,
|
99
|
+
first_value,
|
100
|
+
inference_size,
|
101
|
+
)
|
102
|
+
.map_err(RbPolarsErr::from)?,
|
103
|
+
)
|
104
|
+
.into(),
|
105
|
+
true,
|
106
|
+
));
|
107
|
+
} else if out.is_kind_of(class::array()) {
|
108
|
+
return Err(RbPolarsErr::other(
|
109
|
+
"A list output type is invalid. Do you mean to create polars List Series?\
|
110
|
+
Then return a Series object."
|
111
|
+
.into(),
|
112
|
+
));
|
113
|
+
} else {
|
114
|
+
return Err(RbPolarsErr::other("Could not determine output type".into()));
|
115
|
+
}
|
116
|
+
}
|
117
|
+
Err(RbPolarsErr::other("Could not determine output type".into()))
|
118
|
+
}
|
119
|
+
|
120
|
+
fn apply_iter<T>(
|
121
|
+
df: &DataFrame,
|
122
|
+
lambda: Value,
|
123
|
+
init_null_count: usize,
|
124
|
+
skip: usize,
|
125
|
+
) -> impl Iterator<Item = Option<T>> + '_
|
126
|
+
where
|
127
|
+
T: TryConvert,
|
128
|
+
{
|
129
|
+
let columns = df.get_columns();
|
130
|
+
((init_null_count + skip)..df.height()).map(move |idx| {
|
131
|
+
let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
|
132
|
+
let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
133
|
+
match lambda.funcall::<_, _, Value>("call", tpl) {
|
134
|
+
Ok(val) => val.try_convert::<T>().ok(),
|
135
|
+
Err(e) => panic!("ruby function failed {}", e),
|
136
|
+
}
|
137
|
+
})
|
138
|
+
}
|
139
|
+
|
140
|
+
/// Apply a lambda with a primitive output type
|
141
|
+
pub fn apply_lambda_with_primitive_out_type<D>(
|
142
|
+
df: &DataFrame,
|
143
|
+
lambda: Value,
|
144
|
+
init_null_count: usize,
|
145
|
+
first_value: Option<D::Native>,
|
146
|
+
) -> ChunkedArray<D>
|
147
|
+
where
|
148
|
+
D: RbArrowPrimitiveType,
|
149
|
+
D::Native: Into<Value> + TryConvert,
|
150
|
+
{
|
151
|
+
let skip = usize::from(first_value.is_some());
|
152
|
+
if init_null_count == df.height() {
|
153
|
+
ChunkedArray::full_null("apply", df.height())
|
154
|
+
} else {
|
155
|
+
let iter = apply_iter(df, lambda, init_null_count, skip);
|
156
|
+
iterator_to_primitive(iter, init_null_count, first_value, "apply", df.height())
|
157
|
+
}
|
158
|
+
}
|
159
|
+
|
160
|
+
/// Apply a lambda with a boolean output type
|
161
|
+
pub fn apply_lambda_with_bool_out_type(
|
162
|
+
df: &DataFrame,
|
163
|
+
lambda: Value,
|
164
|
+
init_null_count: usize,
|
165
|
+
first_value: Option<bool>,
|
166
|
+
) -> ChunkedArray<BooleanType> {
|
167
|
+
let skip = usize::from(first_value.is_some());
|
168
|
+
if init_null_count == df.height() {
|
169
|
+
ChunkedArray::full_null("apply", df.height())
|
170
|
+
} else {
|
171
|
+
let iter = apply_iter(df, lambda, init_null_count, skip);
|
172
|
+
iterator_to_bool(iter, init_null_count, first_value, "apply", df.height())
|
173
|
+
}
|
174
|
+
}
|
175
|
+
|
176
|
+
/// Apply a lambda with utf8 output type
|
177
|
+
pub fn apply_lambda_with_utf8_out_type(
|
178
|
+
df: &DataFrame,
|
179
|
+
lambda: Value,
|
180
|
+
init_null_count: usize,
|
181
|
+
first_value: Option<&str>,
|
182
|
+
) -> Utf8Chunked {
|
183
|
+
let skip = usize::from(first_value.is_some());
|
184
|
+
if init_null_count == df.height() {
|
185
|
+
ChunkedArray::full_null("apply", df.height())
|
186
|
+
} else {
|
187
|
+
let iter = apply_iter::<String>(df, lambda, init_null_count, skip);
|
188
|
+
iterator_to_utf8(iter, init_null_count, first_value, "apply", df.height())
|
189
|
+
}
|
190
|
+
}
|
191
|
+
|
192
|
+
/// Apply a lambda with list output type
|
193
|
+
pub fn apply_lambda_with_list_out_type<'a>(
|
194
|
+
df: &'a DataFrame,
|
195
|
+
lambda: Value,
|
196
|
+
init_null_count: usize,
|
197
|
+
first_value: Option<&Series>,
|
198
|
+
dt: &DataType,
|
199
|
+
) -> RbResult<ListChunked> {
|
200
|
+
let columns = df.get_columns();
|
201
|
+
|
202
|
+
let skip = usize::from(first_value.is_some());
|
203
|
+
if init_null_count == df.height() {
|
204
|
+
Ok(ChunkedArray::full_null("apply", df.height()))
|
205
|
+
} else {
|
206
|
+
let iter = ((init_null_count + skip)..df.height()).map(|idx| {
|
207
|
+
let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
|
208
|
+
let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
209
|
+
match lambda.funcall::<_, _, Value>("call", tpl) {
|
210
|
+
Ok(val) => match val.funcall::<_, _, Value>("_s", ()) {
|
211
|
+
Ok(val) => val
|
212
|
+
.try_convert::<&RbSeries>()
|
213
|
+
.ok()
|
214
|
+
.map(|ps| ps.series.borrow().clone()),
|
215
|
+
Err(_) => {
|
216
|
+
if val.is_nil() {
|
217
|
+
None
|
218
|
+
} else {
|
219
|
+
panic!("should return a Series, got a {:?}", val)
|
220
|
+
}
|
221
|
+
}
|
222
|
+
},
|
223
|
+
Err(e) => panic!("ruby function failed {}", e),
|
224
|
+
}
|
225
|
+
});
|
226
|
+
iterator_to_list(dt, iter, init_null_count, first_value, "apply", df.height())
|
227
|
+
}
|
228
|
+
}
|
229
|
+
|
230
|
+
pub fn apply_lambda_with_rows_output<'a>(
|
231
|
+
df: &'a DataFrame,
|
232
|
+
lambda: Value,
|
233
|
+
init_null_count: usize,
|
234
|
+
first_value: Row<'a>,
|
235
|
+
inference_size: usize,
|
236
|
+
) -> PolarsResult<DataFrame> {
|
237
|
+
let columns = df.get_columns();
|
238
|
+
let width = first_value.0.len();
|
239
|
+
let null_row = Row::new(vec![AnyValue::Null; width]);
|
240
|
+
|
241
|
+
let mut row_buf = Row::default();
|
242
|
+
|
243
|
+
let skip = 1;
|
244
|
+
let mut row_iter = ((init_null_count + skip)..df.height()).map(|idx| {
|
245
|
+
let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
|
246
|
+
let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
247
|
+
match lambda.funcall::<_, _, Value>("call", tpl) {
|
248
|
+
Ok(val) => {
|
249
|
+
match val.try_convert::<RArray>().ok() {
|
250
|
+
Some(tuple) => {
|
251
|
+
row_buf.0.clear();
|
252
|
+
for v in tuple.each() {
|
253
|
+
let v = v.unwrap().try_convert::<Wrap<AnyValue>>().unwrap().0;
|
254
|
+
row_buf.0.push(v);
|
255
|
+
}
|
256
|
+
let ptr = &row_buf as *const Row;
|
257
|
+
// Safety:
|
258
|
+
// we know that row constructor of polars dataframe does not keep a reference
|
259
|
+
// to the row. Before we mutate the row buf again, the reference is dropped.
|
260
|
+
// we only cannot prove it to the compiler.
|
261
|
+
// we still do this because it saves a Vec allocation in a hot loop.
|
262
|
+
unsafe { &*ptr }
|
263
|
+
}
|
264
|
+
None => &null_row,
|
265
|
+
}
|
266
|
+
}
|
267
|
+
Err(e) => panic!("ruby function failed {}", e),
|
268
|
+
}
|
269
|
+
});
|
270
|
+
|
271
|
+
// first rows for schema inference
|
272
|
+
let mut buf = Vec::with_capacity(inference_size);
|
273
|
+
buf.push(first_value);
|
274
|
+
buf.extend((&mut row_iter).take(inference_size).cloned());
|
275
|
+
let schema = rows_to_schema_first_non_null(&buf, Some(50));
|
276
|
+
|
277
|
+
if init_null_count > 0 {
|
278
|
+
// Safety: we know the iterators size
|
279
|
+
let iter = unsafe {
|
280
|
+
(0..init_null_count)
|
281
|
+
.map(|_| &null_row)
|
282
|
+
.chain(buf.iter())
|
283
|
+
.chain(row_iter)
|
284
|
+
.trust_my_length(df.height())
|
285
|
+
};
|
286
|
+
DataFrame::from_rows_iter_and_schema(iter, &schema)
|
287
|
+
} else {
|
288
|
+
// Safety: we know the iterators size
|
289
|
+
let iter = unsafe { buf.iter().chain(row_iter).trust_my_length(df.height()) };
|
290
|
+
DataFrame::from_rows_iter_and_schema(iter, &schema)
|
291
|
+
}
|
292
|
+
}
|