polars-df 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Cargo.lock +142 -11
- data/Cargo.toml +5 -0
- data/ext/polars/Cargo.toml +17 -1
- data/ext/polars/src/apply/dataframe.rs +292 -0
- data/ext/polars/src/apply/mod.rs +254 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +180 -5
- data/ext/polars/src/dataframe.rs +146 -1
- data/ext/polars/src/error.rs +12 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +74 -3
- data/ext/polars/src/lazy/dsl.rs +136 -0
- data/ext/polars/src/lib.rs +199 -1
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +331 -0
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1558 -60
- data/lib/polars/date_time_expr.rb +2 -2
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/dynamic_group_by.rb +49 -0
- data/lib/polars/expr.rb +4072 -107
- data/lib/polars/expr_dispatch.rb +8 -0
- data/lib/polars/functions.rb +192 -3
- data/lib/polars/group_by.rb +44 -3
- data/lib/polars/io.rb +20 -4
- data/lib/polars/lazy_frame.rb +800 -26
- data/lib/polars/lazy_functions.rb +687 -43
- data/lib/polars/lazy_group_by.rb +1 -0
- data/lib/polars/list_expr.rb +502 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +934 -62
- data/lib/polars/string_expr.rb +189 -13
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +44 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +14 -1
- metadata +15 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3a08e866e51227716cd3cb4454835016a7d61e30e964fe76a8b99704dcb60a12
|
4
|
+
data.tar.gz: 1f30c3fdd47ebf52a311909aa26ba4b6d64e426622455854b9bbc660de1229b3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1531fff4fc2fab8b2dc72709a69fb2890c215ae08e4223aa32262dbb4b0debb4b6f2fbab1e8138953871f5d02d462abfaba49cc7f22a66e25aa7d60f128a89bc
|
7
|
+
data.tar.gz: e1041d708e2f8046c14c565a65879fa4e5c6671cf526736a3f8418a82dfa70e17692a96d383e43f393d8761e5f29f717d63185ae1ed3f0793a6876be2d946fc0
|
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
@@ -8,6 +8,12 @@ version = "1.0.2"
|
|
8
8
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
9
9
|
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
|
10
10
|
|
11
|
+
[[package]]
|
12
|
+
name = "adler32"
|
13
|
+
version = "1.2.0"
|
14
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
15
|
+
checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
|
16
|
+
|
11
17
|
[[package]]
|
12
18
|
name = "ahash"
|
13
19
|
version = "0.7.6"
|
@@ -85,6 +91,7 @@ checksum = "ee6f62e41078c967a4c063fcbdfd3801a2a9632276402c045311c4d73d0845f3"
|
|
85
91
|
dependencies = [
|
86
92
|
"ahash 0.7.6",
|
87
93
|
"arrow-format",
|
94
|
+
"avro-schema",
|
88
95
|
"base64",
|
89
96
|
"bytemuck",
|
90
97
|
"chrono",
|
@@ -147,6 +154,20 @@ version = "1.1.0"
|
|
147
154
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
148
155
|
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
149
156
|
|
157
|
+
[[package]]
|
158
|
+
name = "avro-schema"
|
159
|
+
version = "0.3.0"
|
160
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
161
|
+
checksum = "b5281855b39aba9684d2f47bf96983fbfd8f1725f12fabb0513a8ab879647bbd"
|
162
|
+
dependencies = [
|
163
|
+
"crc",
|
164
|
+
"fallible-streaming-iterator",
|
165
|
+
"libflate",
|
166
|
+
"serde",
|
167
|
+
"serde_json",
|
168
|
+
"snap",
|
169
|
+
]
|
170
|
+
|
150
171
|
[[package]]
|
151
172
|
name = "base64"
|
152
173
|
version = "0.13.1"
|
@@ -314,6 +335,21 @@ dependencies = [
|
|
314
335
|
"unicode-width",
|
315
336
|
]
|
316
337
|
|
338
|
+
[[package]]
|
339
|
+
name = "crc"
|
340
|
+
version = "2.1.0"
|
341
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
342
|
+
checksum = "49fc9a695bca7f35f5f4c15cddc84415f66a74ea78eef08e90c5024f2b540e23"
|
343
|
+
dependencies = [
|
344
|
+
"crc-catalog",
|
345
|
+
]
|
346
|
+
|
347
|
+
[[package]]
|
348
|
+
name = "crc-catalog"
|
349
|
+
version = "1.1.1"
|
350
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
351
|
+
checksum = "ccaeedb56da03b09f598226e25e80088cb4cd25f316e6e4df7d695f0feeb1403"
|
352
|
+
|
317
353
|
[[package]]
|
318
354
|
name = "crc32fast"
|
319
355
|
version = "1.3.2"
|
@@ -483,6 +519,12 @@ version = "0.1.0"
|
|
483
519
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
484
520
|
checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673"
|
485
521
|
|
522
|
+
[[package]]
|
523
|
+
name = "fs_extra"
|
524
|
+
version = "1.2.0"
|
525
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
526
|
+
checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394"
|
527
|
+
|
486
528
|
[[package]]
|
487
529
|
name = "futures"
|
488
530
|
version = "0.3.25"
|
@@ -646,6 +688,12 @@ dependencies = [
|
|
646
688
|
"libc",
|
647
689
|
]
|
648
690
|
|
691
|
+
[[package]]
|
692
|
+
name = "hex"
|
693
|
+
version = "0.4.3"
|
694
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
695
|
+
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
|
696
|
+
|
649
697
|
[[package]]
|
650
698
|
name = "indexmap"
|
651
699
|
version = "1.8.0"
|
@@ -663,6 +711,27 @@ version = "1.0.4"
|
|
663
711
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
664
712
|
checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc"
|
665
713
|
|
714
|
+
[[package]]
|
715
|
+
name = "jemalloc-sys"
|
716
|
+
version = "0.5.2+5.3.0-patched"
|
717
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
718
|
+
checksum = "134163979b6eed9564c98637b710b40979939ba351f59952708234ea11b5f3f8"
|
719
|
+
dependencies = [
|
720
|
+
"cc",
|
721
|
+
"fs_extra",
|
722
|
+
"libc",
|
723
|
+
]
|
724
|
+
|
725
|
+
[[package]]
|
726
|
+
name = "jemallocator"
|
727
|
+
version = "0.5.0"
|
728
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
729
|
+
checksum = "16c2514137880c52b0b4822b563fadd38257c1f380858addb74a400889696ea6"
|
730
|
+
dependencies = [
|
731
|
+
"jemalloc-sys",
|
732
|
+
"libc",
|
733
|
+
]
|
734
|
+
|
666
735
|
[[package]]
|
667
736
|
name = "jobserver"
|
668
737
|
version = "0.1.25"
|
@@ -683,13 +752,23 @@ dependencies = [
|
|
683
752
|
|
684
753
|
[[package]]
|
685
754
|
name = "json-deserializer"
|
686
|
-
version = "0.4.
|
755
|
+
version = "0.4.3"
|
687
756
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
688
|
-
checksum = "
|
757
|
+
checksum = "daba674f7eecf80fe8bbbf196340908ad1a22510fe71fd6111bb50f441b26440"
|
689
758
|
dependencies = [
|
690
759
|
"indexmap",
|
691
760
|
]
|
692
761
|
|
762
|
+
[[package]]
|
763
|
+
name = "jsonpath_lib"
|
764
|
+
version = "0.3.0"
|
765
|
+
source = "git+https://github.com/ritchie46/jsonpath?rev=24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b#24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b"
|
766
|
+
dependencies = [
|
767
|
+
"log",
|
768
|
+
"serde",
|
769
|
+
"serde_json",
|
770
|
+
]
|
771
|
+
|
693
772
|
[[package]]
|
694
773
|
name = "lazy_static"
|
695
774
|
version = "1.4.0"
|
@@ -781,6 +860,26 @@ version = "0.2.121"
|
|
781
860
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
782
861
|
checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f"
|
783
862
|
|
863
|
+
[[package]]
|
864
|
+
name = "libflate"
|
865
|
+
version = "1.2.0"
|
866
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
867
|
+
checksum = "05605ab2bce11bcfc0e9c635ff29ef8b2ea83f29be257ee7d730cac3ee373093"
|
868
|
+
dependencies = [
|
869
|
+
"adler32",
|
870
|
+
"crc32fast",
|
871
|
+
"libflate_lz77",
|
872
|
+
]
|
873
|
+
|
874
|
+
[[package]]
|
875
|
+
name = "libflate_lz77"
|
876
|
+
version = "1.1.0"
|
877
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
878
|
+
checksum = "39a734c0493409afcd49deee13c006a04e3586b9761a03543c6272c9c51f2f5a"
|
879
|
+
dependencies = [
|
880
|
+
"rle-decode-fast",
|
881
|
+
]
|
882
|
+
|
784
883
|
[[package]]
|
785
884
|
name = "libloading"
|
786
885
|
version = "0.7.4"
|
@@ -797,6 +896,16 @@ version = "0.2.6"
|
|
797
896
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
798
897
|
checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb"
|
799
898
|
|
899
|
+
[[package]]
|
900
|
+
name = "libmimalloc-sys"
|
901
|
+
version = "0.1.28"
|
902
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
903
|
+
checksum = "04d1c67deb83e6b75fa4fe3309e09cfeade12e7721d95322af500d3814ea60c9"
|
904
|
+
dependencies = [
|
905
|
+
"cc",
|
906
|
+
"libc",
|
907
|
+
]
|
908
|
+
|
800
909
|
[[package]]
|
801
910
|
name = "lock_api"
|
802
911
|
version = "0.4.9"
|
@@ -850,8 +959,7 @@ dependencies = [
|
|
850
959
|
[[package]]
|
851
960
|
name = "magnus-macros"
|
852
961
|
version = "0.2.0"
|
853
|
-
source = "
|
854
|
-
checksum = "acc8ba6908cb0f67a4e75cb48fc81a1f0e6a6dd1501936e0c9e2c7c8f9f18e05"
|
962
|
+
source = "git+https://github.com/matsadler/magnus#ae792419bed70107d4c930e1f8193272750b9fd2"
|
855
963
|
dependencies = [
|
856
964
|
"proc-macro2",
|
857
965
|
"quote",
|
@@ -882,6 +990,15 @@ dependencies = [
|
|
882
990
|
"autocfg",
|
883
991
|
]
|
884
992
|
|
993
|
+
[[package]]
|
994
|
+
name = "mimalloc"
|
995
|
+
version = "0.1.32"
|
996
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
997
|
+
checksum = "9b2374e2999959a7b583e1811a1ddbf1d3a4b9496eceb9746f1192a59d871eca"
|
998
|
+
dependencies = [
|
999
|
+
"libmimalloc-sys",
|
1000
|
+
]
|
1001
|
+
|
885
1002
|
[[package]]
|
886
1003
|
name = "minimal-lexical"
|
887
1004
|
version = "0.2.1"
|
@@ -1160,10 +1277,14 @@ dependencies = [
|
|
1160
1277
|
|
1161
1278
|
[[package]]
|
1162
1279
|
name = "polars"
|
1163
|
-
version = "0.1.
|
1280
|
+
version = "0.1.5"
|
1164
1281
|
dependencies = [
|
1282
|
+
"ahash 0.8.2",
|
1283
|
+
"jemallocator",
|
1165
1284
|
"magnus",
|
1285
|
+
"mimalloc",
|
1166
1286
|
"polars 0.25.1",
|
1287
|
+
"polars-core",
|
1167
1288
|
"serde_json",
|
1168
1289
|
]
|
1169
1290
|
|
@@ -1202,11 +1323,13 @@ dependencies = [
|
|
1202
1323
|
"ahash 0.8.2",
|
1203
1324
|
"anyhow",
|
1204
1325
|
"arrow2",
|
1326
|
+
"base64",
|
1205
1327
|
"bitflags",
|
1206
1328
|
"chrono",
|
1207
1329
|
"chrono-tz",
|
1208
1330
|
"comfy-table",
|
1209
1331
|
"hashbrown 0.12.3",
|
1332
|
+
"hex",
|
1210
1333
|
"indexmap",
|
1211
1334
|
"num",
|
1212
1335
|
"once_cell",
|
@@ -1277,9 +1400,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1277
1400
|
checksum = "bfd3f6552b3e9539634c35047f372db331b6227f75c36fcbe4670ab58bbcbeb3"
|
1278
1401
|
dependencies = [
|
1279
1402
|
"arrow2",
|
1403
|
+
"jsonpath_lib",
|
1280
1404
|
"polars-arrow",
|
1281
1405
|
"polars-core",
|
1282
1406
|
"polars-utils",
|
1407
|
+
"serde_json",
|
1283
1408
|
]
|
1284
1409
|
|
1285
1410
|
[[package]]
|
@@ -1431,18 +1556,18 @@ dependencies = [
|
|
1431
1556
|
|
1432
1557
|
[[package]]
|
1433
1558
|
name = "rb-sys"
|
1434
|
-
version = "0.9.
|
1559
|
+
version = "0.9.48"
|
1435
1560
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1436
|
-
checksum = "
|
1561
|
+
checksum = "dfc6b8f3bf2d04b0180e243ceeb033b51ca267d839aa1c12fa25f262c17d0596"
|
1437
1562
|
dependencies = [
|
1438
1563
|
"rb-sys-build",
|
1439
1564
|
]
|
1440
1565
|
|
1441
1566
|
[[package]]
|
1442
1567
|
name = "rb-sys-build"
|
1443
|
-
version = "0.9.
|
1568
|
+
version = "0.9.48"
|
1444
1569
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1445
|
-
checksum = "
|
1570
|
+
checksum = "2cd591ebf22c45a44e51192fbeebba473aea0fe2a708b0b24665a13010c58b8d"
|
1446
1571
|
dependencies = [
|
1447
1572
|
"bindgen",
|
1448
1573
|
"regex",
|
@@ -1452,8 +1577,7 @@ dependencies = [
|
|
1452
1577
|
[[package]]
|
1453
1578
|
name = "rb-sys-env"
|
1454
1579
|
version = "0.1.1"
|
1455
|
-
source = "
|
1456
|
-
checksum = "74c38752410925faeb82c400c06ba2fd9ee6aa8f719dd33994c9e53f5242d25f"
|
1580
|
+
source = "git+https://github.com/oxidize-rb/rb-sys#93c4f97a244168b9ebc2c5682275e7281421f4b8"
|
1457
1581
|
|
1458
1582
|
[[package]]
|
1459
1583
|
name = "redox_syscall"
|
@@ -1492,6 +1616,12 @@ version = "0.6.28"
|
|
1492
1616
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1493
1617
|
checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848"
|
1494
1618
|
|
1619
|
+
[[package]]
|
1620
|
+
name = "rle-decode-fast"
|
1621
|
+
version = "1.0.3"
|
1622
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1623
|
+
checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422"
|
1624
|
+
|
1495
1625
|
[[package]]
|
1496
1626
|
name = "rustc-hash"
|
1497
1627
|
version = "1.1.0"
|
@@ -1548,6 +1678,7 @@ version = "1.0.88"
|
|
1548
1678
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1549
1679
|
checksum = "8e8b3801309262e8184d9687fb697586833e939767aea0dda89f5a8e650e8bd7"
|
1550
1680
|
dependencies = [
|
1681
|
+
"indexmap",
|
1551
1682
|
"itoa",
|
1552
1683
|
"ryu",
|
1553
1684
|
"serde",
|
data/Cargo.toml
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
[workspace]
|
2
2
|
members = ["ext/polars"]
|
3
3
|
|
4
|
+
[patch.crates-io]
|
5
|
+
jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
|
6
|
+
magnus-macros = { git = "https://github.com/matsadler/magnus" }
|
7
|
+
rb-sys-env = { git = "https://github.com/oxidize-rb/rb-sys" }
|
8
|
+
|
4
9
|
[profile.release]
|
5
10
|
strip = true
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.5"
|
4
4
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
5
5
|
edition = "2021"
|
6
6
|
publish = false
|
@@ -9,7 +9,9 @@ publish = false
|
|
9
9
|
crate-type = ["cdylib"]
|
10
10
|
|
11
11
|
[dependencies]
|
12
|
+
ahash = "0.8"
|
12
13
|
magnus = "0.4"
|
14
|
+
polars-core = "0.25.1"
|
13
15
|
serde_json = "1"
|
14
16
|
|
15
17
|
[dependencies.polars]
|
@@ -18,7 +20,10 @@ features = [
|
|
18
20
|
"abs",
|
19
21
|
"arange",
|
20
22
|
"arg_where",
|
23
|
+
"asof_join",
|
24
|
+
"avro",
|
21
25
|
"concat_str",
|
26
|
+
"cse",
|
22
27
|
"csv-file",
|
23
28
|
"cum_agg",
|
24
29
|
"cumulative_eval",
|
@@ -30,6 +35,7 @@ features = [
|
|
30
35
|
"dtype-full",
|
31
36
|
"dynamic_groupby",
|
32
37
|
"ewma",
|
38
|
+
"extract_jsonpath",
|
33
39
|
"fmt",
|
34
40
|
"horizontal_concat",
|
35
41
|
"interpolate",
|
@@ -49,6 +55,8 @@ features = [
|
|
49
55
|
"parquet",
|
50
56
|
"partition_by",
|
51
57
|
"pct_change",
|
58
|
+
"performant",
|
59
|
+
"pivot",
|
52
60
|
"product",
|
53
61
|
"propagate_nans",
|
54
62
|
"random",
|
@@ -57,10 +65,12 @@ features = [
|
|
57
65
|
"repeat_by",
|
58
66
|
"rolling_window",
|
59
67
|
"round_series",
|
68
|
+
"row_hash",
|
60
69
|
"search_sorted",
|
61
70
|
"semi_anti_join",
|
62
71
|
"serde-lazy",
|
63
72
|
"sign",
|
73
|
+
"string_encoding",
|
64
74
|
"string_justify",
|
65
75
|
"strings",
|
66
76
|
"timezones",
|
@@ -69,3 +79,9 @@ features = [
|
|
69
79
|
"trigonometry",
|
70
80
|
"unique_counts",
|
71
81
|
]
|
82
|
+
|
83
|
+
[target.'cfg(target_os = "linux")'.dependencies]
|
84
|
+
jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }
|
85
|
+
|
86
|
+
[target.'cfg(not(target_os = "linux"))'.dependencies]
|
87
|
+
mimalloc = { version = "0.1", default-features = false }
|
@@ -0,0 +1,292 @@
|
|
1
|
+
use magnus::{class, RArray, TryConvert, Value};
|
2
|
+
use polars::prelude::*;
|
3
|
+
use polars_core::frame::row::{rows_to_schema_first_non_null, Row};
|
4
|
+
|
5
|
+
use super::*;
|
6
|
+
use crate::{RbDataFrame, RbPolarsErr, RbSeries, Wrap};
|
7
|
+
|
8
|
+
pub fn apply_lambda_unknown<'a>(
|
9
|
+
df: &'a DataFrame,
|
10
|
+
lambda: Value,
|
11
|
+
inference_size: usize,
|
12
|
+
) -> RbResult<(Value, bool)> {
|
13
|
+
let columns = df.get_columns();
|
14
|
+
let mut null_count = 0;
|
15
|
+
|
16
|
+
for idx in 0..df.height() {
|
17
|
+
let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
|
18
|
+
let arg = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
19
|
+
let out: Value = lambda.funcall("call", arg)?;
|
20
|
+
|
21
|
+
if out.is_nil() {
|
22
|
+
null_count += 1;
|
23
|
+
continue;
|
24
|
+
} else if out.is_kind_of(class::true_class()) || out.is_kind_of(class::false_class()) {
|
25
|
+
let first_value = out.try_convert::<bool>().ok();
|
26
|
+
return Ok((
|
27
|
+
RbSeries::new(
|
28
|
+
apply_lambda_with_bool_out_type(df, lambda, null_count, first_value)
|
29
|
+
.into_series(),
|
30
|
+
)
|
31
|
+
.into(),
|
32
|
+
false,
|
33
|
+
));
|
34
|
+
} else if out.is_kind_of(class::float()) {
|
35
|
+
let first_value = out.try_convert::<f64>().ok();
|
36
|
+
|
37
|
+
return Ok((
|
38
|
+
RbSeries::new(
|
39
|
+
apply_lambda_with_primitive_out_type::<Float64Type>(
|
40
|
+
df,
|
41
|
+
lambda,
|
42
|
+
null_count,
|
43
|
+
first_value,
|
44
|
+
)
|
45
|
+
.into_series(),
|
46
|
+
)
|
47
|
+
.into(),
|
48
|
+
false,
|
49
|
+
));
|
50
|
+
} else if out.is_kind_of(class::integer()) {
|
51
|
+
let first_value = out.try_convert::<i64>().ok();
|
52
|
+
return Ok((
|
53
|
+
RbSeries::new(
|
54
|
+
apply_lambda_with_primitive_out_type::<Int64Type>(
|
55
|
+
df,
|
56
|
+
lambda,
|
57
|
+
null_count,
|
58
|
+
first_value,
|
59
|
+
)
|
60
|
+
.into_series(),
|
61
|
+
)
|
62
|
+
.into(),
|
63
|
+
false,
|
64
|
+
));
|
65
|
+
// } else if out.is_kind_of(class::string()) {
|
66
|
+
// let first_value = out.try_convert::<String>().ok();
|
67
|
+
// return Ok((
|
68
|
+
// RbSeries::new(
|
69
|
+
// apply_lambda_with_utf8_out_type(df, lambda, null_count, first_value)
|
70
|
+
// .into_series(),
|
71
|
+
// )
|
72
|
+
// .into(),
|
73
|
+
// false,
|
74
|
+
// ));
|
75
|
+
} else if out.respond_to("_s", true)? {
|
76
|
+
let rb_rbseries: Value = out.funcall("_s", ()).unwrap();
|
77
|
+
let series = rb_rbseries
|
78
|
+
.try_convert::<&RbSeries>()
|
79
|
+
.unwrap()
|
80
|
+
.series
|
81
|
+
.borrow();
|
82
|
+
let dt = series.dtype();
|
83
|
+
return Ok((
|
84
|
+
RbSeries::new(
|
85
|
+
apply_lambda_with_list_out_type(df, lambda, null_count, Some(&series), dt)?
|
86
|
+
.into_series(),
|
87
|
+
)
|
88
|
+
.into(),
|
89
|
+
false,
|
90
|
+
));
|
91
|
+
} else if out.try_convert::<Wrap<Row<'a>>>().is_ok() {
|
92
|
+
let first_value = out.try_convert::<Wrap<Row<'a>>>().unwrap().0;
|
93
|
+
return Ok((
|
94
|
+
RbDataFrame::from(
|
95
|
+
apply_lambda_with_rows_output(
|
96
|
+
df,
|
97
|
+
lambda,
|
98
|
+
null_count,
|
99
|
+
first_value,
|
100
|
+
inference_size,
|
101
|
+
)
|
102
|
+
.map_err(RbPolarsErr::from)?,
|
103
|
+
)
|
104
|
+
.into(),
|
105
|
+
true,
|
106
|
+
));
|
107
|
+
} else if out.is_kind_of(class::array()) {
|
108
|
+
return Err(RbPolarsErr::other(
|
109
|
+
"A list output type is invalid. Do you mean to create polars List Series?\
|
110
|
+
Then return a Series object."
|
111
|
+
.into(),
|
112
|
+
));
|
113
|
+
} else {
|
114
|
+
return Err(RbPolarsErr::other("Could not determine output type".into()));
|
115
|
+
}
|
116
|
+
}
|
117
|
+
Err(RbPolarsErr::other("Could not determine output type".into()))
|
118
|
+
}
|
119
|
+
|
120
|
+
fn apply_iter<T>(
|
121
|
+
df: &DataFrame,
|
122
|
+
lambda: Value,
|
123
|
+
init_null_count: usize,
|
124
|
+
skip: usize,
|
125
|
+
) -> impl Iterator<Item = Option<T>> + '_
|
126
|
+
where
|
127
|
+
T: TryConvert,
|
128
|
+
{
|
129
|
+
let columns = df.get_columns();
|
130
|
+
((init_null_count + skip)..df.height()).map(move |idx| {
|
131
|
+
let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
|
132
|
+
let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
133
|
+
match lambda.funcall::<_, _, Value>("call", tpl) {
|
134
|
+
Ok(val) => val.try_convert::<T>().ok(),
|
135
|
+
Err(e) => panic!("ruby function failed {}", e),
|
136
|
+
}
|
137
|
+
})
|
138
|
+
}
|
139
|
+
|
140
|
+
/// Apply a lambda with a primitive output type
|
141
|
+
pub fn apply_lambda_with_primitive_out_type<D>(
|
142
|
+
df: &DataFrame,
|
143
|
+
lambda: Value,
|
144
|
+
init_null_count: usize,
|
145
|
+
first_value: Option<D::Native>,
|
146
|
+
) -> ChunkedArray<D>
|
147
|
+
where
|
148
|
+
D: RbArrowPrimitiveType,
|
149
|
+
D::Native: Into<Value> + TryConvert,
|
150
|
+
{
|
151
|
+
let skip = usize::from(first_value.is_some());
|
152
|
+
if init_null_count == df.height() {
|
153
|
+
ChunkedArray::full_null("apply", df.height())
|
154
|
+
} else {
|
155
|
+
let iter = apply_iter(df, lambda, init_null_count, skip);
|
156
|
+
iterator_to_primitive(iter, init_null_count, first_value, "apply", df.height())
|
157
|
+
}
|
158
|
+
}
|
159
|
+
|
160
|
+
/// Apply a lambda with a boolean output type
|
161
|
+
pub fn apply_lambda_with_bool_out_type(
|
162
|
+
df: &DataFrame,
|
163
|
+
lambda: Value,
|
164
|
+
init_null_count: usize,
|
165
|
+
first_value: Option<bool>,
|
166
|
+
) -> ChunkedArray<BooleanType> {
|
167
|
+
let skip = usize::from(first_value.is_some());
|
168
|
+
if init_null_count == df.height() {
|
169
|
+
ChunkedArray::full_null("apply", df.height())
|
170
|
+
} else {
|
171
|
+
let iter = apply_iter(df, lambda, init_null_count, skip);
|
172
|
+
iterator_to_bool(iter, init_null_count, first_value, "apply", df.height())
|
173
|
+
}
|
174
|
+
}
|
175
|
+
|
176
|
+
/// Apply a lambda with utf8 output type
|
177
|
+
pub fn apply_lambda_with_utf8_out_type(
|
178
|
+
df: &DataFrame,
|
179
|
+
lambda: Value,
|
180
|
+
init_null_count: usize,
|
181
|
+
first_value: Option<&str>,
|
182
|
+
) -> Utf8Chunked {
|
183
|
+
let skip = usize::from(first_value.is_some());
|
184
|
+
if init_null_count == df.height() {
|
185
|
+
ChunkedArray::full_null("apply", df.height())
|
186
|
+
} else {
|
187
|
+
let iter = apply_iter::<String>(df, lambda, init_null_count, skip);
|
188
|
+
iterator_to_utf8(iter, init_null_count, first_value, "apply", df.height())
|
189
|
+
}
|
190
|
+
}
|
191
|
+
|
192
|
+
/// Apply a lambda with list output type
|
193
|
+
pub fn apply_lambda_with_list_out_type<'a>(
|
194
|
+
df: &'a DataFrame,
|
195
|
+
lambda: Value,
|
196
|
+
init_null_count: usize,
|
197
|
+
first_value: Option<&Series>,
|
198
|
+
dt: &DataType,
|
199
|
+
) -> RbResult<ListChunked> {
|
200
|
+
let columns = df.get_columns();
|
201
|
+
|
202
|
+
let skip = usize::from(first_value.is_some());
|
203
|
+
if init_null_count == df.height() {
|
204
|
+
Ok(ChunkedArray::full_null("apply", df.height()))
|
205
|
+
} else {
|
206
|
+
let iter = ((init_null_count + skip)..df.height()).map(|idx| {
|
207
|
+
let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
|
208
|
+
let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
209
|
+
match lambda.funcall::<_, _, Value>("call", tpl) {
|
210
|
+
Ok(val) => match val.funcall::<_, _, Value>("_s", ()) {
|
211
|
+
Ok(val) => val
|
212
|
+
.try_convert::<&RbSeries>()
|
213
|
+
.ok()
|
214
|
+
.map(|ps| ps.series.borrow().clone()),
|
215
|
+
Err(_) => {
|
216
|
+
if val.is_nil() {
|
217
|
+
None
|
218
|
+
} else {
|
219
|
+
panic!("should return a Series, got a {:?}", val)
|
220
|
+
}
|
221
|
+
}
|
222
|
+
},
|
223
|
+
Err(e) => panic!("ruby function failed {}", e),
|
224
|
+
}
|
225
|
+
});
|
226
|
+
iterator_to_list(dt, iter, init_null_count, first_value, "apply", df.height())
|
227
|
+
}
|
228
|
+
}
|
229
|
+
|
230
|
+
pub fn apply_lambda_with_rows_output<'a>(
|
231
|
+
df: &'a DataFrame,
|
232
|
+
lambda: Value,
|
233
|
+
init_null_count: usize,
|
234
|
+
first_value: Row<'a>,
|
235
|
+
inference_size: usize,
|
236
|
+
) -> PolarsResult<DataFrame> {
|
237
|
+
let columns = df.get_columns();
|
238
|
+
let width = first_value.0.len();
|
239
|
+
let null_row = Row::new(vec![AnyValue::Null; width]);
|
240
|
+
|
241
|
+
let mut row_buf = Row::default();
|
242
|
+
|
243
|
+
let skip = 1;
|
244
|
+
let mut row_iter = ((init_null_count + skip)..df.height()).map(|idx| {
|
245
|
+
let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
|
246
|
+
let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
247
|
+
match lambda.funcall::<_, _, Value>("call", tpl) {
|
248
|
+
Ok(val) => {
|
249
|
+
match val.try_convert::<RArray>().ok() {
|
250
|
+
Some(tuple) => {
|
251
|
+
row_buf.0.clear();
|
252
|
+
for v in tuple.each() {
|
253
|
+
let v = v.unwrap().try_convert::<Wrap<AnyValue>>().unwrap().0;
|
254
|
+
row_buf.0.push(v);
|
255
|
+
}
|
256
|
+
let ptr = &row_buf as *const Row;
|
257
|
+
// Safety:
|
258
|
+
// we know that row constructor of polars dataframe does not keep a reference
|
259
|
+
// to the row. Before we mutate the row buf again, the reference is dropped.
|
260
|
+
// we only cannot prove it to the compiler.
|
261
|
+
// we still do this because it saves a Vec allocation in a hot loop.
|
262
|
+
unsafe { &*ptr }
|
263
|
+
}
|
264
|
+
None => &null_row,
|
265
|
+
}
|
266
|
+
}
|
267
|
+
Err(e) => panic!("ruby function failed {}", e),
|
268
|
+
}
|
269
|
+
});
|
270
|
+
|
271
|
+
// first rows for schema inference
|
272
|
+
let mut buf = Vec::with_capacity(inference_size);
|
273
|
+
buf.push(first_value);
|
274
|
+
buf.extend((&mut row_iter).take(inference_size).cloned());
|
275
|
+
let schema = rows_to_schema_first_non_null(&buf, Some(50));
|
276
|
+
|
277
|
+
if init_null_count > 0 {
|
278
|
+
// Safety: we know the iterators size
|
279
|
+
let iter = unsafe {
|
280
|
+
(0..init_null_count)
|
281
|
+
.map(|_| &null_row)
|
282
|
+
.chain(buf.iter())
|
283
|
+
.chain(row_iter)
|
284
|
+
.trust_my_length(df.height())
|
285
|
+
};
|
286
|
+
DataFrame::from_rows_iter_and_schema(iter, &schema)
|
287
|
+
} else {
|
288
|
+
// Safety: we know the iterators size
|
289
|
+
let iter = unsafe { buf.iter().chain(row_iter).trust_my_length(df.height()) };
|
290
|
+
DataFrame::from_rows_iter_and_schema(iter, &schema)
|
291
|
+
}
|
292
|
+
}
|