polars-df 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/Cargo.lock +142 -11
  4. data/Cargo.toml +5 -0
  5. data/ext/polars/Cargo.toml +17 -1
  6. data/ext/polars/src/apply/dataframe.rs +292 -0
  7. data/ext/polars/src/apply/mod.rs +254 -0
  8. data/ext/polars/src/apply/series.rs +1173 -0
  9. data/ext/polars/src/conversion.rs +180 -5
  10. data/ext/polars/src/dataframe.rs +146 -1
  11. data/ext/polars/src/error.rs +12 -0
  12. data/ext/polars/src/lazy/apply.rs +34 -2
  13. data/ext/polars/src/lazy/dataframe.rs +74 -3
  14. data/ext/polars/src/lazy/dsl.rs +136 -0
  15. data/ext/polars/src/lib.rs +199 -1
  16. data/ext/polars/src/list_construction.rs +100 -0
  17. data/ext/polars/src/series.rs +331 -0
  18. data/ext/polars/src/utils.rs +25 -0
  19. data/lib/polars/cat_name_space.rb +54 -0
  20. data/lib/polars/convert.rb +100 -0
  21. data/lib/polars/data_frame.rb +1558 -60
  22. data/lib/polars/date_time_expr.rb +2 -2
  23. data/lib/polars/date_time_name_space.rb +1484 -0
  24. data/lib/polars/dynamic_group_by.rb +49 -0
  25. data/lib/polars/expr.rb +4072 -107
  26. data/lib/polars/expr_dispatch.rb +8 -0
  27. data/lib/polars/functions.rb +192 -3
  28. data/lib/polars/group_by.rb +44 -3
  29. data/lib/polars/io.rb +20 -4
  30. data/lib/polars/lazy_frame.rb +800 -26
  31. data/lib/polars/lazy_functions.rb +687 -43
  32. data/lib/polars/lazy_group_by.rb +1 -0
  33. data/lib/polars/list_expr.rb +502 -5
  34. data/lib/polars/list_name_space.rb +346 -0
  35. data/lib/polars/rolling_group_by.rb +35 -0
  36. data/lib/polars/series.rb +934 -62
  37. data/lib/polars/string_expr.rb +189 -13
  38. data/lib/polars/string_name_space.rb +690 -0
  39. data/lib/polars/struct_name_space.rb +64 -0
  40. data/lib/polars/utils.rb +44 -0
  41. data/lib/polars/version.rb +1 -1
  42. data/lib/polars.rb +14 -1
  43. metadata +15 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9e6fb732e5dafe2fde285322554bd9159483cbbdf17d6e2bba9cba9a83563b47
4
- data.tar.gz: 1b4249d0c0100f136973c601b8404cb6d92abc632d5ed0476bd93bc5360a11dc
3
+ metadata.gz: 3a08e866e51227716cd3cb4454835016a7d61e30e964fe76a8b99704dcb60a12
4
+ data.tar.gz: 1f30c3fdd47ebf52a311909aa26ba4b6d64e426622455854b9bbc660de1229b3
5
5
  SHA512:
6
- metadata.gz: d9414d6f60c489e2b3b72885288822083ba8c04bac4053f4e34c1d53ee805d164f17fe4b8b3a8f4ff562550bcc657f374bea6e250b52985367f601ea50e3037f
7
- data.tar.gz: 9e3a7cfe105f03ec20e9c26aa38c1475074ccc1ea057a170a97b7068b41943d561d50af49bb1d1f74b7705809dc1375900f542ab93683ba627dea080274f6d91
6
+ metadata.gz: 1531fff4fc2fab8b2dc72709a69fb2890c215ae08e4223aa32262dbb4b0debb4b6f2fbab1e8138953871f5d02d462abfaba49cc7f22a66e25aa7d60f128a89bc
7
+ data.tar.gz: e1041d708e2f8046c14c565a65879fa4e5c6671cf526736a3f8418a82dfa70e17692a96d383e43f393d8761e5f29f717d63185ae1ed3f0793a6876be2d946fc0
data/CHANGELOG.md CHANGED
@@ -1,3 +1,13 @@
1
+ ## 0.1.5 (2022-12-22)
2
+
3
+ - Added `read_avro` and `write_avro` methods
4
+ - Added more methods
5
+
6
+ ## 0.1.4 (2022-12-02)
7
+
8
+ - Added more methods
9
+ - Improved performance
10
+
1
11
  ## 0.1.3 (2022-11-27)
2
12
 
3
13
  - Added more methods
data/Cargo.lock CHANGED
@@ -8,6 +8,12 @@ version = "1.0.2"
8
8
  source = "registry+https://github.com/rust-lang/crates.io-index"
9
9
  checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
10
10
 
11
+ [[package]]
12
+ name = "adler32"
13
+ version = "1.2.0"
14
+ source = "registry+https://github.com/rust-lang/crates.io-index"
15
+ checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
16
+
11
17
  [[package]]
12
18
  name = "ahash"
13
19
  version = "0.7.6"
@@ -85,6 +91,7 @@ checksum = "ee6f62e41078c967a4c063fcbdfd3801a2a9632276402c045311c4d73d0845f3"
85
91
  dependencies = [
86
92
  "ahash 0.7.6",
87
93
  "arrow-format",
94
+ "avro-schema",
88
95
  "base64",
89
96
  "bytemuck",
90
97
  "chrono",
@@ -147,6 +154,20 @@ version = "1.1.0"
147
154
  source = "registry+https://github.com/rust-lang/crates.io-index"
148
155
  checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
149
156
 
157
+ [[package]]
158
+ name = "avro-schema"
159
+ version = "0.3.0"
160
+ source = "registry+https://github.com/rust-lang/crates.io-index"
161
+ checksum = "b5281855b39aba9684d2f47bf96983fbfd8f1725f12fabb0513a8ab879647bbd"
162
+ dependencies = [
163
+ "crc",
164
+ "fallible-streaming-iterator",
165
+ "libflate",
166
+ "serde",
167
+ "serde_json",
168
+ "snap",
169
+ ]
170
+
150
171
  [[package]]
151
172
  name = "base64"
152
173
  version = "0.13.1"
@@ -314,6 +335,21 @@ dependencies = [
314
335
  "unicode-width",
315
336
  ]
316
337
 
338
+ [[package]]
339
+ name = "crc"
340
+ version = "2.1.0"
341
+ source = "registry+https://github.com/rust-lang/crates.io-index"
342
+ checksum = "49fc9a695bca7f35f5f4c15cddc84415f66a74ea78eef08e90c5024f2b540e23"
343
+ dependencies = [
344
+ "crc-catalog",
345
+ ]
346
+
347
+ [[package]]
348
+ name = "crc-catalog"
349
+ version = "1.1.1"
350
+ source = "registry+https://github.com/rust-lang/crates.io-index"
351
+ checksum = "ccaeedb56da03b09f598226e25e80088cb4cd25f316e6e4df7d695f0feeb1403"
352
+
317
353
  [[package]]
318
354
  name = "crc32fast"
319
355
  version = "1.3.2"
@@ -483,6 +519,12 @@ version = "0.1.0"
483
519
  source = "registry+https://github.com/rust-lang/crates.io-index"
484
520
  checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673"
485
521
 
522
+ [[package]]
523
+ name = "fs_extra"
524
+ version = "1.2.0"
525
+ source = "registry+https://github.com/rust-lang/crates.io-index"
526
+ checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394"
527
+
486
528
  [[package]]
487
529
  name = "futures"
488
530
  version = "0.3.25"
@@ -646,6 +688,12 @@ dependencies = [
646
688
  "libc",
647
689
  ]
648
690
 
691
+ [[package]]
692
+ name = "hex"
693
+ version = "0.4.3"
694
+ source = "registry+https://github.com/rust-lang/crates.io-index"
695
+ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
696
+
649
697
  [[package]]
650
698
  name = "indexmap"
651
699
  version = "1.8.0"
@@ -663,6 +711,27 @@ version = "1.0.4"
663
711
  source = "registry+https://github.com/rust-lang/crates.io-index"
664
712
  checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc"
665
713
 
714
+ [[package]]
715
+ name = "jemalloc-sys"
716
+ version = "0.5.2+5.3.0-patched"
717
+ source = "registry+https://github.com/rust-lang/crates.io-index"
718
+ checksum = "134163979b6eed9564c98637b710b40979939ba351f59952708234ea11b5f3f8"
719
+ dependencies = [
720
+ "cc",
721
+ "fs_extra",
722
+ "libc",
723
+ ]
724
+
725
+ [[package]]
726
+ name = "jemallocator"
727
+ version = "0.5.0"
728
+ source = "registry+https://github.com/rust-lang/crates.io-index"
729
+ checksum = "16c2514137880c52b0b4822b563fadd38257c1f380858addb74a400889696ea6"
730
+ dependencies = [
731
+ "jemalloc-sys",
732
+ "libc",
733
+ ]
734
+
666
735
  [[package]]
667
736
  name = "jobserver"
668
737
  version = "0.1.25"
@@ -683,13 +752,23 @@ dependencies = [
683
752
 
684
753
  [[package]]
685
754
  name = "json-deserializer"
686
- version = "0.4.2"
755
+ version = "0.4.3"
687
756
  source = "registry+https://github.com/rust-lang/crates.io-index"
688
- checksum = "d784d2d481d0bace3450572391d6076dd6d10c66c0ebc1a0be037b3b420664bd"
757
+ checksum = "daba674f7eecf80fe8bbbf196340908ad1a22510fe71fd6111bb50f441b26440"
689
758
  dependencies = [
690
759
  "indexmap",
691
760
  ]
692
761
 
762
+ [[package]]
763
+ name = "jsonpath_lib"
764
+ version = "0.3.0"
765
+ source = "git+https://github.com/ritchie46/jsonpath?rev=24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b#24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b"
766
+ dependencies = [
767
+ "log",
768
+ "serde",
769
+ "serde_json",
770
+ ]
771
+
693
772
  [[package]]
694
773
  name = "lazy_static"
695
774
  version = "1.4.0"
@@ -781,6 +860,26 @@ version = "0.2.121"
781
860
  source = "registry+https://github.com/rust-lang/crates.io-index"
782
861
  checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f"
783
862
 
863
+ [[package]]
864
+ name = "libflate"
865
+ version = "1.2.0"
866
+ source = "registry+https://github.com/rust-lang/crates.io-index"
867
+ checksum = "05605ab2bce11bcfc0e9c635ff29ef8b2ea83f29be257ee7d730cac3ee373093"
868
+ dependencies = [
869
+ "adler32",
870
+ "crc32fast",
871
+ "libflate_lz77",
872
+ ]
873
+
874
+ [[package]]
875
+ name = "libflate_lz77"
876
+ version = "1.1.0"
877
+ source = "registry+https://github.com/rust-lang/crates.io-index"
878
+ checksum = "39a734c0493409afcd49deee13c006a04e3586b9761a03543c6272c9c51f2f5a"
879
+ dependencies = [
880
+ "rle-decode-fast",
881
+ ]
882
+
784
883
  [[package]]
785
884
  name = "libloading"
786
885
  version = "0.7.4"
@@ -797,6 +896,16 @@ version = "0.2.6"
797
896
  source = "registry+https://github.com/rust-lang/crates.io-index"
798
897
  checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb"
799
898
 
899
+ [[package]]
900
+ name = "libmimalloc-sys"
901
+ version = "0.1.28"
902
+ source = "registry+https://github.com/rust-lang/crates.io-index"
903
+ checksum = "04d1c67deb83e6b75fa4fe3309e09cfeade12e7721d95322af500d3814ea60c9"
904
+ dependencies = [
905
+ "cc",
906
+ "libc",
907
+ ]
908
+
800
909
  [[package]]
801
910
  name = "lock_api"
802
911
  version = "0.4.9"
@@ -850,8 +959,7 @@ dependencies = [
850
959
  [[package]]
851
960
  name = "magnus-macros"
852
961
  version = "0.2.0"
853
- source = "registry+https://github.com/rust-lang/crates.io-index"
854
- checksum = "acc8ba6908cb0f67a4e75cb48fc81a1f0e6a6dd1501936e0c9e2c7c8f9f18e05"
962
+ source = "git+https://github.com/matsadler/magnus#ae792419bed70107d4c930e1f8193272750b9fd2"
855
963
  dependencies = [
856
964
  "proc-macro2",
857
965
  "quote",
@@ -882,6 +990,15 @@ dependencies = [
882
990
  "autocfg",
883
991
  ]
884
992
 
993
+ [[package]]
994
+ name = "mimalloc"
995
+ version = "0.1.32"
996
+ source = "registry+https://github.com/rust-lang/crates.io-index"
997
+ checksum = "9b2374e2999959a7b583e1811a1ddbf1d3a4b9496eceb9746f1192a59d871eca"
998
+ dependencies = [
999
+ "libmimalloc-sys",
1000
+ ]
1001
+
885
1002
  [[package]]
886
1003
  name = "minimal-lexical"
887
1004
  version = "0.2.1"
@@ -1160,10 +1277,14 @@ dependencies = [
1160
1277
 
1161
1278
  [[package]]
1162
1279
  name = "polars"
1163
- version = "0.1.3"
1280
+ version = "0.1.5"
1164
1281
  dependencies = [
1282
+ "ahash 0.8.2",
1283
+ "jemallocator",
1165
1284
  "magnus",
1285
+ "mimalloc",
1166
1286
  "polars 0.25.1",
1287
+ "polars-core",
1167
1288
  "serde_json",
1168
1289
  ]
1169
1290
 
@@ -1202,11 +1323,13 @@ dependencies = [
1202
1323
  "ahash 0.8.2",
1203
1324
  "anyhow",
1204
1325
  "arrow2",
1326
+ "base64",
1205
1327
  "bitflags",
1206
1328
  "chrono",
1207
1329
  "chrono-tz",
1208
1330
  "comfy-table",
1209
1331
  "hashbrown 0.12.3",
1332
+ "hex",
1210
1333
  "indexmap",
1211
1334
  "num",
1212
1335
  "once_cell",
@@ -1277,9 +1400,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
1277
1400
  checksum = "bfd3f6552b3e9539634c35047f372db331b6227f75c36fcbe4670ab58bbcbeb3"
1278
1401
  dependencies = [
1279
1402
  "arrow2",
1403
+ "jsonpath_lib",
1280
1404
  "polars-arrow",
1281
1405
  "polars-core",
1282
1406
  "polars-utils",
1407
+ "serde_json",
1283
1408
  ]
1284
1409
 
1285
1410
  [[package]]
@@ -1431,18 +1556,18 @@ dependencies = [
1431
1556
 
1432
1557
  [[package]]
1433
1558
  name = "rb-sys"
1434
- version = "0.9.44"
1559
+ version = "0.9.48"
1435
1560
  source = "registry+https://github.com/rust-lang/crates.io-index"
1436
- checksum = "31f48777b8161ff5c077ad74ce486ebe963ca8a92257512bab473b405a80d69f"
1561
+ checksum = "dfc6b8f3bf2d04b0180e243ceeb033b51ca267d839aa1c12fa25f262c17d0596"
1437
1562
  dependencies = [
1438
1563
  "rb-sys-build",
1439
1564
  ]
1440
1565
 
1441
1566
  [[package]]
1442
1567
  name = "rb-sys-build"
1443
- version = "0.9.44"
1568
+ version = "0.9.48"
1444
1569
  source = "registry+https://github.com/rust-lang/crates.io-index"
1445
- checksum = "a46785122aff7077527b78c2518d739c45dc0fbc410a2b8361076ff4bbf993f9"
1570
+ checksum = "2cd591ebf22c45a44e51192fbeebba473aea0fe2a708b0b24665a13010c58b8d"
1446
1571
  dependencies = [
1447
1572
  "bindgen",
1448
1573
  "regex",
@@ -1452,8 +1577,7 @@ dependencies = [
1452
1577
  [[package]]
1453
1578
  name = "rb-sys-env"
1454
1579
  version = "0.1.1"
1455
- source = "registry+https://github.com/rust-lang/crates.io-index"
1456
- checksum = "74c38752410925faeb82c400c06ba2fd9ee6aa8f719dd33994c9e53f5242d25f"
1580
+ source = "git+https://github.com/oxidize-rb/rb-sys#93c4f97a244168b9ebc2c5682275e7281421f4b8"
1457
1581
 
1458
1582
  [[package]]
1459
1583
  name = "redox_syscall"
@@ -1492,6 +1616,12 @@ version = "0.6.28"
1492
1616
  source = "registry+https://github.com/rust-lang/crates.io-index"
1493
1617
  checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848"
1494
1618
 
1619
+ [[package]]
1620
+ name = "rle-decode-fast"
1621
+ version = "1.0.3"
1622
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1623
+ checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422"
1624
+
1495
1625
  [[package]]
1496
1626
  name = "rustc-hash"
1497
1627
  version = "1.1.0"
@@ -1548,6 +1678,7 @@ version = "1.0.88"
1548
1678
  source = "registry+https://github.com/rust-lang/crates.io-index"
1549
1679
  checksum = "8e8b3801309262e8184d9687fb697586833e939767aea0dda89f5a8e650e8bd7"
1550
1680
  dependencies = [
1681
+ "indexmap",
1551
1682
  "itoa",
1552
1683
  "ryu",
1553
1684
  "serde",
data/Cargo.toml CHANGED
@@ -1,5 +1,10 @@
1
1
  [workspace]
2
2
  members = ["ext/polars"]
3
3
 
4
+ [patch.crates-io]
5
+ jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
6
+ magnus-macros = { git = "https://github.com/matsadler/magnus" }
7
+ rb-sys-env = { git = "https://github.com/oxidize-rb/rb-sys" }
8
+
4
9
  [profile.release]
5
10
  strip = true
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.1.3"
3
+ version = "0.1.5"
4
4
  authors = ["Andrew Kane <andrew@ankane.org>"]
5
5
  edition = "2021"
6
6
  publish = false
@@ -9,7 +9,9 @@ publish = false
9
9
  crate-type = ["cdylib"]
10
10
 
11
11
  [dependencies]
12
+ ahash = "0.8"
12
13
  magnus = "0.4"
14
+ polars-core = "0.25.1"
13
15
  serde_json = "1"
14
16
 
15
17
  [dependencies.polars]
@@ -18,7 +20,10 @@ features = [
18
20
  "abs",
19
21
  "arange",
20
22
  "arg_where",
23
+ "asof_join",
24
+ "avro",
21
25
  "concat_str",
26
+ "cse",
22
27
  "csv-file",
23
28
  "cum_agg",
24
29
  "cumulative_eval",
@@ -30,6 +35,7 @@ features = [
30
35
  "dtype-full",
31
36
  "dynamic_groupby",
32
37
  "ewma",
38
+ "extract_jsonpath",
33
39
  "fmt",
34
40
  "horizontal_concat",
35
41
  "interpolate",
@@ -49,6 +55,8 @@ features = [
49
55
  "parquet",
50
56
  "partition_by",
51
57
  "pct_change",
58
+ "performant",
59
+ "pivot",
52
60
  "product",
53
61
  "propagate_nans",
54
62
  "random",
@@ -57,10 +65,12 @@ features = [
57
65
  "repeat_by",
58
66
  "rolling_window",
59
67
  "round_series",
68
+ "row_hash",
60
69
  "search_sorted",
61
70
  "semi_anti_join",
62
71
  "serde-lazy",
63
72
  "sign",
73
+ "string_encoding",
64
74
  "string_justify",
65
75
  "strings",
66
76
  "timezones",
@@ -69,3 +79,9 @@ features = [
69
79
  "trigonometry",
70
80
  "unique_counts",
71
81
  ]
82
+
83
+ [target.'cfg(target_os = "linux")'.dependencies]
84
+ jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }
85
+
86
+ [target.'cfg(not(target_os = "linux"))'.dependencies]
87
+ mimalloc = { version = "0.1", default-features = false }
@@ -0,0 +1,292 @@
1
+ use magnus::{class, RArray, TryConvert, Value};
2
+ use polars::prelude::*;
3
+ use polars_core::frame::row::{rows_to_schema_first_non_null, Row};
4
+
5
+ use super::*;
6
+ use crate::{RbDataFrame, RbPolarsErr, RbSeries, Wrap};
7
+
8
+ pub fn apply_lambda_unknown<'a>(
9
+ df: &'a DataFrame,
10
+ lambda: Value,
11
+ inference_size: usize,
12
+ ) -> RbResult<(Value, bool)> {
13
+ let columns = df.get_columns();
14
+ let mut null_count = 0;
15
+
16
+ for idx in 0..df.height() {
17
+ let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
18
+ let arg = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
19
+ let out: Value = lambda.funcall("call", arg)?;
20
+
21
+ if out.is_nil() {
22
+ null_count += 1;
23
+ continue;
24
+ } else if out.is_kind_of(class::true_class()) || out.is_kind_of(class::false_class()) {
25
+ let first_value = out.try_convert::<bool>().ok();
26
+ return Ok((
27
+ RbSeries::new(
28
+ apply_lambda_with_bool_out_type(df, lambda, null_count, first_value)
29
+ .into_series(),
30
+ )
31
+ .into(),
32
+ false,
33
+ ));
34
+ } else if out.is_kind_of(class::float()) {
35
+ let first_value = out.try_convert::<f64>().ok();
36
+
37
+ return Ok((
38
+ RbSeries::new(
39
+ apply_lambda_with_primitive_out_type::<Float64Type>(
40
+ df,
41
+ lambda,
42
+ null_count,
43
+ first_value,
44
+ )
45
+ .into_series(),
46
+ )
47
+ .into(),
48
+ false,
49
+ ));
50
+ } else if out.is_kind_of(class::integer()) {
51
+ let first_value = out.try_convert::<i64>().ok();
52
+ return Ok((
53
+ RbSeries::new(
54
+ apply_lambda_with_primitive_out_type::<Int64Type>(
55
+ df,
56
+ lambda,
57
+ null_count,
58
+ first_value,
59
+ )
60
+ .into_series(),
61
+ )
62
+ .into(),
63
+ false,
64
+ ));
65
+ // } else if out.is_kind_of(class::string()) {
66
+ // let first_value = out.try_convert::<String>().ok();
67
+ // return Ok((
68
+ // RbSeries::new(
69
+ // apply_lambda_with_utf8_out_type(df, lambda, null_count, first_value)
70
+ // .into_series(),
71
+ // )
72
+ // .into(),
73
+ // false,
74
+ // ));
75
+ } else if out.respond_to("_s", true)? {
76
+ let rb_rbseries: Value = out.funcall("_s", ()).unwrap();
77
+ let series = rb_rbseries
78
+ .try_convert::<&RbSeries>()
79
+ .unwrap()
80
+ .series
81
+ .borrow();
82
+ let dt = series.dtype();
83
+ return Ok((
84
+ RbSeries::new(
85
+ apply_lambda_with_list_out_type(df, lambda, null_count, Some(&series), dt)?
86
+ .into_series(),
87
+ )
88
+ .into(),
89
+ false,
90
+ ));
91
+ } else if out.try_convert::<Wrap<Row<'a>>>().is_ok() {
92
+ let first_value = out.try_convert::<Wrap<Row<'a>>>().unwrap().0;
93
+ return Ok((
94
+ RbDataFrame::from(
95
+ apply_lambda_with_rows_output(
96
+ df,
97
+ lambda,
98
+ null_count,
99
+ first_value,
100
+ inference_size,
101
+ )
102
+ .map_err(RbPolarsErr::from)?,
103
+ )
104
+ .into(),
105
+ true,
106
+ ));
107
+ } else if out.is_kind_of(class::array()) {
108
+ return Err(RbPolarsErr::other(
109
+ "A list output type is invalid. Do you mean to create polars List Series?\
110
+ Then return a Series object."
111
+ .into(),
112
+ ));
113
+ } else {
114
+ return Err(RbPolarsErr::other("Could not determine output type".into()));
115
+ }
116
+ }
117
+ Err(RbPolarsErr::other("Could not determine output type".into()))
118
+ }
119
+
120
+ fn apply_iter<T>(
121
+ df: &DataFrame,
122
+ lambda: Value,
123
+ init_null_count: usize,
124
+ skip: usize,
125
+ ) -> impl Iterator<Item = Option<T>> + '_
126
+ where
127
+ T: TryConvert,
128
+ {
129
+ let columns = df.get_columns();
130
+ ((init_null_count + skip)..df.height()).map(move |idx| {
131
+ let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
132
+ let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
133
+ match lambda.funcall::<_, _, Value>("call", tpl) {
134
+ Ok(val) => val.try_convert::<T>().ok(),
135
+ Err(e) => panic!("ruby function failed {}", e),
136
+ }
137
+ })
138
+ }
139
+
140
+ /// Apply a lambda with a primitive output type
141
+ pub fn apply_lambda_with_primitive_out_type<D>(
142
+ df: &DataFrame,
143
+ lambda: Value,
144
+ init_null_count: usize,
145
+ first_value: Option<D::Native>,
146
+ ) -> ChunkedArray<D>
147
+ where
148
+ D: RbArrowPrimitiveType,
149
+ D::Native: Into<Value> + TryConvert,
150
+ {
151
+ let skip = usize::from(first_value.is_some());
152
+ if init_null_count == df.height() {
153
+ ChunkedArray::full_null("apply", df.height())
154
+ } else {
155
+ let iter = apply_iter(df, lambda, init_null_count, skip);
156
+ iterator_to_primitive(iter, init_null_count, first_value, "apply", df.height())
157
+ }
158
+ }
159
+
160
+ /// Apply a lambda with a boolean output type
161
+ pub fn apply_lambda_with_bool_out_type(
162
+ df: &DataFrame,
163
+ lambda: Value,
164
+ init_null_count: usize,
165
+ first_value: Option<bool>,
166
+ ) -> ChunkedArray<BooleanType> {
167
+ let skip = usize::from(first_value.is_some());
168
+ if init_null_count == df.height() {
169
+ ChunkedArray::full_null("apply", df.height())
170
+ } else {
171
+ let iter = apply_iter(df, lambda, init_null_count, skip);
172
+ iterator_to_bool(iter, init_null_count, first_value, "apply", df.height())
173
+ }
174
+ }
175
+
176
+ /// Apply a lambda with utf8 output type
177
+ pub fn apply_lambda_with_utf8_out_type(
178
+ df: &DataFrame,
179
+ lambda: Value,
180
+ init_null_count: usize,
181
+ first_value: Option<&str>,
182
+ ) -> Utf8Chunked {
183
+ let skip = usize::from(first_value.is_some());
184
+ if init_null_count == df.height() {
185
+ ChunkedArray::full_null("apply", df.height())
186
+ } else {
187
+ let iter = apply_iter::<String>(df, lambda, init_null_count, skip);
188
+ iterator_to_utf8(iter, init_null_count, first_value, "apply", df.height())
189
+ }
190
+ }
191
+
192
+ /// Apply a lambda with list output type
193
+ pub fn apply_lambda_with_list_out_type<'a>(
194
+ df: &'a DataFrame,
195
+ lambda: Value,
196
+ init_null_count: usize,
197
+ first_value: Option<&Series>,
198
+ dt: &DataType,
199
+ ) -> RbResult<ListChunked> {
200
+ let columns = df.get_columns();
201
+
202
+ let skip = usize::from(first_value.is_some());
203
+ if init_null_count == df.height() {
204
+ Ok(ChunkedArray::full_null("apply", df.height()))
205
+ } else {
206
+ let iter = ((init_null_count + skip)..df.height()).map(|idx| {
207
+ let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
208
+ let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
209
+ match lambda.funcall::<_, _, Value>("call", tpl) {
210
+ Ok(val) => match val.funcall::<_, _, Value>("_s", ()) {
211
+ Ok(val) => val
212
+ .try_convert::<&RbSeries>()
213
+ .ok()
214
+ .map(|ps| ps.series.borrow().clone()),
215
+ Err(_) => {
216
+ if val.is_nil() {
217
+ None
218
+ } else {
219
+ panic!("should return a Series, got a {:?}", val)
220
+ }
221
+ }
222
+ },
223
+ Err(e) => panic!("ruby function failed {}", e),
224
+ }
225
+ });
226
+ iterator_to_list(dt, iter, init_null_count, first_value, "apply", df.height())
227
+ }
228
+ }
229
+
230
+ pub fn apply_lambda_with_rows_output<'a>(
231
+ df: &'a DataFrame,
232
+ lambda: Value,
233
+ init_null_count: usize,
234
+ first_value: Row<'a>,
235
+ inference_size: usize,
236
+ ) -> PolarsResult<DataFrame> {
237
+ let columns = df.get_columns();
238
+ let width = first_value.0.len();
239
+ let null_row = Row::new(vec![AnyValue::Null; width]);
240
+
241
+ let mut row_buf = Row::default();
242
+
243
+ let skip = 1;
244
+ let mut row_iter = ((init_null_count + skip)..df.height()).map(|idx| {
245
+ let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
246
+ let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
247
+ match lambda.funcall::<_, _, Value>("call", tpl) {
248
+ Ok(val) => {
249
+ match val.try_convert::<RArray>().ok() {
250
+ Some(tuple) => {
251
+ row_buf.0.clear();
252
+ for v in tuple.each() {
253
+ let v = v.unwrap().try_convert::<Wrap<AnyValue>>().unwrap().0;
254
+ row_buf.0.push(v);
255
+ }
256
+ let ptr = &row_buf as *const Row;
257
+ // Safety:
258
+ // we know that row constructor of polars dataframe does not keep a reference
259
+ // to the row. Before we mutate the row buf again, the reference is dropped.
260
+ // we only cannot prove it to the compiler.
261
+ // we still do this because it saves a Vec allocation in a hot loop.
262
+ unsafe { &*ptr }
263
+ }
264
+ None => &null_row,
265
+ }
266
+ }
267
+ Err(e) => panic!("ruby function failed {}", e),
268
+ }
269
+ });
270
+
271
+ // first rows for schema inference
272
+ let mut buf = Vec::with_capacity(inference_size);
273
+ buf.push(first_value);
274
+ buf.extend((&mut row_iter).take(inference_size).cloned());
275
+ let schema = rows_to_schema_first_non_null(&buf, Some(50));
276
+
277
+ if init_null_count > 0 {
278
+ // Safety: we know the iterators size
279
+ let iter = unsafe {
280
+ (0..init_null_count)
281
+ .map(|_| &null_row)
282
+ .chain(buf.iter())
283
+ .chain(row_iter)
284
+ .trust_my_length(df.height())
285
+ };
286
+ DataFrame::from_rows_iter_and_schema(iter, &schema)
287
+ } else {
288
+ // Safety: we know the iterators size
289
+ let iter = unsafe { buf.iter().chain(row_iter).trust_my_length(df.height()) };
290
+ DataFrame::from_rows_iter_and_schema(iter, &schema)
291
+ }
292
+ }