polars-df 0.1.3 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/Cargo.lock +142 -11
  4. data/Cargo.toml +5 -0
  5. data/ext/polars/Cargo.toml +17 -1
  6. data/ext/polars/src/apply/dataframe.rs +292 -0
  7. data/ext/polars/src/apply/mod.rs +254 -0
  8. data/ext/polars/src/apply/series.rs +1173 -0
  9. data/ext/polars/src/conversion.rs +180 -5
  10. data/ext/polars/src/dataframe.rs +146 -1
  11. data/ext/polars/src/error.rs +12 -0
  12. data/ext/polars/src/lazy/apply.rs +34 -2
  13. data/ext/polars/src/lazy/dataframe.rs +74 -3
  14. data/ext/polars/src/lazy/dsl.rs +136 -0
  15. data/ext/polars/src/lib.rs +199 -1
  16. data/ext/polars/src/list_construction.rs +100 -0
  17. data/ext/polars/src/series.rs +331 -0
  18. data/ext/polars/src/utils.rs +25 -0
  19. data/lib/polars/cat_name_space.rb +54 -0
  20. data/lib/polars/convert.rb +100 -0
  21. data/lib/polars/data_frame.rb +1558 -60
  22. data/lib/polars/date_time_expr.rb +2 -2
  23. data/lib/polars/date_time_name_space.rb +1484 -0
  24. data/lib/polars/dynamic_group_by.rb +49 -0
  25. data/lib/polars/expr.rb +4072 -107
  26. data/lib/polars/expr_dispatch.rb +8 -0
  27. data/lib/polars/functions.rb +192 -3
  28. data/lib/polars/group_by.rb +44 -3
  29. data/lib/polars/io.rb +20 -4
  30. data/lib/polars/lazy_frame.rb +800 -26
  31. data/lib/polars/lazy_functions.rb +687 -43
  32. data/lib/polars/lazy_group_by.rb +1 -0
  33. data/lib/polars/list_expr.rb +502 -5
  34. data/lib/polars/list_name_space.rb +346 -0
  35. data/lib/polars/rolling_group_by.rb +35 -0
  36. data/lib/polars/series.rb +934 -62
  37. data/lib/polars/string_expr.rb +189 -13
  38. data/lib/polars/string_name_space.rb +690 -0
  39. data/lib/polars/struct_name_space.rb +64 -0
  40. data/lib/polars/utils.rb +44 -0
  41. data/lib/polars/version.rb +1 -1
  42. data/lib/polars.rb +14 -1
  43. metadata +15 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9e6fb732e5dafe2fde285322554bd9159483cbbdf17d6e2bba9cba9a83563b47
4
- data.tar.gz: 1b4249d0c0100f136973c601b8404cb6d92abc632d5ed0476bd93bc5360a11dc
3
+ metadata.gz: 3a08e866e51227716cd3cb4454835016a7d61e30e964fe76a8b99704dcb60a12
4
+ data.tar.gz: 1f30c3fdd47ebf52a311909aa26ba4b6d64e426622455854b9bbc660de1229b3
5
5
  SHA512:
6
- metadata.gz: d9414d6f60c489e2b3b72885288822083ba8c04bac4053f4e34c1d53ee805d164f17fe4b8b3a8f4ff562550bcc657f374bea6e250b52985367f601ea50e3037f
7
- data.tar.gz: 9e3a7cfe105f03ec20e9c26aa38c1475074ccc1ea057a170a97b7068b41943d561d50af49bb1d1f74b7705809dc1375900f542ab93683ba627dea080274f6d91
6
+ metadata.gz: 1531fff4fc2fab8b2dc72709a69fb2890c215ae08e4223aa32262dbb4b0debb4b6f2fbab1e8138953871f5d02d462abfaba49cc7f22a66e25aa7d60f128a89bc
7
+ data.tar.gz: e1041d708e2f8046c14c565a65879fa4e5c6671cf526736a3f8418a82dfa70e17692a96d383e43f393d8761e5f29f717d63185ae1ed3f0793a6876be2d946fc0
data/CHANGELOG.md CHANGED
@@ -1,3 +1,13 @@
1
+ ## 0.1.5 (2022-12-22)
2
+
3
+ - Added `read_avro` and `write_avro` methods
4
+ - Added more methods
5
+
6
+ ## 0.1.4 (2022-12-02)
7
+
8
+ - Added more methods
9
+ - Improved performance
10
+
1
11
  ## 0.1.3 (2022-11-27)
2
12
 
3
13
  - Added more methods
data/Cargo.lock CHANGED
@@ -8,6 +8,12 @@ version = "1.0.2"
8
8
  source = "registry+https://github.com/rust-lang/crates.io-index"
9
9
  checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
10
10
 
11
+ [[package]]
12
+ name = "adler32"
13
+ version = "1.2.0"
14
+ source = "registry+https://github.com/rust-lang/crates.io-index"
15
+ checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
16
+
11
17
  [[package]]
12
18
  name = "ahash"
13
19
  version = "0.7.6"
@@ -85,6 +91,7 @@ checksum = "ee6f62e41078c967a4c063fcbdfd3801a2a9632276402c045311c4d73d0845f3"
85
91
  dependencies = [
86
92
  "ahash 0.7.6",
87
93
  "arrow-format",
94
+ "avro-schema",
88
95
  "base64",
89
96
  "bytemuck",
90
97
  "chrono",
@@ -147,6 +154,20 @@ version = "1.1.0"
147
154
  source = "registry+https://github.com/rust-lang/crates.io-index"
148
155
  checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
149
156
 
157
+ [[package]]
158
+ name = "avro-schema"
159
+ version = "0.3.0"
160
+ source = "registry+https://github.com/rust-lang/crates.io-index"
161
+ checksum = "b5281855b39aba9684d2f47bf96983fbfd8f1725f12fabb0513a8ab879647bbd"
162
+ dependencies = [
163
+ "crc",
164
+ "fallible-streaming-iterator",
165
+ "libflate",
166
+ "serde",
167
+ "serde_json",
168
+ "snap",
169
+ ]
170
+
150
171
  [[package]]
151
172
  name = "base64"
152
173
  version = "0.13.1"
@@ -314,6 +335,21 @@ dependencies = [
314
335
  "unicode-width",
315
336
  ]
316
337
 
338
+ [[package]]
339
+ name = "crc"
340
+ version = "2.1.0"
341
+ source = "registry+https://github.com/rust-lang/crates.io-index"
342
+ checksum = "49fc9a695bca7f35f5f4c15cddc84415f66a74ea78eef08e90c5024f2b540e23"
343
+ dependencies = [
344
+ "crc-catalog",
345
+ ]
346
+
347
+ [[package]]
348
+ name = "crc-catalog"
349
+ version = "1.1.1"
350
+ source = "registry+https://github.com/rust-lang/crates.io-index"
351
+ checksum = "ccaeedb56da03b09f598226e25e80088cb4cd25f316e6e4df7d695f0feeb1403"
352
+
317
353
  [[package]]
318
354
  name = "crc32fast"
319
355
  version = "1.3.2"
@@ -483,6 +519,12 @@ version = "0.1.0"
483
519
  source = "registry+https://github.com/rust-lang/crates.io-index"
484
520
  checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673"
485
521
 
522
+ [[package]]
523
+ name = "fs_extra"
524
+ version = "1.2.0"
525
+ source = "registry+https://github.com/rust-lang/crates.io-index"
526
+ checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394"
527
+
486
528
  [[package]]
487
529
  name = "futures"
488
530
  version = "0.3.25"
@@ -646,6 +688,12 @@ dependencies = [
646
688
  "libc",
647
689
  ]
648
690
 
691
+ [[package]]
692
+ name = "hex"
693
+ version = "0.4.3"
694
+ source = "registry+https://github.com/rust-lang/crates.io-index"
695
+ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
696
+
649
697
  [[package]]
650
698
  name = "indexmap"
651
699
  version = "1.8.0"
@@ -663,6 +711,27 @@ version = "1.0.4"
663
711
  source = "registry+https://github.com/rust-lang/crates.io-index"
664
712
  checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc"
665
713
 
714
+ [[package]]
715
+ name = "jemalloc-sys"
716
+ version = "0.5.2+5.3.0-patched"
717
+ source = "registry+https://github.com/rust-lang/crates.io-index"
718
+ checksum = "134163979b6eed9564c98637b710b40979939ba351f59952708234ea11b5f3f8"
719
+ dependencies = [
720
+ "cc",
721
+ "fs_extra",
722
+ "libc",
723
+ ]
724
+
725
+ [[package]]
726
+ name = "jemallocator"
727
+ version = "0.5.0"
728
+ source = "registry+https://github.com/rust-lang/crates.io-index"
729
+ checksum = "16c2514137880c52b0b4822b563fadd38257c1f380858addb74a400889696ea6"
730
+ dependencies = [
731
+ "jemalloc-sys",
732
+ "libc",
733
+ ]
734
+
666
735
  [[package]]
667
736
  name = "jobserver"
668
737
  version = "0.1.25"
@@ -683,13 +752,23 @@ dependencies = [
683
752
 
684
753
  [[package]]
685
754
  name = "json-deserializer"
686
- version = "0.4.2"
755
+ version = "0.4.3"
687
756
  source = "registry+https://github.com/rust-lang/crates.io-index"
688
- checksum = "d784d2d481d0bace3450572391d6076dd6d10c66c0ebc1a0be037b3b420664bd"
757
+ checksum = "daba674f7eecf80fe8bbbf196340908ad1a22510fe71fd6111bb50f441b26440"
689
758
  dependencies = [
690
759
  "indexmap",
691
760
  ]
692
761
 
762
+ [[package]]
763
+ name = "jsonpath_lib"
764
+ version = "0.3.0"
765
+ source = "git+https://github.com/ritchie46/jsonpath?rev=24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b#24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b"
766
+ dependencies = [
767
+ "log",
768
+ "serde",
769
+ "serde_json",
770
+ ]
771
+
693
772
  [[package]]
694
773
  name = "lazy_static"
695
774
  version = "1.4.0"
@@ -781,6 +860,26 @@ version = "0.2.121"
781
860
  source = "registry+https://github.com/rust-lang/crates.io-index"
782
861
  checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f"
783
862
 
863
+ [[package]]
864
+ name = "libflate"
865
+ version = "1.2.0"
866
+ source = "registry+https://github.com/rust-lang/crates.io-index"
867
+ checksum = "05605ab2bce11bcfc0e9c635ff29ef8b2ea83f29be257ee7d730cac3ee373093"
868
+ dependencies = [
869
+ "adler32",
870
+ "crc32fast",
871
+ "libflate_lz77",
872
+ ]
873
+
874
+ [[package]]
875
+ name = "libflate_lz77"
876
+ version = "1.1.0"
877
+ source = "registry+https://github.com/rust-lang/crates.io-index"
878
+ checksum = "39a734c0493409afcd49deee13c006a04e3586b9761a03543c6272c9c51f2f5a"
879
+ dependencies = [
880
+ "rle-decode-fast",
881
+ ]
882
+
784
883
  [[package]]
785
884
  name = "libloading"
786
885
  version = "0.7.4"
@@ -797,6 +896,16 @@ version = "0.2.6"
797
896
  source = "registry+https://github.com/rust-lang/crates.io-index"
798
897
  checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb"
799
898
 
899
+ [[package]]
900
+ name = "libmimalloc-sys"
901
+ version = "0.1.28"
902
+ source = "registry+https://github.com/rust-lang/crates.io-index"
903
+ checksum = "04d1c67deb83e6b75fa4fe3309e09cfeade12e7721d95322af500d3814ea60c9"
904
+ dependencies = [
905
+ "cc",
906
+ "libc",
907
+ ]
908
+
800
909
  [[package]]
801
910
  name = "lock_api"
802
911
  version = "0.4.9"
@@ -850,8 +959,7 @@ dependencies = [
850
959
  [[package]]
851
960
  name = "magnus-macros"
852
961
  version = "0.2.0"
853
- source = "registry+https://github.com/rust-lang/crates.io-index"
854
- checksum = "acc8ba6908cb0f67a4e75cb48fc81a1f0e6a6dd1501936e0c9e2c7c8f9f18e05"
962
+ source = "git+https://github.com/matsadler/magnus#ae792419bed70107d4c930e1f8193272750b9fd2"
855
963
  dependencies = [
856
964
  "proc-macro2",
857
965
  "quote",
@@ -882,6 +990,15 @@ dependencies = [
882
990
  "autocfg",
883
991
  ]
884
992
 
993
+ [[package]]
994
+ name = "mimalloc"
995
+ version = "0.1.32"
996
+ source = "registry+https://github.com/rust-lang/crates.io-index"
997
+ checksum = "9b2374e2999959a7b583e1811a1ddbf1d3a4b9496eceb9746f1192a59d871eca"
998
+ dependencies = [
999
+ "libmimalloc-sys",
1000
+ ]
1001
+
885
1002
  [[package]]
886
1003
  name = "minimal-lexical"
887
1004
  version = "0.2.1"
@@ -1160,10 +1277,14 @@ dependencies = [
1160
1277
 
1161
1278
  [[package]]
1162
1279
  name = "polars"
1163
- version = "0.1.3"
1280
+ version = "0.1.5"
1164
1281
  dependencies = [
1282
+ "ahash 0.8.2",
1283
+ "jemallocator",
1165
1284
  "magnus",
1285
+ "mimalloc",
1166
1286
  "polars 0.25.1",
1287
+ "polars-core",
1167
1288
  "serde_json",
1168
1289
  ]
1169
1290
 
@@ -1202,11 +1323,13 @@ dependencies = [
1202
1323
  "ahash 0.8.2",
1203
1324
  "anyhow",
1204
1325
  "arrow2",
1326
+ "base64",
1205
1327
  "bitflags",
1206
1328
  "chrono",
1207
1329
  "chrono-tz",
1208
1330
  "comfy-table",
1209
1331
  "hashbrown 0.12.3",
1332
+ "hex",
1210
1333
  "indexmap",
1211
1334
  "num",
1212
1335
  "once_cell",
@@ -1277,9 +1400,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
1277
1400
  checksum = "bfd3f6552b3e9539634c35047f372db331b6227f75c36fcbe4670ab58bbcbeb3"
1278
1401
  dependencies = [
1279
1402
  "arrow2",
1403
+ "jsonpath_lib",
1280
1404
  "polars-arrow",
1281
1405
  "polars-core",
1282
1406
  "polars-utils",
1407
+ "serde_json",
1283
1408
  ]
1284
1409
 
1285
1410
  [[package]]
@@ -1431,18 +1556,18 @@ dependencies = [
1431
1556
 
1432
1557
  [[package]]
1433
1558
  name = "rb-sys"
1434
- version = "0.9.44"
1559
+ version = "0.9.48"
1435
1560
  source = "registry+https://github.com/rust-lang/crates.io-index"
1436
- checksum = "31f48777b8161ff5c077ad74ce486ebe963ca8a92257512bab473b405a80d69f"
1561
+ checksum = "dfc6b8f3bf2d04b0180e243ceeb033b51ca267d839aa1c12fa25f262c17d0596"
1437
1562
  dependencies = [
1438
1563
  "rb-sys-build",
1439
1564
  ]
1440
1565
 
1441
1566
  [[package]]
1442
1567
  name = "rb-sys-build"
1443
- version = "0.9.44"
1568
+ version = "0.9.48"
1444
1569
  source = "registry+https://github.com/rust-lang/crates.io-index"
1445
- checksum = "a46785122aff7077527b78c2518d739c45dc0fbc410a2b8361076ff4bbf993f9"
1570
+ checksum = "2cd591ebf22c45a44e51192fbeebba473aea0fe2a708b0b24665a13010c58b8d"
1446
1571
  dependencies = [
1447
1572
  "bindgen",
1448
1573
  "regex",
@@ -1452,8 +1577,7 @@ dependencies = [
1452
1577
  [[package]]
1453
1578
  name = "rb-sys-env"
1454
1579
  version = "0.1.1"
1455
- source = "registry+https://github.com/rust-lang/crates.io-index"
1456
- checksum = "74c38752410925faeb82c400c06ba2fd9ee6aa8f719dd33994c9e53f5242d25f"
1580
+ source = "git+https://github.com/oxidize-rb/rb-sys#93c4f97a244168b9ebc2c5682275e7281421f4b8"
1457
1581
 
1458
1582
  [[package]]
1459
1583
  name = "redox_syscall"
@@ -1492,6 +1616,12 @@ version = "0.6.28"
1492
1616
  source = "registry+https://github.com/rust-lang/crates.io-index"
1493
1617
  checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848"
1494
1618
 
1619
+ [[package]]
1620
+ name = "rle-decode-fast"
1621
+ version = "1.0.3"
1622
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1623
+ checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422"
1624
+
1495
1625
  [[package]]
1496
1626
  name = "rustc-hash"
1497
1627
  version = "1.1.0"
@@ -1548,6 +1678,7 @@ version = "1.0.88"
1548
1678
  source = "registry+https://github.com/rust-lang/crates.io-index"
1549
1679
  checksum = "8e8b3801309262e8184d9687fb697586833e939767aea0dda89f5a8e650e8bd7"
1550
1680
  dependencies = [
1681
+ "indexmap",
1551
1682
  "itoa",
1552
1683
  "ryu",
1553
1684
  "serde",
data/Cargo.toml CHANGED
@@ -1,5 +1,10 @@
1
1
  [workspace]
2
2
  members = ["ext/polars"]
3
3
 
4
+ [patch.crates-io]
5
+ jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
6
+ magnus-macros = { git = "https://github.com/matsadler/magnus" }
7
+ rb-sys-env = { git = "https://github.com/oxidize-rb/rb-sys" }
8
+
4
9
  [profile.release]
5
10
  strip = true
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.1.3"
3
+ version = "0.1.5"
4
4
  authors = ["Andrew Kane <andrew@ankane.org>"]
5
5
  edition = "2021"
6
6
  publish = false
@@ -9,7 +9,9 @@ publish = false
9
9
  crate-type = ["cdylib"]
10
10
 
11
11
  [dependencies]
12
+ ahash = "0.8"
12
13
  magnus = "0.4"
14
+ polars-core = "0.25.1"
13
15
  serde_json = "1"
14
16
 
15
17
  [dependencies.polars]
@@ -18,7 +20,10 @@ features = [
18
20
  "abs",
19
21
  "arange",
20
22
  "arg_where",
23
+ "asof_join",
24
+ "avro",
21
25
  "concat_str",
26
+ "cse",
22
27
  "csv-file",
23
28
  "cum_agg",
24
29
  "cumulative_eval",
@@ -30,6 +35,7 @@ features = [
30
35
  "dtype-full",
31
36
  "dynamic_groupby",
32
37
  "ewma",
38
+ "extract_jsonpath",
33
39
  "fmt",
34
40
  "horizontal_concat",
35
41
  "interpolate",
@@ -49,6 +55,8 @@ features = [
49
55
  "parquet",
50
56
  "partition_by",
51
57
  "pct_change",
58
+ "performant",
59
+ "pivot",
52
60
  "product",
53
61
  "propagate_nans",
54
62
  "random",
@@ -57,10 +65,12 @@ features = [
57
65
  "repeat_by",
58
66
  "rolling_window",
59
67
  "round_series",
68
+ "row_hash",
60
69
  "search_sorted",
61
70
  "semi_anti_join",
62
71
  "serde-lazy",
63
72
  "sign",
73
+ "string_encoding",
64
74
  "string_justify",
65
75
  "strings",
66
76
  "timezones",
@@ -69,3 +79,9 @@ features = [
69
79
  "trigonometry",
70
80
  "unique_counts",
71
81
  ]
82
+
83
+ [target.'cfg(target_os = "linux")'.dependencies]
84
+ jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }
85
+
86
+ [target.'cfg(not(target_os = "linux"))'.dependencies]
87
+ mimalloc = { version = "0.1", default-features = false }
@@ -0,0 +1,292 @@
1
+ use magnus::{class, RArray, TryConvert, Value};
2
+ use polars::prelude::*;
3
+ use polars_core::frame::row::{rows_to_schema_first_non_null, Row};
4
+
5
+ use super::*;
6
+ use crate::{RbDataFrame, RbPolarsErr, RbSeries, Wrap};
7
+
8
+ pub fn apply_lambda_unknown<'a>(
9
+ df: &'a DataFrame,
10
+ lambda: Value,
11
+ inference_size: usize,
12
+ ) -> RbResult<(Value, bool)> {
13
+ let columns = df.get_columns();
14
+ let mut null_count = 0;
15
+
16
+ for idx in 0..df.height() {
17
+ let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
18
+ let arg = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
19
+ let out: Value = lambda.funcall("call", arg)?;
20
+
21
+ if out.is_nil() {
22
+ null_count += 1;
23
+ continue;
24
+ } else if out.is_kind_of(class::true_class()) || out.is_kind_of(class::false_class()) {
25
+ let first_value = out.try_convert::<bool>().ok();
26
+ return Ok((
27
+ RbSeries::new(
28
+ apply_lambda_with_bool_out_type(df, lambda, null_count, first_value)
29
+ .into_series(),
30
+ )
31
+ .into(),
32
+ false,
33
+ ));
34
+ } else if out.is_kind_of(class::float()) {
35
+ let first_value = out.try_convert::<f64>().ok();
36
+
37
+ return Ok((
38
+ RbSeries::new(
39
+ apply_lambda_with_primitive_out_type::<Float64Type>(
40
+ df,
41
+ lambda,
42
+ null_count,
43
+ first_value,
44
+ )
45
+ .into_series(),
46
+ )
47
+ .into(),
48
+ false,
49
+ ));
50
+ } else if out.is_kind_of(class::integer()) {
51
+ let first_value = out.try_convert::<i64>().ok();
52
+ return Ok((
53
+ RbSeries::new(
54
+ apply_lambda_with_primitive_out_type::<Int64Type>(
55
+ df,
56
+ lambda,
57
+ null_count,
58
+ first_value,
59
+ )
60
+ .into_series(),
61
+ )
62
+ .into(),
63
+ false,
64
+ ));
65
+ // } else if out.is_kind_of(class::string()) {
66
+ // let first_value = out.try_convert::<String>().ok();
67
+ // return Ok((
68
+ // RbSeries::new(
69
+ // apply_lambda_with_utf8_out_type(df, lambda, null_count, first_value)
70
+ // .into_series(),
71
+ // )
72
+ // .into(),
73
+ // false,
74
+ // ));
75
+ } else if out.respond_to("_s", true)? {
76
+ let rb_rbseries: Value = out.funcall("_s", ()).unwrap();
77
+ let series = rb_rbseries
78
+ .try_convert::<&RbSeries>()
79
+ .unwrap()
80
+ .series
81
+ .borrow();
82
+ let dt = series.dtype();
83
+ return Ok((
84
+ RbSeries::new(
85
+ apply_lambda_with_list_out_type(df, lambda, null_count, Some(&series), dt)?
86
+ .into_series(),
87
+ )
88
+ .into(),
89
+ false,
90
+ ));
91
+ } else if out.try_convert::<Wrap<Row<'a>>>().is_ok() {
92
+ let first_value = out.try_convert::<Wrap<Row<'a>>>().unwrap().0;
93
+ return Ok((
94
+ RbDataFrame::from(
95
+ apply_lambda_with_rows_output(
96
+ df,
97
+ lambda,
98
+ null_count,
99
+ first_value,
100
+ inference_size,
101
+ )
102
+ .map_err(RbPolarsErr::from)?,
103
+ )
104
+ .into(),
105
+ true,
106
+ ));
107
+ } else if out.is_kind_of(class::array()) {
108
+ return Err(RbPolarsErr::other(
109
+ "A list output type is invalid. Do you mean to create polars List Series?\
110
+ Then return a Series object."
111
+ .into(),
112
+ ));
113
+ } else {
114
+ return Err(RbPolarsErr::other("Could not determine output type".into()));
115
+ }
116
+ }
117
+ Err(RbPolarsErr::other("Could not determine output type".into()))
118
+ }
119
+
120
+ fn apply_iter<T>(
121
+ df: &DataFrame,
122
+ lambda: Value,
123
+ init_null_count: usize,
124
+ skip: usize,
125
+ ) -> impl Iterator<Item = Option<T>> + '_
126
+ where
127
+ T: TryConvert,
128
+ {
129
+ let columns = df.get_columns();
130
+ ((init_null_count + skip)..df.height()).map(move |idx| {
131
+ let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
132
+ let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
133
+ match lambda.funcall::<_, _, Value>("call", tpl) {
134
+ Ok(val) => val.try_convert::<T>().ok(),
135
+ Err(e) => panic!("ruby function failed {}", e),
136
+ }
137
+ })
138
+ }
139
+
140
+ /// Apply a lambda with a primitive output type
141
+ pub fn apply_lambda_with_primitive_out_type<D>(
142
+ df: &DataFrame,
143
+ lambda: Value,
144
+ init_null_count: usize,
145
+ first_value: Option<D::Native>,
146
+ ) -> ChunkedArray<D>
147
+ where
148
+ D: RbArrowPrimitiveType,
149
+ D::Native: Into<Value> + TryConvert,
150
+ {
151
+ let skip = usize::from(first_value.is_some());
152
+ if init_null_count == df.height() {
153
+ ChunkedArray::full_null("apply", df.height())
154
+ } else {
155
+ let iter = apply_iter(df, lambda, init_null_count, skip);
156
+ iterator_to_primitive(iter, init_null_count, first_value, "apply", df.height())
157
+ }
158
+ }
159
+
160
+ /// Apply a lambda with a boolean output type
161
+ pub fn apply_lambda_with_bool_out_type(
162
+ df: &DataFrame,
163
+ lambda: Value,
164
+ init_null_count: usize,
165
+ first_value: Option<bool>,
166
+ ) -> ChunkedArray<BooleanType> {
167
+ let skip = usize::from(first_value.is_some());
168
+ if init_null_count == df.height() {
169
+ ChunkedArray::full_null("apply", df.height())
170
+ } else {
171
+ let iter = apply_iter(df, lambda, init_null_count, skip);
172
+ iterator_to_bool(iter, init_null_count, first_value, "apply", df.height())
173
+ }
174
+ }
175
+
176
+ /// Apply a lambda with utf8 output type
177
+ pub fn apply_lambda_with_utf8_out_type(
178
+ df: &DataFrame,
179
+ lambda: Value,
180
+ init_null_count: usize,
181
+ first_value: Option<&str>,
182
+ ) -> Utf8Chunked {
183
+ let skip = usize::from(first_value.is_some());
184
+ if init_null_count == df.height() {
185
+ ChunkedArray::full_null("apply", df.height())
186
+ } else {
187
+ let iter = apply_iter::<String>(df, lambda, init_null_count, skip);
188
+ iterator_to_utf8(iter, init_null_count, first_value, "apply", df.height())
189
+ }
190
+ }
191
+
192
+ /// Apply a lambda with list output type
193
+ pub fn apply_lambda_with_list_out_type<'a>(
194
+ df: &'a DataFrame,
195
+ lambda: Value,
196
+ init_null_count: usize,
197
+ first_value: Option<&Series>,
198
+ dt: &DataType,
199
+ ) -> RbResult<ListChunked> {
200
+ let columns = df.get_columns();
201
+
202
+ let skip = usize::from(first_value.is_some());
203
+ if init_null_count == df.height() {
204
+ Ok(ChunkedArray::full_null("apply", df.height()))
205
+ } else {
206
+ let iter = ((init_null_count + skip)..df.height()).map(|idx| {
207
+ let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
208
+ let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
209
+ match lambda.funcall::<_, _, Value>("call", tpl) {
210
+ Ok(val) => match val.funcall::<_, _, Value>("_s", ()) {
211
+ Ok(val) => val
212
+ .try_convert::<&RbSeries>()
213
+ .ok()
214
+ .map(|ps| ps.series.borrow().clone()),
215
+ Err(_) => {
216
+ if val.is_nil() {
217
+ None
218
+ } else {
219
+ panic!("should return a Series, got a {:?}", val)
220
+ }
221
+ }
222
+ },
223
+ Err(e) => panic!("ruby function failed {}", e),
224
+ }
225
+ });
226
+ iterator_to_list(dt, iter, init_null_count, first_value, "apply", df.height())
227
+ }
228
+ }
229
+
230
+ pub fn apply_lambda_with_rows_output<'a>(
231
+ df: &'a DataFrame,
232
+ lambda: Value,
233
+ init_null_count: usize,
234
+ first_value: Row<'a>,
235
+ inference_size: usize,
236
+ ) -> PolarsResult<DataFrame> {
237
+ let columns = df.get_columns();
238
+ let width = first_value.0.len();
239
+ let null_row = Row::new(vec![AnyValue::Null; width]);
240
+
241
+ let mut row_buf = Row::default();
242
+
243
+ let skip = 1;
244
+ let mut row_iter = ((init_null_count + skip)..df.height()).map(|idx| {
245
+ let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
246
+ let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
247
+ match lambda.funcall::<_, _, Value>("call", tpl) {
248
+ Ok(val) => {
249
+ match val.try_convert::<RArray>().ok() {
250
+ Some(tuple) => {
251
+ row_buf.0.clear();
252
+ for v in tuple.each() {
253
+ let v = v.unwrap().try_convert::<Wrap<AnyValue>>().unwrap().0;
254
+ row_buf.0.push(v);
255
+ }
256
+ let ptr = &row_buf as *const Row;
257
+ // Safety:
258
+ // we know that row constructor of polars dataframe does not keep a reference
259
+ // to the row. Before we mutate the row buf again, the reference is dropped.
260
+ // we only cannot prove it to the compiler.
261
+ // we still do this because it saves a Vec allocation in a hot loop.
262
+ unsafe { &*ptr }
263
+ }
264
+ None => &null_row,
265
+ }
266
+ }
267
+ Err(e) => panic!("ruby function failed {}", e),
268
+ }
269
+ });
270
+
271
+ // first rows for schema inference
272
+ let mut buf = Vec::with_capacity(inference_size);
273
+ buf.push(first_value);
274
+ buf.extend((&mut row_iter).take(inference_size).cloned());
275
+ let schema = rows_to_schema_first_non_null(&buf, Some(50));
276
+
277
+ if init_null_count > 0 {
278
+ // Safety: we know the iterators size
279
+ let iter = unsafe {
280
+ (0..init_null_count)
281
+ .map(|_| &null_row)
282
+ .chain(buf.iter())
283
+ .chain(row_iter)
284
+ .trust_my_length(df.height())
285
+ };
286
+ DataFrame::from_rows_iter_and_schema(iter, &schema)
287
+ } else {
288
+ // Safety: we know the iterators size
289
+ let iter = unsafe { buf.iter().chain(row_iter).trust_my_length(df.height()) };
290
+ DataFrame::from_rows_iter_and_schema(iter, &schema)
291
+ }
292
+ }