polars-df 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +70 -9
- data/Cargo.toml +2 -0
- data/ext/polars/Cargo.toml +6 -1
- data/ext/polars/src/apply/dataframe.rs +292 -0
- data/ext/polars/src/apply/mod.rs +254 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +100 -5
- data/ext/polars/src/dataframe.rs +146 -1
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +72 -1
- data/ext/polars/src/lazy/dsl.rs +38 -0
- data/ext/polars/src/lib.rs +165 -1
- data/ext/polars/src/series.rs +296 -0
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1457 -56
- data/lib/polars/dynamic_group_by.rb +49 -0
- data/lib/polars/expr.rb +258 -9
- data/lib/polars/functions.rb +192 -3
- data/lib/polars/group_by.rb +43 -3
- data/lib/polars/io.rb +19 -3
- data/lib/polars/lazy_frame.rb +792 -22
- data/lib/polars/lazy_functions.rb +561 -27
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +132 -10
- data/lib/polars/utils.rb +16 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +9 -1
- metadata +9 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3a08e866e51227716cd3cb4454835016a7d61e30e964fe76a8b99704dcb60a12
|
4
|
+
data.tar.gz: 1f30c3fdd47ebf52a311909aa26ba4b6d64e426622455854b9bbc660de1229b3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1531fff4fc2fab8b2dc72709a69fb2890c215ae08e4223aa32262dbb4b0debb4b6f2fbab1e8138953871f5d02d462abfaba49cc7f22a66e25aa7d60f128a89bc
|
7
|
+
data.tar.gz: e1041d708e2f8046c14c565a65879fa4e5c6671cf526736a3f8418a82dfa70e17692a96d383e43f393d8761e5f29f717d63185ae1ed3f0793a6876be2d946fc0
|
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
@@ -8,6 +8,12 @@ version = "1.0.2"
|
|
8
8
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
9
9
|
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
|
10
10
|
|
11
|
+
[[package]]
|
12
|
+
name = "adler32"
|
13
|
+
version = "1.2.0"
|
14
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
15
|
+
checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
|
16
|
+
|
11
17
|
[[package]]
|
12
18
|
name = "ahash"
|
13
19
|
version = "0.7.6"
|
@@ -85,6 +91,7 @@ checksum = "ee6f62e41078c967a4c063fcbdfd3801a2a9632276402c045311c4d73d0845f3"
|
|
85
91
|
dependencies = [
|
86
92
|
"ahash 0.7.6",
|
87
93
|
"arrow-format",
|
94
|
+
"avro-schema",
|
88
95
|
"base64",
|
89
96
|
"bytemuck",
|
90
97
|
"chrono",
|
@@ -147,6 +154,20 @@ version = "1.1.0"
|
|
147
154
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
148
155
|
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
149
156
|
|
157
|
+
[[package]]
|
158
|
+
name = "avro-schema"
|
159
|
+
version = "0.3.0"
|
160
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
161
|
+
checksum = "b5281855b39aba9684d2f47bf96983fbfd8f1725f12fabb0513a8ab879647bbd"
|
162
|
+
dependencies = [
|
163
|
+
"crc",
|
164
|
+
"fallible-streaming-iterator",
|
165
|
+
"libflate",
|
166
|
+
"serde",
|
167
|
+
"serde_json",
|
168
|
+
"snap",
|
169
|
+
]
|
170
|
+
|
150
171
|
[[package]]
|
151
172
|
name = "base64"
|
152
173
|
version = "0.13.1"
|
@@ -314,6 +335,21 @@ dependencies = [
|
|
314
335
|
"unicode-width",
|
315
336
|
]
|
316
337
|
|
338
|
+
[[package]]
|
339
|
+
name = "crc"
|
340
|
+
version = "2.1.0"
|
341
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
342
|
+
checksum = "49fc9a695bca7f35f5f4c15cddc84415f66a74ea78eef08e90c5024f2b540e23"
|
343
|
+
dependencies = [
|
344
|
+
"crc-catalog",
|
345
|
+
]
|
346
|
+
|
347
|
+
[[package]]
|
348
|
+
name = "crc-catalog"
|
349
|
+
version = "1.1.1"
|
350
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
351
|
+
checksum = "ccaeedb56da03b09f598226e25e80088cb4cd25f316e6e4df7d695f0feeb1403"
|
352
|
+
|
317
353
|
[[package]]
|
318
354
|
name = "crc32fast"
|
319
355
|
version = "1.3.2"
|
@@ -824,6 +860,26 @@ version = "0.2.121"
|
|
824
860
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
825
861
|
checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f"
|
826
862
|
|
863
|
+
[[package]]
|
864
|
+
name = "libflate"
|
865
|
+
version = "1.2.0"
|
866
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
867
|
+
checksum = "05605ab2bce11bcfc0e9c635ff29ef8b2ea83f29be257ee7d730cac3ee373093"
|
868
|
+
dependencies = [
|
869
|
+
"adler32",
|
870
|
+
"crc32fast",
|
871
|
+
"libflate_lz77",
|
872
|
+
]
|
873
|
+
|
874
|
+
[[package]]
|
875
|
+
name = "libflate_lz77"
|
876
|
+
version = "1.1.0"
|
877
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
878
|
+
checksum = "39a734c0493409afcd49deee13c006a04e3586b9761a03543c6272c9c51f2f5a"
|
879
|
+
dependencies = [
|
880
|
+
"rle-decode-fast",
|
881
|
+
]
|
882
|
+
|
827
883
|
[[package]]
|
828
884
|
name = "libloading"
|
829
885
|
version = "0.7.4"
|
@@ -903,8 +959,7 @@ dependencies = [
|
|
903
959
|
[[package]]
|
904
960
|
name = "magnus-macros"
|
905
961
|
version = "0.2.0"
|
906
|
-
source = "
|
907
|
-
checksum = "acc8ba6908cb0f67a4e75cb48fc81a1f0e6a6dd1501936e0c9e2c7c8f9f18e05"
|
962
|
+
source = "git+https://github.com/matsadler/magnus#ae792419bed70107d4c930e1f8193272750b9fd2"
|
908
963
|
dependencies = [
|
909
964
|
"proc-macro2",
|
910
965
|
"quote",
|
@@ -1222,8 +1277,9 @@ dependencies = [
|
|
1222
1277
|
|
1223
1278
|
[[package]]
|
1224
1279
|
name = "polars"
|
1225
|
-
version = "0.1.
|
1280
|
+
version = "0.1.5"
|
1226
1281
|
dependencies = [
|
1282
|
+
"ahash 0.8.2",
|
1227
1283
|
"jemallocator",
|
1228
1284
|
"magnus",
|
1229
1285
|
"mimalloc",
|
@@ -1500,18 +1556,18 @@ dependencies = [
|
|
1500
1556
|
|
1501
1557
|
[[package]]
|
1502
1558
|
name = "rb-sys"
|
1503
|
-
version = "0.9.
|
1559
|
+
version = "0.9.48"
|
1504
1560
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1505
|
-
checksum = "
|
1561
|
+
checksum = "dfc6b8f3bf2d04b0180e243ceeb033b51ca267d839aa1c12fa25f262c17d0596"
|
1506
1562
|
dependencies = [
|
1507
1563
|
"rb-sys-build",
|
1508
1564
|
]
|
1509
1565
|
|
1510
1566
|
[[package]]
|
1511
1567
|
name = "rb-sys-build"
|
1512
|
-
version = "0.9.
|
1568
|
+
version = "0.9.48"
|
1513
1569
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1514
|
-
checksum = "
|
1570
|
+
checksum = "2cd591ebf22c45a44e51192fbeebba473aea0fe2a708b0b24665a13010c58b8d"
|
1515
1571
|
dependencies = [
|
1516
1572
|
"bindgen",
|
1517
1573
|
"regex",
|
@@ -1521,8 +1577,7 @@ dependencies = [
|
|
1521
1577
|
[[package]]
|
1522
1578
|
name = "rb-sys-env"
|
1523
1579
|
version = "0.1.1"
|
1524
|
-
source = "
|
1525
|
-
checksum = "74c38752410925faeb82c400c06ba2fd9ee6aa8f719dd33994c9e53f5242d25f"
|
1580
|
+
source = "git+https://github.com/oxidize-rb/rb-sys#93c4f97a244168b9ebc2c5682275e7281421f4b8"
|
1526
1581
|
|
1527
1582
|
[[package]]
|
1528
1583
|
name = "redox_syscall"
|
@@ -1561,6 +1616,12 @@ version = "0.6.28"
|
|
1561
1616
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1562
1617
|
checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848"
|
1563
1618
|
|
1619
|
+
[[package]]
|
1620
|
+
name = "rle-decode-fast"
|
1621
|
+
version = "1.0.3"
|
1622
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1623
|
+
checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422"
|
1624
|
+
|
1564
1625
|
[[package]]
|
1565
1626
|
name = "rustc-hash"
|
1566
1627
|
version = "1.1.0"
|
data/Cargo.toml
CHANGED
@@ -3,6 +3,8 @@ members = ["ext/polars"]
|
|
3
3
|
|
4
4
|
[patch.crates-io]
|
5
5
|
jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
|
6
|
+
magnus-macros = { git = "https://github.com/matsadler/magnus" }
|
7
|
+
rb-sys-env = { git = "https://github.com/oxidize-rb/rb-sys" }
|
6
8
|
|
7
9
|
[profile.release]
|
8
10
|
strip = true
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.5"
|
4
4
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
5
5
|
edition = "2021"
|
6
6
|
publish = false
|
@@ -9,6 +9,7 @@ publish = false
|
|
9
9
|
crate-type = ["cdylib"]
|
10
10
|
|
11
11
|
[dependencies]
|
12
|
+
ahash = "0.8"
|
12
13
|
magnus = "0.4"
|
13
14
|
polars-core = "0.25.1"
|
14
15
|
serde_json = "1"
|
@@ -19,6 +20,8 @@ features = [
|
|
19
20
|
"abs",
|
20
21
|
"arange",
|
21
22
|
"arg_where",
|
23
|
+
"asof_join",
|
24
|
+
"avro",
|
22
25
|
"concat_str",
|
23
26
|
"cse",
|
24
27
|
"csv-file",
|
@@ -53,6 +56,7 @@ features = [
|
|
53
56
|
"partition_by",
|
54
57
|
"pct_change",
|
55
58
|
"performant",
|
59
|
+
"pivot",
|
56
60
|
"product",
|
57
61
|
"propagate_nans",
|
58
62
|
"random",
|
@@ -61,6 +65,7 @@ features = [
|
|
61
65
|
"repeat_by",
|
62
66
|
"rolling_window",
|
63
67
|
"round_series",
|
68
|
+
"row_hash",
|
64
69
|
"search_sorted",
|
65
70
|
"semi_anti_join",
|
66
71
|
"serde-lazy",
|
@@ -0,0 +1,292 @@
|
|
1
|
+
use magnus::{class, RArray, TryConvert, Value};
|
2
|
+
use polars::prelude::*;
|
3
|
+
use polars_core::frame::row::{rows_to_schema_first_non_null, Row};
|
4
|
+
|
5
|
+
use super::*;
|
6
|
+
use crate::{RbDataFrame, RbPolarsErr, RbSeries, Wrap};
|
7
|
+
|
8
|
+
pub fn apply_lambda_unknown<'a>(
|
9
|
+
df: &'a DataFrame,
|
10
|
+
lambda: Value,
|
11
|
+
inference_size: usize,
|
12
|
+
) -> RbResult<(Value, bool)> {
|
13
|
+
let columns = df.get_columns();
|
14
|
+
let mut null_count = 0;
|
15
|
+
|
16
|
+
for idx in 0..df.height() {
|
17
|
+
let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
|
18
|
+
let arg = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
19
|
+
let out: Value = lambda.funcall("call", arg)?;
|
20
|
+
|
21
|
+
if out.is_nil() {
|
22
|
+
null_count += 1;
|
23
|
+
continue;
|
24
|
+
} else if out.is_kind_of(class::true_class()) || out.is_kind_of(class::false_class()) {
|
25
|
+
let first_value = out.try_convert::<bool>().ok();
|
26
|
+
return Ok((
|
27
|
+
RbSeries::new(
|
28
|
+
apply_lambda_with_bool_out_type(df, lambda, null_count, first_value)
|
29
|
+
.into_series(),
|
30
|
+
)
|
31
|
+
.into(),
|
32
|
+
false,
|
33
|
+
));
|
34
|
+
} else if out.is_kind_of(class::float()) {
|
35
|
+
let first_value = out.try_convert::<f64>().ok();
|
36
|
+
|
37
|
+
return Ok((
|
38
|
+
RbSeries::new(
|
39
|
+
apply_lambda_with_primitive_out_type::<Float64Type>(
|
40
|
+
df,
|
41
|
+
lambda,
|
42
|
+
null_count,
|
43
|
+
first_value,
|
44
|
+
)
|
45
|
+
.into_series(),
|
46
|
+
)
|
47
|
+
.into(),
|
48
|
+
false,
|
49
|
+
));
|
50
|
+
} else if out.is_kind_of(class::integer()) {
|
51
|
+
let first_value = out.try_convert::<i64>().ok();
|
52
|
+
return Ok((
|
53
|
+
RbSeries::new(
|
54
|
+
apply_lambda_with_primitive_out_type::<Int64Type>(
|
55
|
+
df,
|
56
|
+
lambda,
|
57
|
+
null_count,
|
58
|
+
first_value,
|
59
|
+
)
|
60
|
+
.into_series(),
|
61
|
+
)
|
62
|
+
.into(),
|
63
|
+
false,
|
64
|
+
));
|
65
|
+
// } else if out.is_kind_of(class::string()) {
|
66
|
+
// let first_value = out.try_convert::<String>().ok();
|
67
|
+
// return Ok((
|
68
|
+
// RbSeries::new(
|
69
|
+
// apply_lambda_with_utf8_out_type(df, lambda, null_count, first_value)
|
70
|
+
// .into_series(),
|
71
|
+
// )
|
72
|
+
// .into(),
|
73
|
+
// false,
|
74
|
+
// ));
|
75
|
+
} else if out.respond_to("_s", true)? {
|
76
|
+
let rb_rbseries: Value = out.funcall("_s", ()).unwrap();
|
77
|
+
let series = rb_rbseries
|
78
|
+
.try_convert::<&RbSeries>()
|
79
|
+
.unwrap()
|
80
|
+
.series
|
81
|
+
.borrow();
|
82
|
+
let dt = series.dtype();
|
83
|
+
return Ok((
|
84
|
+
RbSeries::new(
|
85
|
+
apply_lambda_with_list_out_type(df, lambda, null_count, Some(&series), dt)?
|
86
|
+
.into_series(),
|
87
|
+
)
|
88
|
+
.into(),
|
89
|
+
false,
|
90
|
+
));
|
91
|
+
} else if out.try_convert::<Wrap<Row<'a>>>().is_ok() {
|
92
|
+
let first_value = out.try_convert::<Wrap<Row<'a>>>().unwrap().0;
|
93
|
+
return Ok((
|
94
|
+
RbDataFrame::from(
|
95
|
+
apply_lambda_with_rows_output(
|
96
|
+
df,
|
97
|
+
lambda,
|
98
|
+
null_count,
|
99
|
+
first_value,
|
100
|
+
inference_size,
|
101
|
+
)
|
102
|
+
.map_err(RbPolarsErr::from)?,
|
103
|
+
)
|
104
|
+
.into(),
|
105
|
+
true,
|
106
|
+
));
|
107
|
+
} else if out.is_kind_of(class::array()) {
|
108
|
+
return Err(RbPolarsErr::other(
|
109
|
+
"A list output type is invalid. Do you mean to create polars List Series?\
|
110
|
+
Then return a Series object."
|
111
|
+
.into(),
|
112
|
+
));
|
113
|
+
} else {
|
114
|
+
return Err(RbPolarsErr::other("Could not determine output type".into()));
|
115
|
+
}
|
116
|
+
}
|
117
|
+
Err(RbPolarsErr::other("Could not determine output type".into()))
|
118
|
+
}
|
119
|
+
|
120
|
+
fn apply_iter<T>(
|
121
|
+
df: &DataFrame,
|
122
|
+
lambda: Value,
|
123
|
+
init_null_count: usize,
|
124
|
+
skip: usize,
|
125
|
+
) -> impl Iterator<Item = Option<T>> + '_
|
126
|
+
where
|
127
|
+
T: TryConvert,
|
128
|
+
{
|
129
|
+
let columns = df.get_columns();
|
130
|
+
((init_null_count + skip)..df.height()).map(move |idx| {
|
131
|
+
let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
|
132
|
+
let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
133
|
+
match lambda.funcall::<_, _, Value>("call", tpl) {
|
134
|
+
Ok(val) => val.try_convert::<T>().ok(),
|
135
|
+
Err(e) => panic!("ruby function failed {}", e),
|
136
|
+
}
|
137
|
+
})
|
138
|
+
}
|
139
|
+
|
140
|
+
/// Apply a lambda with a primitive output type
|
141
|
+
pub fn apply_lambda_with_primitive_out_type<D>(
|
142
|
+
df: &DataFrame,
|
143
|
+
lambda: Value,
|
144
|
+
init_null_count: usize,
|
145
|
+
first_value: Option<D::Native>,
|
146
|
+
) -> ChunkedArray<D>
|
147
|
+
where
|
148
|
+
D: RbArrowPrimitiveType,
|
149
|
+
D::Native: Into<Value> + TryConvert,
|
150
|
+
{
|
151
|
+
let skip = usize::from(first_value.is_some());
|
152
|
+
if init_null_count == df.height() {
|
153
|
+
ChunkedArray::full_null("apply", df.height())
|
154
|
+
} else {
|
155
|
+
let iter = apply_iter(df, lambda, init_null_count, skip);
|
156
|
+
iterator_to_primitive(iter, init_null_count, first_value, "apply", df.height())
|
157
|
+
}
|
158
|
+
}
|
159
|
+
|
160
|
+
/// Apply a lambda with a boolean output type
|
161
|
+
pub fn apply_lambda_with_bool_out_type(
|
162
|
+
df: &DataFrame,
|
163
|
+
lambda: Value,
|
164
|
+
init_null_count: usize,
|
165
|
+
first_value: Option<bool>,
|
166
|
+
) -> ChunkedArray<BooleanType> {
|
167
|
+
let skip = usize::from(first_value.is_some());
|
168
|
+
if init_null_count == df.height() {
|
169
|
+
ChunkedArray::full_null("apply", df.height())
|
170
|
+
} else {
|
171
|
+
let iter = apply_iter(df, lambda, init_null_count, skip);
|
172
|
+
iterator_to_bool(iter, init_null_count, first_value, "apply", df.height())
|
173
|
+
}
|
174
|
+
}
|
175
|
+
|
176
|
+
/// Apply a lambda with utf8 output type
|
177
|
+
pub fn apply_lambda_with_utf8_out_type(
|
178
|
+
df: &DataFrame,
|
179
|
+
lambda: Value,
|
180
|
+
init_null_count: usize,
|
181
|
+
first_value: Option<&str>,
|
182
|
+
) -> Utf8Chunked {
|
183
|
+
let skip = usize::from(first_value.is_some());
|
184
|
+
if init_null_count == df.height() {
|
185
|
+
ChunkedArray::full_null("apply", df.height())
|
186
|
+
} else {
|
187
|
+
let iter = apply_iter::<String>(df, lambda, init_null_count, skip);
|
188
|
+
iterator_to_utf8(iter, init_null_count, first_value, "apply", df.height())
|
189
|
+
}
|
190
|
+
}
|
191
|
+
|
192
|
+
/// Apply a lambda with list output type
|
193
|
+
pub fn apply_lambda_with_list_out_type<'a>(
|
194
|
+
df: &'a DataFrame,
|
195
|
+
lambda: Value,
|
196
|
+
init_null_count: usize,
|
197
|
+
first_value: Option<&Series>,
|
198
|
+
dt: &DataType,
|
199
|
+
) -> RbResult<ListChunked> {
|
200
|
+
let columns = df.get_columns();
|
201
|
+
|
202
|
+
let skip = usize::from(first_value.is_some());
|
203
|
+
if init_null_count == df.height() {
|
204
|
+
Ok(ChunkedArray::full_null("apply", df.height()))
|
205
|
+
} else {
|
206
|
+
let iter = ((init_null_count + skip)..df.height()).map(|idx| {
|
207
|
+
let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
|
208
|
+
let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
209
|
+
match lambda.funcall::<_, _, Value>("call", tpl) {
|
210
|
+
Ok(val) => match val.funcall::<_, _, Value>("_s", ()) {
|
211
|
+
Ok(val) => val
|
212
|
+
.try_convert::<&RbSeries>()
|
213
|
+
.ok()
|
214
|
+
.map(|ps| ps.series.borrow().clone()),
|
215
|
+
Err(_) => {
|
216
|
+
if val.is_nil() {
|
217
|
+
None
|
218
|
+
} else {
|
219
|
+
panic!("should return a Series, got a {:?}", val)
|
220
|
+
}
|
221
|
+
}
|
222
|
+
},
|
223
|
+
Err(e) => panic!("ruby function failed {}", e),
|
224
|
+
}
|
225
|
+
});
|
226
|
+
iterator_to_list(dt, iter, init_null_count, first_value, "apply", df.height())
|
227
|
+
}
|
228
|
+
}
|
229
|
+
|
230
|
+
pub fn apply_lambda_with_rows_output<'a>(
|
231
|
+
df: &'a DataFrame,
|
232
|
+
lambda: Value,
|
233
|
+
init_null_count: usize,
|
234
|
+
first_value: Row<'a>,
|
235
|
+
inference_size: usize,
|
236
|
+
) -> PolarsResult<DataFrame> {
|
237
|
+
let columns = df.get_columns();
|
238
|
+
let width = first_value.0.len();
|
239
|
+
let null_row = Row::new(vec![AnyValue::Null; width]);
|
240
|
+
|
241
|
+
let mut row_buf = Row::default();
|
242
|
+
|
243
|
+
let skip = 1;
|
244
|
+
let mut row_iter = ((init_null_count + skip)..df.height()).map(|idx| {
|
245
|
+
let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
|
246
|
+
let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
247
|
+
match lambda.funcall::<_, _, Value>("call", tpl) {
|
248
|
+
Ok(val) => {
|
249
|
+
match val.try_convert::<RArray>().ok() {
|
250
|
+
Some(tuple) => {
|
251
|
+
row_buf.0.clear();
|
252
|
+
for v in tuple.each() {
|
253
|
+
let v = v.unwrap().try_convert::<Wrap<AnyValue>>().unwrap().0;
|
254
|
+
row_buf.0.push(v);
|
255
|
+
}
|
256
|
+
let ptr = &row_buf as *const Row;
|
257
|
+
// Safety:
|
258
|
+
// we know that row constructor of polars dataframe does not keep a reference
|
259
|
+
// to the row. Before we mutate the row buf again, the reference is dropped.
|
260
|
+
// we only cannot prove it to the compiler.
|
261
|
+
// we still do this because it saves a Vec allocation in a hot loop.
|
262
|
+
unsafe { &*ptr }
|
263
|
+
}
|
264
|
+
None => &null_row,
|
265
|
+
}
|
266
|
+
}
|
267
|
+
Err(e) => panic!("ruby function failed {}", e),
|
268
|
+
}
|
269
|
+
});
|
270
|
+
|
271
|
+
// first rows for schema inference
|
272
|
+
let mut buf = Vec::with_capacity(inference_size);
|
273
|
+
buf.push(first_value);
|
274
|
+
buf.extend((&mut row_iter).take(inference_size).cloned());
|
275
|
+
let schema = rows_to_schema_first_non_null(&buf, Some(50));
|
276
|
+
|
277
|
+
if init_null_count > 0 {
|
278
|
+
// Safety: we know the iterators size
|
279
|
+
let iter = unsafe {
|
280
|
+
(0..init_null_count)
|
281
|
+
.map(|_| &null_row)
|
282
|
+
.chain(buf.iter())
|
283
|
+
.chain(row_iter)
|
284
|
+
.trust_my_length(df.height())
|
285
|
+
};
|
286
|
+
DataFrame::from_rows_iter_and_schema(iter, &schema)
|
287
|
+
} else {
|
288
|
+
// Safety: we know the iterators size
|
289
|
+
let iter = unsafe { buf.iter().chain(row_iter).trust_my_length(df.height()) };
|
290
|
+
DataFrame::from_rows_iter_and_schema(iter, &schema)
|
291
|
+
}
|
292
|
+
}
|