polars-df 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +70 -9
- data/Cargo.toml +2 -0
- data/ext/polars/Cargo.toml +6 -1
- data/ext/polars/src/apply/dataframe.rs +292 -0
- data/ext/polars/src/apply/mod.rs +254 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +100 -5
- data/ext/polars/src/dataframe.rs +146 -1
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +72 -1
- data/ext/polars/src/lazy/dsl.rs +38 -0
- data/ext/polars/src/lib.rs +165 -1
- data/ext/polars/src/series.rs +296 -0
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1457 -56
- data/lib/polars/dynamic_group_by.rb +49 -0
- data/lib/polars/expr.rb +258 -9
- data/lib/polars/functions.rb +192 -3
- data/lib/polars/group_by.rb +43 -3
- data/lib/polars/io.rb +19 -3
- data/lib/polars/lazy_frame.rb +792 -22
- data/lib/polars/lazy_functions.rb +561 -27
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +132 -10
- data/lib/polars/utils.rb +16 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +9 -1
- metadata +9 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3a08e866e51227716cd3cb4454835016a7d61e30e964fe76a8b99704dcb60a12
|
4
|
+
data.tar.gz: 1f30c3fdd47ebf52a311909aa26ba4b6d64e426622455854b9bbc660de1229b3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1531fff4fc2fab8b2dc72709a69fb2890c215ae08e4223aa32262dbb4b0debb4b6f2fbab1e8138953871f5d02d462abfaba49cc7f22a66e25aa7d60f128a89bc
|
7
|
+
data.tar.gz: e1041d708e2f8046c14c565a65879fa4e5c6671cf526736a3f8418a82dfa70e17692a96d383e43f393d8761e5f29f717d63185ae1ed3f0793a6876be2d946fc0
|
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
@@ -8,6 +8,12 @@ version = "1.0.2"
|
|
8
8
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
9
9
|
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
|
10
10
|
|
11
|
+
[[package]]
|
12
|
+
name = "adler32"
|
13
|
+
version = "1.2.0"
|
14
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
15
|
+
checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
|
16
|
+
|
11
17
|
[[package]]
|
12
18
|
name = "ahash"
|
13
19
|
version = "0.7.6"
|
@@ -85,6 +91,7 @@ checksum = "ee6f62e41078c967a4c063fcbdfd3801a2a9632276402c045311c4d73d0845f3"
|
|
85
91
|
dependencies = [
|
86
92
|
"ahash 0.7.6",
|
87
93
|
"arrow-format",
|
94
|
+
"avro-schema",
|
88
95
|
"base64",
|
89
96
|
"bytemuck",
|
90
97
|
"chrono",
|
@@ -147,6 +154,20 @@ version = "1.1.0"
|
|
147
154
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
148
155
|
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
149
156
|
|
157
|
+
[[package]]
|
158
|
+
name = "avro-schema"
|
159
|
+
version = "0.3.0"
|
160
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
161
|
+
checksum = "b5281855b39aba9684d2f47bf96983fbfd8f1725f12fabb0513a8ab879647bbd"
|
162
|
+
dependencies = [
|
163
|
+
"crc",
|
164
|
+
"fallible-streaming-iterator",
|
165
|
+
"libflate",
|
166
|
+
"serde",
|
167
|
+
"serde_json",
|
168
|
+
"snap",
|
169
|
+
]
|
170
|
+
|
150
171
|
[[package]]
|
151
172
|
name = "base64"
|
152
173
|
version = "0.13.1"
|
@@ -314,6 +335,21 @@ dependencies = [
|
|
314
335
|
"unicode-width",
|
315
336
|
]
|
316
337
|
|
338
|
+
[[package]]
|
339
|
+
name = "crc"
|
340
|
+
version = "2.1.0"
|
341
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
342
|
+
checksum = "49fc9a695bca7f35f5f4c15cddc84415f66a74ea78eef08e90c5024f2b540e23"
|
343
|
+
dependencies = [
|
344
|
+
"crc-catalog",
|
345
|
+
]
|
346
|
+
|
347
|
+
[[package]]
|
348
|
+
name = "crc-catalog"
|
349
|
+
version = "1.1.1"
|
350
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
351
|
+
checksum = "ccaeedb56da03b09f598226e25e80088cb4cd25f316e6e4df7d695f0feeb1403"
|
352
|
+
|
317
353
|
[[package]]
|
318
354
|
name = "crc32fast"
|
319
355
|
version = "1.3.2"
|
@@ -824,6 +860,26 @@ version = "0.2.121"
|
|
824
860
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
825
861
|
checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f"
|
826
862
|
|
863
|
+
[[package]]
|
864
|
+
name = "libflate"
|
865
|
+
version = "1.2.0"
|
866
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
867
|
+
checksum = "05605ab2bce11bcfc0e9c635ff29ef8b2ea83f29be257ee7d730cac3ee373093"
|
868
|
+
dependencies = [
|
869
|
+
"adler32",
|
870
|
+
"crc32fast",
|
871
|
+
"libflate_lz77",
|
872
|
+
]
|
873
|
+
|
874
|
+
[[package]]
|
875
|
+
name = "libflate_lz77"
|
876
|
+
version = "1.1.0"
|
877
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
878
|
+
checksum = "39a734c0493409afcd49deee13c006a04e3586b9761a03543c6272c9c51f2f5a"
|
879
|
+
dependencies = [
|
880
|
+
"rle-decode-fast",
|
881
|
+
]
|
882
|
+
|
827
883
|
[[package]]
|
828
884
|
name = "libloading"
|
829
885
|
version = "0.7.4"
|
@@ -903,8 +959,7 @@ dependencies = [
|
|
903
959
|
[[package]]
|
904
960
|
name = "magnus-macros"
|
905
961
|
version = "0.2.0"
|
906
|
-
source = "
|
907
|
-
checksum = "acc8ba6908cb0f67a4e75cb48fc81a1f0e6a6dd1501936e0c9e2c7c8f9f18e05"
|
962
|
+
source = "git+https://github.com/matsadler/magnus#ae792419bed70107d4c930e1f8193272750b9fd2"
|
908
963
|
dependencies = [
|
909
964
|
"proc-macro2",
|
910
965
|
"quote",
|
@@ -1222,8 +1277,9 @@ dependencies = [
|
|
1222
1277
|
|
1223
1278
|
[[package]]
|
1224
1279
|
name = "polars"
|
1225
|
-
version = "0.1.
|
1280
|
+
version = "0.1.5"
|
1226
1281
|
dependencies = [
|
1282
|
+
"ahash 0.8.2",
|
1227
1283
|
"jemallocator",
|
1228
1284
|
"magnus",
|
1229
1285
|
"mimalloc",
|
@@ -1500,18 +1556,18 @@ dependencies = [
|
|
1500
1556
|
|
1501
1557
|
[[package]]
|
1502
1558
|
name = "rb-sys"
|
1503
|
-
version = "0.9.
|
1559
|
+
version = "0.9.48"
|
1504
1560
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1505
|
-
checksum = "
|
1561
|
+
checksum = "dfc6b8f3bf2d04b0180e243ceeb033b51ca267d839aa1c12fa25f262c17d0596"
|
1506
1562
|
dependencies = [
|
1507
1563
|
"rb-sys-build",
|
1508
1564
|
]
|
1509
1565
|
|
1510
1566
|
[[package]]
|
1511
1567
|
name = "rb-sys-build"
|
1512
|
-
version = "0.9.
|
1568
|
+
version = "0.9.48"
|
1513
1569
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1514
|
-
checksum = "
|
1570
|
+
checksum = "2cd591ebf22c45a44e51192fbeebba473aea0fe2a708b0b24665a13010c58b8d"
|
1515
1571
|
dependencies = [
|
1516
1572
|
"bindgen",
|
1517
1573
|
"regex",
|
@@ -1521,8 +1577,7 @@ dependencies = [
|
|
1521
1577
|
[[package]]
|
1522
1578
|
name = "rb-sys-env"
|
1523
1579
|
version = "0.1.1"
|
1524
|
-
source = "
|
1525
|
-
checksum = "74c38752410925faeb82c400c06ba2fd9ee6aa8f719dd33994c9e53f5242d25f"
|
1580
|
+
source = "git+https://github.com/oxidize-rb/rb-sys#93c4f97a244168b9ebc2c5682275e7281421f4b8"
|
1526
1581
|
|
1527
1582
|
[[package]]
|
1528
1583
|
name = "redox_syscall"
|
@@ -1561,6 +1616,12 @@ version = "0.6.28"
|
|
1561
1616
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1562
1617
|
checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848"
|
1563
1618
|
|
1619
|
+
[[package]]
|
1620
|
+
name = "rle-decode-fast"
|
1621
|
+
version = "1.0.3"
|
1622
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1623
|
+
checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422"
|
1624
|
+
|
1564
1625
|
[[package]]
|
1565
1626
|
name = "rustc-hash"
|
1566
1627
|
version = "1.1.0"
|
data/Cargo.toml
CHANGED
@@ -3,6 +3,8 @@ members = ["ext/polars"]
|
|
3
3
|
|
4
4
|
[patch.crates-io]
|
5
5
|
jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
|
6
|
+
magnus-macros = { git = "https://github.com/matsadler/magnus" }
|
7
|
+
rb-sys-env = { git = "https://github.com/oxidize-rb/rb-sys" }
|
6
8
|
|
7
9
|
[profile.release]
|
8
10
|
strip = true
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.5"
|
4
4
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
5
5
|
edition = "2021"
|
6
6
|
publish = false
|
@@ -9,6 +9,7 @@ publish = false
|
|
9
9
|
crate-type = ["cdylib"]
|
10
10
|
|
11
11
|
[dependencies]
|
12
|
+
ahash = "0.8"
|
12
13
|
magnus = "0.4"
|
13
14
|
polars-core = "0.25.1"
|
14
15
|
serde_json = "1"
|
@@ -19,6 +20,8 @@ features = [
|
|
19
20
|
"abs",
|
20
21
|
"arange",
|
21
22
|
"arg_where",
|
23
|
+
"asof_join",
|
24
|
+
"avro",
|
22
25
|
"concat_str",
|
23
26
|
"cse",
|
24
27
|
"csv-file",
|
@@ -53,6 +56,7 @@ features = [
|
|
53
56
|
"partition_by",
|
54
57
|
"pct_change",
|
55
58
|
"performant",
|
59
|
+
"pivot",
|
56
60
|
"product",
|
57
61
|
"propagate_nans",
|
58
62
|
"random",
|
@@ -61,6 +65,7 @@ features = [
|
|
61
65
|
"repeat_by",
|
62
66
|
"rolling_window",
|
63
67
|
"round_series",
|
68
|
+
"row_hash",
|
64
69
|
"search_sorted",
|
65
70
|
"semi_anti_join",
|
66
71
|
"serde-lazy",
|
@@ -0,0 +1,292 @@
|
|
1
|
+
use magnus::{class, RArray, TryConvert, Value};
|
2
|
+
use polars::prelude::*;
|
3
|
+
use polars_core::frame::row::{rows_to_schema_first_non_null, Row};
|
4
|
+
|
5
|
+
use super::*;
|
6
|
+
use crate::{RbDataFrame, RbPolarsErr, RbSeries, Wrap};
|
7
|
+
|
8
|
+
pub fn apply_lambda_unknown<'a>(
|
9
|
+
df: &'a DataFrame,
|
10
|
+
lambda: Value,
|
11
|
+
inference_size: usize,
|
12
|
+
) -> RbResult<(Value, bool)> {
|
13
|
+
let columns = df.get_columns();
|
14
|
+
let mut null_count = 0;
|
15
|
+
|
16
|
+
for idx in 0..df.height() {
|
17
|
+
let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
|
18
|
+
let arg = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
19
|
+
let out: Value = lambda.funcall("call", arg)?;
|
20
|
+
|
21
|
+
if out.is_nil() {
|
22
|
+
null_count += 1;
|
23
|
+
continue;
|
24
|
+
} else if out.is_kind_of(class::true_class()) || out.is_kind_of(class::false_class()) {
|
25
|
+
let first_value = out.try_convert::<bool>().ok();
|
26
|
+
return Ok((
|
27
|
+
RbSeries::new(
|
28
|
+
apply_lambda_with_bool_out_type(df, lambda, null_count, first_value)
|
29
|
+
.into_series(),
|
30
|
+
)
|
31
|
+
.into(),
|
32
|
+
false,
|
33
|
+
));
|
34
|
+
} else if out.is_kind_of(class::float()) {
|
35
|
+
let first_value = out.try_convert::<f64>().ok();
|
36
|
+
|
37
|
+
return Ok((
|
38
|
+
RbSeries::new(
|
39
|
+
apply_lambda_with_primitive_out_type::<Float64Type>(
|
40
|
+
df,
|
41
|
+
lambda,
|
42
|
+
null_count,
|
43
|
+
first_value,
|
44
|
+
)
|
45
|
+
.into_series(),
|
46
|
+
)
|
47
|
+
.into(),
|
48
|
+
false,
|
49
|
+
));
|
50
|
+
} else if out.is_kind_of(class::integer()) {
|
51
|
+
let first_value = out.try_convert::<i64>().ok();
|
52
|
+
return Ok((
|
53
|
+
RbSeries::new(
|
54
|
+
apply_lambda_with_primitive_out_type::<Int64Type>(
|
55
|
+
df,
|
56
|
+
lambda,
|
57
|
+
null_count,
|
58
|
+
first_value,
|
59
|
+
)
|
60
|
+
.into_series(),
|
61
|
+
)
|
62
|
+
.into(),
|
63
|
+
false,
|
64
|
+
));
|
65
|
+
// } else if out.is_kind_of(class::string()) {
|
66
|
+
// let first_value = out.try_convert::<String>().ok();
|
67
|
+
// return Ok((
|
68
|
+
// RbSeries::new(
|
69
|
+
// apply_lambda_with_utf8_out_type(df, lambda, null_count, first_value)
|
70
|
+
// .into_series(),
|
71
|
+
// )
|
72
|
+
// .into(),
|
73
|
+
// false,
|
74
|
+
// ));
|
75
|
+
} else if out.respond_to("_s", true)? {
|
76
|
+
let rb_rbseries: Value = out.funcall("_s", ()).unwrap();
|
77
|
+
let series = rb_rbseries
|
78
|
+
.try_convert::<&RbSeries>()
|
79
|
+
.unwrap()
|
80
|
+
.series
|
81
|
+
.borrow();
|
82
|
+
let dt = series.dtype();
|
83
|
+
return Ok((
|
84
|
+
RbSeries::new(
|
85
|
+
apply_lambda_with_list_out_type(df, lambda, null_count, Some(&series), dt)?
|
86
|
+
.into_series(),
|
87
|
+
)
|
88
|
+
.into(),
|
89
|
+
false,
|
90
|
+
));
|
91
|
+
} else if out.try_convert::<Wrap<Row<'a>>>().is_ok() {
|
92
|
+
let first_value = out.try_convert::<Wrap<Row<'a>>>().unwrap().0;
|
93
|
+
return Ok((
|
94
|
+
RbDataFrame::from(
|
95
|
+
apply_lambda_with_rows_output(
|
96
|
+
df,
|
97
|
+
lambda,
|
98
|
+
null_count,
|
99
|
+
first_value,
|
100
|
+
inference_size,
|
101
|
+
)
|
102
|
+
.map_err(RbPolarsErr::from)?,
|
103
|
+
)
|
104
|
+
.into(),
|
105
|
+
true,
|
106
|
+
));
|
107
|
+
} else if out.is_kind_of(class::array()) {
|
108
|
+
return Err(RbPolarsErr::other(
|
109
|
+
"A list output type is invalid. Do you mean to create polars List Series?\
|
110
|
+
Then return a Series object."
|
111
|
+
.into(),
|
112
|
+
));
|
113
|
+
} else {
|
114
|
+
return Err(RbPolarsErr::other("Could not determine output type".into()));
|
115
|
+
}
|
116
|
+
}
|
117
|
+
Err(RbPolarsErr::other("Could not determine output type".into()))
|
118
|
+
}
|
119
|
+
|
120
|
+
fn apply_iter<T>(
|
121
|
+
df: &DataFrame,
|
122
|
+
lambda: Value,
|
123
|
+
init_null_count: usize,
|
124
|
+
skip: usize,
|
125
|
+
) -> impl Iterator<Item = Option<T>> + '_
|
126
|
+
where
|
127
|
+
T: TryConvert,
|
128
|
+
{
|
129
|
+
let columns = df.get_columns();
|
130
|
+
((init_null_count + skip)..df.height()).map(move |idx| {
|
131
|
+
let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
|
132
|
+
let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
133
|
+
match lambda.funcall::<_, _, Value>("call", tpl) {
|
134
|
+
Ok(val) => val.try_convert::<T>().ok(),
|
135
|
+
Err(e) => panic!("ruby function failed {}", e),
|
136
|
+
}
|
137
|
+
})
|
138
|
+
}
|
139
|
+
|
140
|
+
/// Apply a lambda with a primitive output type
|
141
|
+
pub fn apply_lambda_with_primitive_out_type<D>(
|
142
|
+
df: &DataFrame,
|
143
|
+
lambda: Value,
|
144
|
+
init_null_count: usize,
|
145
|
+
first_value: Option<D::Native>,
|
146
|
+
) -> ChunkedArray<D>
|
147
|
+
where
|
148
|
+
D: RbArrowPrimitiveType,
|
149
|
+
D::Native: Into<Value> + TryConvert,
|
150
|
+
{
|
151
|
+
let skip = usize::from(first_value.is_some());
|
152
|
+
if init_null_count == df.height() {
|
153
|
+
ChunkedArray::full_null("apply", df.height())
|
154
|
+
} else {
|
155
|
+
let iter = apply_iter(df, lambda, init_null_count, skip);
|
156
|
+
iterator_to_primitive(iter, init_null_count, first_value, "apply", df.height())
|
157
|
+
}
|
158
|
+
}
|
159
|
+
|
160
|
+
/// Apply a lambda with a boolean output type
|
161
|
+
pub fn apply_lambda_with_bool_out_type(
|
162
|
+
df: &DataFrame,
|
163
|
+
lambda: Value,
|
164
|
+
init_null_count: usize,
|
165
|
+
first_value: Option<bool>,
|
166
|
+
) -> ChunkedArray<BooleanType> {
|
167
|
+
let skip = usize::from(first_value.is_some());
|
168
|
+
if init_null_count == df.height() {
|
169
|
+
ChunkedArray::full_null("apply", df.height())
|
170
|
+
} else {
|
171
|
+
let iter = apply_iter(df, lambda, init_null_count, skip);
|
172
|
+
iterator_to_bool(iter, init_null_count, first_value, "apply", df.height())
|
173
|
+
}
|
174
|
+
}
|
175
|
+
|
176
|
+
/// Apply a lambda with utf8 output type
|
177
|
+
pub fn apply_lambda_with_utf8_out_type(
|
178
|
+
df: &DataFrame,
|
179
|
+
lambda: Value,
|
180
|
+
init_null_count: usize,
|
181
|
+
first_value: Option<&str>,
|
182
|
+
) -> Utf8Chunked {
|
183
|
+
let skip = usize::from(first_value.is_some());
|
184
|
+
if init_null_count == df.height() {
|
185
|
+
ChunkedArray::full_null("apply", df.height())
|
186
|
+
} else {
|
187
|
+
let iter = apply_iter::<String>(df, lambda, init_null_count, skip);
|
188
|
+
iterator_to_utf8(iter, init_null_count, first_value, "apply", df.height())
|
189
|
+
}
|
190
|
+
}
|
191
|
+
|
192
|
+
/// Apply a lambda with list output type
|
193
|
+
pub fn apply_lambda_with_list_out_type<'a>(
|
194
|
+
df: &'a DataFrame,
|
195
|
+
lambda: Value,
|
196
|
+
init_null_count: usize,
|
197
|
+
first_value: Option<&Series>,
|
198
|
+
dt: &DataType,
|
199
|
+
) -> RbResult<ListChunked> {
|
200
|
+
let columns = df.get_columns();
|
201
|
+
|
202
|
+
let skip = usize::from(first_value.is_some());
|
203
|
+
if init_null_count == df.height() {
|
204
|
+
Ok(ChunkedArray::full_null("apply", df.height()))
|
205
|
+
} else {
|
206
|
+
let iter = ((init_null_count + skip)..df.height()).map(|idx| {
|
207
|
+
let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
|
208
|
+
let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
209
|
+
match lambda.funcall::<_, _, Value>("call", tpl) {
|
210
|
+
Ok(val) => match val.funcall::<_, _, Value>("_s", ()) {
|
211
|
+
Ok(val) => val
|
212
|
+
.try_convert::<&RbSeries>()
|
213
|
+
.ok()
|
214
|
+
.map(|ps| ps.series.borrow().clone()),
|
215
|
+
Err(_) => {
|
216
|
+
if val.is_nil() {
|
217
|
+
None
|
218
|
+
} else {
|
219
|
+
panic!("should return a Series, got a {:?}", val)
|
220
|
+
}
|
221
|
+
}
|
222
|
+
},
|
223
|
+
Err(e) => panic!("ruby function failed {}", e),
|
224
|
+
}
|
225
|
+
});
|
226
|
+
iterator_to_list(dt, iter, init_null_count, first_value, "apply", df.height())
|
227
|
+
}
|
228
|
+
}
|
229
|
+
|
230
|
+
pub fn apply_lambda_with_rows_output<'a>(
|
231
|
+
df: &'a DataFrame,
|
232
|
+
lambda: Value,
|
233
|
+
init_null_count: usize,
|
234
|
+
first_value: Row<'a>,
|
235
|
+
inference_size: usize,
|
236
|
+
) -> PolarsResult<DataFrame> {
|
237
|
+
let columns = df.get_columns();
|
238
|
+
let width = first_value.0.len();
|
239
|
+
let null_row = Row::new(vec![AnyValue::Null; width]);
|
240
|
+
|
241
|
+
let mut row_buf = Row::default();
|
242
|
+
|
243
|
+
let skip = 1;
|
244
|
+
let mut row_iter = ((init_null_count + skip)..df.height()).map(|idx| {
|
245
|
+
let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
|
246
|
+
let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
247
|
+
match lambda.funcall::<_, _, Value>("call", tpl) {
|
248
|
+
Ok(val) => {
|
249
|
+
match val.try_convert::<RArray>().ok() {
|
250
|
+
Some(tuple) => {
|
251
|
+
row_buf.0.clear();
|
252
|
+
for v in tuple.each() {
|
253
|
+
let v = v.unwrap().try_convert::<Wrap<AnyValue>>().unwrap().0;
|
254
|
+
row_buf.0.push(v);
|
255
|
+
}
|
256
|
+
let ptr = &row_buf as *const Row;
|
257
|
+
// Safety:
|
258
|
+
// we know that row constructor of polars dataframe does not keep a reference
|
259
|
+
// to the row. Before we mutate the row buf again, the reference is dropped.
|
260
|
+
// we only cannot prove it to the compiler.
|
261
|
+
// we still do this because it saves a Vec allocation in a hot loop.
|
262
|
+
unsafe { &*ptr }
|
263
|
+
}
|
264
|
+
None => &null_row,
|
265
|
+
}
|
266
|
+
}
|
267
|
+
Err(e) => panic!("ruby function failed {}", e),
|
268
|
+
}
|
269
|
+
});
|
270
|
+
|
271
|
+
// first rows for schema inference
|
272
|
+
let mut buf = Vec::with_capacity(inference_size);
|
273
|
+
buf.push(first_value);
|
274
|
+
buf.extend((&mut row_iter).take(inference_size).cloned());
|
275
|
+
let schema = rows_to_schema_first_non_null(&buf, Some(50));
|
276
|
+
|
277
|
+
if init_null_count > 0 {
|
278
|
+
// Safety: we know the iterators size
|
279
|
+
let iter = unsafe {
|
280
|
+
(0..init_null_count)
|
281
|
+
.map(|_| &null_row)
|
282
|
+
.chain(buf.iter())
|
283
|
+
.chain(row_iter)
|
284
|
+
.trust_my_length(df.height())
|
285
|
+
};
|
286
|
+
DataFrame::from_rows_iter_and_schema(iter, &schema)
|
287
|
+
} else {
|
288
|
+
// Safety: we know the iterators size
|
289
|
+
let iter = unsafe { buf.iter().chain(row_iter).trust_my_length(df.height()) };
|
290
|
+
DataFrame::from_rows_iter_and_schema(iter, &schema)
|
291
|
+
}
|
292
|
+
}
|