polars-df 0.8.0 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -1
- data/Cargo.lock +107 -59
- data/Cargo.toml +0 -3
- data/LICENSE.txt +1 -1
- data/README.md +2 -2
- data/ext/polars/Cargo.toml +15 -7
- data/ext/polars/src/batched_csv.rs +4 -4
- data/ext/polars/src/conversion/anyvalue.rs +185 -0
- data/ext/polars/src/conversion/chunked_array.rs +140 -0
- data/ext/polars/src/{conversion.rs → conversion/mod.rs} +260 -340
- data/ext/polars/src/dataframe.rs +69 -53
- data/ext/polars/src/expr/array.rs +74 -0
- data/ext/polars/src/expr/datetime.rs +22 -56
- data/ext/polars/src/expr/general.rs +61 -33
- data/ext/polars/src/expr/list.rs +52 -4
- data/ext/polars/src/expr/meta.rs +48 -0
- data/ext/polars/src/expr/rolling.rs +1 -0
- data/ext/polars/src/expr/string.rs +59 -8
- data/ext/polars/src/expr/struct.rs +8 -4
- data/ext/polars/src/functions/aggregation.rs +6 -0
- data/ext/polars/src/functions/lazy.rs +103 -48
- data/ext/polars/src/functions/meta.rs +45 -1
- data/ext/polars/src/functions/string_cache.rs +14 -0
- data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +138 -22
- data/ext/polars/src/lib.rs +226 -168
- data/ext/polars/src/series/aggregation.rs +20 -0
- data/ext/polars/src/series/mod.rs +25 -4
- data/lib/polars/array_expr.rb +449 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +179 -43
- data/lib/polars/data_types.rb +191 -28
- data/lib/polars/date_time_expr.rb +31 -14
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +866 -186
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +27 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +2 -2
- data/lib/polars/io.rb +18 -25
- data/lib/polars/lazy_frame.rb +367 -53
- data/lib/polars/list_expr.rb +152 -6
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +273 -34
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +412 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +52 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -2
- metadata +35 -5
- data/lib/polars/lazy_functions.rb +0 -1181
@@ -17,6 +17,44 @@ macro_rules! set_unwrapped_or_0 {
|
|
17
17
|
};
|
18
18
|
}
|
19
19
|
|
20
|
+
pub fn rolling_corr(
|
21
|
+
x: &RbExpr,
|
22
|
+
y: &RbExpr,
|
23
|
+
window_size: IdxSize,
|
24
|
+
min_periods: IdxSize,
|
25
|
+
ddof: u8,
|
26
|
+
) -> RbExpr {
|
27
|
+
dsl::rolling_corr(
|
28
|
+
x.inner.clone(),
|
29
|
+
y.inner.clone(),
|
30
|
+
RollingCovOptions {
|
31
|
+
min_periods,
|
32
|
+
window_size,
|
33
|
+
ddof,
|
34
|
+
},
|
35
|
+
)
|
36
|
+
.into()
|
37
|
+
}
|
38
|
+
|
39
|
+
pub fn rolling_cov(
|
40
|
+
x: &RbExpr,
|
41
|
+
y: &RbExpr,
|
42
|
+
window_size: IdxSize,
|
43
|
+
min_periods: IdxSize,
|
44
|
+
ddof: u8,
|
45
|
+
) -> RbExpr {
|
46
|
+
dsl::rolling_cov(
|
47
|
+
x.inner.clone(),
|
48
|
+
y.inner.clone(),
|
49
|
+
RollingCovOptions {
|
50
|
+
min_periods,
|
51
|
+
window_size,
|
52
|
+
ddof,
|
53
|
+
},
|
54
|
+
)
|
55
|
+
.into()
|
56
|
+
}
|
57
|
+
|
20
58
|
pub fn arg_sort_by(by: RArray, descending: Vec<bool>) -> RbResult<RbExpr> {
|
21
59
|
let by = rb_exprs_to_exprs(by)?;
|
22
60
|
Ok(dsl::arg_sort_by(by, &descending).into())
|
@@ -83,6 +121,47 @@ pub fn concat_lf(
|
|
83
121
|
Ok(lf.into())
|
84
122
|
}
|
85
123
|
|
124
|
+
pub fn concat_list(s: RArray) -> RbResult<RbExpr> {
|
125
|
+
let s = rb_exprs_to_exprs(s)?;
|
126
|
+
let expr = dsl::concat_list(s).map_err(RbPolarsErr::from)?;
|
127
|
+
Ok(expr.into())
|
128
|
+
}
|
129
|
+
|
130
|
+
pub fn concat_str(s: RArray, separator: String, ignore_nulls: bool) -> RbResult<RbExpr> {
|
131
|
+
let s = rb_exprs_to_exprs(s)?;
|
132
|
+
Ok(dsl::concat_str(s, &separator, ignore_nulls).into())
|
133
|
+
}
|
134
|
+
|
135
|
+
pub fn len() -> RbExpr {
|
136
|
+
dsl::len().into()
|
137
|
+
}
|
138
|
+
|
139
|
+
pub fn cov(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
|
140
|
+
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone(), ddof).into()
|
141
|
+
}
|
142
|
+
|
143
|
+
pub fn arctan2(y: &RbExpr, x: &RbExpr) -> RbExpr {
|
144
|
+
y.inner.clone().arctan2(x.inner.clone()).into()
|
145
|
+
}
|
146
|
+
|
147
|
+
pub fn arctan2d(y: &RbExpr, x: &RbExpr) -> RbExpr {
|
148
|
+
y.inner.clone().arctan2(x.inner.clone()).degrees().into()
|
149
|
+
}
|
150
|
+
|
151
|
+
pub fn cum_fold(
|
152
|
+
acc: &RbExpr,
|
153
|
+
lambda: Value,
|
154
|
+
exprs: RArray,
|
155
|
+
include_init: bool,
|
156
|
+
) -> RbResult<RbExpr> {
|
157
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
158
|
+
let lambda = Opaque::from(lambda);
|
159
|
+
|
160
|
+
let func =
|
161
|
+
move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
|
162
|
+
Ok(dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
|
163
|
+
}
|
164
|
+
|
86
165
|
pub fn concat_lf_diagonal(
|
87
166
|
lfs: RArray,
|
88
167
|
rechunk: bool,
|
@@ -110,6 +189,19 @@ pub fn concat_lf_diagonal(
|
|
110
189
|
Ok(lf.into())
|
111
190
|
}
|
112
191
|
|
192
|
+
pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
|
193
|
+
dsl::dtype_cols(dtypes).into()
|
194
|
+
}
|
195
|
+
|
196
|
+
pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
|
197
|
+
let dtypes = dtypes
|
198
|
+
.each()
|
199
|
+
.map(|v| Wrap::<DataType>::try_convert(v?))
|
200
|
+
.collect::<RbResult<Vec<Wrap<DataType>>>>()?;
|
201
|
+
let dtypes = vec_extract_wrapped(dtypes);
|
202
|
+
Ok(crate::functions::lazy::dtype_cols(dtypes))
|
203
|
+
}
|
204
|
+
|
113
205
|
#[allow(clippy::too_many_arguments)]
|
114
206
|
pub fn duration(
|
115
207
|
weeks: Option<&RbExpr>,
|
@@ -146,38 +238,21 @@ pub fn duration(
|
|
146
238
|
dsl::duration(args).into()
|
147
239
|
}
|
148
240
|
|
149
|
-
pub fn count() -> RbExpr {
|
150
|
-
dsl::count().into()
|
151
|
-
}
|
152
|
-
|
153
241
|
pub fn first() -> RbExpr {
|
154
242
|
dsl::first().into()
|
155
243
|
}
|
156
244
|
|
157
|
-
pub fn last() -> RbExpr {
|
158
|
-
dsl::last().into()
|
159
|
-
}
|
160
|
-
|
161
|
-
pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
|
162
|
-
dsl::dtype_cols(dtypes).into()
|
163
|
-
}
|
164
|
-
|
165
245
|
pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
166
246
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
167
247
|
let lambda = Opaque::from(lambda);
|
168
248
|
|
169
249
|
let func =
|
170
250
|
move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
|
171
|
-
Ok(
|
251
|
+
Ok(dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
|
172
252
|
}
|
173
253
|
|
174
|
-
pub fn
|
175
|
-
|
176
|
-
let lambda = Opaque::from(lambda);
|
177
|
-
|
178
|
-
let func =
|
179
|
-
move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
|
180
|
-
Ok(polars::lazy::dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
|
254
|
+
pub fn last() -> RbExpr {
|
255
|
+
dsl::last().into()
|
181
256
|
}
|
182
257
|
|
183
258
|
pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
|
@@ -219,6 +294,10 @@ pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
|
|
219
294
|
}
|
220
295
|
}
|
221
296
|
|
297
|
+
pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
|
298
|
+
dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
|
299
|
+
}
|
300
|
+
|
222
301
|
pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbResult<RbExpr> {
|
223
302
|
let mut value = value.inner.clone();
|
224
303
|
let n = n.inner.clone();
|
@@ -228,7 +307,7 @@ pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbRe
|
|
228
307
|
}
|
229
308
|
|
230
309
|
if let Expr::Literal(lv) = &value {
|
231
|
-
let av = lv.
|
310
|
+
let av = lv.to_any_value().unwrap();
|
232
311
|
// Integer inputs that fit in Int32 are parsed as such
|
233
312
|
if let DataType::Int64 = av.dtype() {
|
234
313
|
let int_value = av.try_extract::<i64>().unwrap();
|
@@ -240,35 +319,11 @@ pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbRe
|
|
240
319
|
Ok(dsl::repeat(value, n).into())
|
241
320
|
}
|
242
321
|
|
243
|
-
pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
|
244
|
-
polars::lazy::dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
|
245
|
-
}
|
246
|
-
|
247
322
|
pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool) -> RbExpr {
|
248
|
-
|
249
|
-
.into()
|
250
|
-
}
|
251
|
-
|
252
|
-
pub fn cov(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
|
253
|
-
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone(), ddof).into()
|
254
|
-
}
|
255
|
-
|
256
|
-
pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
|
257
|
-
let s = rb_exprs_to_exprs(s)?;
|
258
|
-
Ok(dsl::concat_str(s, &sep).into())
|
323
|
+
dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans).into()
|
259
324
|
}
|
260
325
|
|
261
|
-
pub fn
|
262
|
-
let
|
263
|
-
let expr = dsl::concat_list(s).map_err(RbPolarsErr::from)?;
|
326
|
+
pub fn sql_expr(sql: String) -> RbResult<RbExpr> {
|
327
|
+
let expr = polars::sql::sql_expr(&sql).map_err(RbPolarsErr::from)?;
|
264
328
|
Ok(expr.into())
|
265
329
|
}
|
266
|
-
|
267
|
-
pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
|
268
|
-
let dtypes = dtypes
|
269
|
-
.each()
|
270
|
-
.map(|v| Wrap::<DataType>::try_convert(v?))
|
271
|
-
.collect::<RbResult<Vec<Wrap<DataType>>>>()?;
|
272
|
-
let dtypes = vec_extract_wrapped(dtypes);
|
273
|
-
Ok(crate::functions::lazy::dtype_cols(dtypes))
|
274
|
-
}
|
@@ -7,7 +7,7 @@ use polars_core::POOL;
|
|
7
7
|
use crate::conversion::Wrap;
|
8
8
|
use crate::{RbResult, RbValueError};
|
9
9
|
|
10
|
-
pub fn
|
10
|
+
pub fn get_index_type() -> Value {
|
11
11
|
Wrap(IDX_DTYPE).into_value()
|
12
12
|
}
|
13
13
|
|
@@ -36,3 +36,47 @@ pub fn get_float_fmt() -> RbResult<String> {
|
|
36
36
|
};
|
37
37
|
Ok(strfmt.to_string())
|
38
38
|
}
|
39
|
+
|
40
|
+
pub fn set_float_precision(precision: Option<usize>) -> RbResult<()> {
|
41
|
+
use polars_core::fmt::set_float_precision;
|
42
|
+
set_float_precision(precision);
|
43
|
+
Ok(())
|
44
|
+
}
|
45
|
+
|
46
|
+
pub fn get_float_precision() -> RbResult<Option<usize>> {
|
47
|
+
use polars_core::fmt::get_float_precision;
|
48
|
+
Ok(get_float_precision())
|
49
|
+
}
|
50
|
+
|
51
|
+
pub fn set_thousands_separator(sep: Option<char>) -> RbResult<()> {
|
52
|
+
use polars_core::fmt::set_thousands_separator;
|
53
|
+
set_thousands_separator(sep);
|
54
|
+
Ok(())
|
55
|
+
}
|
56
|
+
|
57
|
+
pub fn get_thousands_separator() -> RbResult<Option<String>> {
|
58
|
+
use polars_core::fmt::get_thousands_separator;
|
59
|
+
Ok(Some(get_thousands_separator()))
|
60
|
+
}
|
61
|
+
|
62
|
+
pub fn set_decimal_separator(sep: Option<char>) -> RbResult<()> {
|
63
|
+
use polars_core::fmt::set_decimal_separator;
|
64
|
+
set_decimal_separator(sep);
|
65
|
+
Ok(())
|
66
|
+
}
|
67
|
+
|
68
|
+
pub fn get_decimal_separator() -> RbResult<Option<char>> {
|
69
|
+
use polars_core::fmt::get_decimal_separator;
|
70
|
+
Ok(Some(get_decimal_separator()))
|
71
|
+
}
|
72
|
+
|
73
|
+
pub fn set_trim_decimal_zeros(trim: Option<bool>) -> RbResult<()> {
|
74
|
+
use polars_core::fmt::set_trim_decimal_zeros;
|
75
|
+
set_trim_decimal_zeros(trim);
|
76
|
+
Ok(())
|
77
|
+
}
|
78
|
+
|
79
|
+
pub fn get_trim_decimal_zeros() -> RbResult<Option<bool>> {
|
80
|
+
use polars_core::fmt::get_trim_decimal_zeros;
|
81
|
+
Ok(Some(get_trim_decimal_zeros()))
|
82
|
+
}
|
@@ -1,3 +1,7 @@
|
|
1
|
+
use crate::RbResult;
|
2
|
+
use magnus::{RArray, Ruby, Value};
|
3
|
+
use polars_core::StringCacheHolder;
|
4
|
+
|
1
5
|
pub fn enable_string_cache() {
|
2
6
|
polars_core::enable_string_cache()
|
3
7
|
}
|
@@ -9,3 +13,13 @@ pub fn disable_string_cache() {
|
|
9
13
|
pub fn using_string_cache() -> bool {
|
10
14
|
polars_core::using_string_cache()
|
11
15
|
}
|
16
|
+
|
17
|
+
#[magnus::wrap(class = "Polars::RbStringCacheHolder")]
|
18
|
+
pub struct RbStringCacheHolder {}
|
19
|
+
|
20
|
+
impl RbStringCacheHolder {
|
21
|
+
pub fn hold() -> RbResult<Value> {
|
22
|
+
let _hold = StringCacheHolder::hold();
|
23
|
+
Ruby::get().unwrap().yield_splat(RArray::new())
|
24
|
+
}
|
25
|
+
}
|
@@ -1,9 +1,10 @@
|
|
1
1
|
use magnus::{IntoValue, RArray, RHash, TryConvert, Value};
|
2
|
-
use polars::io::
|
2
|
+
use polars::io::RowIndex;
|
3
3
|
use polars::lazy::frame::LazyFrame;
|
4
4
|
use polars::prelude::*;
|
5
5
|
use std::cell::RefCell;
|
6
6
|
use std::io::{BufWriter, Read};
|
7
|
+
use std::num::NonZeroUsize;
|
7
8
|
use std::path::PathBuf;
|
8
9
|
|
9
10
|
use crate::conversion::*;
|
@@ -55,13 +56,14 @@ impl RbLazyFrame {
|
|
55
56
|
pub fn new_from_ndjson(
|
56
57
|
path: String,
|
57
58
|
infer_schema_length: Option<usize>,
|
58
|
-
batch_size: Option<
|
59
|
+
batch_size: Option<Wrap<NonZeroUsize>>,
|
59
60
|
n_rows: Option<usize>,
|
60
61
|
low_memory: bool,
|
61
62
|
rechunk: bool,
|
62
|
-
|
63
|
+
row_index: Option<(String, IdxSize)>,
|
63
64
|
) -> RbResult<Self> {
|
64
|
-
let
|
65
|
+
let batch_size = batch_size.map(|v| v.0);
|
66
|
+
let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
|
65
67
|
|
66
68
|
let lf = LazyJsonLineReader::new(path)
|
67
69
|
.with_infer_schema_length(infer_schema_length)
|
@@ -69,7 +71,7 @@ impl RbLazyFrame {
|
|
69
71
|
.with_n_rows(n_rows)
|
70
72
|
.low_memory(low_memory)
|
71
73
|
.with_rechunk(rechunk)
|
72
|
-
.
|
74
|
+
.with_row_index(row_index)
|
73
75
|
.finish()
|
74
76
|
.map_err(RbPolarsErr::from)?;
|
75
77
|
Ok(lf.into())
|
@@ -95,7 +97,7 @@ impl RbLazyFrame {
|
|
95
97
|
let rechunk = bool::try_convert(arguments[14])?;
|
96
98
|
let skip_rows_after_header = usize::try_convert(arguments[15])?;
|
97
99
|
let encoding = Wrap::<CsvEncoding>::try_convert(arguments[16])?;
|
98
|
-
let
|
100
|
+
let row_index = Option::<(String, IdxSize)>::try_convert(arguments[17])?;
|
99
101
|
let try_parse_dates = bool::try_convert(arguments[18])?;
|
100
102
|
let eol_char = String::try_convert(arguments[19])?;
|
101
103
|
// end arguments
|
@@ -105,7 +107,7 @@ impl RbLazyFrame {
|
|
105
107
|
let separator = separator.as_bytes()[0];
|
106
108
|
let eol_char = eol_char.as_bytes()[0];
|
107
109
|
|
108
|
-
let
|
110
|
+
let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
|
109
111
|
|
110
112
|
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
111
113
|
overwrite_dtype
|
@@ -129,7 +131,7 @@ impl RbLazyFrame {
|
|
129
131
|
.with_rechunk(rechunk)
|
130
132
|
.with_skip_rows_after_header(skip_rows_after_header)
|
131
133
|
.with_encoding(encoding.0)
|
132
|
-
.
|
134
|
+
.with_row_index(row_index)
|
133
135
|
.with_try_parse_dates(try_parse_dates)
|
134
136
|
.with_null_values(null_values);
|
135
137
|
|
@@ -147,18 +149,18 @@ impl RbLazyFrame {
|
|
147
149
|
cache: bool,
|
148
150
|
parallel: Wrap<ParallelStrategy>,
|
149
151
|
rechunk: bool,
|
150
|
-
|
152
|
+
row_index: Option<(String, IdxSize)>,
|
151
153
|
low_memory: bool,
|
152
154
|
use_statistics: bool,
|
153
155
|
hive_partitioning: bool,
|
154
156
|
) -> RbResult<Self> {
|
155
|
-
let
|
157
|
+
let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
|
156
158
|
let args = ScanArgsParquet {
|
157
159
|
n_rows,
|
158
160
|
cache,
|
159
161
|
parallel: parallel.0,
|
160
162
|
rechunk,
|
161
|
-
|
163
|
+
row_index,
|
162
164
|
low_memory,
|
163
165
|
// TODO support cloud options
|
164
166
|
cloud_options: None,
|
@@ -174,15 +176,15 @@ impl RbLazyFrame {
|
|
174
176
|
n_rows: Option<usize>,
|
175
177
|
cache: bool,
|
176
178
|
rechunk: bool,
|
177
|
-
|
179
|
+
row_index: Option<(String, IdxSize)>,
|
178
180
|
memory_map: bool,
|
179
181
|
) -> RbResult<Self> {
|
180
|
-
let
|
182
|
+
let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
|
181
183
|
let args = ScanArgsIpc {
|
182
184
|
n_rows,
|
183
185
|
cache,
|
184
186
|
rechunk,
|
185
|
-
|
187
|
+
row_index,
|
186
188
|
memmap: memory_map,
|
187
189
|
};
|
188
190
|
let lf = LazyFrame::scan_ipc(path, args).map_err(RbPolarsErr::from)?;
|
@@ -216,20 +218,24 @@ impl RbLazyFrame {
|
|
216
218
|
projection_pushdown: bool,
|
217
219
|
simplify_expr: bool,
|
218
220
|
slice_pushdown: bool,
|
219
|
-
|
221
|
+
comm_subplan_elim: bool,
|
222
|
+
comm_subexpr_elim: bool,
|
220
223
|
allow_streaming: bool,
|
221
224
|
_eager: bool,
|
222
225
|
) -> RbLazyFrame {
|
223
226
|
let ldf = self.ldf.clone();
|
224
|
-
let ldf = ldf
|
227
|
+
let mut ldf = ldf
|
225
228
|
.with_type_coercion(type_coercion)
|
226
229
|
.with_predicate_pushdown(predicate_pushdown)
|
227
230
|
.with_simplify_expr(simplify_expr)
|
228
231
|
.with_slice_pushdown(slice_pushdown)
|
229
|
-
.with_comm_subplan_elim(cse)
|
230
232
|
.with_streaming(allow_streaming)
|
231
233
|
._with_eager(_eager)
|
232
234
|
.with_projection_pushdown(projection_pushdown);
|
235
|
+
|
236
|
+
ldf = ldf.with_comm_subplan_elim(comm_subplan_elim);
|
237
|
+
ldf = ldf.with_comm_subexpr_elim(comm_subexpr_elim);
|
238
|
+
|
233
239
|
ldf.into()
|
234
240
|
}
|
235
241
|
|
@@ -304,6 +310,75 @@ impl RbLazyFrame {
|
|
304
310
|
Ok(())
|
305
311
|
}
|
306
312
|
|
313
|
+
pub fn sink_ipc(
|
314
|
+
&self,
|
315
|
+
path: PathBuf,
|
316
|
+
compression: Option<Wrap<IpcCompression>>,
|
317
|
+
maintain_order: bool,
|
318
|
+
) -> RbResult<()> {
|
319
|
+
let options = IpcWriterOptions {
|
320
|
+
compression: compression.map(|c| c.0),
|
321
|
+
maintain_order,
|
322
|
+
};
|
323
|
+
|
324
|
+
let ldf = self.ldf.clone();
|
325
|
+
ldf.sink_ipc(path, options).map_err(RbPolarsErr::from)?;
|
326
|
+
Ok(())
|
327
|
+
}
|
328
|
+
|
329
|
+
pub fn sink_csv(
|
330
|
+
&self,
|
331
|
+
path: PathBuf,
|
332
|
+
include_bom: bool,
|
333
|
+
include_header: bool,
|
334
|
+
separator: u8,
|
335
|
+
line_terminator: String,
|
336
|
+
quote_char: u8,
|
337
|
+
batch_size: Wrap<NonZeroUsize>,
|
338
|
+
datetime_format: Option<String>,
|
339
|
+
date_format: Option<String>,
|
340
|
+
time_format: Option<String>,
|
341
|
+
float_precision: Option<usize>,
|
342
|
+
null_value: Option<String>,
|
343
|
+
quote_style: Option<Wrap<QuoteStyle>>,
|
344
|
+
maintain_order: bool,
|
345
|
+
) -> RbResult<()> {
|
346
|
+
let quote_style = quote_style.map_or(QuoteStyle::default(), |wrap| wrap.0);
|
347
|
+
let null_value = null_value.unwrap_or(SerializeOptions::default().null);
|
348
|
+
|
349
|
+
let serialize_options = SerializeOptions {
|
350
|
+
date_format,
|
351
|
+
time_format,
|
352
|
+
datetime_format,
|
353
|
+
float_precision,
|
354
|
+
separator,
|
355
|
+
quote_char,
|
356
|
+
null: null_value,
|
357
|
+
line_terminator,
|
358
|
+
quote_style,
|
359
|
+
};
|
360
|
+
|
361
|
+
let options = CsvWriterOptions {
|
362
|
+
include_bom,
|
363
|
+
include_header,
|
364
|
+
maintain_order,
|
365
|
+
batch_size: batch_size.0,
|
366
|
+
serialize_options,
|
367
|
+
};
|
368
|
+
|
369
|
+
let ldf = self.ldf.clone();
|
370
|
+
ldf.sink_csv(path, options).map_err(RbPolarsErr::from)?;
|
371
|
+
Ok(())
|
372
|
+
}
|
373
|
+
|
374
|
+
pub fn sink_json(&self, path: PathBuf, maintain_order: bool) -> RbResult<()> {
|
375
|
+
let options = JsonWriterOptions { maintain_order };
|
376
|
+
|
377
|
+
let ldf = self.ldf.clone();
|
378
|
+
ldf.sink_json(path, options).map_err(RbPolarsErr::from)?;
|
379
|
+
Ok(())
|
380
|
+
}
|
381
|
+
|
307
382
|
pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
|
308
383
|
let ldf = self.ldf.clone();
|
309
384
|
let df = ldf.fetch(n_rows).map_err(RbPolarsErr::from)?;
|
@@ -321,6 +396,12 @@ impl RbLazyFrame {
|
|
321
396
|
Ok(ldf.select(exprs).into())
|
322
397
|
}
|
323
398
|
|
399
|
+
pub fn select_seq(&self, exprs: RArray) -> RbResult<Self> {
|
400
|
+
let ldf = self.ldf.clone();
|
401
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
402
|
+
Ok(ldf.select_seq(exprs).into())
|
403
|
+
}
|
404
|
+
|
324
405
|
pub fn group_by(&self, by: RArray, maintain_order: bool) -> RbResult<RbLazyGroupBy> {
|
325
406
|
let ldf = self.ldf.clone();
|
326
407
|
let by = rb_exprs_to_exprs(by)?;
|
@@ -334,7 +415,7 @@ impl RbLazyFrame {
|
|
334
415
|
})
|
335
416
|
}
|
336
417
|
|
337
|
-
pub fn
|
418
|
+
pub fn rolling(
|
338
419
|
&self,
|
339
420
|
index_column: &RbExpr,
|
340
421
|
period: String,
|
@@ -459,6 +540,7 @@ impl RbLazyFrame {
|
|
459
540
|
right_on: RArray,
|
460
541
|
allow_parallel: bool,
|
461
542
|
force_parallel: bool,
|
543
|
+
join_nulls: bool,
|
462
544
|
how: Wrap<JoinType>,
|
463
545
|
suffix: String,
|
464
546
|
) -> RbResult<Self> {
|
@@ -474,17 +556,28 @@ impl RbLazyFrame {
|
|
474
556
|
.right_on(right_on)
|
475
557
|
.allow_parallel(allow_parallel)
|
476
558
|
.force_parallel(force_parallel)
|
559
|
+
.join_nulls(join_nulls)
|
477
560
|
.how(how.0)
|
478
561
|
.suffix(suffix)
|
479
562
|
.finish()
|
480
563
|
.into())
|
481
564
|
}
|
482
565
|
|
566
|
+
pub fn with_column(&self, expr: &RbExpr) -> Self {
|
567
|
+
let ldf = self.ldf.clone();
|
568
|
+
ldf.with_column(expr.inner.clone()).into()
|
569
|
+
}
|
570
|
+
|
483
571
|
pub fn with_columns(&self, exprs: RArray) -> RbResult<Self> {
|
484
572
|
let ldf = self.ldf.clone();
|
485
573
|
Ok(ldf.with_columns(rb_exprs_to_exprs(exprs)?).into())
|
486
574
|
}
|
487
575
|
|
576
|
+
pub fn with_columns_seq(&self, exprs: RArray) -> RbResult<Self> {
|
577
|
+
let ldf = self.ldf.clone();
|
578
|
+
Ok(ldf.with_columns_seq(rb_exprs_to_exprs(exprs)?).into())
|
579
|
+
}
|
580
|
+
|
488
581
|
pub fn rename(&self, existing: Vec<String>, new: Vec<String>) -> Self {
|
489
582
|
let ldf = self.ldf.clone();
|
490
583
|
ldf.rename(existing, new).into()
|
@@ -569,6 +662,11 @@ impl RbLazyFrame {
|
|
569
662
|
Ok(ldf.explode(column).into())
|
570
663
|
}
|
571
664
|
|
665
|
+
pub fn null_count(&self) -> Self {
|
666
|
+
let ldf = self.ldf.clone();
|
667
|
+
ldf.null_count().into()
|
668
|
+
}
|
669
|
+
|
572
670
|
pub fn unique(
|
573
671
|
&self,
|
574
672
|
maintain_order: bool,
|
@@ -619,14 +717,18 @@ impl RbLazyFrame {
|
|
619
717
|
ldf.melt(args).into()
|
620
718
|
}
|
621
719
|
|
622
|
-
pub fn
|
720
|
+
pub fn with_row_index(&self, name: String, offset: Option<IdxSize>) -> Self {
|
623
721
|
let ldf = self.ldf.clone();
|
624
|
-
ldf.
|
722
|
+
ldf.with_row_index(&name, offset).into()
|
625
723
|
}
|
626
724
|
|
627
|
-
pub fn
|
725
|
+
pub fn drop(&self, cols: Vec<String>) -> Self {
|
628
726
|
let ldf = self.ldf.clone();
|
629
|
-
ldf.
|
727
|
+
ldf.drop(cols).into()
|
728
|
+
}
|
729
|
+
|
730
|
+
pub fn cast_all(&self, dtype: Wrap<DataType>, strict: bool) -> Self {
|
731
|
+
self.ldf.clone().cast_all(dtype.0, strict).into()
|
630
732
|
}
|
631
733
|
|
632
734
|
pub fn clone(&self) -> Self {
|
@@ -668,4 +770,18 @@ impl RbLazyFrame {
|
|
668
770
|
pub fn width(&self) -> RbResult<usize> {
|
669
771
|
Ok(self.get_schema()?.len())
|
670
772
|
}
|
773
|
+
|
774
|
+
pub fn count(&self) -> Self {
|
775
|
+
let ldf = self.ldf.clone();
|
776
|
+
ldf.count().into()
|
777
|
+
}
|
778
|
+
|
779
|
+
pub fn merge_sorted(&self, other: &Self, key: String) -> RbResult<Self> {
|
780
|
+
let out = self
|
781
|
+
.ldf
|
782
|
+
.clone()
|
783
|
+
.merge_sorted(other.ldf.clone(), &key)
|
784
|
+
.map_err(RbPolarsErr::from)?;
|
785
|
+
Ok(out.into())
|
786
|
+
}
|
671
787
|
}
|