polars-df 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -1
- data/Cargo.lock +107 -59
- data/Cargo.toml +0 -3
- data/LICENSE.txt +1 -1
- data/README.md +2 -2
- data/ext/polars/Cargo.toml +15 -7
- data/ext/polars/src/batched_csv.rs +4 -4
- data/ext/polars/src/conversion/anyvalue.rs +185 -0
- data/ext/polars/src/conversion/chunked_array.rs +140 -0
- data/ext/polars/src/{conversion.rs → conversion/mod.rs} +260 -340
- data/ext/polars/src/dataframe.rs +69 -53
- data/ext/polars/src/expr/array.rs +74 -0
- data/ext/polars/src/expr/datetime.rs +22 -56
- data/ext/polars/src/expr/general.rs +61 -33
- data/ext/polars/src/expr/list.rs +52 -4
- data/ext/polars/src/expr/meta.rs +48 -0
- data/ext/polars/src/expr/rolling.rs +1 -0
- data/ext/polars/src/expr/string.rs +59 -8
- data/ext/polars/src/expr/struct.rs +8 -4
- data/ext/polars/src/functions/aggregation.rs +6 -0
- data/ext/polars/src/functions/lazy.rs +103 -48
- data/ext/polars/src/functions/meta.rs +45 -1
- data/ext/polars/src/functions/string_cache.rs +14 -0
- data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +138 -22
- data/ext/polars/src/lib.rs +226 -168
- data/ext/polars/src/series/aggregation.rs +20 -0
- data/ext/polars/src/series/mod.rs +25 -4
- data/lib/polars/array_expr.rb +449 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +179 -43
- data/lib/polars/data_types.rb +191 -28
- data/lib/polars/date_time_expr.rb +31 -14
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +866 -186
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +27 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +2 -2
- data/lib/polars/io.rb +18 -25
- data/lib/polars/lazy_frame.rb +367 -53
- data/lib/polars/list_expr.rb +152 -6
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +273 -34
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +412 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +52 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -2
- metadata +35 -5
- data/lib/polars/lazy_functions.rb +0 -1181
@@ -17,6 +17,44 @@ macro_rules! set_unwrapped_or_0 {
|
|
17
17
|
};
|
18
18
|
}
|
19
19
|
|
20
|
+
pub fn rolling_corr(
|
21
|
+
x: &RbExpr,
|
22
|
+
y: &RbExpr,
|
23
|
+
window_size: IdxSize,
|
24
|
+
min_periods: IdxSize,
|
25
|
+
ddof: u8,
|
26
|
+
) -> RbExpr {
|
27
|
+
dsl::rolling_corr(
|
28
|
+
x.inner.clone(),
|
29
|
+
y.inner.clone(),
|
30
|
+
RollingCovOptions {
|
31
|
+
min_periods,
|
32
|
+
window_size,
|
33
|
+
ddof,
|
34
|
+
},
|
35
|
+
)
|
36
|
+
.into()
|
37
|
+
}
|
38
|
+
|
39
|
+
pub fn rolling_cov(
|
40
|
+
x: &RbExpr,
|
41
|
+
y: &RbExpr,
|
42
|
+
window_size: IdxSize,
|
43
|
+
min_periods: IdxSize,
|
44
|
+
ddof: u8,
|
45
|
+
) -> RbExpr {
|
46
|
+
dsl::rolling_cov(
|
47
|
+
x.inner.clone(),
|
48
|
+
y.inner.clone(),
|
49
|
+
RollingCovOptions {
|
50
|
+
min_periods,
|
51
|
+
window_size,
|
52
|
+
ddof,
|
53
|
+
},
|
54
|
+
)
|
55
|
+
.into()
|
56
|
+
}
|
57
|
+
|
20
58
|
pub fn arg_sort_by(by: RArray, descending: Vec<bool>) -> RbResult<RbExpr> {
|
21
59
|
let by = rb_exprs_to_exprs(by)?;
|
22
60
|
Ok(dsl::arg_sort_by(by, &descending).into())
|
@@ -83,6 +121,47 @@ pub fn concat_lf(
|
|
83
121
|
Ok(lf.into())
|
84
122
|
}
|
85
123
|
|
124
|
+
pub fn concat_list(s: RArray) -> RbResult<RbExpr> {
|
125
|
+
let s = rb_exprs_to_exprs(s)?;
|
126
|
+
let expr = dsl::concat_list(s).map_err(RbPolarsErr::from)?;
|
127
|
+
Ok(expr.into())
|
128
|
+
}
|
129
|
+
|
130
|
+
pub fn concat_str(s: RArray, separator: String, ignore_nulls: bool) -> RbResult<RbExpr> {
|
131
|
+
let s = rb_exprs_to_exprs(s)?;
|
132
|
+
Ok(dsl::concat_str(s, &separator, ignore_nulls).into())
|
133
|
+
}
|
134
|
+
|
135
|
+
pub fn len() -> RbExpr {
|
136
|
+
dsl::len().into()
|
137
|
+
}
|
138
|
+
|
139
|
+
pub fn cov(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
|
140
|
+
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone(), ddof).into()
|
141
|
+
}
|
142
|
+
|
143
|
+
pub fn arctan2(y: &RbExpr, x: &RbExpr) -> RbExpr {
|
144
|
+
y.inner.clone().arctan2(x.inner.clone()).into()
|
145
|
+
}
|
146
|
+
|
147
|
+
pub fn arctan2d(y: &RbExpr, x: &RbExpr) -> RbExpr {
|
148
|
+
y.inner.clone().arctan2(x.inner.clone()).degrees().into()
|
149
|
+
}
|
150
|
+
|
151
|
+
pub fn cum_fold(
|
152
|
+
acc: &RbExpr,
|
153
|
+
lambda: Value,
|
154
|
+
exprs: RArray,
|
155
|
+
include_init: bool,
|
156
|
+
) -> RbResult<RbExpr> {
|
157
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
158
|
+
let lambda = Opaque::from(lambda);
|
159
|
+
|
160
|
+
let func =
|
161
|
+
move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
|
162
|
+
Ok(dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
|
163
|
+
}
|
164
|
+
|
86
165
|
pub fn concat_lf_diagonal(
|
87
166
|
lfs: RArray,
|
88
167
|
rechunk: bool,
|
@@ -110,6 +189,19 @@ pub fn concat_lf_diagonal(
|
|
110
189
|
Ok(lf.into())
|
111
190
|
}
|
112
191
|
|
192
|
+
pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
|
193
|
+
dsl::dtype_cols(dtypes).into()
|
194
|
+
}
|
195
|
+
|
196
|
+
pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
|
197
|
+
let dtypes = dtypes
|
198
|
+
.each()
|
199
|
+
.map(|v| Wrap::<DataType>::try_convert(v?))
|
200
|
+
.collect::<RbResult<Vec<Wrap<DataType>>>>()?;
|
201
|
+
let dtypes = vec_extract_wrapped(dtypes);
|
202
|
+
Ok(crate::functions::lazy::dtype_cols(dtypes))
|
203
|
+
}
|
204
|
+
|
113
205
|
#[allow(clippy::too_many_arguments)]
|
114
206
|
pub fn duration(
|
115
207
|
weeks: Option<&RbExpr>,
|
@@ -146,38 +238,21 @@ pub fn duration(
|
|
146
238
|
dsl::duration(args).into()
|
147
239
|
}
|
148
240
|
|
149
|
-
pub fn count() -> RbExpr {
|
150
|
-
dsl::count().into()
|
151
|
-
}
|
152
|
-
|
153
241
|
pub fn first() -> RbExpr {
|
154
242
|
dsl::first().into()
|
155
243
|
}
|
156
244
|
|
157
|
-
pub fn last() -> RbExpr {
|
158
|
-
dsl::last().into()
|
159
|
-
}
|
160
|
-
|
161
|
-
pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
|
162
|
-
dsl::dtype_cols(dtypes).into()
|
163
|
-
}
|
164
|
-
|
165
245
|
pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
166
246
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
167
247
|
let lambda = Opaque::from(lambda);
|
168
248
|
|
169
249
|
let func =
|
170
250
|
move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
|
171
|
-
Ok(
|
251
|
+
Ok(dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
|
172
252
|
}
|
173
253
|
|
174
|
-
pub fn
|
175
|
-
|
176
|
-
let lambda = Opaque::from(lambda);
|
177
|
-
|
178
|
-
let func =
|
179
|
-
move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
|
180
|
-
Ok(polars::lazy::dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
|
254
|
+
pub fn last() -> RbExpr {
|
255
|
+
dsl::last().into()
|
181
256
|
}
|
182
257
|
|
183
258
|
pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
|
@@ -219,6 +294,10 @@ pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
|
|
219
294
|
}
|
220
295
|
}
|
221
296
|
|
297
|
+
pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
|
298
|
+
dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
|
299
|
+
}
|
300
|
+
|
222
301
|
pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbResult<RbExpr> {
|
223
302
|
let mut value = value.inner.clone();
|
224
303
|
let n = n.inner.clone();
|
@@ -228,7 +307,7 @@ pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbRe
|
|
228
307
|
}
|
229
308
|
|
230
309
|
if let Expr::Literal(lv) = &value {
|
231
|
-
let av = lv.
|
310
|
+
let av = lv.to_any_value().unwrap();
|
232
311
|
// Integer inputs that fit in Int32 are parsed as such
|
233
312
|
if let DataType::Int64 = av.dtype() {
|
234
313
|
let int_value = av.try_extract::<i64>().unwrap();
|
@@ -240,35 +319,11 @@ pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbRe
|
|
240
319
|
Ok(dsl::repeat(value, n).into())
|
241
320
|
}
|
242
321
|
|
243
|
-
pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
|
244
|
-
polars::lazy::dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
|
245
|
-
}
|
246
|
-
|
247
322
|
pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool) -> RbExpr {
|
248
|
-
|
249
|
-
.into()
|
250
|
-
}
|
251
|
-
|
252
|
-
pub fn cov(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
|
253
|
-
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone(), ddof).into()
|
254
|
-
}
|
255
|
-
|
256
|
-
pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
|
257
|
-
let s = rb_exprs_to_exprs(s)?;
|
258
|
-
Ok(dsl::concat_str(s, &sep).into())
|
323
|
+
dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans).into()
|
259
324
|
}
|
260
325
|
|
261
|
-
pub fn
|
262
|
-
let
|
263
|
-
let expr = dsl::concat_list(s).map_err(RbPolarsErr::from)?;
|
326
|
+
pub fn sql_expr(sql: String) -> RbResult<RbExpr> {
|
327
|
+
let expr = polars::sql::sql_expr(&sql).map_err(RbPolarsErr::from)?;
|
264
328
|
Ok(expr.into())
|
265
329
|
}
|
266
|
-
|
267
|
-
pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
|
268
|
-
let dtypes = dtypes
|
269
|
-
.each()
|
270
|
-
.map(|v| Wrap::<DataType>::try_convert(v?))
|
271
|
-
.collect::<RbResult<Vec<Wrap<DataType>>>>()?;
|
272
|
-
let dtypes = vec_extract_wrapped(dtypes);
|
273
|
-
Ok(crate::functions::lazy::dtype_cols(dtypes))
|
274
|
-
}
|
@@ -7,7 +7,7 @@ use polars_core::POOL;
|
|
7
7
|
use crate::conversion::Wrap;
|
8
8
|
use crate::{RbResult, RbValueError};
|
9
9
|
|
10
|
-
pub fn
|
10
|
+
pub fn get_index_type() -> Value {
|
11
11
|
Wrap(IDX_DTYPE).into_value()
|
12
12
|
}
|
13
13
|
|
@@ -36,3 +36,47 @@ pub fn get_float_fmt() -> RbResult<String> {
|
|
36
36
|
};
|
37
37
|
Ok(strfmt.to_string())
|
38
38
|
}
|
39
|
+
|
40
|
+
pub fn set_float_precision(precision: Option<usize>) -> RbResult<()> {
|
41
|
+
use polars_core::fmt::set_float_precision;
|
42
|
+
set_float_precision(precision);
|
43
|
+
Ok(())
|
44
|
+
}
|
45
|
+
|
46
|
+
pub fn get_float_precision() -> RbResult<Option<usize>> {
|
47
|
+
use polars_core::fmt::get_float_precision;
|
48
|
+
Ok(get_float_precision())
|
49
|
+
}
|
50
|
+
|
51
|
+
pub fn set_thousands_separator(sep: Option<char>) -> RbResult<()> {
|
52
|
+
use polars_core::fmt::set_thousands_separator;
|
53
|
+
set_thousands_separator(sep);
|
54
|
+
Ok(())
|
55
|
+
}
|
56
|
+
|
57
|
+
pub fn get_thousands_separator() -> RbResult<Option<String>> {
|
58
|
+
use polars_core::fmt::get_thousands_separator;
|
59
|
+
Ok(Some(get_thousands_separator()))
|
60
|
+
}
|
61
|
+
|
62
|
+
pub fn set_decimal_separator(sep: Option<char>) -> RbResult<()> {
|
63
|
+
use polars_core::fmt::set_decimal_separator;
|
64
|
+
set_decimal_separator(sep);
|
65
|
+
Ok(())
|
66
|
+
}
|
67
|
+
|
68
|
+
pub fn get_decimal_separator() -> RbResult<Option<char>> {
|
69
|
+
use polars_core::fmt::get_decimal_separator;
|
70
|
+
Ok(Some(get_decimal_separator()))
|
71
|
+
}
|
72
|
+
|
73
|
+
pub fn set_trim_decimal_zeros(trim: Option<bool>) -> RbResult<()> {
|
74
|
+
use polars_core::fmt::set_trim_decimal_zeros;
|
75
|
+
set_trim_decimal_zeros(trim);
|
76
|
+
Ok(())
|
77
|
+
}
|
78
|
+
|
79
|
+
pub fn get_trim_decimal_zeros() -> RbResult<Option<bool>> {
|
80
|
+
use polars_core::fmt::get_trim_decimal_zeros;
|
81
|
+
Ok(Some(get_trim_decimal_zeros()))
|
82
|
+
}
|
@@ -1,3 +1,7 @@
|
|
1
|
+
use crate::RbResult;
|
2
|
+
use magnus::{RArray, Ruby, Value};
|
3
|
+
use polars_core::StringCacheHolder;
|
4
|
+
|
1
5
|
pub fn enable_string_cache() {
|
2
6
|
polars_core::enable_string_cache()
|
3
7
|
}
|
@@ -9,3 +13,13 @@ pub fn disable_string_cache() {
|
|
9
13
|
pub fn using_string_cache() -> bool {
|
10
14
|
polars_core::using_string_cache()
|
11
15
|
}
|
16
|
+
|
17
|
+
#[magnus::wrap(class = "Polars::RbStringCacheHolder")]
|
18
|
+
pub struct RbStringCacheHolder {}
|
19
|
+
|
20
|
+
impl RbStringCacheHolder {
|
21
|
+
pub fn hold() -> RbResult<Value> {
|
22
|
+
let _hold = StringCacheHolder::hold();
|
23
|
+
Ruby::get().unwrap().yield_splat(RArray::new())
|
24
|
+
}
|
25
|
+
}
|
@@ -1,9 +1,10 @@
|
|
1
1
|
use magnus::{IntoValue, RArray, RHash, TryConvert, Value};
|
2
|
-
use polars::io::
|
2
|
+
use polars::io::RowIndex;
|
3
3
|
use polars::lazy::frame::LazyFrame;
|
4
4
|
use polars::prelude::*;
|
5
5
|
use std::cell::RefCell;
|
6
6
|
use std::io::{BufWriter, Read};
|
7
|
+
use std::num::NonZeroUsize;
|
7
8
|
use std::path::PathBuf;
|
8
9
|
|
9
10
|
use crate::conversion::*;
|
@@ -55,13 +56,14 @@ impl RbLazyFrame {
|
|
55
56
|
pub fn new_from_ndjson(
|
56
57
|
path: String,
|
57
58
|
infer_schema_length: Option<usize>,
|
58
|
-
batch_size: Option<
|
59
|
+
batch_size: Option<Wrap<NonZeroUsize>>,
|
59
60
|
n_rows: Option<usize>,
|
60
61
|
low_memory: bool,
|
61
62
|
rechunk: bool,
|
62
|
-
|
63
|
+
row_index: Option<(String, IdxSize)>,
|
63
64
|
) -> RbResult<Self> {
|
64
|
-
let
|
65
|
+
let batch_size = batch_size.map(|v| v.0);
|
66
|
+
let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
|
65
67
|
|
66
68
|
let lf = LazyJsonLineReader::new(path)
|
67
69
|
.with_infer_schema_length(infer_schema_length)
|
@@ -69,7 +71,7 @@ impl RbLazyFrame {
|
|
69
71
|
.with_n_rows(n_rows)
|
70
72
|
.low_memory(low_memory)
|
71
73
|
.with_rechunk(rechunk)
|
72
|
-
.
|
74
|
+
.with_row_index(row_index)
|
73
75
|
.finish()
|
74
76
|
.map_err(RbPolarsErr::from)?;
|
75
77
|
Ok(lf.into())
|
@@ -95,7 +97,7 @@ impl RbLazyFrame {
|
|
95
97
|
let rechunk = bool::try_convert(arguments[14])?;
|
96
98
|
let skip_rows_after_header = usize::try_convert(arguments[15])?;
|
97
99
|
let encoding = Wrap::<CsvEncoding>::try_convert(arguments[16])?;
|
98
|
-
let
|
100
|
+
let row_index = Option::<(String, IdxSize)>::try_convert(arguments[17])?;
|
99
101
|
let try_parse_dates = bool::try_convert(arguments[18])?;
|
100
102
|
let eol_char = String::try_convert(arguments[19])?;
|
101
103
|
// end arguments
|
@@ -105,7 +107,7 @@ impl RbLazyFrame {
|
|
105
107
|
let separator = separator.as_bytes()[0];
|
106
108
|
let eol_char = eol_char.as_bytes()[0];
|
107
109
|
|
108
|
-
let
|
110
|
+
let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
|
109
111
|
|
110
112
|
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
111
113
|
overwrite_dtype
|
@@ -129,7 +131,7 @@ impl RbLazyFrame {
|
|
129
131
|
.with_rechunk(rechunk)
|
130
132
|
.with_skip_rows_after_header(skip_rows_after_header)
|
131
133
|
.with_encoding(encoding.0)
|
132
|
-
.
|
134
|
+
.with_row_index(row_index)
|
133
135
|
.with_try_parse_dates(try_parse_dates)
|
134
136
|
.with_null_values(null_values);
|
135
137
|
|
@@ -147,18 +149,18 @@ impl RbLazyFrame {
|
|
147
149
|
cache: bool,
|
148
150
|
parallel: Wrap<ParallelStrategy>,
|
149
151
|
rechunk: bool,
|
150
|
-
|
152
|
+
row_index: Option<(String, IdxSize)>,
|
151
153
|
low_memory: bool,
|
152
154
|
use_statistics: bool,
|
153
155
|
hive_partitioning: bool,
|
154
156
|
) -> RbResult<Self> {
|
155
|
-
let
|
157
|
+
let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
|
156
158
|
let args = ScanArgsParquet {
|
157
159
|
n_rows,
|
158
160
|
cache,
|
159
161
|
parallel: parallel.0,
|
160
162
|
rechunk,
|
161
|
-
|
163
|
+
row_index,
|
162
164
|
low_memory,
|
163
165
|
// TODO support cloud options
|
164
166
|
cloud_options: None,
|
@@ -174,15 +176,15 @@ impl RbLazyFrame {
|
|
174
176
|
n_rows: Option<usize>,
|
175
177
|
cache: bool,
|
176
178
|
rechunk: bool,
|
177
|
-
|
179
|
+
row_index: Option<(String, IdxSize)>,
|
178
180
|
memory_map: bool,
|
179
181
|
) -> RbResult<Self> {
|
180
|
-
let
|
182
|
+
let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
|
181
183
|
let args = ScanArgsIpc {
|
182
184
|
n_rows,
|
183
185
|
cache,
|
184
186
|
rechunk,
|
185
|
-
|
187
|
+
row_index,
|
186
188
|
memmap: memory_map,
|
187
189
|
};
|
188
190
|
let lf = LazyFrame::scan_ipc(path, args).map_err(RbPolarsErr::from)?;
|
@@ -216,20 +218,24 @@ impl RbLazyFrame {
|
|
216
218
|
projection_pushdown: bool,
|
217
219
|
simplify_expr: bool,
|
218
220
|
slice_pushdown: bool,
|
219
|
-
|
221
|
+
comm_subplan_elim: bool,
|
222
|
+
comm_subexpr_elim: bool,
|
220
223
|
allow_streaming: bool,
|
221
224
|
_eager: bool,
|
222
225
|
) -> RbLazyFrame {
|
223
226
|
let ldf = self.ldf.clone();
|
224
|
-
let ldf = ldf
|
227
|
+
let mut ldf = ldf
|
225
228
|
.with_type_coercion(type_coercion)
|
226
229
|
.with_predicate_pushdown(predicate_pushdown)
|
227
230
|
.with_simplify_expr(simplify_expr)
|
228
231
|
.with_slice_pushdown(slice_pushdown)
|
229
|
-
.with_comm_subplan_elim(cse)
|
230
232
|
.with_streaming(allow_streaming)
|
231
233
|
._with_eager(_eager)
|
232
234
|
.with_projection_pushdown(projection_pushdown);
|
235
|
+
|
236
|
+
ldf = ldf.with_comm_subplan_elim(comm_subplan_elim);
|
237
|
+
ldf = ldf.with_comm_subexpr_elim(comm_subexpr_elim);
|
238
|
+
|
233
239
|
ldf.into()
|
234
240
|
}
|
235
241
|
|
@@ -304,6 +310,75 @@ impl RbLazyFrame {
|
|
304
310
|
Ok(())
|
305
311
|
}
|
306
312
|
|
313
|
+
pub fn sink_ipc(
|
314
|
+
&self,
|
315
|
+
path: PathBuf,
|
316
|
+
compression: Option<Wrap<IpcCompression>>,
|
317
|
+
maintain_order: bool,
|
318
|
+
) -> RbResult<()> {
|
319
|
+
let options = IpcWriterOptions {
|
320
|
+
compression: compression.map(|c| c.0),
|
321
|
+
maintain_order,
|
322
|
+
};
|
323
|
+
|
324
|
+
let ldf = self.ldf.clone();
|
325
|
+
ldf.sink_ipc(path, options).map_err(RbPolarsErr::from)?;
|
326
|
+
Ok(())
|
327
|
+
}
|
328
|
+
|
329
|
+
pub fn sink_csv(
|
330
|
+
&self,
|
331
|
+
path: PathBuf,
|
332
|
+
include_bom: bool,
|
333
|
+
include_header: bool,
|
334
|
+
separator: u8,
|
335
|
+
line_terminator: String,
|
336
|
+
quote_char: u8,
|
337
|
+
batch_size: Wrap<NonZeroUsize>,
|
338
|
+
datetime_format: Option<String>,
|
339
|
+
date_format: Option<String>,
|
340
|
+
time_format: Option<String>,
|
341
|
+
float_precision: Option<usize>,
|
342
|
+
null_value: Option<String>,
|
343
|
+
quote_style: Option<Wrap<QuoteStyle>>,
|
344
|
+
maintain_order: bool,
|
345
|
+
) -> RbResult<()> {
|
346
|
+
let quote_style = quote_style.map_or(QuoteStyle::default(), |wrap| wrap.0);
|
347
|
+
let null_value = null_value.unwrap_or(SerializeOptions::default().null);
|
348
|
+
|
349
|
+
let serialize_options = SerializeOptions {
|
350
|
+
date_format,
|
351
|
+
time_format,
|
352
|
+
datetime_format,
|
353
|
+
float_precision,
|
354
|
+
separator,
|
355
|
+
quote_char,
|
356
|
+
null: null_value,
|
357
|
+
line_terminator,
|
358
|
+
quote_style,
|
359
|
+
};
|
360
|
+
|
361
|
+
let options = CsvWriterOptions {
|
362
|
+
include_bom,
|
363
|
+
include_header,
|
364
|
+
maintain_order,
|
365
|
+
batch_size: batch_size.0,
|
366
|
+
serialize_options,
|
367
|
+
};
|
368
|
+
|
369
|
+
let ldf = self.ldf.clone();
|
370
|
+
ldf.sink_csv(path, options).map_err(RbPolarsErr::from)?;
|
371
|
+
Ok(())
|
372
|
+
}
|
373
|
+
|
374
|
+
pub fn sink_json(&self, path: PathBuf, maintain_order: bool) -> RbResult<()> {
|
375
|
+
let options = JsonWriterOptions { maintain_order };
|
376
|
+
|
377
|
+
let ldf = self.ldf.clone();
|
378
|
+
ldf.sink_json(path, options).map_err(RbPolarsErr::from)?;
|
379
|
+
Ok(())
|
380
|
+
}
|
381
|
+
|
307
382
|
pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
|
308
383
|
let ldf = self.ldf.clone();
|
309
384
|
let df = ldf.fetch(n_rows).map_err(RbPolarsErr::from)?;
|
@@ -321,6 +396,12 @@ impl RbLazyFrame {
|
|
321
396
|
Ok(ldf.select(exprs).into())
|
322
397
|
}
|
323
398
|
|
399
|
+
pub fn select_seq(&self, exprs: RArray) -> RbResult<Self> {
|
400
|
+
let ldf = self.ldf.clone();
|
401
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
402
|
+
Ok(ldf.select_seq(exprs).into())
|
403
|
+
}
|
404
|
+
|
324
405
|
pub fn group_by(&self, by: RArray, maintain_order: bool) -> RbResult<RbLazyGroupBy> {
|
325
406
|
let ldf = self.ldf.clone();
|
326
407
|
let by = rb_exprs_to_exprs(by)?;
|
@@ -334,7 +415,7 @@ impl RbLazyFrame {
|
|
334
415
|
})
|
335
416
|
}
|
336
417
|
|
337
|
-
pub fn
|
418
|
+
pub fn rolling(
|
338
419
|
&self,
|
339
420
|
index_column: &RbExpr,
|
340
421
|
period: String,
|
@@ -459,6 +540,7 @@ impl RbLazyFrame {
|
|
459
540
|
right_on: RArray,
|
460
541
|
allow_parallel: bool,
|
461
542
|
force_parallel: bool,
|
543
|
+
join_nulls: bool,
|
462
544
|
how: Wrap<JoinType>,
|
463
545
|
suffix: String,
|
464
546
|
) -> RbResult<Self> {
|
@@ -474,17 +556,28 @@ impl RbLazyFrame {
|
|
474
556
|
.right_on(right_on)
|
475
557
|
.allow_parallel(allow_parallel)
|
476
558
|
.force_parallel(force_parallel)
|
559
|
+
.join_nulls(join_nulls)
|
477
560
|
.how(how.0)
|
478
561
|
.suffix(suffix)
|
479
562
|
.finish()
|
480
563
|
.into())
|
481
564
|
}
|
482
565
|
|
566
|
+
pub fn with_column(&self, expr: &RbExpr) -> Self {
|
567
|
+
let ldf = self.ldf.clone();
|
568
|
+
ldf.with_column(expr.inner.clone()).into()
|
569
|
+
}
|
570
|
+
|
483
571
|
pub fn with_columns(&self, exprs: RArray) -> RbResult<Self> {
|
484
572
|
let ldf = self.ldf.clone();
|
485
573
|
Ok(ldf.with_columns(rb_exprs_to_exprs(exprs)?).into())
|
486
574
|
}
|
487
575
|
|
576
|
+
pub fn with_columns_seq(&self, exprs: RArray) -> RbResult<Self> {
|
577
|
+
let ldf = self.ldf.clone();
|
578
|
+
Ok(ldf.with_columns_seq(rb_exprs_to_exprs(exprs)?).into())
|
579
|
+
}
|
580
|
+
|
488
581
|
pub fn rename(&self, existing: Vec<String>, new: Vec<String>) -> Self {
|
489
582
|
let ldf = self.ldf.clone();
|
490
583
|
ldf.rename(existing, new).into()
|
@@ -569,6 +662,11 @@ impl RbLazyFrame {
|
|
569
662
|
Ok(ldf.explode(column).into())
|
570
663
|
}
|
571
664
|
|
665
|
+
pub fn null_count(&self) -> Self {
|
666
|
+
let ldf = self.ldf.clone();
|
667
|
+
ldf.null_count().into()
|
668
|
+
}
|
669
|
+
|
572
670
|
pub fn unique(
|
573
671
|
&self,
|
574
672
|
maintain_order: bool,
|
@@ -619,14 +717,18 @@ impl RbLazyFrame {
|
|
619
717
|
ldf.melt(args).into()
|
620
718
|
}
|
621
719
|
|
622
|
-
pub fn
|
720
|
+
pub fn with_row_index(&self, name: String, offset: Option<IdxSize>) -> Self {
|
623
721
|
let ldf = self.ldf.clone();
|
624
|
-
ldf.
|
722
|
+
ldf.with_row_index(&name, offset).into()
|
625
723
|
}
|
626
724
|
|
627
|
-
pub fn
|
725
|
+
pub fn drop(&self, cols: Vec<String>) -> Self {
|
628
726
|
let ldf = self.ldf.clone();
|
629
|
-
ldf.
|
727
|
+
ldf.drop(cols).into()
|
728
|
+
}
|
729
|
+
|
730
|
+
pub fn cast_all(&self, dtype: Wrap<DataType>, strict: bool) -> Self {
|
731
|
+
self.ldf.clone().cast_all(dtype.0, strict).into()
|
630
732
|
}
|
631
733
|
|
632
734
|
pub fn clone(&self) -> Self {
|
@@ -668,4 +770,18 @@ impl RbLazyFrame {
|
|
668
770
|
pub fn width(&self) -> RbResult<usize> {
|
669
771
|
Ok(self.get_schema()?.len())
|
670
772
|
}
|
773
|
+
|
774
|
+
pub fn count(&self) -> Self {
|
775
|
+
let ldf = self.ldf.clone();
|
776
|
+
ldf.count().into()
|
777
|
+
}
|
778
|
+
|
779
|
+
pub fn merge_sorted(&self, other: &Self, key: String) -> RbResult<Self> {
|
780
|
+
let out = self
|
781
|
+
.ldf
|
782
|
+
.clone()
|
783
|
+
.merge_sorted(other.ldf.clone(), &key)
|
784
|
+
.map_err(RbPolarsErr::from)?;
|
785
|
+
Ok(out.into())
|
786
|
+
}
|
671
787
|
}
|