polars-df 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +468 -538
- data/Cargo.toml +1 -0
- data/README.md +8 -7
- data/ext/polars/Cargo.toml +17 -10
- data/ext/polars/src/batched_csv.rs +26 -26
- data/ext/polars/src/conversion.rs +121 -93
- data/ext/polars/src/dataframe.rs +116 -71
- data/ext/polars/src/error.rs +0 -5
- data/ext/polars/src/expr/binary.rs +18 -6
- data/ext/polars/src/expr/datetime.rs +10 -12
- data/ext/polars/src/expr/general.rs +68 -284
- data/ext/polars/src/expr/list.rs +17 -9
- data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
- data/ext/polars/src/expr/name.rs +44 -0
- data/ext/polars/src/expr/rolling.rs +196 -0
- data/ext/polars/src/expr/string.rs +85 -58
- data/ext/polars/src/file.rs +3 -3
- data/ext/polars/src/functions/aggregation.rs +35 -0
- data/ext/polars/src/functions/eager.rs +7 -31
- data/ext/polars/src/functions/io.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +66 -41
- data/ext/polars/src/functions/meta.rs +30 -0
- data/ext/polars/src/functions/misc.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/random.rs +6 -0
- data/ext/polars/src/functions/range.rs +46 -0
- data/ext/polars/src/functions/string_cache.rs +11 -0
- data/ext/polars/src/functions/whenthen.rs +7 -7
- data/ext/polars/src/lazyframe.rs +47 -42
- data/ext/polars/src/lib.rs +156 -72
- data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
- data/ext/polars/src/{apply → map}/mod.rs +3 -3
- data/ext/polars/src/{apply → map}/series.rs +12 -16
- data/ext/polars/src/object.rs +1 -1
- data/ext/polars/src/rb_modules.rs +22 -7
- data/ext/polars/src/series/construction.rs +4 -4
- data/ext/polars/src/series/export.rs +2 -2
- data/ext/polars/src/series/set_at_idx.rs +33 -17
- data/ext/polars/src/series.rs +7 -27
- data/ext/polars/src/sql.rs +46 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +115 -82
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +5 -25
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +177 -94
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +93 -66
- data/lib/polars/lazy_functions.rb +36 -48
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +26 -13
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/utils.rb +12 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +18 -7
- /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -1,10 +1,12 @@
|
|
1
1
|
use magnus::encoding::{self, EncodingCapable};
|
2
|
-
use magnus::{
|
2
|
+
use magnus::{
|
3
|
+
class, prelude::*, typed_data::Obj, value::Opaque, Float, Integer, RArray, RString, Ruby, Value,
|
4
|
+
};
|
3
5
|
use polars::lazy::dsl;
|
4
6
|
use polars::prelude::*;
|
5
7
|
|
6
|
-
use crate::apply::lazy::binary_lambda;
|
7
8
|
use crate::conversion::{get_lf, get_rbseq, Wrap};
|
9
|
+
use crate::map::lazy::binary_lambda;
|
8
10
|
use crate::prelude::vec_extract_wrapped;
|
9
11
|
use crate::rb_exprs_to_exprs;
|
10
12
|
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
@@ -15,10 +17,6 @@ macro_rules! set_unwrapped_or_0 {
|
|
15
17
|
};
|
16
18
|
}
|
17
19
|
|
18
|
-
pub fn arange(low: &RbExpr, high: &RbExpr, step: i64) -> RbExpr {
|
19
|
-
dsl::arange(low.inner.clone(), high.inner.clone(), step).into()
|
20
|
-
}
|
21
|
-
|
22
20
|
pub fn arg_sort_by(by: RArray, descending: Vec<bool>) -> RbResult<RbExpr> {
|
23
21
|
let by = rb_exprs_to_exprs(by)?;
|
24
22
|
Ok(dsl::arg_sort_by(by, &descending).into())
|
@@ -30,7 +28,7 @@ pub fn arg_where(condition: &RbExpr) -> RbExpr {
|
|
30
28
|
|
31
29
|
pub fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
|
32
30
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
33
|
-
Ok(dsl::as_struct(
|
31
|
+
Ok(dsl::as_struct(exprs).into())
|
34
32
|
}
|
35
33
|
|
36
34
|
pub fn coalesce(exprs: RArray) -> RbResult<RbExpr> {
|
@@ -45,7 +43,7 @@ pub fn col(name: String) -> RbExpr {
|
|
45
43
|
pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
|
46
44
|
let lfs = lfs
|
47
45
|
.each()
|
48
|
-
.map(|v|
|
46
|
+
.map(|v| <&RbLazyFrame>::try_convert(v?))
|
49
47
|
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
50
48
|
|
51
49
|
Ok(RArray::from_iter(lfs.iter().map(|lf| {
|
@@ -85,36 +83,65 @@ pub fn concat_lf(
|
|
85
83
|
Ok(lf.into())
|
86
84
|
}
|
87
85
|
|
86
|
+
pub fn concat_lf_diagonal(
|
87
|
+
lfs: RArray,
|
88
|
+
rechunk: bool,
|
89
|
+
parallel: bool,
|
90
|
+
to_supertypes: bool,
|
91
|
+
) -> RbResult<RbLazyFrame> {
|
92
|
+
let iter = lfs.each();
|
93
|
+
|
94
|
+
let lfs = iter
|
95
|
+
.map(|item| {
|
96
|
+
let item = item?;
|
97
|
+
get_lf(item)
|
98
|
+
})
|
99
|
+
.collect::<RbResult<Vec<_>>>()?;
|
100
|
+
|
101
|
+
let lf = dsl::functions::concat_lf_diagonal(
|
102
|
+
lfs,
|
103
|
+
UnionArgs {
|
104
|
+
rechunk,
|
105
|
+
parallel,
|
106
|
+
to_supertypes,
|
107
|
+
},
|
108
|
+
)
|
109
|
+
.map_err(RbPolarsErr::from)?;
|
110
|
+
Ok(lf.into())
|
111
|
+
}
|
112
|
+
|
88
113
|
#[allow(clippy::too_many_arguments)]
|
89
114
|
pub fn duration(
|
115
|
+
weeks: Option<&RbExpr>,
|
90
116
|
days: Option<&RbExpr>,
|
117
|
+
hours: Option<&RbExpr>,
|
118
|
+
minutes: Option<&RbExpr>,
|
91
119
|
seconds: Option<&RbExpr>,
|
92
|
-
nanoseconds: Option<&RbExpr>,
|
93
|
-
microseconds: Option<&RbExpr>,
|
94
120
|
milliseconds: Option<&RbExpr>,
|
95
|
-
|
96
|
-
|
97
|
-
|
121
|
+
microseconds: Option<&RbExpr>,
|
122
|
+
nanoseconds: Option<&RbExpr>,
|
123
|
+
time_unit: Wrap<TimeUnit>,
|
98
124
|
) -> RbExpr {
|
99
125
|
set_unwrapped_or_0!(
|
126
|
+
weeks,
|
100
127
|
days,
|
128
|
+
hours,
|
129
|
+
minutes,
|
101
130
|
seconds,
|
102
|
-
nanoseconds,
|
103
|
-
microseconds,
|
104
131
|
milliseconds,
|
105
|
-
|
106
|
-
|
107
|
-
weeks,
|
132
|
+
microseconds,
|
133
|
+
nanoseconds,
|
108
134
|
);
|
109
135
|
let args = DurationArgs {
|
136
|
+
weeks,
|
110
137
|
days,
|
138
|
+
hours,
|
139
|
+
minutes,
|
111
140
|
seconds,
|
112
|
-
nanoseconds,
|
113
|
-
microseconds,
|
114
141
|
milliseconds,
|
115
|
-
|
116
|
-
|
117
|
-
|
142
|
+
microseconds,
|
143
|
+
nanoseconds,
|
144
|
+
time_unit: time_unit.0,
|
118
145
|
};
|
119
146
|
dsl::duration(args).into()
|
120
147
|
}
|
@@ -137,23 +164,27 @@ pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
|
|
137
164
|
|
138
165
|
pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
139
166
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
167
|
+
let lambda = Opaque::from(lambda);
|
140
168
|
|
141
|
-
let func =
|
169
|
+
let func =
|
170
|
+
move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
|
142
171
|
Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
|
143
172
|
}
|
144
173
|
|
145
174
|
pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
|
146
175
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
176
|
+
let lambda = Opaque::from(lambda);
|
147
177
|
|
148
|
-
let func =
|
149
|
-
|
178
|
+
let func =
|
179
|
+
move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
|
180
|
+
Ok(polars::lazy::dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
|
150
181
|
}
|
151
182
|
|
152
183
|
pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
|
153
184
|
if value.is_kind_of(class::true_class()) || value.is_kind_of(class::false_class()) {
|
154
|
-
Ok(dsl::lit(
|
185
|
+
Ok(dsl::lit(bool::try_convert(value)?).into())
|
155
186
|
} else if let Some(v) = Integer::from_value(value) {
|
156
|
-
match v.
|
187
|
+
match v.to_i64() {
|
157
188
|
Ok(val) => {
|
158
189
|
if val > 0 && val < i32::MAX as i64 || val < 0 && val > i32::MIN as i64 {
|
159
190
|
Ok(dsl::lit(val as i32).into())
|
@@ -162,19 +193,19 @@ pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
|
|
162
193
|
}
|
163
194
|
}
|
164
195
|
_ => {
|
165
|
-
let val =
|
196
|
+
let val = v.to_u64()?;
|
166
197
|
Ok(dsl::lit(val).into())
|
167
198
|
}
|
168
199
|
}
|
169
200
|
} else if let Some(v) = Float::from_value(value) {
|
170
|
-
Ok(dsl::lit(v.
|
201
|
+
Ok(dsl::lit(v.to_f64()).into())
|
171
202
|
} else if let Some(v) = RString::from_value(value) {
|
172
203
|
if v.enc_get() == encoding::Index::utf8() {
|
173
|
-
Ok(dsl::lit(v.
|
204
|
+
Ok(dsl::lit(v.to_string()?).into())
|
174
205
|
} else {
|
175
206
|
Ok(dsl::lit(unsafe { v.as_slice() }).into())
|
176
207
|
}
|
177
|
-
} else if let Ok(series) =
|
208
|
+
} else if let Ok(series) = Obj::<RbSeries>::try_convert(value) {
|
178
209
|
Ok(dsl::lit(series.series.borrow().clone()).into())
|
179
210
|
} else if value.is_nil() {
|
180
211
|
Ok(dsl::lit(Null {}).into())
|
@@ -218,8 +249,8 @@ pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool
|
|
218
249
|
.into()
|
219
250
|
}
|
220
251
|
|
221
|
-
pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
|
222
|
-
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
|
252
|
+
pub fn cov(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
|
253
|
+
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone(), ddof).into()
|
223
254
|
}
|
224
255
|
|
225
256
|
pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
|
@@ -236,14 +267,8 @@ pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
|
|
236
267
|
pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
|
237
268
|
let dtypes = dtypes
|
238
269
|
.each()
|
239
|
-
.map(|v|
|
270
|
+
.map(|v| Wrap::<DataType>::try_convert(v?))
|
240
271
|
.collect::<RbResult<Vec<Wrap<DataType>>>>()?;
|
241
272
|
let dtypes = vec_extract_wrapped(dtypes);
|
242
273
|
Ok(crate::functions::lazy::dtype_cols(dtypes))
|
243
274
|
}
|
244
|
-
|
245
|
-
// TODO rename to sum_horizontal
|
246
|
-
pub fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
|
247
|
-
let exprs = rb_exprs_to_exprs(exprs)?;
|
248
|
-
Ok(polars::lazy::dsl::sum_horizontal(exprs).into())
|
249
|
-
}
|
@@ -1,8 +1,38 @@
|
|
1
1
|
use magnus::{IntoValue, Value};
|
2
|
+
use polars_core;
|
3
|
+
use polars_core::fmt::FloatFmt;
|
2
4
|
use polars_core::prelude::IDX_DTYPE;
|
5
|
+
use polars_core::POOL;
|
3
6
|
|
4
7
|
use crate::conversion::Wrap;
|
8
|
+
use crate::{RbResult, RbValueError};
|
5
9
|
|
6
10
|
pub fn get_idx_type() -> Value {
|
7
11
|
Wrap(IDX_DTYPE).into_value()
|
8
12
|
}
|
13
|
+
|
14
|
+
pub fn threadpool_size() -> usize {
|
15
|
+
POOL.current_num_threads()
|
16
|
+
}
|
17
|
+
|
18
|
+
pub fn set_float_fmt(fmt: String) -> RbResult<()> {
|
19
|
+
let fmt = match fmt.as_str() {
|
20
|
+
"full" => FloatFmt::Full,
|
21
|
+
"mixed" => FloatFmt::Mixed,
|
22
|
+
e => {
|
23
|
+
return Err(RbValueError::new_err(format!(
|
24
|
+
"fmt must be one of {{'full', 'mixed'}}, got {e}",
|
25
|
+
)))
|
26
|
+
}
|
27
|
+
};
|
28
|
+
polars_core::fmt::set_float_fmt(fmt);
|
29
|
+
Ok(())
|
30
|
+
}
|
31
|
+
|
32
|
+
pub fn get_float_fmt() -> RbResult<String> {
|
33
|
+
let strfmt = match polars_core::fmt::get_float_fmt() {
|
34
|
+
FloatFmt::Full => "full",
|
35
|
+
FloatFmt::Mixed => "mixed",
|
36
|
+
};
|
37
|
+
Ok(strfmt.to_string())
|
38
|
+
}
|
@@ -0,0 +1,46 @@
|
|
1
|
+
use polars::lazy::dsl;
|
2
|
+
use polars_core::datatypes::{TimeUnit, TimeZone};
|
3
|
+
|
4
|
+
use crate::conversion::Wrap;
|
5
|
+
use crate::prelude::*;
|
6
|
+
use crate::RbExpr;
|
7
|
+
|
8
|
+
pub fn int_range(start: &RbExpr, end: &RbExpr, step: i64, dtype: Wrap<DataType>) -> RbExpr {
|
9
|
+
let dtype = dtype.0;
|
10
|
+
|
11
|
+
let mut result = dsl::int_range(start.inner.clone(), end.inner.clone(), step);
|
12
|
+
|
13
|
+
if dtype != DataType::Int64 {
|
14
|
+
result = result.cast(dtype)
|
15
|
+
}
|
16
|
+
|
17
|
+
result.into()
|
18
|
+
}
|
19
|
+
|
20
|
+
pub fn int_ranges(start: &RbExpr, end: &RbExpr, step: i64, dtype: Wrap<DataType>) -> RbExpr {
|
21
|
+
let dtype = dtype.0;
|
22
|
+
|
23
|
+
let mut result = dsl::int_ranges(start.inner.clone(), end.inner.clone(), step);
|
24
|
+
|
25
|
+
if dtype != DataType::Int64 {
|
26
|
+
result = result.cast(DataType::List(Box::new(dtype)))
|
27
|
+
}
|
28
|
+
|
29
|
+
result.into()
|
30
|
+
}
|
31
|
+
|
32
|
+
pub fn date_range(
|
33
|
+
start: &RbExpr,
|
34
|
+
end: &RbExpr,
|
35
|
+
every: String,
|
36
|
+
closed: Wrap<ClosedWindow>,
|
37
|
+
time_unit: Option<Wrap<TimeUnit>>,
|
38
|
+
time_zone: Option<TimeZone>,
|
39
|
+
) -> RbExpr {
|
40
|
+
let start = start.inner.clone();
|
41
|
+
let end = end.inner.clone();
|
42
|
+
let every = Duration::parse(&every);
|
43
|
+
let closed = closed.0;
|
44
|
+
let time_unit = time_unit.map(|x| x.0);
|
45
|
+
dsl::date_range(start, end, every, closed, time_unit, time_zone).into()
|
46
|
+
}
|
@@ -16,23 +16,23 @@ impl From<dsl::When> for RbWhen {
|
|
16
16
|
|
17
17
|
#[magnus::wrap(class = "Polars::RbWhenThen")]
|
18
18
|
#[derive(Clone)]
|
19
|
-
pub struct
|
20
|
-
pub inner: dsl::
|
19
|
+
pub struct RbThen {
|
20
|
+
pub inner: dsl::Then,
|
21
21
|
}
|
22
22
|
|
23
|
-
impl From<dsl::
|
24
|
-
fn from(inner: dsl::
|
25
|
-
|
23
|
+
impl From<dsl::Then> for RbThen {
|
24
|
+
fn from(inner: dsl::Then) -> Self {
|
25
|
+
RbThen { inner }
|
26
26
|
}
|
27
27
|
}
|
28
28
|
|
29
29
|
impl RbWhen {
|
30
|
-
pub fn then(&self, expr: &RbExpr) ->
|
30
|
+
pub fn then(&self, expr: &RbExpr) -> RbThen {
|
31
31
|
self.inner.clone().then(expr.inner.clone()).into()
|
32
32
|
}
|
33
33
|
}
|
34
34
|
|
35
|
-
impl
|
35
|
+
impl RbThen {
|
36
36
|
pub fn overwise(&self, expr: &RbExpr) -> RbExpr {
|
37
37
|
self.inner.clone().otherwise(expr.inner.clone()).into()
|
38
38
|
}
|
data/ext/polars/src/lazyframe.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{IntoValue, RArray, RHash, Value};
|
1
|
+
use magnus::{IntoValue, RArray, RHash, TryConvert, Value};
|
2
2
|
use polars::io::RowCount;
|
3
3
|
use polars::lazy::frame::LazyFrame;
|
4
4
|
use polars::prelude::*;
|
@@ -78,32 +78,32 @@ impl RbLazyFrame {
|
|
78
78
|
pub fn new_from_csv(arguments: &[Value]) -> RbResult<Self> {
|
79
79
|
// start arguments
|
80
80
|
// this pattern is needed for more than 16
|
81
|
-
let path
|
82
|
-
let
|
83
|
-
let has_header
|
84
|
-
let ignore_errors
|
85
|
-
let skip_rows
|
86
|
-
let n_rows
|
87
|
-
let cache
|
88
|
-
let overwrite_dtype
|
89
|
-
let low_memory
|
90
|
-
let comment_char
|
91
|
-
let quote_char
|
92
|
-
let null_values
|
93
|
-
let infer_schema_length
|
94
|
-
let with_schema_modify
|
95
|
-
let rechunk
|
96
|
-
let skip_rows_after_header
|
97
|
-
let encoding
|
98
|
-
let row_count
|
99
|
-
let try_parse_dates
|
100
|
-
let eol_char
|
81
|
+
let path = String::try_convert(arguments[0])?;
|
82
|
+
let separator = String::try_convert(arguments[1])?;
|
83
|
+
let has_header = bool::try_convert(arguments[2])?;
|
84
|
+
let ignore_errors = bool::try_convert(arguments[3])?;
|
85
|
+
let skip_rows = usize::try_convert(arguments[4])?;
|
86
|
+
let n_rows = Option::<usize>::try_convert(arguments[5])?;
|
87
|
+
let cache = bool::try_convert(arguments[6])?;
|
88
|
+
let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[7])?;
|
89
|
+
let low_memory = bool::try_convert(arguments[8])?;
|
90
|
+
let comment_char = Option::<String>::try_convert(arguments[9])?;
|
91
|
+
let quote_char = Option::<String>::try_convert(arguments[10])?;
|
92
|
+
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[11])?;
|
93
|
+
let infer_schema_length = Option::<usize>::try_convert(arguments[12])?;
|
94
|
+
let with_schema_modify = Option::<Value>::try_convert(arguments[13])?;
|
95
|
+
let rechunk = bool::try_convert(arguments[14])?;
|
96
|
+
let skip_rows_after_header = usize::try_convert(arguments[15])?;
|
97
|
+
let encoding = Wrap::<CsvEncoding>::try_convert(arguments[16])?;
|
98
|
+
let row_count = Option::<(String, IdxSize)>::try_convert(arguments[17])?;
|
99
|
+
let try_parse_dates = bool::try_convert(arguments[18])?;
|
100
|
+
let eol_char = String::try_convert(arguments[19])?;
|
101
101
|
// end arguments
|
102
102
|
|
103
103
|
let null_values = null_values.map(|w| w.0);
|
104
104
|
let comment_char = comment_char.map(|s| s.as_bytes()[0]);
|
105
105
|
let quote_char = quote_char.map(|s| s.as_bytes()[0]);
|
106
|
-
let
|
106
|
+
let separator = separator.as_bytes()[0];
|
107
107
|
let eol_char = eol_char.as_bytes()[0];
|
108
108
|
|
109
109
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
@@ -116,7 +116,7 @@ impl RbLazyFrame {
|
|
116
116
|
});
|
117
117
|
let r = LazyCsvReader::new(path)
|
118
118
|
.with_infer_schema_length(infer_schema_length)
|
119
|
-
.
|
119
|
+
.with_separator(separator)
|
120
120
|
.has_header(has_header)
|
121
121
|
.with_ignore_errors(ignore_errors)
|
122
122
|
.with_skip_rows(skip_rows)
|
@@ -151,6 +151,7 @@ impl RbLazyFrame {
|
|
151
151
|
row_count: Option<(String, IdxSize)>,
|
152
152
|
low_memory: bool,
|
153
153
|
use_statistics: bool,
|
154
|
+
hive_partitioning: bool,
|
154
155
|
) -> RbResult<Self> {
|
155
156
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
156
157
|
let args = ScanArgsParquet {
|
@@ -163,6 +164,7 @@ impl RbLazyFrame {
|
|
163
164
|
// TODO support cloud options
|
164
165
|
cloud_options: None,
|
165
166
|
use_statistics,
|
167
|
+
hive_partitioning,
|
166
168
|
};
|
167
169
|
let lf = LazyFrame::scan_parquet(path, args).map_err(RbPolarsErr::from)?;
|
168
170
|
Ok(lf.into())
|
@@ -217,6 +219,7 @@ impl RbLazyFrame {
|
|
217
219
|
slice_pushdown: bool,
|
218
220
|
cse: bool,
|
219
221
|
allow_streaming: bool,
|
222
|
+
_eager: bool,
|
220
223
|
) -> RbLazyFrame {
|
221
224
|
let ldf = self.ldf.clone();
|
222
225
|
let ldf = ldf
|
@@ -224,8 +227,9 @@ impl RbLazyFrame {
|
|
224
227
|
.with_predicate_pushdown(predicate_pushdown)
|
225
228
|
.with_simplify_expr(simplify_expr)
|
226
229
|
.with_slice_pushdown(slice_pushdown)
|
227
|
-
.
|
230
|
+
.with_comm_subplan_elim(cse)
|
228
231
|
.with_streaming(allow_streaming)
|
232
|
+
._with_eager(_eager)
|
229
233
|
.with_projection_pushdown(projection_pushdown);
|
230
234
|
ldf.into()
|
231
235
|
}
|
@@ -318,20 +322,20 @@ impl RbLazyFrame {
|
|
318
322
|
Ok(ldf.select(exprs).into())
|
319
323
|
}
|
320
324
|
|
321
|
-
pub fn
|
325
|
+
pub fn group_by(&self, by: RArray, maintain_order: bool) -> RbResult<RbLazyGroupBy> {
|
322
326
|
let ldf = self.ldf.clone();
|
323
327
|
let by = rb_exprs_to_exprs(by)?;
|
324
328
|
let lazy_gb = if maintain_order {
|
325
|
-
ldf.
|
329
|
+
ldf.group_by_stable(by)
|
326
330
|
} else {
|
327
|
-
ldf.
|
331
|
+
ldf.group_by(by)
|
328
332
|
};
|
329
333
|
Ok(RbLazyGroupBy {
|
330
334
|
lgb: RefCell::new(Some(lazy_gb)),
|
331
335
|
})
|
332
336
|
}
|
333
337
|
|
334
|
-
pub fn
|
338
|
+
pub fn group_by_rolling(
|
335
339
|
&self,
|
336
340
|
index_column: &RbExpr,
|
337
341
|
period: String,
|
@@ -343,7 +347,7 @@ impl RbLazyFrame {
|
|
343
347
|
let closed_window = closed.0;
|
344
348
|
let ldf = self.ldf.clone();
|
345
349
|
let by = rb_exprs_to_exprs(by)?;
|
346
|
-
let lazy_gb = ldf.
|
350
|
+
let lazy_gb = ldf.group_by_rolling(
|
347
351
|
index_column.inner.clone(),
|
348
352
|
by,
|
349
353
|
RollingGroupOptions {
|
@@ -361,32 +365,34 @@ impl RbLazyFrame {
|
|
361
365
|
}
|
362
366
|
|
363
367
|
#[allow(clippy::too_many_arguments)]
|
364
|
-
pub fn
|
368
|
+
pub fn group_by_dynamic(
|
365
369
|
&self,
|
366
370
|
index_column: &RbExpr,
|
367
371
|
every: String,
|
368
372
|
period: String,
|
369
373
|
offset: String,
|
370
|
-
|
374
|
+
label: Wrap<Label>,
|
371
375
|
include_boundaries: bool,
|
372
376
|
closed: Wrap<ClosedWindow>,
|
373
377
|
by: RArray,
|
374
378
|
start_by: Wrap<StartBy>,
|
379
|
+
check_sorted: bool,
|
375
380
|
) -> RbResult<RbLazyGroupBy> {
|
376
381
|
let closed_window = closed.0;
|
377
382
|
let by = rb_exprs_to_exprs(by)?;
|
378
383
|
let ldf = self.ldf.clone();
|
379
|
-
let lazy_gb = ldf.
|
384
|
+
let lazy_gb = ldf.group_by_dynamic(
|
380
385
|
index_column.inner.clone(),
|
381
386
|
by,
|
382
387
|
DynamicGroupOptions {
|
383
388
|
every: Duration::parse(&every),
|
384
389
|
period: Duration::parse(&period),
|
385
390
|
offset: Duration::parse(&offset),
|
386
|
-
|
391
|
+
label: label.0,
|
387
392
|
include_boundaries,
|
388
393
|
closed_window,
|
389
394
|
start_by: start_by.0,
|
395
|
+
check_sorted,
|
390
396
|
..Default::default()
|
391
397
|
},
|
392
398
|
);
|
@@ -399,7 +405,7 @@ impl RbLazyFrame {
|
|
399
405
|
pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
|
400
406
|
let contexts = contexts
|
401
407
|
.each()
|
402
|
-
.map(|v| v.unwrap()
|
408
|
+
.map(|v| TryConvert::try_convert(v.unwrap()))
|
403
409
|
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
404
410
|
let contexts = contexts
|
405
411
|
.into_iter()
|
@@ -490,14 +496,13 @@ impl RbLazyFrame {
|
|
490
496
|
ldf.reverse().into()
|
491
497
|
}
|
492
498
|
|
493
|
-
pub fn shift(&self,
|
494
|
-
let
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
ldf.shift_and_fill(periods, fill_value.inner.clone()).into()
|
499
|
+
pub fn shift(&self, n: &RbExpr, fill_value: Option<&RbExpr>) -> Self {
|
500
|
+
let lf = self.ldf.clone();
|
501
|
+
let out = match fill_value {
|
502
|
+
Some(v) => lf.shift_and_fill(n.inner.clone(), v.inner.clone()),
|
503
|
+
None => lf.shift(n.inner.clone()),
|
504
|
+
};
|
505
|
+
out.into()
|
501
506
|
}
|
502
507
|
|
503
508
|
pub fn fill_nan(&self, fill_value: &RbExpr) -> Self {
|