polars-df 0.6.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +468 -538
- data/Cargo.toml +1 -0
- data/README.md +8 -7
- data/ext/polars/Cargo.toml +17 -10
- data/ext/polars/src/batched_csv.rs +26 -26
- data/ext/polars/src/conversion.rs +121 -93
- data/ext/polars/src/dataframe.rs +116 -71
- data/ext/polars/src/error.rs +0 -5
- data/ext/polars/src/expr/binary.rs +18 -6
- data/ext/polars/src/expr/datetime.rs +10 -12
- data/ext/polars/src/expr/general.rs +68 -284
- data/ext/polars/src/expr/list.rs +17 -9
- data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
- data/ext/polars/src/expr/name.rs +44 -0
- data/ext/polars/src/expr/rolling.rs +196 -0
- data/ext/polars/src/expr/string.rs +85 -58
- data/ext/polars/src/file.rs +3 -3
- data/ext/polars/src/functions/aggregation.rs +35 -0
- data/ext/polars/src/functions/eager.rs +7 -31
- data/ext/polars/src/functions/io.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +66 -41
- data/ext/polars/src/functions/meta.rs +30 -0
- data/ext/polars/src/functions/misc.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/random.rs +6 -0
- data/ext/polars/src/functions/range.rs +46 -0
- data/ext/polars/src/functions/string_cache.rs +11 -0
- data/ext/polars/src/functions/whenthen.rs +7 -7
- data/ext/polars/src/lazyframe.rs +47 -42
- data/ext/polars/src/lib.rs +156 -72
- data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
- data/ext/polars/src/{apply → map}/mod.rs +3 -3
- data/ext/polars/src/{apply → map}/series.rs +12 -16
- data/ext/polars/src/object.rs +1 -1
- data/ext/polars/src/rb_modules.rs +22 -7
- data/ext/polars/src/series/construction.rs +4 -4
- data/ext/polars/src/series/export.rs +2 -2
- data/ext/polars/src/series/set_at_idx.rs +33 -17
- data/ext/polars/src/series.rs +7 -27
- data/ext/polars/src/sql.rs +46 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +115 -82
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +5 -25
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +177 -94
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +93 -66
- data/lib/polars/lazy_functions.rb +36 -48
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +26 -13
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/utils.rb +12 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +18 -7
- /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -1,10 +1,12 @@
|
|
1
1
|
use magnus::encoding::{self, EncodingCapable};
|
2
|
-
use magnus::{
|
2
|
+
use magnus::{
|
3
|
+
class, prelude::*, typed_data::Obj, value::Opaque, Float, Integer, RArray, RString, Ruby, Value,
|
4
|
+
};
|
3
5
|
use polars::lazy::dsl;
|
4
6
|
use polars::prelude::*;
|
5
7
|
|
6
|
-
use crate::apply::lazy::binary_lambda;
|
7
8
|
use crate::conversion::{get_lf, get_rbseq, Wrap};
|
9
|
+
use crate::map::lazy::binary_lambda;
|
8
10
|
use crate::prelude::vec_extract_wrapped;
|
9
11
|
use crate::rb_exprs_to_exprs;
|
10
12
|
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
@@ -15,10 +17,6 @@ macro_rules! set_unwrapped_or_0 {
|
|
15
17
|
};
|
16
18
|
}
|
17
19
|
|
18
|
-
pub fn arange(low: &RbExpr, high: &RbExpr, step: i64) -> RbExpr {
|
19
|
-
dsl::arange(low.inner.clone(), high.inner.clone(), step).into()
|
20
|
-
}
|
21
|
-
|
22
20
|
pub fn arg_sort_by(by: RArray, descending: Vec<bool>) -> RbResult<RbExpr> {
|
23
21
|
let by = rb_exprs_to_exprs(by)?;
|
24
22
|
Ok(dsl::arg_sort_by(by, &descending).into())
|
@@ -30,7 +28,7 @@ pub fn arg_where(condition: &RbExpr) -> RbExpr {
|
|
30
28
|
|
31
29
|
pub fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
|
32
30
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
33
|
-
Ok(dsl::as_struct(
|
31
|
+
Ok(dsl::as_struct(exprs).into())
|
34
32
|
}
|
35
33
|
|
36
34
|
pub fn coalesce(exprs: RArray) -> RbResult<RbExpr> {
|
@@ -45,7 +43,7 @@ pub fn col(name: String) -> RbExpr {
|
|
45
43
|
pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
|
46
44
|
let lfs = lfs
|
47
45
|
.each()
|
48
|
-
.map(|v|
|
46
|
+
.map(|v| <&RbLazyFrame>::try_convert(v?))
|
49
47
|
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
50
48
|
|
51
49
|
Ok(RArray::from_iter(lfs.iter().map(|lf| {
|
@@ -85,36 +83,65 @@ pub fn concat_lf(
|
|
85
83
|
Ok(lf.into())
|
86
84
|
}
|
87
85
|
|
86
|
+
pub fn concat_lf_diagonal(
|
87
|
+
lfs: RArray,
|
88
|
+
rechunk: bool,
|
89
|
+
parallel: bool,
|
90
|
+
to_supertypes: bool,
|
91
|
+
) -> RbResult<RbLazyFrame> {
|
92
|
+
let iter = lfs.each();
|
93
|
+
|
94
|
+
let lfs = iter
|
95
|
+
.map(|item| {
|
96
|
+
let item = item?;
|
97
|
+
get_lf(item)
|
98
|
+
})
|
99
|
+
.collect::<RbResult<Vec<_>>>()?;
|
100
|
+
|
101
|
+
let lf = dsl::functions::concat_lf_diagonal(
|
102
|
+
lfs,
|
103
|
+
UnionArgs {
|
104
|
+
rechunk,
|
105
|
+
parallel,
|
106
|
+
to_supertypes,
|
107
|
+
},
|
108
|
+
)
|
109
|
+
.map_err(RbPolarsErr::from)?;
|
110
|
+
Ok(lf.into())
|
111
|
+
}
|
112
|
+
|
88
113
|
#[allow(clippy::too_many_arguments)]
|
89
114
|
pub fn duration(
|
115
|
+
weeks: Option<&RbExpr>,
|
90
116
|
days: Option<&RbExpr>,
|
117
|
+
hours: Option<&RbExpr>,
|
118
|
+
minutes: Option<&RbExpr>,
|
91
119
|
seconds: Option<&RbExpr>,
|
92
|
-
nanoseconds: Option<&RbExpr>,
|
93
|
-
microseconds: Option<&RbExpr>,
|
94
120
|
milliseconds: Option<&RbExpr>,
|
95
|
-
|
96
|
-
|
97
|
-
|
121
|
+
microseconds: Option<&RbExpr>,
|
122
|
+
nanoseconds: Option<&RbExpr>,
|
123
|
+
time_unit: Wrap<TimeUnit>,
|
98
124
|
) -> RbExpr {
|
99
125
|
set_unwrapped_or_0!(
|
126
|
+
weeks,
|
100
127
|
days,
|
128
|
+
hours,
|
129
|
+
minutes,
|
101
130
|
seconds,
|
102
|
-
nanoseconds,
|
103
|
-
microseconds,
|
104
131
|
milliseconds,
|
105
|
-
|
106
|
-
|
107
|
-
weeks,
|
132
|
+
microseconds,
|
133
|
+
nanoseconds,
|
108
134
|
);
|
109
135
|
let args = DurationArgs {
|
136
|
+
weeks,
|
110
137
|
days,
|
138
|
+
hours,
|
139
|
+
minutes,
|
111
140
|
seconds,
|
112
|
-
nanoseconds,
|
113
|
-
microseconds,
|
114
141
|
milliseconds,
|
115
|
-
|
116
|
-
|
117
|
-
|
142
|
+
microseconds,
|
143
|
+
nanoseconds,
|
144
|
+
time_unit: time_unit.0,
|
118
145
|
};
|
119
146
|
dsl::duration(args).into()
|
120
147
|
}
|
@@ -137,23 +164,27 @@ pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
|
|
137
164
|
|
138
165
|
pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
139
166
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
167
|
+
let lambda = Opaque::from(lambda);
|
140
168
|
|
141
|
-
let func =
|
169
|
+
let func =
|
170
|
+
move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
|
142
171
|
Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
|
143
172
|
}
|
144
173
|
|
145
174
|
pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
|
146
175
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
176
|
+
let lambda = Opaque::from(lambda);
|
147
177
|
|
148
|
-
let func =
|
149
|
-
|
178
|
+
let func =
|
179
|
+
move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
|
180
|
+
Ok(polars::lazy::dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
|
150
181
|
}
|
151
182
|
|
152
183
|
pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
|
153
184
|
if value.is_kind_of(class::true_class()) || value.is_kind_of(class::false_class()) {
|
154
|
-
Ok(dsl::lit(
|
185
|
+
Ok(dsl::lit(bool::try_convert(value)?).into())
|
155
186
|
} else if let Some(v) = Integer::from_value(value) {
|
156
|
-
match v.
|
187
|
+
match v.to_i64() {
|
157
188
|
Ok(val) => {
|
158
189
|
if val > 0 && val < i32::MAX as i64 || val < 0 && val > i32::MIN as i64 {
|
159
190
|
Ok(dsl::lit(val as i32).into())
|
@@ -162,19 +193,19 @@ pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
|
|
162
193
|
}
|
163
194
|
}
|
164
195
|
_ => {
|
165
|
-
let val =
|
196
|
+
let val = v.to_u64()?;
|
166
197
|
Ok(dsl::lit(val).into())
|
167
198
|
}
|
168
199
|
}
|
169
200
|
} else if let Some(v) = Float::from_value(value) {
|
170
|
-
Ok(dsl::lit(v.
|
201
|
+
Ok(dsl::lit(v.to_f64()).into())
|
171
202
|
} else if let Some(v) = RString::from_value(value) {
|
172
203
|
if v.enc_get() == encoding::Index::utf8() {
|
173
|
-
Ok(dsl::lit(v.
|
204
|
+
Ok(dsl::lit(v.to_string()?).into())
|
174
205
|
} else {
|
175
206
|
Ok(dsl::lit(unsafe { v.as_slice() }).into())
|
176
207
|
}
|
177
|
-
} else if let Ok(series) =
|
208
|
+
} else if let Ok(series) = Obj::<RbSeries>::try_convert(value) {
|
178
209
|
Ok(dsl::lit(series.series.borrow().clone()).into())
|
179
210
|
} else if value.is_nil() {
|
180
211
|
Ok(dsl::lit(Null {}).into())
|
@@ -218,8 +249,8 @@ pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool
|
|
218
249
|
.into()
|
219
250
|
}
|
220
251
|
|
221
|
-
pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
|
222
|
-
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
|
252
|
+
pub fn cov(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
|
253
|
+
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone(), ddof).into()
|
223
254
|
}
|
224
255
|
|
225
256
|
pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
|
@@ -236,14 +267,8 @@ pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
|
|
236
267
|
pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
|
237
268
|
let dtypes = dtypes
|
238
269
|
.each()
|
239
|
-
.map(|v|
|
270
|
+
.map(|v| Wrap::<DataType>::try_convert(v?))
|
240
271
|
.collect::<RbResult<Vec<Wrap<DataType>>>>()?;
|
241
272
|
let dtypes = vec_extract_wrapped(dtypes);
|
242
273
|
Ok(crate::functions::lazy::dtype_cols(dtypes))
|
243
274
|
}
|
244
|
-
|
245
|
-
// TODO rename to sum_horizontal
|
246
|
-
pub fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
|
247
|
-
let exprs = rb_exprs_to_exprs(exprs)?;
|
248
|
-
Ok(polars::lazy::dsl::sum_horizontal(exprs).into())
|
249
|
-
}
|
@@ -1,8 +1,38 @@
|
|
1
1
|
use magnus::{IntoValue, Value};
|
2
|
+
use polars_core;
|
3
|
+
use polars_core::fmt::FloatFmt;
|
2
4
|
use polars_core::prelude::IDX_DTYPE;
|
5
|
+
use polars_core::POOL;
|
3
6
|
|
4
7
|
use crate::conversion::Wrap;
|
8
|
+
use crate::{RbResult, RbValueError};
|
5
9
|
|
6
10
|
pub fn get_idx_type() -> Value {
|
7
11
|
Wrap(IDX_DTYPE).into_value()
|
8
12
|
}
|
13
|
+
|
14
|
+
pub fn threadpool_size() -> usize {
|
15
|
+
POOL.current_num_threads()
|
16
|
+
}
|
17
|
+
|
18
|
+
pub fn set_float_fmt(fmt: String) -> RbResult<()> {
|
19
|
+
let fmt = match fmt.as_str() {
|
20
|
+
"full" => FloatFmt::Full,
|
21
|
+
"mixed" => FloatFmt::Mixed,
|
22
|
+
e => {
|
23
|
+
return Err(RbValueError::new_err(format!(
|
24
|
+
"fmt must be one of {{'full', 'mixed'}}, got {e}",
|
25
|
+
)))
|
26
|
+
}
|
27
|
+
};
|
28
|
+
polars_core::fmt::set_float_fmt(fmt);
|
29
|
+
Ok(())
|
30
|
+
}
|
31
|
+
|
32
|
+
pub fn get_float_fmt() -> RbResult<String> {
|
33
|
+
let strfmt = match polars_core::fmt::get_float_fmt() {
|
34
|
+
FloatFmt::Full => "full",
|
35
|
+
FloatFmt::Mixed => "mixed",
|
36
|
+
};
|
37
|
+
Ok(strfmt.to_string())
|
38
|
+
}
|
@@ -0,0 +1,46 @@
|
|
1
|
+
use polars::lazy::dsl;
|
2
|
+
use polars_core::datatypes::{TimeUnit, TimeZone};
|
3
|
+
|
4
|
+
use crate::conversion::Wrap;
|
5
|
+
use crate::prelude::*;
|
6
|
+
use crate::RbExpr;
|
7
|
+
|
8
|
+
pub fn int_range(start: &RbExpr, end: &RbExpr, step: i64, dtype: Wrap<DataType>) -> RbExpr {
|
9
|
+
let dtype = dtype.0;
|
10
|
+
|
11
|
+
let mut result = dsl::int_range(start.inner.clone(), end.inner.clone(), step);
|
12
|
+
|
13
|
+
if dtype != DataType::Int64 {
|
14
|
+
result = result.cast(dtype)
|
15
|
+
}
|
16
|
+
|
17
|
+
result.into()
|
18
|
+
}
|
19
|
+
|
20
|
+
pub fn int_ranges(start: &RbExpr, end: &RbExpr, step: i64, dtype: Wrap<DataType>) -> RbExpr {
|
21
|
+
let dtype = dtype.0;
|
22
|
+
|
23
|
+
let mut result = dsl::int_ranges(start.inner.clone(), end.inner.clone(), step);
|
24
|
+
|
25
|
+
if dtype != DataType::Int64 {
|
26
|
+
result = result.cast(DataType::List(Box::new(dtype)))
|
27
|
+
}
|
28
|
+
|
29
|
+
result.into()
|
30
|
+
}
|
31
|
+
|
32
|
+
pub fn date_range(
|
33
|
+
start: &RbExpr,
|
34
|
+
end: &RbExpr,
|
35
|
+
every: String,
|
36
|
+
closed: Wrap<ClosedWindow>,
|
37
|
+
time_unit: Option<Wrap<TimeUnit>>,
|
38
|
+
time_zone: Option<TimeZone>,
|
39
|
+
) -> RbExpr {
|
40
|
+
let start = start.inner.clone();
|
41
|
+
let end = end.inner.clone();
|
42
|
+
let every = Duration::parse(&every);
|
43
|
+
let closed = closed.0;
|
44
|
+
let time_unit = time_unit.map(|x| x.0);
|
45
|
+
dsl::date_range(start, end, every, closed, time_unit, time_zone).into()
|
46
|
+
}
|
@@ -16,23 +16,23 @@ impl From<dsl::When> for RbWhen {
|
|
16
16
|
|
17
17
|
#[magnus::wrap(class = "Polars::RbWhenThen")]
|
18
18
|
#[derive(Clone)]
|
19
|
-
pub struct
|
20
|
-
pub inner: dsl::
|
19
|
+
pub struct RbThen {
|
20
|
+
pub inner: dsl::Then,
|
21
21
|
}
|
22
22
|
|
23
|
-
impl From<dsl::
|
24
|
-
fn from(inner: dsl::
|
25
|
-
|
23
|
+
impl From<dsl::Then> for RbThen {
|
24
|
+
fn from(inner: dsl::Then) -> Self {
|
25
|
+
RbThen { inner }
|
26
26
|
}
|
27
27
|
}
|
28
28
|
|
29
29
|
impl RbWhen {
|
30
|
-
pub fn then(&self, expr: &RbExpr) ->
|
30
|
+
pub fn then(&self, expr: &RbExpr) -> RbThen {
|
31
31
|
self.inner.clone().then(expr.inner.clone()).into()
|
32
32
|
}
|
33
33
|
}
|
34
34
|
|
35
|
-
impl
|
35
|
+
impl RbThen {
|
36
36
|
pub fn overwise(&self, expr: &RbExpr) -> RbExpr {
|
37
37
|
self.inner.clone().otherwise(expr.inner.clone()).into()
|
38
38
|
}
|
data/ext/polars/src/lazyframe.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{IntoValue, RArray, RHash, Value};
|
1
|
+
use magnus::{IntoValue, RArray, RHash, TryConvert, Value};
|
2
2
|
use polars::io::RowCount;
|
3
3
|
use polars::lazy::frame::LazyFrame;
|
4
4
|
use polars::prelude::*;
|
@@ -78,32 +78,32 @@ impl RbLazyFrame {
|
|
78
78
|
pub fn new_from_csv(arguments: &[Value]) -> RbResult<Self> {
|
79
79
|
// start arguments
|
80
80
|
// this pattern is needed for more than 16
|
81
|
-
let path
|
82
|
-
let
|
83
|
-
let has_header
|
84
|
-
let ignore_errors
|
85
|
-
let skip_rows
|
86
|
-
let n_rows
|
87
|
-
let cache
|
88
|
-
let overwrite_dtype
|
89
|
-
let low_memory
|
90
|
-
let comment_char
|
91
|
-
let quote_char
|
92
|
-
let null_values
|
93
|
-
let infer_schema_length
|
94
|
-
let with_schema_modify
|
95
|
-
let rechunk
|
96
|
-
let skip_rows_after_header
|
97
|
-
let encoding
|
98
|
-
let row_count
|
99
|
-
let try_parse_dates
|
100
|
-
let eol_char
|
81
|
+
let path = String::try_convert(arguments[0])?;
|
82
|
+
let separator = String::try_convert(arguments[1])?;
|
83
|
+
let has_header = bool::try_convert(arguments[2])?;
|
84
|
+
let ignore_errors = bool::try_convert(arguments[3])?;
|
85
|
+
let skip_rows = usize::try_convert(arguments[4])?;
|
86
|
+
let n_rows = Option::<usize>::try_convert(arguments[5])?;
|
87
|
+
let cache = bool::try_convert(arguments[6])?;
|
88
|
+
let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[7])?;
|
89
|
+
let low_memory = bool::try_convert(arguments[8])?;
|
90
|
+
let comment_char = Option::<String>::try_convert(arguments[9])?;
|
91
|
+
let quote_char = Option::<String>::try_convert(arguments[10])?;
|
92
|
+
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[11])?;
|
93
|
+
let infer_schema_length = Option::<usize>::try_convert(arguments[12])?;
|
94
|
+
let with_schema_modify = Option::<Value>::try_convert(arguments[13])?;
|
95
|
+
let rechunk = bool::try_convert(arguments[14])?;
|
96
|
+
let skip_rows_after_header = usize::try_convert(arguments[15])?;
|
97
|
+
let encoding = Wrap::<CsvEncoding>::try_convert(arguments[16])?;
|
98
|
+
let row_count = Option::<(String, IdxSize)>::try_convert(arguments[17])?;
|
99
|
+
let try_parse_dates = bool::try_convert(arguments[18])?;
|
100
|
+
let eol_char = String::try_convert(arguments[19])?;
|
101
101
|
// end arguments
|
102
102
|
|
103
103
|
let null_values = null_values.map(|w| w.0);
|
104
104
|
let comment_char = comment_char.map(|s| s.as_bytes()[0]);
|
105
105
|
let quote_char = quote_char.map(|s| s.as_bytes()[0]);
|
106
|
-
let
|
106
|
+
let separator = separator.as_bytes()[0];
|
107
107
|
let eol_char = eol_char.as_bytes()[0];
|
108
108
|
|
109
109
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
@@ -116,7 +116,7 @@ impl RbLazyFrame {
|
|
116
116
|
});
|
117
117
|
let r = LazyCsvReader::new(path)
|
118
118
|
.with_infer_schema_length(infer_schema_length)
|
119
|
-
.
|
119
|
+
.with_separator(separator)
|
120
120
|
.has_header(has_header)
|
121
121
|
.with_ignore_errors(ignore_errors)
|
122
122
|
.with_skip_rows(skip_rows)
|
@@ -151,6 +151,7 @@ impl RbLazyFrame {
|
|
151
151
|
row_count: Option<(String, IdxSize)>,
|
152
152
|
low_memory: bool,
|
153
153
|
use_statistics: bool,
|
154
|
+
hive_partitioning: bool,
|
154
155
|
) -> RbResult<Self> {
|
155
156
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
156
157
|
let args = ScanArgsParquet {
|
@@ -163,6 +164,7 @@ impl RbLazyFrame {
|
|
163
164
|
// TODO support cloud options
|
164
165
|
cloud_options: None,
|
165
166
|
use_statistics,
|
167
|
+
hive_partitioning,
|
166
168
|
};
|
167
169
|
let lf = LazyFrame::scan_parquet(path, args).map_err(RbPolarsErr::from)?;
|
168
170
|
Ok(lf.into())
|
@@ -217,6 +219,7 @@ impl RbLazyFrame {
|
|
217
219
|
slice_pushdown: bool,
|
218
220
|
cse: bool,
|
219
221
|
allow_streaming: bool,
|
222
|
+
_eager: bool,
|
220
223
|
) -> RbLazyFrame {
|
221
224
|
let ldf = self.ldf.clone();
|
222
225
|
let ldf = ldf
|
@@ -224,8 +227,9 @@ impl RbLazyFrame {
|
|
224
227
|
.with_predicate_pushdown(predicate_pushdown)
|
225
228
|
.with_simplify_expr(simplify_expr)
|
226
229
|
.with_slice_pushdown(slice_pushdown)
|
227
|
-
.
|
230
|
+
.with_comm_subplan_elim(cse)
|
228
231
|
.with_streaming(allow_streaming)
|
232
|
+
._with_eager(_eager)
|
229
233
|
.with_projection_pushdown(projection_pushdown);
|
230
234
|
ldf.into()
|
231
235
|
}
|
@@ -318,20 +322,20 @@ impl RbLazyFrame {
|
|
318
322
|
Ok(ldf.select(exprs).into())
|
319
323
|
}
|
320
324
|
|
321
|
-
pub fn
|
325
|
+
pub fn group_by(&self, by: RArray, maintain_order: bool) -> RbResult<RbLazyGroupBy> {
|
322
326
|
let ldf = self.ldf.clone();
|
323
327
|
let by = rb_exprs_to_exprs(by)?;
|
324
328
|
let lazy_gb = if maintain_order {
|
325
|
-
ldf.
|
329
|
+
ldf.group_by_stable(by)
|
326
330
|
} else {
|
327
|
-
ldf.
|
331
|
+
ldf.group_by(by)
|
328
332
|
};
|
329
333
|
Ok(RbLazyGroupBy {
|
330
334
|
lgb: RefCell::new(Some(lazy_gb)),
|
331
335
|
})
|
332
336
|
}
|
333
337
|
|
334
|
-
pub fn
|
338
|
+
pub fn group_by_rolling(
|
335
339
|
&self,
|
336
340
|
index_column: &RbExpr,
|
337
341
|
period: String,
|
@@ -343,7 +347,7 @@ impl RbLazyFrame {
|
|
343
347
|
let closed_window = closed.0;
|
344
348
|
let ldf = self.ldf.clone();
|
345
349
|
let by = rb_exprs_to_exprs(by)?;
|
346
|
-
let lazy_gb = ldf.
|
350
|
+
let lazy_gb = ldf.group_by_rolling(
|
347
351
|
index_column.inner.clone(),
|
348
352
|
by,
|
349
353
|
RollingGroupOptions {
|
@@ -361,32 +365,34 @@ impl RbLazyFrame {
|
|
361
365
|
}
|
362
366
|
|
363
367
|
#[allow(clippy::too_many_arguments)]
|
364
|
-
pub fn
|
368
|
+
pub fn group_by_dynamic(
|
365
369
|
&self,
|
366
370
|
index_column: &RbExpr,
|
367
371
|
every: String,
|
368
372
|
period: String,
|
369
373
|
offset: String,
|
370
|
-
|
374
|
+
label: Wrap<Label>,
|
371
375
|
include_boundaries: bool,
|
372
376
|
closed: Wrap<ClosedWindow>,
|
373
377
|
by: RArray,
|
374
378
|
start_by: Wrap<StartBy>,
|
379
|
+
check_sorted: bool,
|
375
380
|
) -> RbResult<RbLazyGroupBy> {
|
376
381
|
let closed_window = closed.0;
|
377
382
|
let by = rb_exprs_to_exprs(by)?;
|
378
383
|
let ldf = self.ldf.clone();
|
379
|
-
let lazy_gb = ldf.
|
384
|
+
let lazy_gb = ldf.group_by_dynamic(
|
380
385
|
index_column.inner.clone(),
|
381
386
|
by,
|
382
387
|
DynamicGroupOptions {
|
383
388
|
every: Duration::parse(&every),
|
384
389
|
period: Duration::parse(&period),
|
385
390
|
offset: Duration::parse(&offset),
|
386
|
-
|
391
|
+
label: label.0,
|
387
392
|
include_boundaries,
|
388
393
|
closed_window,
|
389
394
|
start_by: start_by.0,
|
395
|
+
check_sorted,
|
390
396
|
..Default::default()
|
391
397
|
},
|
392
398
|
);
|
@@ -399,7 +405,7 @@ impl RbLazyFrame {
|
|
399
405
|
pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
|
400
406
|
let contexts = contexts
|
401
407
|
.each()
|
402
|
-
.map(|v| v.unwrap()
|
408
|
+
.map(|v| TryConvert::try_convert(v.unwrap()))
|
403
409
|
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
404
410
|
let contexts = contexts
|
405
411
|
.into_iter()
|
@@ -490,14 +496,13 @@ impl RbLazyFrame {
|
|
490
496
|
ldf.reverse().into()
|
491
497
|
}
|
492
498
|
|
493
|
-
pub fn shift(&self,
|
494
|
-
let
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
ldf.shift_and_fill(periods, fill_value.inner.clone()).into()
|
499
|
+
pub fn shift(&self, n: &RbExpr, fill_value: Option<&RbExpr>) -> Self {
|
500
|
+
let lf = self.ldf.clone();
|
501
|
+
let out = match fill_value {
|
502
|
+
Some(v) => lf.shift_and_fill(n.inner.clone(), v.inner.clone()),
|
503
|
+
None => lf.shift(n.inner.clone()),
|
504
|
+
};
|
505
|
+
out.into()
|
501
506
|
}
|
502
507
|
|
503
508
|
pub fn fill_nan(&self, fill_value: &RbExpr) -> Self {
|