polars-df 0.6.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/Cargo.lock +597 -599
- data/Cargo.toml +1 -0
- data/README.md +8 -7
- data/ext/polars/Cargo.toml +20 -10
- data/ext/polars/src/batched_csv.rs +27 -28
- data/ext/polars/src/conversion.rs +135 -106
- data/ext/polars/src/dataframe.rs +140 -131
- data/ext/polars/src/error.rs +0 -5
- data/ext/polars/src/expr/binary.rs +18 -6
- data/ext/polars/src/expr/categorical.rs +8 -1
- data/ext/polars/src/expr/datetime.rs +10 -12
- data/ext/polars/src/expr/general.rs +129 -286
- data/ext/polars/src/expr/list.rs +17 -9
- data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
- data/ext/polars/src/expr/name.rs +44 -0
- data/ext/polars/src/expr/rolling.rs +201 -0
- data/ext/polars/src/expr/string.rs +94 -67
- data/ext/polars/src/file.rs +3 -3
- data/ext/polars/src/functions/aggregation.rs +35 -0
- data/ext/polars/src/functions/eager.rs +7 -31
- data/ext/polars/src/functions/io.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +66 -41
- data/ext/polars/src/functions/meta.rs +30 -0
- data/ext/polars/src/functions/misc.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/random.rs +6 -0
- data/ext/polars/src/functions/range.rs +41 -0
- data/ext/polars/src/functions/string_cache.rs +11 -0
- data/ext/polars/src/functions/whenthen.rs +7 -7
- data/ext/polars/src/lazyframe.rs +74 -60
- data/ext/polars/src/lib.rs +175 -91
- data/ext/polars/src/{apply → map}/dataframe.rs +29 -34
- data/ext/polars/src/{apply → map}/mod.rs +5 -5
- data/ext/polars/src/{apply → map}/series.rs +18 -22
- data/ext/polars/src/object.rs +0 -30
- data/ext/polars/src/on_startup.rs +32 -0
- data/ext/polars/src/rb_modules.rs +22 -7
- data/ext/polars/src/series/aggregation.rs +3 -0
- data/ext/polars/src/series/construction.rs +5 -5
- data/ext/polars/src/series/export.rs +4 -4
- data/ext/polars/src/{series.rs → series/mod.rs} +28 -45
- data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +38 -22
- data/ext/polars/src/sql.rs +46 -0
- data/ext/polars/src/utils.rs +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +182 -145
- data/lib/polars/data_types.rb +4 -1
- data/lib/polars/date_time_expr.rb +23 -28
- data/lib/polars/date_time_name_space.rb +17 -37
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +398 -110
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +40 -5
- data/lib/polars/lazy_frame.rb +116 -89
- data/lib/polars/lazy_functions.rb +40 -68
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +315 -43
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -13
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +23 -11
- /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -1,10 +1,12 @@
|
|
1
1
|
use magnus::encoding::{self, EncodingCapable};
|
2
|
-
use magnus::{
|
2
|
+
use magnus::{
|
3
|
+
class, prelude::*, typed_data::Obj, value::Opaque, Float, Integer, RArray, RString, Ruby, Value,
|
4
|
+
};
|
3
5
|
use polars::lazy::dsl;
|
4
6
|
use polars::prelude::*;
|
5
7
|
|
6
|
-
use crate::apply::lazy::binary_lambda;
|
7
8
|
use crate::conversion::{get_lf, get_rbseq, Wrap};
|
9
|
+
use crate::map::lazy::binary_lambda;
|
8
10
|
use crate::prelude::vec_extract_wrapped;
|
9
11
|
use crate::rb_exprs_to_exprs;
|
10
12
|
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
@@ -15,10 +17,6 @@ macro_rules! set_unwrapped_or_0 {
|
|
15
17
|
};
|
16
18
|
}
|
17
19
|
|
18
|
-
pub fn arange(low: &RbExpr, high: &RbExpr, step: i64) -> RbExpr {
|
19
|
-
dsl::arange(low.inner.clone(), high.inner.clone(), step).into()
|
20
|
-
}
|
21
|
-
|
22
20
|
pub fn arg_sort_by(by: RArray, descending: Vec<bool>) -> RbResult<RbExpr> {
|
23
21
|
let by = rb_exprs_to_exprs(by)?;
|
24
22
|
Ok(dsl::arg_sort_by(by, &descending).into())
|
@@ -30,7 +28,7 @@ pub fn arg_where(condition: &RbExpr) -> RbExpr {
|
|
30
28
|
|
31
29
|
pub fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
|
32
30
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
33
|
-
Ok(dsl::as_struct(
|
31
|
+
Ok(dsl::as_struct(exprs).into())
|
34
32
|
}
|
35
33
|
|
36
34
|
pub fn coalesce(exprs: RArray) -> RbResult<RbExpr> {
|
@@ -45,7 +43,7 @@ pub fn col(name: String) -> RbExpr {
|
|
45
43
|
pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
|
46
44
|
let lfs = lfs
|
47
45
|
.each()
|
48
|
-
.map(|v|
|
46
|
+
.map(|v| <&RbLazyFrame>::try_convert(v?))
|
49
47
|
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
50
48
|
|
51
49
|
Ok(RArray::from_iter(lfs.iter().map(|lf| {
|
@@ -85,36 +83,65 @@ pub fn concat_lf(
|
|
85
83
|
Ok(lf.into())
|
86
84
|
}
|
87
85
|
|
86
|
+
pub fn concat_lf_diagonal(
|
87
|
+
lfs: RArray,
|
88
|
+
rechunk: bool,
|
89
|
+
parallel: bool,
|
90
|
+
to_supertypes: bool,
|
91
|
+
) -> RbResult<RbLazyFrame> {
|
92
|
+
let iter = lfs.each();
|
93
|
+
|
94
|
+
let lfs = iter
|
95
|
+
.map(|item| {
|
96
|
+
let item = item?;
|
97
|
+
get_lf(item)
|
98
|
+
})
|
99
|
+
.collect::<RbResult<Vec<_>>>()?;
|
100
|
+
|
101
|
+
let lf = dsl::functions::concat_lf_diagonal(
|
102
|
+
lfs,
|
103
|
+
UnionArgs {
|
104
|
+
rechunk,
|
105
|
+
parallel,
|
106
|
+
to_supertypes,
|
107
|
+
},
|
108
|
+
)
|
109
|
+
.map_err(RbPolarsErr::from)?;
|
110
|
+
Ok(lf.into())
|
111
|
+
}
|
112
|
+
|
88
113
|
#[allow(clippy::too_many_arguments)]
|
89
114
|
pub fn duration(
|
115
|
+
weeks: Option<&RbExpr>,
|
90
116
|
days: Option<&RbExpr>,
|
117
|
+
hours: Option<&RbExpr>,
|
118
|
+
minutes: Option<&RbExpr>,
|
91
119
|
seconds: Option<&RbExpr>,
|
92
|
-
nanoseconds: Option<&RbExpr>,
|
93
|
-
microseconds: Option<&RbExpr>,
|
94
120
|
milliseconds: Option<&RbExpr>,
|
95
|
-
|
96
|
-
|
97
|
-
|
121
|
+
microseconds: Option<&RbExpr>,
|
122
|
+
nanoseconds: Option<&RbExpr>,
|
123
|
+
time_unit: Wrap<TimeUnit>,
|
98
124
|
) -> RbExpr {
|
99
125
|
set_unwrapped_or_0!(
|
126
|
+
weeks,
|
100
127
|
days,
|
128
|
+
hours,
|
129
|
+
minutes,
|
101
130
|
seconds,
|
102
|
-
nanoseconds,
|
103
|
-
microseconds,
|
104
131
|
milliseconds,
|
105
|
-
|
106
|
-
|
107
|
-
weeks,
|
132
|
+
microseconds,
|
133
|
+
nanoseconds,
|
108
134
|
);
|
109
135
|
let args = DurationArgs {
|
136
|
+
weeks,
|
110
137
|
days,
|
138
|
+
hours,
|
139
|
+
minutes,
|
111
140
|
seconds,
|
112
|
-
nanoseconds,
|
113
|
-
microseconds,
|
114
141
|
milliseconds,
|
115
|
-
|
116
|
-
|
117
|
-
|
142
|
+
microseconds,
|
143
|
+
nanoseconds,
|
144
|
+
time_unit: time_unit.0,
|
118
145
|
};
|
119
146
|
dsl::duration(args).into()
|
120
147
|
}
|
@@ -137,23 +164,27 @@ pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
|
|
137
164
|
|
138
165
|
pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
139
166
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
167
|
+
let lambda = Opaque::from(lambda);
|
140
168
|
|
141
|
-
let func =
|
169
|
+
let func =
|
170
|
+
move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
|
142
171
|
Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
|
143
172
|
}
|
144
173
|
|
145
174
|
pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
|
146
175
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
176
|
+
let lambda = Opaque::from(lambda);
|
147
177
|
|
148
|
-
let func =
|
149
|
-
|
178
|
+
let func =
|
179
|
+
move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
|
180
|
+
Ok(polars::lazy::dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
|
150
181
|
}
|
151
182
|
|
152
183
|
pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
|
153
184
|
if value.is_kind_of(class::true_class()) || value.is_kind_of(class::false_class()) {
|
154
|
-
Ok(dsl::lit(
|
185
|
+
Ok(dsl::lit(bool::try_convert(value)?).into())
|
155
186
|
} else if let Some(v) = Integer::from_value(value) {
|
156
|
-
match v.
|
187
|
+
match v.to_i64() {
|
157
188
|
Ok(val) => {
|
158
189
|
if val > 0 && val < i32::MAX as i64 || val < 0 && val > i32::MIN as i64 {
|
159
190
|
Ok(dsl::lit(val as i32).into())
|
@@ -162,19 +193,19 @@ pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
|
|
162
193
|
}
|
163
194
|
}
|
164
195
|
_ => {
|
165
|
-
let val =
|
196
|
+
let val = v.to_u64()?;
|
166
197
|
Ok(dsl::lit(val).into())
|
167
198
|
}
|
168
199
|
}
|
169
200
|
} else if let Some(v) = Float::from_value(value) {
|
170
|
-
Ok(dsl::lit(v.
|
201
|
+
Ok(dsl::lit(v.to_f64()).into())
|
171
202
|
} else if let Some(v) = RString::from_value(value) {
|
172
203
|
if v.enc_get() == encoding::Index::utf8() {
|
173
|
-
Ok(dsl::lit(v.
|
204
|
+
Ok(dsl::lit(v.to_string()?).into())
|
174
205
|
} else {
|
175
206
|
Ok(dsl::lit(unsafe { v.as_slice() }).into())
|
176
207
|
}
|
177
|
-
} else if let Ok(series) =
|
208
|
+
} else if let Ok(series) = Obj::<RbSeries>::try_convert(value) {
|
178
209
|
Ok(dsl::lit(series.series.borrow().clone()).into())
|
179
210
|
} else if value.is_nil() {
|
180
211
|
Ok(dsl::lit(Null {}).into())
|
@@ -218,8 +249,8 @@ pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool
|
|
218
249
|
.into()
|
219
250
|
}
|
220
251
|
|
221
|
-
pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
|
222
|
-
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
|
252
|
+
pub fn cov(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
|
253
|
+
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone(), ddof).into()
|
223
254
|
}
|
224
255
|
|
225
256
|
pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
|
@@ -236,14 +267,8 @@ pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
|
|
236
267
|
pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
|
237
268
|
let dtypes = dtypes
|
238
269
|
.each()
|
239
|
-
.map(|v|
|
270
|
+
.map(|v| Wrap::<DataType>::try_convert(v?))
|
240
271
|
.collect::<RbResult<Vec<Wrap<DataType>>>>()?;
|
241
272
|
let dtypes = vec_extract_wrapped(dtypes);
|
242
273
|
Ok(crate::functions::lazy::dtype_cols(dtypes))
|
243
274
|
}
|
244
|
-
|
245
|
-
// TODO rename to sum_horizontal
|
246
|
-
pub fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
|
247
|
-
let exprs = rb_exprs_to_exprs(exprs)?;
|
248
|
-
Ok(polars::lazy::dsl::sum_horizontal(exprs).into())
|
249
|
-
}
|
@@ -1,8 +1,38 @@
|
|
1
1
|
use magnus::{IntoValue, Value};
|
2
|
+
use polars_core;
|
3
|
+
use polars_core::fmt::FloatFmt;
|
2
4
|
use polars_core::prelude::IDX_DTYPE;
|
5
|
+
use polars_core::POOL;
|
3
6
|
|
4
7
|
use crate::conversion::Wrap;
|
8
|
+
use crate::{RbResult, RbValueError};
|
5
9
|
|
6
10
|
pub fn get_idx_type() -> Value {
|
7
11
|
Wrap(IDX_DTYPE).into_value()
|
8
12
|
}
|
13
|
+
|
14
|
+
pub fn threadpool_size() -> usize {
|
15
|
+
POOL.current_num_threads()
|
16
|
+
}
|
17
|
+
|
18
|
+
pub fn set_float_fmt(fmt: String) -> RbResult<()> {
|
19
|
+
let fmt = match fmt.as_str() {
|
20
|
+
"full" => FloatFmt::Full,
|
21
|
+
"mixed" => FloatFmt::Mixed,
|
22
|
+
e => {
|
23
|
+
return Err(RbValueError::new_err(format!(
|
24
|
+
"fmt must be one of {{'full', 'mixed'}}, got {e}",
|
25
|
+
)))
|
26
|
+
}
|
27
|
+
};
|
28
|
+
polars_core::fmt::set_float_fmt(fmt);
|
29
|
+
Ok(())
|
30
|
+
}
|
31
|
+
|
32
|
+
pub fn get_float_fmt() -> RbResult<String> {
|
33
|
+
let strfmt = match polars_core::fmt::get_float_fmt() {
|
34
|
+
FloatFmt::Full => "full",
|
35
|
+
FloatFmt::Mixed => "mixed",
|
36
|
+
};
|
37
|
+
Ok(strfmt.to_string())
|
38
|
+
}
|
@@ -0,0 +1,41 @@
|
|
1
|
+
use polars::lazy::dsl;
|
2
|
+
use polars_core::datatypes::{TimeUnit, TimeZone};
|
3
|
+
|
4
|
+
use crate::conversion::Wrap;
|
5
|
+
use crate::prelude::*;
|
6
|
+
use crate::RbExpr;
|
7
|
+
|
8
|
+
pub fn int_range(start: &RbExpr, end: &RbExpr, step: i64, dtype: Wrap<DataType>) -> RbExpr {
|
9
|
+
let start = start.inner.clone();
|
10
|
+
let end = end.inner.clone();
|
11
|
+
let dtype = dtype.0;
|
12
|
+
dsl::int_range(start, end, step, dtype).into()
|
13
|
+
}
|
14
|
+
|
15
|
+
pub fn int_ranges(start: &RbExpr, end: &RbExpr, step: &RbExpr, dtype: Wrap<DataType>) -> RbExpr {
|
16
|
+
let dtype = dtype.0;
|
17
|
+
|
18
|
+
let mut result = dsl::int_ranges(start.inner.clone(), end.inner.clone(), step.inner.clone());
|
19
|
+
|
20
|
+
if dtype != DataType::Int64 {
|
21
|
+
result = result.cast(DataType::List(Box::new(dtype)))
|
22
|
+
}
|
23
|
+
|
24
|
+
result.into()
|
25
|
+
}
|
26
|
+
|
27
|
+
pub fn date_range(
|
28
|
+
start: &RbExpr,
|
29
|
+
end: &RbExpr,
|
30
|
+
every: String,
|
31
|
+
closed: Wrap<ClosedWindow>,
|
32
|
+
time_unit: Option<Wrap<TimeUnit>>,
|
33
|
+
time_zone: Option<TimeZone>,
|
34
|
+
) -> RbExpr {
|
35
|
+
let start = start.inner.clone();
|
36
|
+
let end = end.inner.clone();
|
37
|
+
let every = Duration::parse(&every);
|
38
|
+
let closed = closed.0;
|
39
|
+
let time_unit = time_unit.map(|x| x.0);
|
40
|
+
dsl::date_range(start, end, every, closed, time_unit, time_zone).into()
|
41
|
+
}
|
@@ -16,23 +16,23 @@ impl From<dsl::When> for RbWhen {
|
|
16
16
|
|
17
17
|
#[magnus::wrap(class = "Polars::RbWhenThen")]
|
18
18
|
#[derive(Clone)]
|
19
|
-
pub struct
|
20
|
-
pub inner: dsl::
|
19
|
+
pub struct RbThen {
|
20
|
+
pub inner: dsl::Then,
|
21
21
|
}
|
22
22
|
|
23
|
-
impl From<dsl::
|
24
|
-
fn from(inner: dsl::
|
25
|
-
|
23
|
+
impl From<dsl::Then> for RbThen {
|
24
|
+
fn from(inner: dsl::Then) -> Self {
|
25
|
+
RbThen { inner }
|
26
26
|
}
|
27
27
|
}
|
28
28
|
|
29
29
|
impl RbWhen {
|
30
|
-
pub fn then(&self, expr: &RbExpr) ->
|
30
|
+
pub fn then(&self, expr: &RbExpr) -> RbThen {
|
31
31
|
self.inner.clone().then(expr.inner.clone()).into()
|
32
32
|
}
|
33
33
|
}
|
34
34
|
|
35
|
-
impl
|
35
|
+
impl RbThen {
|
36
36
|
pub fn overwise(&self, expr: &RbExpr) -> RbExpr {
|
37
37
|
self.inner.clone().otherwise(expr.inner.clone()).into()
|
38
38
|
}
|
data/ext/polars/src/lazyframe.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{IntoValue, RArray, RHash, Value};
|
1
|
+
use magnus::{IntoValue, RArray, RHash, TryConvert, Value};
|
2
2
|
use polars::io::RowCount;
|
3
3
|
use polars::lazy::frame::LazyFrame;
|
4
4
|
use polars::prelude::*;
|
@@ -78,32 +78,31 @@ impl RbLazyFrame {
|
|
78
78
|
pub fn new_from_csv(arguments: &[Value]) -> RbResult<Self> {
|
79
79
|
// start arguments
|
80
80
|
// this pattern is needed for more than 16
|
81
|
-
let path
|
82
|
-
let
|
83
|
-
let has_header
|
84
|
-
let ignore_errors
|
85
|
-
let skip_rows
|
86
|
-
let n_rows
|
87
|
-
let cache
|
88
|
-
let overwrite_dtype
|
89
|
-
let low_memory
|
90
|
-
let
|
91
|
-
let quote_char
|
92
|
-
let null_values
|
93
|
-
let infer_schema_length
|
94
|
-
let with_schema_modify
|
95
|
-
let rechunk
|
96
|
-
let skip_rows_after_header
|
97
|
-
let encoding
|
98
|
-
let row_count
|
99
|
-
let try_parse_dates
|
100
|
-
let eol_char
|
81
|
+
let path = String::try_convert(arguments[0])?;
|
82
|
+
let separator = String::try_convert(arguments[1])?;
|
83
|
+
let has_header = bool::try_convert(arguments[2])?;
|
84
|
+
let ignore_errors = bool::try_convert(arguments[3])?;
|
85
|
+
let skip_rows = usize::try_convert(arguments[4])?;
|
86
|
+
let n_rows = Option::<usize>::try_convert(arguments[5])?;
|
87
|
+
let cache = bool::try_convert(arguments[6])?;
|
88
|
+
let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[7])?;
|
89
|
+
let low_memory = bool::try_convert(arguments[8])?;
|
90
|
+
let comment_prefix = Option::<String>::try_convert(arguments[9])?;
|
91
|
+
let quote_char = Option::<String>::try_convert(arguments[10])?;
|
92
|
+
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[11])?;
|
93
|
+
let infer_schema_length = Option::<usize>::try_convert(arguments[12])?;
|
94
|
+
let with_schema_modify = Option::<Value>::try_convert(arguments[13])?;
|
95
|
+
let rechunk = bool::try_convert(arguments[14])?;
|
96
|
+
let skip_rows_after_header = usize::try_convert(arguments[15])?;
|
97
|
+
let encoding = Wrap::<CsvEncoding>::try_convert(arguments[16])?;
|
98
|
+
let row_count = Option::<(String, IdxSize)>::try_convert(arguments[17])?;
|
99
|
+
let try_parse_dates = bool::try_convert(arguments[18])?;
|
100
|
+
let eol_char = String::try_convert(arguments[19])?;
|
101
101
|
// end arguments
|
102
102
|
|
103
103
|
let null_values = null_values.map(|w| w.0);
|
104
|
-
let comment_char = comment_char.map(|s| s.as_bytes()[0]);
|
105
104
|
let quote_char = quote_char.map(|s| s.as_bytes()[0]);
|
106
|
-
let
|
105
|
+
let separator = separator.as_bytes()[0];
|
107
106
|
let eol_char = eol_char.as_bytes()[0];
|
108
107
|
|
109
108
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
@@ -116,7 +115,7 @@ impl RbLazyFrame {
|
|
116
115
|
});
|
117
116
|
let r = LazyCsvReader::new(path)
|
118
117
|
.with_infer_schema_length(infer_schema_length)
|
119
|
-
.
|
118
|
+
.with_separator(separator)
|
120
119
|
.has_header(has_header)
|
121
120
|
.with_ignore_errors(ignore_errors)
|
122
121
|
.with_skip_rows(skip_rows)
|
@@ -124,7 +123,7 @@ impl RbLazyFrame {
|
|
124
123
|
.with_cache(cache)
|
125
124
|
.with_dtype_overwrite(overwrite_dtype.as_ref())
|
126
125
|
.low_memory(low_memory)
|
127
|
-
.
|
126
|
+
.with_comment_prefix(comment_prefix.as_deref())
|
128
127
|
.with_quote_char(quote_char)
|
129
128
|
.with_end_of_line_char(eol_char)
|
130
129
|
.with_rechunk(rechunk)
|
@@ -151,6 +150,7 @@ impl RbLazyFrame {
|
|
151
150
|
row_count: Option<(String, IdxSize)>,
|
152
151
|
low_memory: bool,
|
153
152
|
use_statistics: bool,
|
153
|
+
hive_partitioning: bool,
|
154
154
|
) -> RbResult<Self> {
|
155
155
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
156
156
|
let args = ScanArgsParquet {
|
@@ -163,6 +163,7 @@ impl RbLazyFrame {
|
|
163
163
|
// TODO support cloud options
|
164
164
|
cloud_options: None,
|
165
165
|
use_statistics,
|
166
|
+
hive_partitioning,
|
166
167
|
};
|
167
168
|
let lf = LazyFrame::scan_parquet(path, args).map_err(RbPolarsErr::from)?;
|
168
169
|
Ok(lf.into())
|
@@ -217,6 +218,7 @@ impl RbLazyFrame {
|
|
217
218
|
slice_pushdown: bool,
|
218
219
|
cse: bool,
|
219
220
|
allow_streaming: bool,
|
221
|
+
_eager: bool,
|
220
222
|
) -> RbLazyFrame {
|
221
223
|
let ldf = self.ldf.clone();
|
222
224
|
let ldf = ldf
|
@@ -224,8 +226,9 @@ impl RbLazyFrame {
|
|
224
226
|
.with_predicate_pushdown(predicate_pushdown)
|
225
227
|
.with_simplify_expr(simplify_expr)
|
226
228
|
.with_slice_pushdown(slice_pushdown)
|
227
|
-
.
|
229
|
+
.with_comm_subplan_elim(cse)
|
228
230
|
.with_streaming(allow_streaming)
|
231
|
+
._with_eager(_eager)
|
229
232
|
.with_projection_pushdown(projection_pushdown);
|
230
233
|
ldf.into()
|
231
234
|
}
|
@@ -318,20 +321,20 @@ impl RbLazyFrame {
|
|
318
321
|
Ok(ldf.select(exprs).into())
|
319
322
|
}
|
320
323
|
|
321
|
-
pub fn
|
324
|
+
pub fn group_by(&self, by: RArray, maintain_order: bool) -> RbResult<RbLazyGroupBy> {
|
322
325
|
let ldf = self.ldf.clone();
|
323
326
|
let by = rb_exprs_to_exprs(by)?;
|
324
327
|
let lazy_gb = if maintain_order {
|
325
|
-
ldf.
|
328
|
+
ldf.group_by_stable(by)
|
326
329
|
} else {
|
327
|
-
ldf.
|
330
|
+
ldf.group_by(by)
|
328
331
|
};
|
329
332
|
Ok(RbLazyGroupBy {
|
330
333
|
lgb: RefCell::new(Some(lazy_gb)),
|
331
334
|
})
|
332
335
|
}
|
333
336
|
|
334
|
-
pub fn
|
337
|
+
pub fn group_by_rolling(
|
335
338
|
&self,
|
336
339
|
index_column: &RbExpr,
|
337
340
|
period: String,
|
@@ -343,7 +346,7 @@ impl RbLazyFrame {
|
|
343
346
|
let closed_window = closed.0;
|
344
347
|
let ldf = self.ldf.clone();
|
345
348
|
let by = rb_exprs_to_exprs(by)?;
|
346
|
-
let lazy_gb = ldf.
|
349
|
+
let lazy_gb = ldf.group_by_rolling(
|
347
350
|
index_column.inner.clone(),
|
348
351
|
by,
|
349
352
|
RollingGroupOptions {
|
@@ -361,32 +364,34 @@ impl RbLazyFrame {
|
|
361
364
|
}
|
362
365
|
|
363
366
|
#[allow(clippy::too_many_arguments)]
|
364
|
-
pub fn
|
367
|
+
pub fn group_by_dynamic(
|
365
368
|
&self,
|
366
369
|
index_column: &RbExpr,
|
367
370
|
every: String,
|
368
371
|
period: String,
|
369
372
|
offset: String,
|
370
|
-
|
373
|
+
label: Wrap<Label>,
|
371
374
|
include_boundaries: bool,
|
372
375
|
closed: Wrap<ClosedWindow>,
|
373
376
|
by: RArray,
|
374
377
|
start_by: Wrap<StartBy>,
|
378
|
+
check_sorted: bool,
|
375
379
|
) -> RbResult<RbLazyGroupBy> {
|
376
380
|
let closed_window = closed.0;
|
377
381
|
let by = rb_exprs_to_exprs(by)?;
|
378
382
|
let ldf = self.ldf.clone();
|
379
|
-
let lazy_gb = ldf.
|
383
|
+
let lazy_gb = ldf.group_by_dynamic(
|
380
384
|
index_column.inner.clone(),
|
381
385
|
by,
|
382
386
|
DynamicGroupOptions {
|
383
387
|
every: Duration::parse(&every),
|
384
388
|
period: Duration::parse(&period),
|
385
389
|
offset: Duration::parse(&offset),
|
386
|
-
|
390
|
+
label: label.0,
|
387
391
|
include_boundaries,
|
388
392
|
closed_window,
|
389
393
|
start_by: start_by.0,
|
394
|
+
check_sorted,
|
390
395
|
..Default::default()
|
391
396
|
},
|
392
397
|
);
|
@@ -399,7 +404,7 @@ impl RbLazyFrame {
|
|
399
404
|
pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
|
400
405
|
let contexts = contexts
|
401
406
|
.each()
|
402
|
-
.map(|v| v.unwrap()
|
407
|
+
.map(|v| TryConvert::try_convert(v.unwrap()))
|
403
408
|
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
404
409
|
let contexts = contexts
|
405
410
|
.into_iter()
|
@@ -490,14 +495,13 @@ impl RbLazyFrame {
|
|
490
495
|
ldf.reverse().into()
|
491
496
|
}
|
492
497
|
|
493
|
-
pub fn shift(&self,
|
494
|
-
let
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
ldf.shift_and_fill(periods, fill_value.inner.clone()).into()
|
498
|
+
pub fn shift(&self, n: &RbExpr, fill_value: Option<&RbExpr>) -> Self {
|
499
|
+
let lf = self.ldf.clone();
|
500
|
+
let out = match fill_value {
|
501
|
+
Some(v) => lf.shift_and_fill(n.inner.clone(), v.inner.clone()),
|
502
|
+
None => lf.shift(n.inner.clone()),
|
503
|
+
};
|
504
|
+
out.into()
|
501
505
|
}
|
502
506
|
|
503
507
|
pub fn fill_nan(&self, fill_value: &RbExpr) -> Self {
|
@@ -505,48 +509,58 @@ impl RbLazyFrame {
|
|
505
509
|
ldf.fill_nan(fill_value.inner.clone()).into()
|
506
510
|
}
|
507
511
|
|
508
|
-
pub fn min(&self) -> Self {
|
512
|
+
pub fn min(&self) -> RbResult<Self> {
|
509
513
|
let ldf = self.ldf.clone();
|
510
|
-
ldf.min().
|
514
|
+
let out = ldf.min().map_err(RbPolarsErr::from)?;
|
515
|
+
Ok(out.into())
|
511
516
|
}
|
512
517
|
|
513
|
-
pub fn max(&self) -> Self {
|
518
|
+
pub fn max(&self) -> RbResult<Self> {
|
514
519
|
let ldf = self.ldf.clone();
|
515
|
-
ldf.max().
|
520
|
+
let out = ldf.max().map_err(RbPolarsErr::from)?;
|
521
|
+
Ok(out.into())
|
516
522
|
}
|
517
523
|
|
518
|
-
pub fn sum(&self) -> Self {
|
524
|
+
pub fn sum(&self) -> RbResult<Self> {
|
519
525
|
let ldf = self.ldf.clone();
|
520
|
-
ldf.sum().
|
526
|
+
let out = ldf.sum().map_err(RbPolarsErr::from)?;
|
527
|
+
Ok(out.into())
|
521
528
|
}
|
522
529
|
|
523
|
-
pub fn mean(&self) -> Self {
|
530
|
+
pub fn mean(&self) -> RbResult<Self> {
|
524
531
|
let ldf = self.ldf.clone();
|
525
|
-
ldf.mean().
|
532
|
+
let out = ldf.mean().map_err(RbPolarsErr::from)?;
|
533
|
+
Ok(out.into())
|
526
534
|
}
|
527
535
|
|
528
|
-
pub fn std(&self, ddof: u8) -> Self {
|
536
|
+
pub fn std(&self, ddof: u8) -> RbResult<Self> {
|
529
537
|
let ldf = self.ldf.clone();
|
530
|
-
ldf.std(ddof).
|
538
|
+
let out = ldf.std(ddof).map_err(RbPolarsErr::from)?;
|
539
|
+
Ok(out.into())
|
531
540
|
}
|
532
541
|
|
533
|
-
pub fn var(&self, ddof: u8) -> Self {
|
542
|
+
pub fn var(&self, ddof: u8) -> RbResult<Self> {
|
534
543
|
let ldf = self.ldf.clone();
|
535
|
-
ldf.var(ddof).
|
544
|
+
let out = ldf.var(ddof).map_err(RbPolarsErr::from)?;
|
545
|
+
Ok(out.into())
|
536
546
|
}
|
537
547
|
|
538
|
-
pub fn median(&self) -> Self {
|
548
|
+
pub fn median(&self) -> RbResult<Self> {
|
539
549
|
let ldf = self.ldf.clone();
|
540
|
-
ldf.median().
|
550
|
+
let out = ldf.median().map_err(RbPolarsErr::from)?;
|
551
|
+
Ok(out.into())
|
541
552
|
}
|
542
553
|
|
543
554
|
pub fn quantile(
|
544
555
|
&self,
|
545
556
|
quantile: &RbExpr,
|
546
557
|
interpolation: Wrap<QuantileInterpolOptions>,
|
547
|
-
) -> Self {
|
558
|
+
) -> RbResult<Self> {
|
548
559
|
let ldf = self.ldf.clone();
|
549
|
-
ldf
|
560
|
+
let out = ldf
|
561
|
+
.quantile(quantile.inner.clone(), interpolation.0)
|
562
|
+
.map_err(RbPolarsErr::from)?;
|
563
|
+
Ok(out.into())
|
550
564
|
}
|
551
565
|
|
552
566
|
pub fn explode(&self, column: RArray) -> RbResult<Self> {
|