polars-df 0.3.1 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -1
- data/Cargo.lock +486 -380
- data/Cargo.toml +0 -2
- data/README.md +31 -2
- data/ext/polars/Cargo.toml +10 -4
- data/ext/polars/src/apply/dataframe.rs +2 -2
- data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
- data/ext/polars/src/apply/mod.rs +1 -0
- data/ext/polars/src/batched_csv.rs +36 -19
- data/ext/polars/src/conversion.rs +159 -16
- data/ext/polars/src/dataframe.rs +51 -52
- data/ext/polars/src/error.rs +0 -4
- data/ext/polars/src/expr/binary.rs +69 -0
- data/ext/polars/src/expr/categorical.rs +10 -0
- data/ext/polars/src/expr/datetime.rs +223 -0
- data/ext/polars/src/{lazy/dsl.rs → expr/general.rs} +22 -799
- data/ext/polars/src/expr/list.rs +146 -0
- data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
- data/ext/polars/src/expr/string.rs +313 -0
- data/ext/polars/src/expr/struct.rs +15 -0
- data/ext/polars/src/expr.rs +33 -0
- data/ext/polars/src/functions/eager.rs +93 -0
- data/ext/polars/src/functions/io.rs +34 -0
- data/ext/polars/src/functions/lazy.rs +209 -0
- data/ext/polars/src/functions/meta.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/whenthen.rs +43 -0
- data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +58 -45
- data/ext/polars/src/lazygroupby.rs +29 -0
- data/ext/polars/src/lib.rs +216 -300
- data/ext/polars/src/rb_modules.rs +8 -0
- data/ext/polars/src/series/aggregation.rs +83 -0
- data/ext/polars/src/series/arithmetic.rs +88 -0
- data/ext/polars/src/series/comparison.rs +251 -0
- data/ext/polars/src/series/construction.rs +164 -0
- data/ext/polars/src/series.rs +103 -531
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +263 -87
- data/lib/polars/data_types.rb +6 -4
- data/lib/polars/date_time_expr.rb +148 -8
- data/lib/polars/expr.rb +78 -11
- data/lib/polars/io.rb +73 -62
- data/lib/polars/lazy_frame.rb +107 -10
- data/lib/polars/lazy_functions.rb +7 -3
- data/lib/polars/list_expr.rb +70 -21
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/series.rb +190 -74
- data/lib/polars/string_expr.rb +150 -44
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +51 -9
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +4 -2
- metadata +29 -12
- data/ext/polars/src/lazy/mod.rs +0 -5
- data/ext/polars/src/lazy/utils.rs +0 -13
- data/ext/polars/src/list_construction.rs +0 -100
- /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
- /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
@@ -0,0 +1,209 @@
|
|
1
|
+
use magnus::{class, RArray, RString, Value};
|
2
|
+
use polars::lazy::dsl;
|
3
|
+
use polars::prelude::*;
|
4
|
+
|
5
|
+
use crate::apply::lazy::binary_lambda;
|
6
|
+
use crate::conversion::{get_lf, get_rbseq, Wrap};
|
7
|
+
use crate::prelude::vec_extract_wrapped;
|
8
|
+
use crate::rb_exprs_to_exprs;
|
9
|
+
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
10
|
+
|
11
|
+
macro_rules! set_unwrapped_or_0 {
|
12
|
+
($($var:ident),+ $(,)?) => {
|
13
|
+
$(let $var = $var.map(|e| e.inner.clone()).unwrap_or(polars::lazy::dsl::lit(0));)+
|
14
|
+
};
|
15
|
+
}
|
16
|
+
|
17
|
+
pub fn arange(low: &RbExpr, high: &RbExpr, step: i64) -> RbExpr {
|
18
|
+
dsl::arange(low.inner.clone(), high.inner.clone(), step).into()
|
19
|
+
}
|
20
|
+
|
21
|
+
pub fn arg_sort_by(by: RArray, descending: Vec<bool>) -> RbResult<RbExpr> {
|
22
|
+
let by = rb_exprs_to_exprs(by)?;
|
23
|
+
Ok(dsl::arg_sort_by(by, &descending).into())
|
24
|
+
}
|
25
|
+
|
26
|
+
pub fn arg_where(condition: &RbExpr) -> RbExpr {
|
27
|
+
dsl::arg_where(condition.inner.clone()).into()
|
28
|
+
}
|
29
|
+
|
30
|
+
pub fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
|
31
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
32
|
+
Ok(dsl::as_struct(&exprs).into())
|
33
|
+
}
|
34
|
+
|
35
|
+
pub fn coalesce(exprs: RArray) -> RbResult<RbExpr> {
|
36
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
37
|
+
Ok(dsl::coalesce(&exprs).into())
|
38
|
+
}
|
39
|
+
|
40
|
+
pub fn col(name: String) -> RbExpr {
|
41
|
+
dsl::col(&name).into()
|
42
|
+
}
|
43
|
+
|
44
|
+
pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
|
45
|
+
let lfs = lfs
|
46
|
+
.each()
|
47
|
+
.map(|v| v?.try_convert::<&RbLazyFrame>())
|
48
|
+
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
49
|
+
|
50
|
+
Ok(RArray::from_iter(lfs.iter().map(|lf| {
|
51
|
+
let df = lf.ldf.clone().collect().unwrap();
|
52
|
+
RbDataFrame::new(df)
|
53
|
+
})))
|
54
|
+
}
|
55
|
+
|
56
|
+
pub fn cols(names: Vec<String>) -> RbExpr {
|
57
|
+
dsl::cols(names).into()
|
58
|
+
}
|
59
|
+
|
60
|
+
pub fn concat_lf(lfs: Value, rechunk: bool, parallel: bool) -> RbResult<RbLazyFrame> {
|
61
|
+
let (seq, len) = get_rbseq(lfs)?;
|
62
|
+
let mut lfs = Vec::with_capacity(len);
|
63
|
+
|
64
|
+
for res in seq.each() {
|
65
|
+
let item = res?;
|
66
|
+
let lf = get_lf(item)?;
|
67
|
+
lfs.push(lf);
|
68
|
+
}
|
69
|
+
|
70
|
+
let lf = polars::lazy::dsl::concat(lfs, rechunk, parallel).map_err(RbPolarsErr::from)?;
|
71
|
+
Ok(lf.into())
|
72
|
+
}
|
73
|
+
|
74
|
+
#[allow(clippy::too_many_arguments)]
|
75
|
+
pub fn duration(
|
76
|
+
days: Option<&RbExpr>,
|
77
|
+
seconds: Option<&RbExpr>,
|
78
|
+
nanoseconds: Option<&RbExpr>,
|
79
|
+
microseconds: Option<&RbExpr>,
|
80
|
+
milliseconds: Option<&RbExpr>,
|
81
|
+
minutes: Option<&RbExpr>,
|
82
|
+
hours: Option<&RbExpr>,
|
83
|
+
weeks: Option<&RbExpr>,
|
84
|
+
) -> RbExpr {
|
85
|
+
set_unwrapped_or_0!(
|
86
|
+
days,
|
87
|
+
seconds,
|
88
|
+
nanoseconds,
|
89
|
+
microseconds,
|
90
|
+
milliseconds,
|
91
|
+
minutes,
|
92
|
+
hours,
|
93
|
+
weeks,
|
94
|
+
);
|
95
|
+
let args = DurationArgs {
|
96
|
+
days,
|
97
|
+
seconds,
|
98
|
+
nanoseconds,
|
99
|
+
microseconds,
|
100
|
+
milliseconds,
|
101
|
+
minutes,
|
102
|
+
hours,
|
103
|
+
weeks,
|
104
|
+
};
|
105
|
+
dsl::duration(args).into()
|
106
|
+
}
|
107
|
+
|
108
|
+
pub fn count() -> RbExpr {
|
109
|
+
dsl::count().into()
|
110
|
+
}
|
111
|
+
|
112
|
+
pub fn first() -> RbExpr {
|
113
|
+
dsl::first().into()
|
114
|
+
}
|
115
|
+
|
116
|
+
pub fn last() -> RbExpr {
|
117
|
+
dsl::last().into()
|
118
|
+
}
|
119
|
+
|
120
|
+
pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
|
121
|
+
dsl::dtype_cols(dtypes).into()
|
122
|
+
}
|
123
|
+
|
124
|
+
pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
125
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
126
|
+
|
127
|
+
let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
|
128
|
+
Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
|
129
|
+
}
|
130
|
+
|
131
|
+
pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
|
132
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
133
|
+
|
134
|
+
let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
|
135
|
+
Ok(polars::lazy::dsl::cumfold_exprs(acc.inner.clone(), func, exprs, include_init).into())
|
136
|
+
}
|
137
|
+
|
138
|
+
// TODO improve
|
139
|
+
pub fn lit(value: Value) -> RbResult<RbExpr> {
|
140
|
+
if value.is_nil() {
|
141
|
+
Ok(dsl::lit(Null {}).into())
|
142
|
+
} else if let Ok(series) = value.try_convert::<&RbSeries>() {
|
143
|
+
Ok(dsl::lit(series.series.borrow().clone()).into())
|
144
|
+
} else if let Some(v) = RString::from_value(value) {
|
145
|
+
Ok(dsl::lit(v.try_convert::<String>()?).into())
|
146
|
+
} else if value.is_kind_of(class::integer()) {
|
147
|
+
match value.try_convert::<i64>() {
|
148
|
+
Ok(val) => {
|
149
|
+
if val > 0 && val < i32::MAX as i64 || val < 0 && val > i32::MIN as i64 {
|
150
|
+
Ok(dsl::lit(val as i32).into())
|
151
|
+
} else {
|
152
|
+
Ok(dsl::lit(val).into())
|
153
|
+
}
|
154
|
+
}
|
155
|
+
_ => {
|
156
|
+
let val = value.try_convert::<u64>()?;
|
157
|
+
Ok(dsl::lit(val).into())
|
158
|
+
}
|
159
|
+
}
|
160
|
+
} else {
|
161
|
+
Ok(dsl::lit(value.try_convert::<f64>()?).into())
|
162
|
+
}
|
163
|
+
}
|
164
|
+
|
165
|
+
pub fn repeat(value: Value, n_times: &RbExpr) -> RbResult<RbExpr> {
|
166
|
+
if value.is_nil() {
|
167
|
+
Ok(polars::lazy::dsl::repeat(Null {}, n_times.inner.clone()).into())
|
168
|
+
} else {
|
169
|
+
todo!();
|
170
|
+
}
|
171
|
+
}
|
172
|
+
|
173
|
+
pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
|
174
|
+
polars::lazy::dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
|
175
|
+
}
|
176
|
+
|
177
|
+
pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool) -> RbExpr {
|
178
|
+
polars::lazy::dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans)
|
179
|
+
.into()
|
180
|
+
}
|
181
|
+
|
182
|
+
pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
|
183
|
+
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
|
184
|
+
}
|
185
|
+
|
186
|
+
pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
|
187
|
+
let s = rb_exprs_to_exprs(s)?;
|
188
|
+
Ok(dsl::concat_str(s, &sep).into())
|
189
|
+
}
|
190
|
+
|
191
|
+
pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
|
192
|
+
let s = rb_exprs_to_exprs(s)?;
|
193
|
+
let expr = dsl::concat_list(s).map_err(RbPolarsErr::from)?;
|
194
|
+
Ok(expr.into())
|
195
|
+
}
|
196
|
+
|
197
|
+
pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
|
198
|
+
let dtypes = dtypes
|
199
|
+
.each()
|
200
|
+
.map(|v| v?.try_convert::<Wrap<DataType>>())
|
201
|
+
.collect::<RbResult<Vec<Wrap<DataType>>>>()?;
|
202
|
+
let dtypes = vec_extract_wrapped(dtypes);
|
203
|
+
Ok(crate::functions::lazy::dtype_cols(dtypes))
|
204
|
+
}
|
205
|
+
|
206
|
+
pub fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
|
207
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
208
|
+
Ok(polars::lazy::dsl::sum_exprs(exprs).into())
|
209
|
+
}
|
@@ -0,0 +1,43 @@
|
|
1
|
+
use polars::lazy::dsl;
|
2
|
+
|
3
|
+
use crate::RbExpr;
|
4
|
+
|
5
|
+
#[magnus::wrap(class = "Polars::RbWhen")]
|
6
|
+
#[derive(Clone)]
|
7
|
+
pub struct RbWhen {
|
8
|
+
pub inner: dsl::When,
|
9
|
+
}
|
10
|
+
|
11
|
+
impl From<dsl::When> for RbWhen {
|
12
|
+
fn from(inner: dsl::When) -> Self {
|
13
|
+
RbWhen { inner }
|
14
|
+
}
|
15
|
+
}
|
16
|
+
|
17
|
+
#[magnus::wrap(class = "Polars::RbWhenThen")]
|
18
|
+
#[derive(Clone)]
|
19
|
+
pub struct RbWhenThen {
|
20
|
+
pub inner: dsl::WhenThen,
|
21
|
+
}
|
22
|
+
|
23
|
+
impl From<dsl::WhenThen> for RbWhenThen {
|
24
|
+
fn from(inner: dsl::WhenThen) -> Self {
|
25
|
+
RbWhenThen { inner }
|
26
|
+
}
|
27
|
+
}
|
28
|
+
|
29
|
+
impl RbWhen {
|
30
|
+
pub fn then(&self, expr: &RbExpr) -> RbWhenThen {
|
31
|
+
self.inner.clone().then(expr.inner.clone()).into()
|
32
|
+
}
|
33
|
+
}
|
34
|
+
|
35
|
+
impl RbWhenThen {
|
36
|
+
pub fn overwise(&self, expr: &RbExpr) -> RbExpr {
|
37
|
+
self.inner.clone().otherwise(expr.inner.clone()).into()
|
38
|
+
}
|
39
|
+
}
|
40
|
+
|
41
|
+
pub fn when(predicate: &RbExpr) -> RbWhen {
|
42
|
+
dsl::when(predicate.inner.clone()).into()
|
43
|
+
}
|
@@ -1,37 +1,15 @@
|
|
1
1
|
use magnus::{IntoValue, RArray, RHash, Value};
|
2
2
|
use polars::io::RowCount;
|
3
|
-
use polars::lazy::frame::
|
3
|
+
use polars::lazy::frame::LazyFrame;
|
4
4
|
use polars::prelude::*;
|
5
5
|
use std::cell::RefCell;
|
6
6
|
use std::io::{BufWriter, Read};
|
7
|
+
use std::path::PathBuf;
|
7
8
|
|
8
9
|
use crate::conversion::*;
|
10
|
+
use crate::expr::rb_exprs_to_exprs;
|
9
11
|
use crate::file::get_file_like;
|
10
|
-
use crate::
|
11
|
-
use crate::{RbDataFrame, RbExpr, RbPolarsErr, RbResult, RbValueError};
|
12
|
-
|
13
|
-
#[magnus::wrap(class = "Polars::RbLazyGroupBy")]
|
14
|
-
pub struct RbLazyGroupBy {
|
15
|
-
lgb: RefCell<Option<LazyGroupBy>>,
|
16
|
-
}
|
17
|
-
|
18
|
-
impl RbLazyGroupBy {
|
19
|
-
pub fn agg(&self, aggs: RArray) -> RbResult<RbLazyFrame> {
|
20
|
-
let lgb = self.lgb.borrow_mut().take().unwrap();
|
21
|
-
let aggs = rb_exprs_to_exprs(aggs)?;
|
22
|
-
Ok(lgb.agg(aggs).into())
|
23
|
-
}
|
24
|
-
|
25
|
-
pub fn head(&self, n: usize) -> RbLazyFrame {
|
26
|
-
let lgb = self.lgb.take().unwrap();
|
27
|
-
lgb.head(Some(n)).into()
|
28
|
-
}
|
29
|
-
|
30
|
-
pub fn tail(&self, n: usize) -> RbLazyFrame {
|
31
|
-
let lgb = self.lgb.take().unwrap();
|
32
|
-
lgb.tail(Some(n)).into()
|
33
|
-
}
|
34
|
-
}
|
12
|
+
use crate::{RbDataFrame, RbExpr, RbLazyGroupBy, RbPolarsErr, RbResult, RbValueError};
|
35
13
|
|
36
14
|
#[magnus::wrap(class = "Polars::RbLazyFrame")]
|
37
15
|
#[derive(Clone)]
|
@@ -118,7 +96,7 @@ impl RbLazyFrame {
|
|
118
96
|
let skip_rows_after_header: usize = arguments[15].try_convert()?;
|
119
97
|
let encoding: Wrap<CsvEncoding> = arguments[16].try_convert()?;
|
120
98
|
let row_count: Option<(String, IdxSize)> = arguments[17].try_convert()?;
|
121
|
-
let
|
99
|
+
let try_parse_dates: bool = arguments[18].try_convert()?;
|
122
100
|
let eol_char: String = arguments[19].try_convert()?;
|
123
101
|
// end arguments
|
124
102
|
|
@@ -131,10 +109,10 @@ impl RbLazyFrame {
|
|
131
109
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
132
110
|
|
133
111
|
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
134
|
-
|
112
|
+
overwrite_dtype
|
135
113
|
.into_iter()
|
136
|
-
.map(|(name, dtype)| Field::new(&name, dtype.0))
|
137
|
-
|
114
|
+
.map(|(name, dtype)| Field::new(&name, dtype.0))
|
115
|
+
.collect::<Schema>()
|
138
116
|
});
|
139
117
|
let r = LazyCsvReader::new(path)
|
140
118
|
.with_infer_schema_length(infer_schema_length)
|
@@ -153,7 +131,7 @@ impl RbLazyFrame {
|
|
153
131
|
.with_skip_rows_after_header(skip_rows_after_header)
|
154
132
|
.with_encoding(encoding.0)
|
155
133
|
.with_row_count(row_count)
|
156
|
-
.
|
134
|
+
.with_try_parse_dates(try_parse_dates)
|
157
135
|
.with_null_values(null_values);
|
158
136
|
|
159
137
|
if let Some(_lambda) = with_schema_modify {
|
@@ -163,6 +141,7 @@ impl RbLazyFrame {
|
|
163
141
|
Ok(r.finish().map_err(RbPolarsErr::from)?.into())
|
164
142
|
}
|
165
143
|
|
144
|
+
#[allow(clippy::too_many_arguments)]
|
166
145
|
pub fn new_from_parquet(
|
167
146
|
path: String,
|
168
147
|
n_rows: Option<usize>,
|
@@ -171,6 +150,7 @@ impl RbLazyFrame {
|
|
171
150
|
rechunk: bool,
|
172
151
|
row_count: Option<(String, IdxSize)>,
|
173
152
|
low_memory: bool,
|
153
|
+
use_statistics: bool,
|
174
154
|
) -> RbResult<Self> {
|
175
155
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
176
156
|
let args = ScanArgsParquet {
|
@@ -182,6 +162,7 @@ impl RbLazyFrame {
|
|
182
162
|
low_memory,
|
183
163
|
// TODO support cloud options
|
184
164
|
cloud_options: None,
|
165
|
+
use_statistics,
|
185
166
|
};
|
186
167
|
let lf = LazyFrame::scan_parquet(path, args).map_err(RbPolarsErr::from)?;
|
187
168
|
Ok(lf.into())
|
@@ -284,6 +265,32 @@ impl RbLazyFrame {
|
|
284
265
|
Ok(df.into())
|
285
266
|
}
|
286
267
|
|
268
|
+
#[allow(clippy::too_many_arguments)]
|
269
|
+
pub fn sink_parquet(
|
270
|
+
&self,
|
271
|
+
path: PathBuf,
|
272
|
+
compression: String,
|
273
|
+
compression_level: Option<i32>,
|
274
|
+
statistics: bool,
|
275
|
+
row_group_size: Option<usize>,
|
276
|
+
data_pagesize_limit: Option<usize>,
|
277
|
+
maintain_order: bool,
|
278
|
+
) -> RbResult<()> {
|
279
|
+
let compression = parse_parquet_compression(&compression, compression_level)?;
|
280
|
+
|
281
|
+
let options = ParquetWriteOptions {
|
282
|
+
compression,
|
283
|
+
statistics,
|
284
|
+
row_group_size,
|
285
|
+
data_pagesize_limit,
|
286
|
+
maintain_order,
|
287
|
+
};
|
288
|
+
|
289
|
+
let ldf = self.ldf.clone();
|
290
|
+
ldf.sink_parquet(path, options).map_err(RbPolarsErr::from)?;
|
291
|
+
Ok(())
|
292
|
+
}
|
293
|
+
|
287
294
|
pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
|
288
295
|
let ldf = self.ldf.clone();
|
289
296
|
let df = ldf.fetch(n_rows).map_err(RbPolarsErr::from)?;
|
@@ -316,7 +323,7 @@ impl RbLazyFrame {
|
|
316
323
|
|
317
324
|
pub fn groupby_rolling(
|
318
325
|
&self,
|
319
|
-
index_column:
|
326
|
+
index_column: &RbExpr,
|
320
327
|
period: String,
|
321
328
|
offset: String,
|
322
329
|
closed: Wrap<ClosedWindow>,
|
@@ -326,9 +333,10 @@ impl RbLazyFrame {
|
|
326
333
|
let ldf = self.ldf.clone();
|
327
334
|
let by = rb_exprs_to_exprs(by)?;
|
328
335
|
let lazy_gb = ldf.groupby_rolling(
|
336
|
+
index_column.inner.clone(),
|
329
337
|
by,
|
330
338
|
RollingGroupOptions {
|
331
|
-
index_column,
|
339
|
+
index_column: "".into(),
|
332
340
|
period: Duration::parse(&period),
|
333
341
|
offset: Duration::parse(&offset),
|
334
342
|
closed_window,
|
@@ -343,7 +351,7 @@ impl RbLazyFrame {
|
|
343
351
|
#[allow(clippy::too_many_arguments)]
|
344
352
|
pub fn groupby_dynamic(
|
345
353
|
&self,
|
346
|
-
index_column:
|
354
|
+
index_column: &RbExpr,
|
347
355
|
every: String,
|
348
356
|
period: String,
|
349
357
|
offset: String,
|
@@ -357,9 +365,9 @@ impl RbLazyFrame {
|
|
357
365
|
let by = rb_exprs_to_exprs(by)?;
|
358
366
|
let ldf = self.ldf.clone();
|
359
367
|
let lazy_gb = ldf.groupby_dynamic(
|
368
|
+
index_column.inner.clone(),
|
360
369
|
by,
|
361
370
|
DynamicGroupOptions {
|
362
|
-
index_column,
|
363
371
|
every: Duration::parse(&every),
|
364
372
|
period: Duration::parse(&period),
|
365
373
|
offset: Duration::parse(&offset),
|
@@ -367,6 +375,7 @@ impl RbLazyFrame {
|
|
367
375
|
include_boundaries,
|
368
376
|
closed_window,
|
369
377
|
start_by: start_by.0,
|
378
|
+
..Default::default()
|
370
379
|
},
|
371
380
|
);
|
372
381
|
|
@@ -415,10 +424,10 @@ impl RbLazyFrame {
|
|
415
424
|
.force_parallel(force_parallel)
|
416
425
|
.how(JoinType::AsOf(AsOfOptions {
|
417
426
|
strategy: strategy.0,
|
418
|
-
left_by,
|
419
|
-
right_by,
|
427
|
+
left_by: left_by.map(strings_to_smartstrings),
|
428
|
+
right_by: right_by.map(strings_to_smartstrings),
|
420
429
|
tolerance: tolerance.map(|t| t.0.into_static().unwrap()),
|
421
|
-
tolerance_str,
|
430
|
+
tolerance_str: tolerance_str.map(|s| s.into()),
|
422
431
|
}))
|
423
432
|
.suffix(suffix)
|
424
433
|
.finish()
|
@@ -570,12 +579,14 @@ impl RbLazyFrame {
|
|
570
579
|
value_vars: Vec<String>,
|
571
580
|
value_name: Option<String>,
|
572
581
|
variable_name: Option<String>,
|
582
|
+
streamable: bool,
|
573
583
|
) -> Self {
|
574
584
|
let args = MeltArgs {
|
575
|
-
id_vars,
|
576
|
-
value_vars,
|
577
|
-
value_name,
|
578
|
-
variable_name,
|
585
|
+
id_vars: strings_to_smartstrings(id_vars),
|
586
|
+
value_vars: strings_to_smartstrings(value_vars),
|
587
|
+
value_name: value_name.map(|s| s.into()),
|
588
|
+
variable_name: variable_name.map(|s| s.into()),
|
589
|
+
streamable,
|
579
590
|
};
|
580
591
|
|
581
592
|
let ldf = self.ldf.clone();
|
@@ -596,8 +607,10 @@ impl RbLazyFrame {
|
|
596
607
|
self.ldf.clone().into()
|
597
608
|
}
|
598
609
|
|
599
|
-
pub fn columns(&self) -> RbResult<
|
600
|
-
|
610
|
+
pub fn columns(&self) -> RbResult<RArray> {
|
611
|
+
let schema = self.get_schema()?;
|
612
|
+
let iter = schema.iter_names().map(|s| s.as_str());
|
613
|
+
Ok(RArray::from_iter(iter))
|
601
614
|
}
|
602
615
|
|
603
616
|
pub fn dtypes(&self) -> RbResult<RArray> {
|
@@ -614,7 +627,7 @@ impl RbLazyFrame {
|
|
614
627
|
// TODO remove unwrap
|
615
628
|
schema_dict
|
616
629
|
.aset::<String, Value>(
|
617
|
-
fld.name().
|
630
|
+
fld.name().to_string(),
|
618
631
|
Wrap(fld.data_type().clone()).into_value(),
|
619
632
|
)
|
620
633
|
.unwrap();
|
@@ -0,0 +1,29 @@
|
|
1
|
+
use magnus::RArray;
|
2
|
+
use polars::lazy::frame::LazyGroupBy;
|
3
|
+
use std::cell::RefCell;
|
4
|
+
|
5
|
+
use crate::expr::rb_exprs_to_exprs;
|
6
|
+
use crate::{RbLazyFrame, RbResult};
|
7
|
+
|
8
|
+
#[magnus::wrap(class = "Polars::RbLazyGroupBy")]
|
9
|
+
pub struct RbLazyGroupBy {
|
10
|
+
pub lgb: RefCell<Option<LazyGroupBy>>,
|
11
|
+
}
|
12
|
+
|
13
|
+
impl RbLazyGroupBy {
|
14
|
+
pub fn agg(&self, aggs: RArray) -> RbResult<RbLazyFrame> {
|
15
|
+
let lgb = self.lgb.borrow_mut().take().unwrap();
|
16
|
+
let aggs = rb_exprs_to_exprs(aggs)?;
|
17
|
+
Ok(lgb.agg(aggs).into())
|
18
|
+
}
|
19
|
+
|
20
|
+
pub fn head(&self, n: usize) -> RbLazyFrame {
|
21
|
+
let lgb = self.lgb.take().unwrap();
|
22
|
+
lgb.head(Some(n)).into()
|
23
|
+
}
|
24
|
+
|
25
|
+
pub fn tail(&self, n: usize) -> RbLazyFrame {
|
26
|
+
let lgb = self.lgb.take().unwrap();
|
27
|
+
lgb.tail(Some(n)).into()
|
28
|
+
}
|
29
|
+
}
|