polars-df 0.3.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -1
- data/Cargo.lock +486 -380
- data/Cargo.toml +0 -2
- data/README.md +31 -2
- data/ext/polars/Cargo.toml +10 -4
- data/ext/polars/src/apply/dataframe.rs +2 -2
- data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
- data/ext/polars/src/apply/mod.rs +1 -0
- data/ext/polars/src/batched_csv.rs +36 -19
- data/ext/polars/src/conversion.rs +159 -16
- data/ext/polars/src/dataframe.rs +51 -52
- data/ext/polars/src/error.rs +0 -4
- data/ext/polars/src/expr/binary.rs +69 -0
- data/ext/polars/src/expr/categorical.rs +10 -0
- data/ext/polars/src/expr/datetime.rs +223 -0
- data/ext/polars/src/{lazy/dsl.rs → expr/general.rs} +22 -799
- data/ext/polars/src/expr/list.rs +146 -0
- data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
- data/ext/polars/src/expr/string.rs +313 -0
- data/ext/polars/src/expr/struct.rs +15 -0
- data/ext/polars/src/expr.rs +33 -0
- data/ext/polars/src/functions/eager.rs +93 -0
- data/ext/polars/src/functions/io.rs +34 -0
- data/ext/polars/src/functions/lazy.rs +209 -0
- data/ext/polars/src/functions/meta.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/whenthen.rs +43 -0
- data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +58 -45
- data/ext/polars/src/lazygroupby.rs +29 -0
- data/ext/polars/src/lib.rs +216 -300
- data/ext/polars/src/rb_modules.rs +8 -0
- data/ext/polars/src/series/aggregation.rs +83 -0
- data/ext/polars/src/series/arithmetic.rs +88 -0
- data/ext/polars/src/series/comparison.rs +251 -0
- data/ext/polars/src/series/construction.rs +164 -0
- data/ext/polars/src/series.rs +103 -531
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +263 -87
- data/lib/polars/data_types.rb +6 -4
- data/lib/polars/date_time_expr.rb +148 -8
- data/lib/polars/expr.rb +78 -11
- data/lib/polars/io.rb +73 -62
- data/lib/polars/lazy_frame.rb +107 -10
- data/lib/polars/lazy_functions.rb +7 -3
- data/lib/polars/list_expr.rb +70 -21
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/series.rb +190 -74
- data/lib/polars/string_expr.rb +150 -44
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +51 -9
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +4 -2
- metadata +29 -12
- data/ext/polars/src/lazy/mod.rs +0 -5
- data/ext/polars/src/lazy/utils.rs +0 -13
- data/ext/polars/src/list_construction.rs +0 -100
- /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
- /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
@@ -0,0 +1,209 @@
|
|
1
|
+
use magnus::{class, RArray, RString, Value};
|
2
|
+
use polars::lazy::dsl;
|
3
|
+
use polars::prelude::*;
|
4
|
+
|
5
|
+
use crate::apply::lazy::binary_lambda;
|
6
|
+
use crate::conversion::{get_lf, get_rbseq, Wrap};
|
7
|
+
use crate::prelude::vec_extract_wrapped;
|
8
|
+
use crate::rb_exprs_to_exprs;
|
9
|
+
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
10
|
+
|
11
|
+
macro_rules! set_unwrapped_or_0 {
|
12
|
+
($($var:ident),+ $(,)?) => {
|
13
|
+
$(let $var = $var.map(|e| e.inner.clone()).unwrap_or(polars::lazy::dsl::lit(0));)+
|
14
|
+
};
|
15
|
+
}
|
16
|
+
|
17
|
+
pub fn arange(low: &RbExpr, high: &RbExpr, step: i64) -> RbExpr {
|
18
|
+
dsl::arange(low.inner.clone(), high.inner.clone(), step).into()
|
19
|
+
}
|
20
|
+
|
21
|
+
pub fn arg_sort_by(by: RArray, descending: Vec<bool>) -> RbResult<RbExpr> {
|
22
|
+
let by = rb_exprs_to_exprs(by)?;
|
23
|
+
Ok(dsl::arg_sort_by(by, &descending).into())
|
24
|
+
}
|
25
|
+
|
26
|
+
pub fn arg_where(condition: &RbExpr) -> RbExpr {
|
27
|
+
dsl::arg_where(condition.inner.clone()).into()
|
28
|
+
}
|
29
|
+
|
30
|
+
pub fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
|
31
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
32
|
+
Ok(dsl::as_struct(&exprs).into())
|
33
|
+
}
|
34
|
+
|
35
|
+
pub fn coalesce(exprs: RArray) -> RbResult<RbExpr> {
|
36
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
37
|
+
Ok(dsl::coalesce(&exprs).into())
|
38
|
+
}
|
39
|
+
|
40
|
+
pub fn col(name: String) -> RbExpr {
|
41
|
+
dsl::col(&name).into()
|
42
|
+
}
|
43
|
+
|
44
|
+
pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
|
45
|
+
let lfs = lfs
|
46
|
+
.each()
|
47
|
+
.map(|v| v?.try_convert::<&RbLazyFrame>())
|
48
|
+
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
49
|
+
|
50
|
+
Ok(RArray::from_iter(lfs.iter().map(|lf| {
|
51
|
+
let df = lf.ldf.clone().collect().unwrap();
|
52
|
+
RbDataFrame::new(df)
|
53
|
+
})))
|
54
|
+
}
|
55
|
+
|
56
|
+
pub fn cols(names: Vec<String>) -> RbExpr {
|
57
|
+
dsl::cols(names).into()
|
58
|
+
}
|
59
|
+
|
60
|
+
pub fn concat_lf(lfs: Value, rechunk: bool, parallel: bool) -> RbResult<RbLazyFrame> {
|
61
|
+
let (seq, len) = get_rbseq(lfs)?;
|
62
|
+
let mut lfs = Vec::with_capacity(len);
|
63
|
+
|
64
|
+
for res in seq.each() {
|
65
|
+
let item = res?;
|
66
|
+
let lf = get_lf(item)?;
|
67
|
+
lfs.push(lf);
|
68
|
+
}
|
69
|
+
|
70
|
+
let lf = polars::lazy::dsl::concat(lfs, rechunk, parallel).map_err(RbPolarsErr::from)?;
|
71
|
+
Ok(lf.into())
|
72
|
+
}
|
73
|
+
|
74
|
+
#[allow(clippy::too_many_arguments)]
|
75
|
+
pub fn duration(
|
76
|
+
days: Option<&RbExpr>,
|
77
|
+
seconds: Option<&RbExpr>,
|
78
|
+
nanoseconds: Option<&RbExpr>,
|
79
|
+
microseconds: Option<&RbExpr>,
|
80
|
+
milliseconds: Option<&RbExpr>,
|
81
|
+
minutes: Option<&RbExpr>,
|
82
|
+
hours: Option<&RbExpr>,
|
83
|
+
weeks: Option<&RbExpr>,
|
84
|
+
) -> RbExpr {
|
85
|
+
set_unwrapped_or_0!(
|
86
|
+
days,
|
87
|
+
seconds,
|
88
|
+
nanoseconds,
|
89
|
+
microseconds,
|
90
|
+
milliseconds,
|
91
|
+
minutes,
|
92
|
+
hours,
|
93
|
+
weeks,
|
94
|
+
);
|
95
|
+
let args = DurationArgs {
|
96
|
+
days,
|
97
|
+
seconds,
|
98
|
+
nanoseconds,
|
99
|
+
microseconds,
|
100
|
+
milliseconds,
|
101
|
+
minutes,
|
102
|
+
hours,
|
103
|
+
weeks,
|
104
|
+
};
|
105
|
+
dsl::duration(args).into()
|
106
|
+
}
|
107
|
+
|
108
|
+
pub fn count() -> RbExpr {
|
109
|
+
dsl::count().into()
|
110
|
+
}
|
111
|
+
|
112
|
+
pub fn first() -> RbExpr {
|
113
|
+
dsl::first().into()
|
114
|
+
}
|
115
|
+
|
116
|
+
pub fn last() -> RbExpr {
|
117
|
+
dsl::last().into()
|
118
|
+
}
|
119
|
+
|
120
|
+
pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
|
121
|
+
dsl::dtype_cols(dtypes).into()
|
122
|
+
}
|
123
|
+
|
124
|
+
pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
125
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
126
|
+
|
127
|
+
let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
|
128
|
+
Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
|
129
|
+
}
|
130
|
+
|
131
|
+
pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
|
132
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
133
|
+
|
134
|
+
let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
|
135
|
+
Ok(polars::lazy::dsl::cumfold_exprs(acc.inner.clone(), func, exprs, include_init).into())
|
136
|
+
}
|
137
|
+
|
138
|
+
// TODO improve
|
139
|
+
pub fn lit(value: Value) -> RbResult<RbExpr> {
|
140
|
+
if value.is_nil() {
|
141
|
+
Ok(dsl::lit(Null {}).into())
|
142
|
+
} else if let Ok(series) = value.try_convert::<&RbSeries>() {
|
143
|
+
Ok(dsl::lit(series.series.borrow().clone()).into())
|
144
|
+
} else if let Some(v) = RString::from_value(value) {
|
145
|
+
Ok(dsl::lit(v.try_convert::<String>()?).into())
|
146
|
+
} else if value.is_kind_of(class::integer()) {
|
147
|
+
match value.try_convert::<i64>() {
|
148
|
+
Ok(val) => {
|
149
|
+
if val > 0 && val < i32::MAX as i64 || val < 0 && val > i32::MIN as i64 {
|
150
|
+
Ok(dsl::lit(val as i32).into())
|
151
|
+
} else {
|
152
|
+
Ok(dsl::lit(val).into())
|
153
|
+
}
|
154
|
+
}
|
155
|
+
_ => {
|
156
|
+
let val = value.try_convert::<u64>()?;
|
157
|
+
Ok(dsl::lit(val).into())
|
158
|
+
}
|
159
|
+
}
|
160
|
+
} else {
|
161
|
+
Ok(dsl::lit(value.try_convert::<f64>()?).into())
|
162
|
+
}
|
163
|
+
}
|
164
|
+
|
165
|
+
pub fn repeat(value: Value, n_times: &RbExpr) -> RbResult<RbExpr> {
|
166
|
+
if value.is_nil() {
|
167
|
+
Ok(polars::lazy::dsl::repeat(Null {}, n_times.inner.clone()).into())
|
168
|
+
} else {
|
169
|
+
todo!();
|
170
|
+
}
|
171
|
+
}
|
172
|
+
|
173
|
+
pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
|
174
|
+
polars::lazy::dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
|
175
|
+
}
|
176
|
+
|
177
|
+
pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool) -> RbExpr {
|
178
|
+
polars::lazy::dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans)
|
179
|
+
.into()
|
180
|
+
}
|
181
|
+
|
182
|
+
pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
|
183
|
+
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
|
184
|
+
}
|
185
|
+
|
186
|
+
pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
|
187
|
+
let s = rb_exprs_to_exprs(s)?;
|
188
|
+
Ok(dsl::concat_str(s, &sep).into())
|
189
|
+
}
|
190
|
+
|
191
|
+
pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
|
192
|
+
let s = rb_exprs_to_exprs(s)?;
|
193
|
+
let expr = dsl::concat_list(s).map_err(RbPolarsErr::from)?;
|
194
|
+
Ok(expr.into())
|
195
|
+
}
|
196
|
+
|
197
|
+
pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
|
198
|
+
let dtypes = dtypes
|
199
|
+
.each()
|
200
|
+
.map(|v| v?.try_convert::<Wrap<DataType>>())
|
201
|
+
.collect::<RbResult<Vec<Wrap<DataType>>>>()?;
|
202
|
+
let dtypes = vec_extract_wrapped(dtypes);
|
203
|
+
Ok(crate::functions::lazy::dtype_cols(dtypes))
|
204
|
+
}
|
205
|
+
|
206
|
+
pub fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
|
207
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
208
|
+
Ok(polars::lazy::dsl::sum_exprs(exprs).into())
|
209
|
+
}
|
@@ -0,0 +1,43 @@
|
|
1
|
+
use polars::lazy::dsl;
|
2
|
+
|
3
|
+
use crate::RbExpr;
|
4
|
+
|
5
|
+
#[magnus::wrap(class = "Polars::RbWhen")]
|
6
|
+
#[derive(Clone)]
|
7
|
+
pub struct RbWhen {
|
8
|
+
pub inner: dsl::When,
|
9
|
+
}
|
10
|
+
|
11
|
+
impl From<dsl::When> for RbWhen {
|
12
|
+
fn from(inner: dsl::When) -> Self {
|
13
|
+
RbWhen { inner }
|
14
|
+
}
|
15
|
+
}
|
16
|
+
|
17
|
+
#[magnus::wrap(class = "Polars::RbWhenThen")]
|
18
|
+
#[derive(Clone)]
|
19
|
+
pub struct RbWhenThen {
|
20
|
+
pub inner: dsl::WhenThen,
|
21
|
+
}
|
22
|
+
|
23
|
+
impl From<dsl::WhenThen> for RbWhenThen {
|
24
|
+
fn from(inner: dsl::WhenThen) -> Self {
|
25
|
+
RbWhenThen { inner }
|
26
|
+
}
|
27
|
+
}
|
28
|
+
|
29
|
+
impl RbWhen {
|
30
|
+
pub fn then(&self, expr: &RbExpr) -> RbWhenThen {
|
31
|
+
self.inner.clone().then(expr.inner.clone()).into()
|
32
|
+
}
|
33
|
+
}
|
34
|
+
|
35
|
+
impl RbWhenThen {
|
36
|
+
pub fn overwise(&self, expr: &RbExpr) -> RbExpr {
|
37
|
+
self.inner.clone().otherwise(expr.inner.clone()).into()
|
38
|
+
}
|
39
|
+
}
|
40
|
+
|
41
|
+
pub fn when(predicate: &RbExpr) -> RbWhen {
|
42
|
+
dsl::when(predicate.inner.clone()).into()
|
43
|
+
}
|
@@ -1,37 +1,15 @@
|
|
1
1
|
use magnus::{IntoValue, RArray, RHash, Value};
|
2
2
|
use polars::io::RowCount;
|
3
|
-
use polars::lazy::frame::
|
3
|
+
use polars::lazy::frame::LazyFrame;
|
4
4
|
use polars::prelude::*;
|
5
5
|
use std::cell::RefCell;
|
6
6
|
use std::io::{BufWriter, Read};
|
7
|
+
use std::path::PathBuf;
|
7
8
|
|
8
9
|
use crate::conversion::*;
|
10
|
+
use crate::expr::rb_exprs_to_exprs;
|
9
11
|
use crate::file::get_file_like;
|
10
|
-
use crate::
|
11
|
-
use crate::{RbDataFrame, RbExpr, RbPolarsErr, RbResult, RbValueError};
|
12
|
-
|
13
|
-
#[magnus::wrap(class = "Polars::RbLazyGroupBy")]
|
14
|
-
pub struct RbLazyGroupBy {
|
15
|
-
lgb: RefCell<Option<LazyGroupBy>>,
|
16
|
-
}
|
17
|
-
|
18
|
-
impl RbLazyGroupBy {
|
19
|
-
pub fn agg(&self, aggs: RArray) -> RbResult<RbLazyFrame> {
|
20
|
-
let lgb = self.lgb.borrow_mut().take().unwrap();
|
21
|
-
let aggs = rb_exprs_to_exprs(aggs)?;
|
22
|
-
Ok(lgb.agg(aggs).into())
|
23
|
-
}
|
24
|
-
|
25
|
-
pub fn head(&self, n: usize) -> RbLazyFrame {
|
26
|
-
let lgb = self.lgb.take().unwrap();
|
27
|
-
lgb.head(Some(n)).into()
|
28
|
-
}
|
29
|
-
|
30
|
-
pub fn tail(&self, n: usize) -> RbLazyFrame {
|
31
|
-
let lgb = self.lgb.take().unwrap();
|
32
|
-
lgb.tail(Some(n)).into()
|
33
|
-
}
|
34
|
-
}
|
12
|
+
use crate::{RbDataFrame, RbExpr, RbLazyGroupBy, RbPolarsErr, RbResult, RbValueError};
|
35
13
|
|
36
14
|
#[magnus::wrap(class = "Polars::RbLazyFrame")]
|
37
15
|
#[derive(Clone)]
|
@@ -118,7 +96,7 @@ impl RbLazyFrame {
|
|
118
96
|
let skip_rows_after_header: usize = arguments[15].try_convert()?;
|
119
97
|
let encoding: Wrap<CsvEncoding> = arguments[16].try_convert()?;
|
120
98
|
let row_count: Option<(String, IdxSize)> = arguments[17].try_convert()?;
|
121
|
-
let
|
99
|
+
let try_parse_dates: bool = arguments[18].try_convert()?;
|
122
100
|
let eol_char: String = arguments[19].try_convert()?;
|
123
101
|
// end arguments
|
124
102
|
|
@@ -131,10 +109,10 @@ impl RbLazyFrame {
|
|
131
109
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
132
110
|
|
133
111
|
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
134
|
-
|
112
|
+
overwrite_dtype
|
135
113
|
.into_iter()
|
136
|
-
.map(|(name, dtype)| Field::new(&name, dtype.0))
|
137
|
-
|
114
|
+
.map(|(name, dtype)| Field::new(&name, dtype.0))
|
115
|
+
.collect::<Schema>()
|
138
116
|
});
|
139
117
|
let r = LazyCsvReader::new(path)
|
140
118
|
.with_infer_schema_length(infer_schema_length)
|
@@ -153,7 +131,7 @@ impl RbLazyFrame {
|
|
153
131
|
.with_skip_rows_after_header(skip_rows_after_header)
|
154
132
|
.with_encoding(encoding.0)
|
155
133
|
.with_row_count(row_count)
|
156
|
-
.
|
134
|
+
.with_try_parse_dates(try_parse_dates)
|
157
135
|
.with_null_values(null_values);
|
158
136
|
|
159
137
|
if let Some(_lambda) = with_schema_modify {
|
@@ -163,6 +141,7 @@ impl RbLazyFrame {
|
|
163
141
|
Ok(r.finish().map_err(RbPolarsErr::from)?.into())
|
164
142
|
}
|
165
143
|
|
144
|
+
#[allow(clippy::too_many_arguments)]
|
166
145
|
pub fn new_from_parquet(
|
167
146
|
path: String,
|
168
147
|
n_rows: Option<usize>,
|
@@ -171,6 +150,7 @@ impl RbLazyFrame {
|
|
171
150
|
rechunk: bool,
|
172
151
|
row_count: Option<(String, IdxSize)>,
|
173
152
|
low_memory: bool,
|
153
|
+
use_statistics: bool,
|
174
154
|
) -> RbResult<Self> {
|
175
155
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
176
156
|
let args = ScanArgsParquet {
|
@@ -182,6 +162,7 @@ impl RbLazyFrame {
|
|
182
162
|
low_memory,
|
183
163
|
// TODO support cloud options
|
184
164
|
cloud_options: None,
|
165
|
+
use_statistics,
|
185
166
|
};
|
186
167
|
let lf = LazyFrame::scan_parquet(path, args).map_err(RbPolarsErr::from)?;
|
187
168
|
Ok(lf.into())
|
@@ -284,6 +265,32 @@ impl RbLazyFrame {
|
|
284
265
|
Ok(df.into())
|
285
266
|
}
|
286
267
|
|
268
|
+
#[allow(clippy::too_many_arguments)]
|
269
|
+
pub fn sink_parquet(
|
270
|
+
&self,
|
271
|
+
path: PathBuf,
|
272
|
+
compression: String,
|
273
|
+
compression_level: Option<i32>,
|
274
|
+
statistics: bool,
|
275
|
+
row_group_size: Option<usize>,
|
276
|
+
data_pagesize_limit: Option<usize>,
|
277
|
+
maintain_order: bool,
|
278
|
+
) -> RbResult<()> {
|
279
|
+
let compression = parse_parquet_compression(&compression, compression_level)?;
|
280
|
+
|
281
|
+
let options = ParquetWriteOptions {
|
282
|
+
compression,
|
283
|
+
statistics,
|
284
|
+
row_group_size,
|
285
|
+
data_pagesize_limit,
|
286
|
+
maintain_order,
|
287
|
+
};
|
288
|
+
|
289
|
+
let ldf = self.ldf.clone();
|
290
|
+
ldf.sink_parquet(path, options).map_err(RbPolarsErr::from)?;
|
291
|
+
Ok(())
|
292
|
+
}
|
293
|
+
|
287
294
|
pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
|
288
295
|
let ldf = self.ldf.clone();
|
289
296
|
let df = ldf.fetch(n_rows).map_err(RbPolarsErr::from)?;
|
@@ -316,7 +323,7 @@ impl RbLazyFrame {
|
|
316
323
|
|
317
324
|
pub fn groupby_rolling(
|
318
325
|
&self,
|
319
|
-
index_column:
|
326
|
+
index_column: &RbExpr,
|
320
327
|
period: String,
|
321
328
|
offset: String,
|
322
329
|
closed: Wrap<ClosedWindow>,
|
@@ -326,9 +333,10 @@ impl RbLazyFrame {
|
|
326
333
|
let ldf = self.ldf.clone();
|
327
334
|
let by = rb_exprs_to_exprs(by)?;
|
328
335
|
let lazy_gb = ldf.groupby_rolling(
|
336
|
+
index_column.inner.clone(),
|
329
337
|
by,
|
330
338
|
RollingGroupOptions {
|
331
|
-
index_column,
|
339
|
+
index_column: "".into(),
|
332
340
|
period: Duration::parse(&period),
|
333
341
|
offset: Duration::parse(&offset),
|
334
342
|
closed_window,
|
@@ -343,7 +351,7 @@ impl RbLazyFrame {
|
|
343
351
|
#[allow(clippy::too_many_arguments)]
|
344
352
|
pub fn groupby_dynamic(
|
345
353
|
&self,
|
346
|
-
index_column:
|
354
|
+
index_column: &RbExpr,
|
347
355
|
every: String,
|
348
356
|
period: String,
|
349
357
|
offset: String,
|
@@ -357,9 +365,9 @@ impl RbLazyFrame {
|
|
357
365
|
let by = rb_exprs_to_exprs(by)?;
|
358
366
|
let ldf = self.ldf.clone();
|
359
367
|
let lazy_gb = ldf.groupby_dynamic(
|
368
|
+
index_column.inner.clone(),
|
360
369
|
by,
|
361
370
|
DynamicGroupOptions {
|
362
|
-
index_column,
|
363
371
|
every: Duration::parse(&every),
|
364
372
|
period: Duration::parse(&period),
|
365
373
|
offset: Duration::parse(&offset),
|
@@ -367,6 +375,7 @@ impl RbLazyFrame {
|
|
367
375
|
include_boundaries,
|
368
376
|
closed_window,
|
369
377
|
start_by: start_by.0,
|
378
|
+
..Default::default()
|
370
379
|
},
|
371
380
|
);
|
372
381
|
|
@@ -415,10 +424,10 @@ impl RbLazyFrame {
|
|
415
424
|
.force_parallel(force_parallel)
|
416
425
|
.how(JoinType::AsOf(AsOfOptions {
|
417
426
|
strategy: strategy.0,
|
418
|
-
left_by,
|
419
|
-
right_by,
|
427
|
+
left_by: left_by.map(strings_to_smartstrings),
|
428
|
+
right_by: right_by.map(strings_to_smartstrings),
|
420
429
|
tolerance: tolerance.map(|t| t.0.into_static().unwrap()),
|
421
|
-
tolerance_str,
|
430
|
+
tolerance_str: tolerance_str.map(|s| s.into()),
|
422
431
|
}))
|
423
432
|
.suffix(suffix)
|
424
433
|
.finish()
|
@@ -570,12 +579,14 @@ impl RbLazyFrame {
|
|
570
579
|
value_vars: Vec<String>,
|
571
580
|
value_name: Option<String>,
|
572
581
|
variable_name: Option<String>,
|
582
|
+
streamable: bool,
|
573
583
|
) -> Self {
|
574
584
|
let args = MeltArgs {
|
575
|
-
id_vars,
|
576
|
-
value_vars,
|
577
|
-
value_name,
|
578
|
-
variable_name,
|
585
|
+
id_vars: strings_to_smartstrings(id_vars),
|
586
|
+
value_vars: strings_to_smartstrings(value_vars),
|
587
|
+
value_name: value_name.map(|s| s.into()),
|
588
|
+
variable_name: variable_name.map(|s| s.into()),
|
589
|
+
streamable,
|
579
590
|
};
|
580
591
|
|
581
592
|
let ldf = self.ldf.clone();
|
@@ -596,8 +607,10 @@ impl RbLazyFrame {
|
|
596
607
|
self.ldf.clone().into()
|
597
608
|
}
|
598
609
|
|
599
|
-
pub fn columns(&self) -> RbResult<
|
600
|
-
|
610
|
+
pub fn columns(&self) -> RbResult<RArray> {
|
611
|
+
let schema = self.get_schema()?;
|
612
|
+
let iter = schema.iter_names().map(|s| s.as_str());
|
613
|
+
Ok(RArray::from_iter(iter))
|
601
614
|
}
|
602
615
|
|
603
616
|
pub fn dtypes(&self) -> RbResult<RArray> {
|
@@ -614,7 +627,7 @@ impl RbLazyFrame {
|
|
614
627
|
// TODO remove unwrap
|
615
628
|
schema_dict
|
616
629
|
.aset::<String, Value>(
|
617
|
-
fld.name().
|
630
|
+
fld.name().to_string(),
|
618
631
|
Wrap(fld.data_type().clone()).into_value(),
|
619
632
|
)
|
620
633
|
.unwrap();
|
@@ -0,0 +1,29 @@
|
|
1
|
+
use magnus::RArray;
|
2
|
+
use polars::lazy::frame::LazyGroupBy;
|
3
|
+
use std::cell::RefCell;
|
4
|
+
|
5
|
+
use crate::expr::rb_exprs_to_exprs;
|
6
|
+
use crate::{RbLazyFrame, RbResult};
|
7
|
+
|
8
|
+
#[magnus::wrap(class = "Polars::RbLazyGroupBy")]
|
9
|
+
pub struct RbLazyGroupBy {
|
10
|
+
pub lgb: RefCell<Option<LazyGroupBy>>,
|
11
|
+
}
|
12
|
+
|
13
|
+
impl RbLazyGroupBy {
|
14
|
+
pub fn agg(&self, aggs: RArray) -> RbResult<RbLazyFrame> {
|
15
|
+
let lgb = self.lgb.borrow_mut().take().unwrap();
|
16
|
+
let aggs = rb_exprs_to_exprs(aggs)?;
|
17
|
+
Ok(lgb.agg(aggs).into())
|
18
|
+
}
|
19
|
+
|
20
|
+
pub fn head(&self, n: usize) -> RbLazyFrame {
|
21
|
+
let lgb = self.lgb.take().unwrap();
|
22
|
+
lgb.head(Some(n)).into()
|
23
|
+
}
|
24
|
+
|
25
|
+
pub fn tail(&self, n: usize) -> RbLazyFrame {
|
26
|
+
let lgb = self.lgb.take().unwrap();
|
27
|
+
lgb.tail(Some(n)).into()
|
28
|
+
}
|
29
|
+
}
|