polars-df 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +272 -191
- data/Cargo.toml +0 -1
- data/README.md +2 -2
- data/ext/polars/Cargo.toml +8 -4
- data/ext/polars/src/apply/dataframe.rs +2 -2
- data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
- data/ext/polars/src/apply/mod.rs +1 -0
- data/ext/polars/src/batched_csv.rs +7 -5
- data/ext/polars/src/conversion.rs +106 -4
- data/ext/polars/src/dataframe.rs +19 -17
- data/ext/polars/src/error.rs +0 -4
- data/ext/polars/src/expr/binary.rs +69 -0
- data/ext/polars/src/expr/categorical.rs +10 -0
- data/ext/polars/src/expr/datetime.rs +223 -0
- data/ext/polars/src/expr/general.rs +933 -0
- data/ext/polars/src/expr/list.rs +146 -0
- data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
- data/ext/polars/src/expr/string.rs +313 -0
- data/ext/polars/src/expr/struct.rs +15 -0
- data/ext/polars/src/expr.rs +33 -0
- data/ext/polars/src/functions/eager.rs +93 -0
- data/ext/polars/src/functions/io.rs +34 -0
- data/ext/polars/src/functions/lazy.rs +209 -0
- data/ext/polars/src/functions/meta.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/whenthen.rs +43 -0
- data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +12 -33
- data/ext/polars/src/lazygroupby.rs +29 -0
- data/ext/polars/src/lib.rs +205 -303
- data/ext/polars/src/rb_modules.rs +8 -0
- data/ext/polars/src/series/aggregation.rs +83 -0
- data/ext/polars/src/series/arithmetic.rs +88 -0
- data/ext/polars/src/series/comparison.rs +251 -0
- data/ext/polars/src/series/construction.rs +164 -0
- data/ext/polars/src/series.rs +99 -539
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +201 -50
- data/lib/polars/data_types.rb +6 -4
- data/lib/polars/date_time_expr.rb +142 -2
- data/lib/polars/expr.rb +70 -10
- data/lib/polars/lazy_frame.rb +4 -3
- data/lib/polars/lazy_functions.rb +4 -1
- data/lib/polars/list_expr.rb +68 -19
- data/lib/polars/series.rb +181 -73
- data/lib/polars/string_expr.rb +149 -43
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +41 -7
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -2
- metadata +26 -11
- data/ext/polars/src/lazy/dsl.rs +0 -1775
- data/ext/polars/src/lazy/mod.rs +0 -5
- data/ext/polars/src/lazy/utils.rs +0 -13
- data/ext/polars/src/list_construction.rs +0 -100
- /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
- /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
@@ -0,0 +1,209 @@
|
|
1
|
+
use magnus::{class, RArray, RString, Value};
|
2
|
+
use polars::lazy::dsl;
|
3
|
+
use polars::prelude::*;
|
4
|
+
|
5
|
+
use crate::apply::lazy::binary_lambda;
|
6
|
+
use crate::conversion::{get_lf, get_rbseq, Wrap};
|
7
|
+
use crate::prelude::vec_extract_wrapped;
|
8
|
+
use crate::rb_exprs_to_exprs;
|
9
|
+
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
10
|
+
|
11
|
+
macro_rules! set_unwrapped_or_0 {
|
12
|
+
($($var:ident),+ $(,)?) => {
|
13
|
+
$(let $var = $var.map(|e| e.inner.clone()).unwrap_or(polars::lazy::dsl::lit(0));)+
|
14
|
+
};
|
15
|
+
}
|
16
|
+
|
17
|
+
pub fn arange(low: &RbExpr, high: &RbExpr, step: i64) -> RbExpr {
|
18
|
+
dsl::arange(low.inner.clone(), high.inner.clone(), step).into()
|
19
|
+
}
|
20
|
+
|
21
|
+
pub fn arg_sort_by(by: RArray, descending: Vec<bool>) -> RbResult<RbExpr> {
|
22
|
+
let by = rb_exprs_to_exprs(by)?;
|
23
|
+
Ok(dsl::arg_sort_by(by, &descending).into())
|
24
|
+
}
|
25
|
+
|
26
|
+
pub fn arg_where(condition: &RbExpr) -> RbExpr {
|
27
|
+
dsl::arg_where(condition.inner.clone()).into()
|
28
|
+
}
|
29
|
+
|
30
|
+
pub fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
|
31
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
32
|
+
Ok(dsl::as_struct(&exprs).into())
|
33
|
+
}
|
34
|
+
|
35
|
+
pub fn coalesce(exprs: RArray) -> RbResult<RbExpr> {
|
36
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
37
|
+
Ok(dsl::coalesce(&exprs).into())
|
38
|
+
}
|
39
|
+
|
40
|
+
pub fn col(name: String) -> RbExpr {
|
41
|
+
dsl::col(&name).into()
|
42
|
+
}
|
43
|
+
|
44
|
+
pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
|
45
|
+
let lfs = lfs
|
46
|
+
.each()
|
47
|
+
.map(|v| v?.try_convert::<&RbLazyFrame>())
|
48
|
+
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
49
|
+
|
50
|
+
Ok(RArray::from_iter(lfs.iter().map(|lf| {
|
51
|
+
let df = lf.ldf.clone().collect().unwrap();
|
52
|
+
RbDataFrame::new(df)
|
53
|
+
})))
|
54
|
+
}
|
55
|
+
|
56
|
+
pub fn cols(names: Vec<String>) -> RbExpr {
|
57
|
+
dsl::cols(names).into()
|
58
|
+
}
|
59
|
+
|
60
|
+
pub fn concat_lf(lfs: Value, rechunk: bool, parallel: bool) -> RbResult<RbLazyFrame> {
|
61
|
+
let (seq, len) = get_rbseq(lfs)?;
|
62
|
+
let mut lfs = Vec::with_capacity(len);
|
63
|
+
|
64
|
+
for res in seq.each() {
|
65
|
+
let item = res?;
|
66
|
+
let lf = get_lf(item)?;
|
67
|
+
lfs.push(lf);
|
68
|
+
}
|
69
|
+
|
70
|
+
let lf = polars::lazy::dsl::concat(lfs, rechunk, parallel).map_err(RbPolarsErr::from)?;
|
71
|
+
Ok(lf.into())
|
72
|
+
}
|
73
|
+
|
74
|
+
#[allow(clippy::too_many_arguments)]
|
75
|
+
pub fn duration(
|
76
|
+
days: Option<&RbExpr>,
|
77
|
+
seconds: Option<&RbExpr>,
|
78
|
+
nanoseconds: Option<&RbExpr>,
|
79
|
+
microseconds: Option<&RbExpr>,
|
80
|
+
milliseconds: Option<&RbExpr>,
|
81
|
+
minutes: Option<&RbExpr>,
|
82
|
+
hours: Option<&RbExpr>,
|
83
|
+
weeks: Option<&RbExpr>,
|
84
|
+
) -> RbExpr {
|
85
|
+
set_unwrapped_or_0!(
|
86
|
+
days,
|
87
|
+
seconds,
|
88
|
+
nanoseconds,
|
89
|
+
microseconds,
|
90
|
+
milliseconds,
|
91
|
+
minutes,
|
92
|
+
hours,
|
93
|
+
weeks,
|
94
|
+
);
|
95
|
+
let args = DurationArgs {
|
96
|
+
days,
|
97
|
+
seconds,
|
98
|
+
nanoseconds,
|
99
|
+
microseconds,
|
100
|
+
milliseconds,
|
101
|
+
minutes,
|
102
|
+
hours,
|
103
|
+
weeks,
|
104
|
+
};
|
105
|
+
dsl::duration(args).into()
|
106
|
+
}
|
107
|
+
|
108
|
+
pub fn count() -> RbExpr {
|
109
|
+
dsl::count().into()
|
110
|
+
}
|
111
|
+
|
112
|
+
pub fn first() -> RbExpr {
|
113
|
+
dsl::first().into()
|
114
|
+
}
|
115
|
+
|
116
|
+
pub fn last() -> RbExpr {
|
117
|
+
dsl::last().into()
|
118
|
+
}
|
119
|
+
|
120
|
+
pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
|
121
|
+
dsl::dtype_cols(dtypes).into()
|
122
|
+
}
|
123
|
+
|
124
|
+
pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
125
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
126
|
+
|
127
|
+
let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
|
128
|
+
Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
|
129
|
+
}
|
130
|
+
|
131
|
+
pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
|
132
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
133
|
+
|
134
|
+
let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
|
135
|
+
Ok(polars::lazy::dsl::cumfold_exprs(acc.inner.clone(), func, exprs, include_init).into())
|
136
|
+
}
|
137
|
+
|
138
|
+
// TODO improve
|
139
|
+
pub fn lit(value: Value) -> RbResult<RbExpr> {
|
140
|
+
if value.is_nil() {
|
141
|
+
Ok(dsl::lit(Null {}).into())
|
142
|
+
} else if let Ok(series) = value.try_convert::<&RbSeries>() {
|
143
|
+
Ok(dsl::lit(series.series.borrow().clone()).into())
|
144
|
+
} else if let Some(v) = RString::from_value(value) {
|
145
|
+
Ok(dsl::lit(v.try_convert::<String>()?).into())
|
146
|
+
} else if value.is_kind_of(class::integer()) {
|
147
|
+
match value.try_convert::<i64>() {
|
148
|
+
Ok(val) => {
|
149
|
+
if val > 0 && val < i32::MAX as i64 || val < 0 && val > i32::MIN as i64 {
|
150
|
+
Ok(dsl::lit(val as i32).into())
|
151
|
+
} else {
|
152
|
+
Ok(dsl::lit(val).into())
|
153
|
+
}
|
154
|
+
}
|
155
|
+
_ => {
|
156
|
+
let val = value.try_convert::<u64>()?;
|
157
|
+
Ok(dsl::lit(val).into())
|
158
|
+
}
|
159
|
+
}
|
160
|
+
} else {
|
161
|
+
Ok(dsl::lit(value.try_convert::<f64>()?).into())
|
162
|
+
}
|
163
|
+
}
|
164
|
+
|
165
|
+
pub fn repeat(value: Value, n_times: &RbExpr) -> RbResult<RbExpr> {
|
166
|
+
if value.is_nil() {
|
167
|
+
Ok(polars::lazy::dsl::repeat(Null {}, n_times.inner.clone()).into())
|
168
|
+
} else {
|
169
|
+
todo!();
|
170
|
+
}
|
171
|
+
}
|
172
|
+
|
173
|
+
pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
|
174
|
+
polars::lazy::dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
|
175
|
+
}
|
176
|
+
|
177
|
+
pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool) -> RbExpr {
|
178
|
+
polars::lazy::dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans)
|
179
|
+
.into()
|
180
|
+
}
|
181
|
+
|
182
|
+
pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
|
183
|
+
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
|
184
|
+
}
|
185
|
+
|
186
|
+
pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
|
187
|
+
let s = rb_exprs_to_exprs(s)?;
|
188
|
+
Ok(dsl::concat_str(s, &sep).into())
|
189
|
+
}
|
190
|
+
|
191
|
+
pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
|
192
|
+
let s = rb_exprs_to_exprs(s)?;
|
193
|
+
let expr = dsl::concat_list(s).map_err(RbPolarsErr::from)?;
|
194
|
+
Ok(expr.into())
|
195
|
+
}
|
196
|
+
|
197
|
+
pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
|
198
|
+
let dtypes = dtypes
|
199
|
+
.each()
|
200
|
+
.map(|v| v?.try_convert::<Wrap<DataType>>())
|
201
|
+
.collect::<RbResult<Vec<Wrap<DataType>>>>()?;
|
202
|
+
let dtypes = vec_extract_wrapped(dtypes);
|
203
|
+
Ok(crate::functions::lazy::dtype_cols(dtypes))
|
204
|
+
}
|
205
|
+
|
206
|
+
pub fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
|
207
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
208
|
+
Ok(polars::lazy::dsl::sum_exprs(exprs).into())
|
209
|
+
}
|
@@ -0,0 +1,43 @@
|
|
1
|
+
use polars::lazy::dsl;
|
2
|
+
|
3
|
+
use crate::RbExpr;
|
4
|
+
|
5
|
+
#[magnus::wrap(class = "Polars::RbWhen")]
|
6
|
+
#[derive(Clone)]
|
7
|
+
pub struct RbWhen {
|
8
|
+
pub inner: dsl::When,
|
9
|
+
}
|
10
|
+
|
11
|
+
impl From<dsl::When> for RbWhen {
|
12
|
+
fn from(inner: dsl::When) -> Self {
|
13
|
+
RbWhen { inner }
|
14
|
+
}
|
15
|
+
}
|
16
|
+
|
17
|
+
#[magnus::wrap(class = "Polars::RbWhenThen")]
|
18
|
+
#[derive(Clone)]
|
19
|
+
pub struct RbWhenThen {
|
20
|
+
pub inner: dsl::WhenThen,
|
21
|
+
}
|
22
|
+
|
23
|
+
impl From<dsl::WhenThen> for RbWhenThen {
|
24
|
+
fn from(inner: dsl::WhenThen) -> Self {
|
25
|
+
RbWhenThen { inner }
|
26
|
+
}
|
27
|
+
}
|
28
|
+
|
29
|
+
impl RbWhen {
|
30
|
+
pub fn then(&self, expr: &RbExpr) -> RbWhenThen {
|
31
|
+
self.inner.clone().then(expr.inner.clone()).into()
|
32
|
+
}
|
33
|
+
}
|
34
|
+
|
35
|
+
impl RbWhenThen {
|
36
|
+
pub fn overwise(&self, expr: &RbExpr) -> RbExpr {
|
37
|
+
self.inner.clone().otherwise(expr.inner.clone()).into()
|
38
|
+
}
|
39
|
+
}
|
40
|
+
|
41
|
+
pub fn when(predicate: &RbExpr) -> RbWhen {
|
42
|
+
dsl::when(predicate.inner.clone()).into()
|
43
|
+
}
|
@@ -1,38 +1,15 @@
|
|
1
1
|
use magnus::{IntoValue, RArray, RHash, Value};
|
2
2
|
use polars::io::RowCount;
|
3
|
-
use polars::lazy::frame::
|
3
|
+
use polars::lazy::frame::LazyFrame;
|
4
4
|
use polars::prelude::*;
|
5
5
|
use std::cell::RefCell;
|
6
6
|
use std::io::{BufWriter, Read};
|
7
7
|
use std::path::PathBuf;
|
8
8
|
|
9
9
|
use crate::conversion::*;
|
10
|
+
use crate::expr::rb_exprs_to_exprs;
|
10
11
|
use crate::file::get_file_like;
|
11
|
-
use crate::
|
12
|
-
use crate::{RbDataFrame, RbExpr, RbPolarsErr, RbResult, RbValueError};
|
13
|
-
|
14
|
-
#[magnus::wrap(class = "Polars::RbLazyGroupBy")]
|
15
|
-
pub struct RbLazyGroupBy {
|
16
|
-
lgb: RefCell<Option<LazyGroupBy>>,
|
17
|
-
}
|
18
|
-
|
19
|
-
impl RbLazyGroupBy {
|
20
|
-
pub fn agg(&self, aggs: RArray) -> RbResult<RbLazyFrame> {
|
21
|
-
let lgb = self.lgb.borrow_mut().take().unwrap();
|
22
|
-
let aggs = rb_exprs_to_exprs(aggs)?;
|
23
|
-
Ok(lgb.agg(aggs).into())
|
24
|
-
}
|
25
|
-
|
26
|
-
pub fn head(&self, n: usize) -> RbLazyFrame {
|
27
|
-
let lgb = self.lgb.take().unwrap();
|
28
|
-
lgb.head(Some(n)).into()
|
29
|
-
}
|
30
|
-
|
31
|
-
pub fn tail(&self, n: usize) -> RbLazyFrame {
|
32
|
-
let lgb = self.lgb.take().unwrap();
|
33
|
-
lgb.tail(Some(n)).into()
|
34
|
-
}
|
35
|
-
}
|
12
|
+
use crate::{RbDataFrame, RbExpr, RbLazyGroupBy, RbPolarsErr, RbResult, RbValueError};
|
36
13
|
|
37
14
|
#[magnus::wrap(class = "Polars::RbLazyFrame")]
|
38
15
|
#[derive(Clone)]
|
@@ -132,10 +109,10 @@ impl RbLazyFrame {
|
|
132
109
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
133
110
|
|
134
111
|
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
135
|
-
|
112
|
+
overwrite_dtype
|
136
113
|
.into_iter()
|
137
|
-
.map(|(name, dtype)| Field::new(&name, dtype.0))
|
138
|
-
|
114
|
+
.map(|(name, dtype)| Field::new(&name, dtype.0))
|
115
|
+
.collect::<Schema>()
|
139
116
|
});
|
140
117
|
let r = LazyCsvReader::new(path)
|
141
118
|
.with_infer_schema_length(infer_schema_length)
|
@@ -346,7 +323,7 @@ impl RbLazyFrame {
|
|
346
323
|
|
347
324
|
pub fn groupby_rolling(
|
348
325
|
&self,
|
349
|
-
index_column:
|
326
|
+
index_column: &RbExpr,
|
350
327
|
period: String,
|
351
328
|
offset: String,
|
352
329
|
closed: Wrap<ClosedWindow>,
|
@@ -356,9 +333,10 @@ impl RbLazyFrame {
|
|
356
333
|
let ldf = self.ldf.clone();
|
357
334
|
let by = rb_exprs_to_exprs(by)?;
|
358
335
|
let lazy_gb = ldf.groupby_rolling(
|
336
|
+
index_column.inner.clone(),
|
359
337
|
by,
|
360
338
|
RollingGroupOptions {
|
361
|
-
index_column:
|
339
|
+
index_column: "".into(),
|
362
340
|
period: Duration::parse(&period),
|
363
341
|
offset: Duration::parse(&offset),
|
364
342
|
closed_window,
|
@@ -373,7 +351,7 @@ impl RbLazyFrame {
|
|
373
351
|
#[allow(clippy::too_many_arguments)]
|
374
352
|
pub fn groupby_dynamic(
|
375
353
|
&self,
|
376
|
-
index_column:
|
354
|
+
index_column: &RbExpr,
|
377
355
|
every: String,
|
378
356
|
period: String,
|
379
357
|
offset: String,
|
@@ -387,9 +365,9 @@ impl RbLazyFrame {
|
|
387
365
|
let by = rb_exprs_to_exprs(by)?;
|
388
366
|
let ldf = self.ldf.clone();
|
389
367
|
let lazy_gb = ldf.groupby_dynamic(
|
368
|
+
index_column.inner.clone(),
|
390
369
|
by,
|
391
370
|
DynamicGroupOptions {
|
392
|
-
index_column: index_column.into(),
|
393
371
|
every: Duration::parse(&every),
|
394
372
|
period: Duration::parse(&period),
|
395
373
|
offset: Duration::parse(&offset),
|
@@ -397,6 +375,7 @@ impl RbLazyFrame {
|
|
397
375
|
include_boundaries,
|
398
376
|
closed_window,
|
399
377
|
start_by: start_by.0,
|
378
|
+
..Default::default()
|
400
379
|
},
|
401
380
|
);
|
402
381
|
|
@@ -0,0 +1,29 @@
|
|
1
|
+
use magnus::RArray;
|
2
|
+
use polars::lazy::frame::LazyGroupBy;
|
3
|
+
use std::cell::RefCell;
|
4
|
+
|
5
|
+
use crate::expr::rb_exprs_to_exprs;
|
6
|
+
use crate::{RbLazyFrame, RbResult};
|
7
|
+
|
8
|
+
#[magnus::wrap(class = "Polars::RbLazyGroupBy")]
|
9
|
+
pub struct RbLazyGroupBy {
|
10
|
+
pub lgb: RefCell<Option<LazyGroupBy>>,
|
11
|
+
}
|
12
|
+
|
13
|
+
impl RbLazyGroupBy {
|
14
|
+
pub fn agg(&self, aggs: RArray) -> RbResult<RbLazyFrame> {
|
15
|
+
let lgb = self.lgb.borrow_mut().take().unwrap();
|
16
|
+
let aggs = rb_exprs_to_exprs(aggs)?;
|
17
|
+
Ok(lgb.agg(aggs).into())
|
18
|
+
}
|
19
|
+
|
20
|
+
pub fn head(&self, n: usize) -> RbLazyFrame {
|
21
|
+
let lgb = self.lgb.take().unwrap();
|
22
|
+
lgb.head(Some(n)).into()
|
23
|
+
}
|
24
|
+
|
25
|
+
pub fn tail(&self, n: usize) -> RbLazyFrame {
|
26
|
+
let lgb = self.lgb.take().unwrap();
|
27
|
+
lgb.tail(Some(n)).into()
|
28
|
+
}
|
29
|
+
}
|