polars-df 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +272 -191
- data/Cargo.toml +0 -1
- data/README.md +2 -2
- data/ext/polars/Cargo.toml +8 -4
- data/ext/polars/src/apply/dataframe.rs +2 -2
- data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
- data/ext/polars/src/apply/mod.rs +1 -0
- data/ext/polars/src/batched_csv.rs +7 -5
- data/ext/polars/src/conversion.rs +106 -4
- data/ext/polars/src/dataframe.rs +19 -17
- data/ext/polars/src/error.rs +0 -4
- data/ext/polars/src/expr/binary.rs +69 -0
- data/ext/polars/src/expr/categorical.rs +10 -0
- data/ext/polars/src/expr/datetime.rs +223 -0
- data/ext/polars/src/expr/general.rs +933 -0
- data/ext/polars/src/expr/list.rs +146 -0
- data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
- data/ext/polars/src/expr/string.rs +313 -0
- data/ext/polars/src/expr/struct.rs +15 -0
- data/ext/polars/src/expr.rs +33 -0
- data/ext/polars/src/functions/eager.rs +93 -0
- data/ext/polars/src/functions/io.rs +34 -0
- data/ext/polars/src/functions/lazy.rs +209 -0
- data/ext/polars/src/functions/meta.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/whenthen.rs +43 -0
- data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +12 -33
- data/ext/polars/src/lazygroupby.rs +29 -0
- data/ext/polars/src/lib.rs +205 -303
- data/ext/polars/src/rb_modules.rs +8 -0
- data/ext/polars/src/series/aggregation.rs +83 -0
- data/ext/polars/src/series/arithmetic.rs +88 -0
- data/ext/polars/src/series/comparison.rs +251 -0
- data/ext/polars/src/series/construction.rs +164 -0
- data/ext/polars/src/series.rs +99 -539
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +201 -50
- data/lib/polars/data_types.rb +6 -4
- data/lib/polars/date_time_expr.rb +142 -2
- data/lib/polars/expr.rb +70 -10
- data/lib/polars/lazy_frame.rb +4 -3
- data/lib/polars/lazy_functions.rb +4 -1
- data/lib/polars/list_expr.rb +68 -19
- data/lib/polars/series.rb +181 -73
- data/lib/polars/string_expr.rb +149 -43
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +41 -7
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -2
- metadata +26 -11
- data/ext/polars/src/lazy/dsl.rs +0 -1775
- data/ext/polars/src/lazy/mod.rs +0 -5
- data/ext/polars/src/lazy/utils.rs +0 -13
- data/ext/polars/src/list_construction.rs +0 -100
- /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
- /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
@@ -0,0 +1,209 @@
|
|
1
|
+
use magnus::{class, RArray, RString, Value};
|
2
|
+
use polars::lazy::dsl;
|
3
|
+
use polars::prelude::*;
|
4
|
+
|
5
|
+
use crate::apply::lazy::binary_lambda;
|
6
|
+
use crate::conversion::{get_lf, get_rbseq, Wrap};
|
7
|
+
use crate::prelude::vec_extract_wrapped;
|
8
|
+
use crate::rb_exprs_to_exprs;
|
9
|
+
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
10
|
+
|
11
|
+
macro_rules! set_unwrapped_or_0 {
|
12
|
+
($($var:ident),+ $(,)?) => {
|
13
|
+
$(let $var = $var.map(|e| e.inner.clone()).unwrap_or(polars::lazy::dsl::lit(0));)+
|
14
|
+
};
|
15
|
+
}
|
16
|
+
|
17
|
+
pub fn arange(low: &RbExpr, high: &RbExpr, step: i64) -> RbExpr {
|
18
|
+
dsl::arange(low.inner.clone(), high.inner.clone(), step).into()
|
19
|
+
}
|
20
|
+
|
21
|
+
pub fn arg_sort_by(by: RArray, descending: Vec<bool>) -> RbResult<RbExpr> {
|
22
|
+
let by = rb_exprs_to_exprs(by)?;
|
23
|
+
Ok(dsl::arg_sort_by(by, &descending).into())
|
24
|
+
}
|
25
|
+
|
26
|
+
pub fn arg_where(condition: &RbExpr) -> RbExpr {
|
27
|
+
dsl::arg_where(condition.inner.clone()).into()
|
28
|
+
}
|
29
|
+
|
30
|
+
pub fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
|
31
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
32
|
+
Ok(dsl::as_struct(&exprs).into())
|
33
|
+
}
|
34
|
+
|
35
|
+
pub fn coalesce(exprs: RArray) -> RbResult<RbExpr> {
|
36
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
37
|
+
Ok(dsl::coalesce(&exprs).into())
|
38
|
+
}
|
39
|
+
|
40
|
+
pub fn col(name: String) -> RbExpr {
|
41
|
+
dsl::col(&name).into()
|
42
|
+
}
|
43
|
+
|
44
|
+
pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
|
45
|
+
let lfs = lfs
|
46
|
+
.each()
|
47
|
+
.map(|v| v?.try_convert::<&RbLazyFrame>())
|
48
|
+
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
49
|
+
|
50
|
+
Ok(RArray::from_iter(lfs.iter().map(|lf| {
|
51
|
+
let df = lf.ldf.clone().collect().unwrap();
|
52
|
+
RbDataFrame::new(df)
|
53
|
+
})))
|
54
|
+
}
|
55
|
+
|
56
|
+
pub fn cols(names: Vec<String>) -> RbExpr {
|
57
|
+
dsl::cols(names).into()
|
58
|
+
}
|
59
|
+
|
60
|
+
pub fn concat_lf(lfs: Value, rechunk: bool, parallel: bool) -> RbResult<RbLazyFrame> {
|
61
|
+
let (seq, len) = get_rbseq(lfs)?;
|
62
|
+
let mut lfs = Vec::with_capacity(len);
|
63
|
+
|
64
|
+
for res in seq.each() {
|
65
|
+
let item = res?;
|
66
|
+
let lf = get_lf(item)?;
|
67
|
+
lfs.push(lf);
|
68
|
+
}
|
69
|
+
|
70
|
+
let lf = polars::lazy::dsl::concat(lfs, rechunk, parallel).map_err(RbPolarsErr::from)?;
|
71
|
+
Ok(lf.into())
|
72
|
+
}
|
73
|
+
|
74
|
+
#[allow(clippy::too_many_arguments)]
|
75
|
+
pub fn duration(
|
76
|
+
days: Option<&RbExpr>,
|
77
|
+
seconds: Option<&RbExpr>,
|
78
|
+
nanoseconds: Option<&RbExpr>,
|
79
|
+
microseconds: Option<&RbExpr>,
|
80
|
+
milliseconds: Option<&RbExpr>,
|
81
|
+
minutes: Option<&RbExpr>,
|
82
|
+
hours: Option<&RbExpr>,
|
83
|
+
weeks: Option<&RbExpr>,
|
84
|
+
) -> RbExpr {
|
85
|
+
set_unwrapped_or_0!(
|
86
|
+
days,
|
87
|
+
seconds,
|
88
|
+
nanoseconds,
|
89
|
+
microseconds,
|
90
|
+
milliseconds,
|
91
|
+
minutes,
|
92
|
+
hours,
|
93
|
+
weeks,
|
94
|
+
);
|
95
|
+
let args = DurationArgs {
|
96
|
+
days,
|
97
|
+
seconds,
|
98
|
+
nanoseconds,
|
99
|
+
microseconds,
|
100
|
+
milliseconds,
|
101
|
+
minutes,
|
102
|
+
hours,
|
103
|
+
weeks,
|
104
|
+
};
|
105
|
+
dsl::duration(args).into()
|
106
|
+
}
|
107
|
+
|
108
|
+
pub fn count() -> RbExpr {
|
109
|
+
dsl::count().into()
|
110
|
+
}
|
111
|
+
|
112
|
+
pub fn first() -> RbExpr {
|
113
|
+
dsl::first().into()
|
114
|
+
}
|
115
|
+
|
116
|
+
pub fn last() -> RbExpr {
|
117
|
+
dsl::last().into()
|
118
|
+
}
|
119
|
+
|
120
|
+
pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
|
121
|
+
dsl::dtype_cols(dtypes).into()
|
122
|
+
}
|
123
|
+
|
124
|
+
pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
125
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
126
|
+
|
127
|
+
let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
|
128
|
+
Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
|
129
|
+
}
|
130
|
+
|
131
|
+
pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
|
132
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
133
|
+
|
134
|
+
let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
|
135
|
+
Ok(polars::lazy::dsl::cumfold_exprs(acc.inner.clone(), func, exprs, include_init).into())
|
136
|
+
}
|
137
|
+
|
138
|
+
// TODO improve
|
139
|
+
pub fn lit(value: Value) -> RbResult<RbExpr> {
|
140
|
+
if value.is_nil() {
|
141
|
+
Ok(dsl::lit(Null {}).into())
|
142
|
+
} else if let Ok(series) = value.try_convert::<&RbSeries>() {
|
143
|
+
Ok(dsl::lit(series.series.borrow().clone()).into())
|
144
|
+
} else if let Some(v) = RString::from_value(value) {
|
145
|
+
Ok(dsl::lit(v.try_convert::<String>()?).into())
|
146
|
+
} else if value.is_kind_of(class::integer()) {
|
147
|
+
match value.try_convert::<i64>() {
|
148
|
+
Ok(val) => {
|
149
|
+
if val > 0 && val < i32::MAX as i64 || val < 0 && val > i32::MIN as i64 {
|
150
|
+
Ok(dsl::lit(val as i32).into())
|
151
|
+
} else {
|
152
|
+
Ok(dsl::lit(val).into())
|
153
|
+
}
|
154
|
+
}
|
155
|
+
_ => {
|
156
|
+
let val = value.try_convert::<u64>()?;
|
157
|
+
Ok(dsl::lit(val).into())
|
158
|
+
}
|
159
|
+
}
|
160
|
+
} else {
|
161
|
+
Ok(dsl::lit(value.try_convert::<f64>()?).into())
|
162
|
+
}
|
163
|
+
}
|
164
|
+
|
165
|
+
pub fn repeat(value: Value, n_times: &RbExpr) -> RbResult<RbExpr> {
|
166
|
+
if value.is_nil() {
|
167
|
+
Ok(polars::lazy::dsl::repeat(Null {}, n_times.inner.clone()).into())
|
168
|
+
} else {
|
169
|
+
todo!();
|
170
|
+
}
|
171
|
+
}
|
172
|
+
|
173
|
+
pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
|
174
|
+
polars::lazy::dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
|
175
|
+
}
|
176
|
+
|
177
|
+
pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool) -> RbExpr {
|
178
|
+
polars::lazy::dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans)
|
179
|
+
.into()
|
180
|
+
}
|
181
|
+
|
182
|
+
pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
|
183
|
+
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
|
184
|
+
}
|
185
|
+
|
186
|
+
pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
|
187
|
+
let s = rb_exprs_to_exprs(s)?;
|
188
|
+
Ok(dsl::concat_str(s, &sep).into())
|
189
|
+
}
|
190
|
+
|
191
|
+
pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
|
192
|
+
let s = rb_exprs_to_exprs(s)?;
|
193
|
+
let expr = dsl::concat_list(s).map_err(RbPolarsErr::from)?;
|
194
|
+
Ok(expr.into())
|
195
|
+
}
|
196
|
+
|
197
|
+
pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
|
198
|
+
let dtypes = dtypes
|
199
|
+
.each()
|
200
|
+
.map(|v| v?.try_convert::<Wrap<DataType>>())
|
201
|
+
.collect::<RbResult<Vec<Wrap<DataType>>>>()?;
|
202
|
+
let dtypes = vec_extract_wrapped(dtypes);
|
203
|
+
Ok(crate::functions::lazy::dtype_cols(dtypes))
|
204
|
+
}
|
205
|
+
|
206
|
+
pub fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
|
207
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
208
|
+
Ok(polars::lazy::dsl::sum_exprs(exprs).into())
|
209
|
+
}
|
@@ -0,0 +1,43 @@
|
|
1
|
+
use polars::lazy::dsl;
|
2
|
+
|
3
|
+
use crate::RbExpr;
|
4
|
+
|
5
|
+
#[magnus::wrap(class = "Polars::RbWhen")]
|
6
|
+
#[derive(Clone)]
|
7
|
+
pub struct RbWhen {
|
8
|
+
pub inner: dsl::When,
|
9
|
+
}
|
10
|
+
|
11
|
+
impl From<dsl::When> for RbWhen {
|
12
|
+
fn from(inner: dsl::When) -> Self {
|
13
|
+
RbWhen { inner }
|
14
|
+
}
|
15
|
+
}
|
16
|
+
|
17
|
+
#[magnus::wrap(class = "Polars::RbWhenThen")]
|
18
|
+
#[derive(Clone)]
|
19
|
+
pub struct RbWhenThen {
|
20
|
+
pub inner: dsl::WhenThen,
|
21
|
+
}
|
22
|
+
|
23
|
+
impl From<dsl::WhenThen> for RbWhenThen {
|
24
|
+
fn from(inner: dsl::WhenThen) -> Self {
|
25
|
+
RbWhenThen { inner }
|
26
|
+
}
|
27
|
+
}
|
28
|
+
|
29
|
+
impl RbWhen {
|
30
|
+
pub fn then(&self, expr: &RbExpr) -> RbWhenThen {
|
31
|
+
self.inner.clone().then(expr.inner.clone()).into()
|
32
|
+
}
|
33
|
+
}
|
34
|
+
|
35
|
+
impl RbWhenThen {
|
36
|
+
pub fn overwise(&self, expr: &RbExpr) -> RbExpr {
|
37
|
+
self.inner.clone().otherwise(expr.inner.clone()).into()
|
38
|
+
}
|
39
|
+
}
|
40
|
+
|
41
|
+
pub fn when(predicate: &RbExpr) -> RbWhen {
|
42
|
+
dsl::when(predicate.inner.clone()).into()
|
43
|
+
}
|
@@ -1,38 +1,15 @@
|
|
1
1
|
use magnus::{IntoValue, RArray, RHash, Value};
|
2
2
|
use polars::io::RowCount;
|
3
|
-
use polars::lazy::frame::
|
3
|
+
use polars::lazy::frame::LazyFrame;
|
4
4
|
use polars::prelude::*;
|
5
5
|
use std::cell::RefCell;
|
6
6
|
use std::io::{BufWriter, Read};
|
7
7
|
use std::path::PathBuf;
|
8
8
|
|
9
9
|
use crate::conversion::*;
|
10
|
+
use crate::expr::rb_exprs_to_exprs;
|
10
11
|
use crate::file::get_file_like;
|
11
|
-
use crate::
|
12
|
-
use crate::{RbDataFrame, RbExpr, RbPolarsErr, RbResult, RbValueError};
|
13
|
-
|
14
|
-
#[magnus::wrap(class = "Polars::RbLazyGroupBy")]
|
15
|
-
pub struct RbLazyGroupBy {
|
16
|
-
lgb: RefCell<Option<LazyGroupBy>>,
|
17
|
-
}
|
18
|
-
|
19
|
-
impl RbLazyGroupBy {
|
20
|
-
pub fn agg(&self, aggs: RArray) -> RbResult<RbLazyFrame> {
|
21
|
-
let lgb = self.lgb.borrow_mut().take().unwrap();
|
22
|
-
let aggs = rb_exprs_to_exprs(aggs)?;
|
23
|
-
Ok(lgb.agg(aggs).into())
|
24
|
-
}
|
25
|
-
|
26
|
-
pub fn head(&self, n: usize) -> RbLazyFrame {
|
27
|
-
let lgb = self.lgb.take().unwrap();
|
28
|
-
lgb.head(Some(n)).into()
|
29
|
-
}
|
30
|
-
|
31
|
-
pub fn tail(&self, n: usize) -> RbLazyFrame {
|
32
|
-
let lgb = self.lgb.take().unwrap();
|
33
|
-
lgb.tail(Some(n)).into()
|
34
|
-
}
|
35
|
-
}
|
12
|
+
use crate::{RbDataFrame, RbExpr, RbLazyGroupBy, RbPolarsErr, RbResult, RbValueError};
|
36
13
|
|
37
14
|
#[magnus::wrap(class = "Polars::RbLazyFrame")]
|
38
15
|
#[derive(Clone)]
|
@@ -132,10 +109,10 @@ impl RbLazyFrame {
|
|
132
109
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
133
110
|
|
134
111
|
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
135
|
-
|
112
|
+
overwrite_dtype
|
136
113
|
.into_iter()
|
137
|
-
.map(|(name, dtype)| Field::new(&name, dtype.0))
|
138
|
-
|
114
|
+
.map(|(name, dtype)| Field::new(&name, dtype.0))
|
115
|
+
.collect::<Schema>()
|
139
116
|
});
|
140
117
|
let r = LazyCsvReader::new(path)
|
141
118
|
.with_infer_schema_length(infer_schema_length)
|
@@ -346,7 +323,7 @@ impl RbLazyFrame {
|
|
346
323
|
|
347
324
|
pub fn groupby_rolling(
|
348
325
|
&self,
|
349
|
-
index_column:
|
326
|
+
index_column: &RbExpr,
|
350
327
|
period: String,
|
351
328
|
offset: String,
|
352
329
|
closed: Wrap<ClosedWindow>,
|
@@ -356,9 +333,10 @@ impl RbLazyFrame {
|
|
356
333
|
let ldf = self.ldf.clone();
|
357
334
|
let by = rb_exprs_to_exprs(by)?;
|
358
335
|
let lazy_gb = ldf.groupby_rolling(
|
336
|
+
index_column.inner.clone(),
|
359
337
|
by,
|
360
338
|
RollingGroupOptions {
|
361
|
-
index_column:
|
339
|
+
index_column: "".into(),
|
362
340
|
period: Duration::parse(&period),
|
363
341
|
offset: Duration::parse(&offset),
|
364
342
|
closed_window,
|
@@ -373,7 +351,7 @@ impl RbLazyFrame {
|
|
373
351
|
#[allow(clippy::too_many_arguments)]
|
374
352
|
pub fn groupby_dynamic(
|
375
353
|
&self,
|
376
|
-
index_column:
|
354
|
+
index_column: &RbExpr,
|
377
355
|
every: String,
|
378
356
|
period: String,
|
379
357
|
offset: String,
|
@@ -387,9 +365,9 @@ impl RbLazyFrame {
|
|
387
365
|
let by = rb_exprs_to_exprs(by)?;
|
388
366
|
let ldf = self.ldf.clone();
|
389
367
|
let lazy_gb = ldf.groupby_dynamic(
|
368
|
+
index_column.inner.clone(),
|
390
369
|
by,
|
391
370
|
DynamicGroupOptions {
|
392
|
-
index_column: index_column.into(),
|
393
371
|
every: Duration::parse(&every),
|
394
372
|
period: Duration::parse(&period),
|
395
373
|
offset: Duration::parse(&offset),
|
@@ -397,6 +375,7 @@ impl RbLazyFrame {
|
|
397
375
|
include_boundaries,
|
398
376
|
closed_window,
|
399
377
|
start_by: start_by.0,
|
378
|
+
..Default::default()
|
400
379
|
},
|
401
380
|
);
|
402
381
|
|
@@ -0,0 +1,29 @@
|
|
1
|
+
use magnus::RArray;
|
2
|
+
use polars::lazy::frame::LazyGroupBy;
|
3
|
+
use std::cell::RefCell;
|
4
|
+
|
5
|
+
use crate::expr::rb_exprs_to_exprs;
|
6
|
+
use crate::{RbLazyFrame, RbResult};
|
7
|
+
|
8
|
+
#[magnus::wrap(class = "Polars::RbLazyGroupBy")]
|
9
|
+
pub struct RbLazyGroupBy {
|
10
|
+
pub lgb: RefCell<Option<LazyGroupBy>>,
|
11
|
+
}
|
12
|
+
|
13
|
+
impl RbLazyGroupBy {
|
14
|
+
pub fn agg(&self, aggs: RArray) -> RbResult<RbLazyFrame> {
|
15
|
+
let lgb = self.lgb.borrow_mut().take().unwrap();
|
16
|
+
let aggs = rb_exprs_to_exprs(aggs)?;
|
17
|
+
Ok(lgb.agg(aggs).into())
|
18
|
+
}
|
19
|
+
|
20
|
+
pub fn head(&self, n: usize) -> RbLazyFrame {
|
21
|
+
let lgb = self.lgb.take().unwrap();
|
22
|
+
lgb.head(Some(n)).into()
|
23
|
+
}
|
24
|
+
|
25
|
+
pub fn tail(&self, n: usize) -> RbLazyFrame {
|
26
|
+
let lgb = self.lgb.take().unwrap();
|
27
|
+
lgb.tail(Some(n)).into()
|
28
|
+
}
|
29
|
+
}
|