polars-df 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +20 -0
- data/README.md +93 -0
- data/ext/polars/Cargo.toml +35 -0
- data/ext/polars/extconf.rb +4 -0
- data/ext/polars/src/conversion.rs +115 -0
- data/ext/polars/src/dataframe.rs +304 -0
- data/ext/polars/src/error.rs +24 -0
- data/ext/polars/src/file.rs +28 -0
- data/ext/polars/src/lazy/dataframe.rs +123 -0
- data/ext/polars/src/lazy/dsl.rs +298 -0
- data/ext/polars/src/lazy/mod.rs +3 -0
- data/ext/polars/src/lazy/utils.rs +13 -0
- data/ext/polars/src/lib.rs +256 -0
- data/ext/polars/src/series.rs +475 -0
- data/lib/polars/data_frame.rb +315 -0
- data/lib/polars/expr.rb +233 -0
- data/lib/polars/functions.rb +45 -0
- data/lib/polars/io.rb +39 -0
- data/lib/polars/lazy_frame.rb +139 -0
- data/lib/polars/lazy_functions.rb +121 -0
- data/lib/polars/lazy_group_by.rb +13 -0
- data/lib/polars/series.rb +261 -0
- data/lib/polars/string_expr.rb +17 -0
- data/lib/polars/utils.rb +47 -0
- data/lib/polars/version.rb +3 -0
- data/lib/polars/when.rb +15 -0
- data/lib/polars/when_then.rb +18 -0
- data/lib/polars-df.rb +1 -0
- data/lib/polars.rb +25 -0
- metadata +87 -0
@@ -0,0 +1,123 @@
|
|
1
|
+
use magnus::RArray;
|
2
|
+
use polars::lazy::frame::{LazyFrame, LazyGroupBy};
|
3
|
+
use std::cell::RefCell;
|
4
|
+
|
5
|
+
use crate::conversion::wrap_join_type;
|
6
|
+
use crate::lazy::utils::rb_exprs_to_exprs;
|
7
|
+
use crate::{RbDataFrame, RbExpr, RbPolarsErr, RbResult};
|
8
|
+
|
9
|
+
#[magnus::wrap(class = "Polars::RbLazyGroupBy")]
|
10
|
+
pub struct RbLazyGroupBy {
|
11
|
+
lgb: RefCell<Option<LazyGroupBy>>,
|
12
|
+
}
|
13
|
+
|
14
|
+
impl RbLazyGroupBy {
|
15
|
+
pub fn agg(&self, aggs: RArray) -> RbResult<RbLazyFrame> {
|
16
|
+
let lgb = self.lgb.borrow_mut().take().unwrap();
|
17
|
+
let aggs = rb_exprs_to_exprs(aggs)?;
|
18
|
+
Ok(lgb.agg(aggs).into())
|
19
|
+
}
|
20
|
+
}
|
21
|
+
|
22
|
+
#[magnus::wrap(class = "Polars::RbLazyFrame")]
|
23
|
+
#[derive(Clone)]
|
24
|
+
pub struct RbLazyFrame {
|
25
|
+
pub ldf: LazyFrame,
|
26
|
+
}
|
27
|
+
|
28
|
+
impl From<LazyFrame> for RbLazyFrame {
|
29
|
+
fn from(ldf: LazyFrame) -> Self {
|
30
|
+
RbLazyFrame { ldf }
|
31
|
+
}
|
32
|
+
}
|
33
|
+
|
34
|
+
impl RbLazyFrame {
|
35
|
+
#[allow(clippy::too_many_arguments)]
|
36
|
+
pub fn optimization_toggle(
|
37
|
+
&self,
|
38
|
+
type_coercion: bool,
|
39
|
+
predicate_pushdown: bool,
|
40
|
+
projection_pushdown: bool,
|
41
|
+
simplify_expr: bool,
|
42
|
+
slice_pushdown: bool,
|
43
|
+
_cse: bool,
|
44
|
+
allow_streaming: bool,
|
45
|
+
) -> RbLazyFrame {
|
46
|
+
let ldf = self.ldf.clone();
|
47
|
+
let ldf = ldf
|
48
|
+
.with_type_coercion(type_coercion)
|
49
|
+
.with_predicate_pushdown(predicate_pushdown)
|
50
|
+
.with_simplify_expr(simplify_expr)
|
51
|
+
.with_slice_pushdown(slice_pushdown)
|
52
|
+
// .with_common_subplan_elimination(cse)
|
53
|
+
.with_streaming(allow_streaming)
|
54
|
+
.with_projection_pushdown(projection_pushdown);
|
55
|
+
ldf.into()
|
56
|
+
}
|
57
|
+
|
58
|
+
pub fn collect(&self) -> RbResult<RbDataFrame> {
|
59
|
+
let ldf = self.ldf.clone();
|
60
|
+
let df = ldf.collect().map_err(RbPolarsErr::from)?;
|
61
|
+
Ok(df.into())
|
62
|
+
}
|
63
|
+
|
64
|
+
pub fn filter(&self, predicate: &RbExpr) -> RbLazyFrame {
|
65
|
+
let ldf = self.ldf.clone();
|
66
|
+
ldf.filter(predicate.inner.clone()).into()
|
67
|
+
}
|
68
|
+
|
69
|
+
pub fn select(&self, exprs: RArray) -> RbResult<RbLazyFrame> {
|
70
|
+
let ldf = self.ldf.clone();
|
71
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
72
|
+
Ok(ldf.select(exprs).into())
|
73
|
+
}
|
74
|
+
|
75
|
+
pub fn groupby(&self, by: RArray, maintain_order: bool) -> RbResult<RbLazyGroupBy> {
|
76
|
+
let ldf = self.ldf.clone();
|
77
|
+
let by = rb_exprs_to_exprs(by)?;
|
78
|
+
let lazy_gb = if maintain_order {
|
79
|
+
ldf.groupby_stable(by)
|
80
|
+
} else {
|
81
|
+
ldf.groupby(by)
|
82
|
+
};
|
83
|
+
Ok(RbLazyGroupBy {
|
84
|
+
lgb: RefCell::new(Some(lazy_gb)),
|
85
|
+
})
|
86
|
+
}
|
87
|
+
|
88
|
+
#[allow(clippy::too_many_arguments)]
|
89
|
+
pub fn join(
|
90
|
+
&self,
|
91
|
+
other: &RbLazyFrame,
|
92
|
+
left_on: RArray,
|
93
|
+
right_on: RArray,
|
94
|
+
allow_parallel: bool,
|
95
|
+
force_parallel: bool,
|
96
|
+
how: String,
|
97
|
+
suffix: String,
|
98
|
+
) -> RbResult<Self> {
|
99
|
+
let how = wrap_join_type(&how)?;
|
100
|
+
|
101
|
+
let ldf = self.ldf.clone();
|
102
|
+
let other = other.ldf.clone();
|
103
|
+
let left_on = rb_exprs_to_exprs(left_on)?;
|
104
|
+
let right_on = rb_exprs_to_exprs(right_on)?;
|
105
|
+
|
106
|
+
Ok(ldf
|
107
|
+
.join_builder()
|
108
|
+
.with(other)
|
109
|
+
.left_on(left_on)
|
110
|
+
.right_on(right_on)
|
111
|
+
.allow_parallel(allow_parallel)
|
112
|
+
.force_parallel(force_parallel)
|
113
|
+
.how(how)
|
114
|
+
.suffix(suffix)
|
115
|
+
.finish()
|
116
|
+
.into())
|
117
|
+
}
|
118
|
+
|
119
|
+
pub fn with_columns(&self, exprs: RArray) -> RbResult<RbLazyFrame> {
|
120
|
+
let ldf = self.ldf.clone();
|
121
|
+
Ok(ldf.with_columns(rb_exprs_to_exprs(exprs)?).into())
|
122
|
+
}
|
123
|
+
}
|
@@ -0,0 +1,298 @@
|
|
1
|
+
use magnus::{RArray, RString, Value};
|
2
|
+
use polars::chunked_array::ops::SortOptions;
|
3
|
+
use polars::lazy::dsl;
|
4
|
+
use polars::lazy::dsl::Operator;
|
5
|
+
use polars::prelude::*;
|
6
|
+
|
7
|
+
use crate::conversion::parse_fill_null_strategy;
|
8
|
+
use crate::lazy::utils::rb_exprs_to_exprs;
|
9
|
+
use crate::RbResult;
|
10
|
+
|
11
|
+
#[magnus::wrap(class = "Polars::RbExpr")]
|
12
|
+
#[derive(Clone)]
|
13
|
+
pub struct RbExpr {
|
14
|
+
pub inner: dsl::Expr,
|
15
|
+
}
|
16
|
+
|
17
|
+
impl From<dsl::Expr> for RbExpr {
|
18
|
+
fn from(inner: dsl::Expr) -> Self {
|
19
|
+
RbExpr { inner }
|
20
|
+
}
|
21
|
+
}
|
22
|
+
|
23
|
+
impl RbExpr {
|
24
|
+
pub fn mul(&self, rhs: &RbExpr) -> RbResult<Self> {
|
25
|
+
Ok(dsl::binary_expr(self.inner.clone(), Operator::Multiply, rhs.inner.clone()).into())
|
26
|
+
}
|
27
|
+
|
28
|
+
pub fn to_str(&self) -> String {
|
29
|
+
format!("{:?}", self.inner)
|
30
|
+
}
|
31
|
+
|
32
|
+
pub fn eq(&self, other: &RbExpr) -> Self {
|
33
|
+
self.clone().inner.eq(other.inner.clone()).into()
|
34
|
+
}
|
35
|
+
|
36
|
+
pub fn neq(&self, other: &RbExpr) -> Self {
|
37
|
+
self.clone().inner.neq(other.inner.clone()).into()
|
38
|
+
}
|
39
|
+
|
40
|
+
pub fn gt(&self, other: &RbExpr) -> Self {
|
41
|
+
self.clone().inner.gt(other.inner.clone()).into()
|
42
|
+
}
|
43
|
+
|
44
|
+
pub fn gt_eq(&self, other: &RbExpr) -> Self {
|
45
|
+
self.clone().inner.gt_eq(other.inner.clone()).into()
|
46
|
+
}
|
47
|
+
|
48
|
+
pub fn lt_eq(&self, other: &RbExpr) -> Self {
|
49
|
+
self.clone().inner.lt_eq(other.inner.clone()).into()
|
50
|
+
}
|
51
|
+
|
52
|
+
pub fn lt(&self, other: &RbExpr) -> Self {
|
53
|
+
self.clone().inner.lt(other.inner.clone()).into()
|
54
|
+
}
|
55
|
+
|
56
|
+
pub fn alias(&self, name: String) -> Self {
|
57
|
+
self.clone().inner.alias(&name).into()
|
58
|
+
}
|
59
|
+
|
60
|
+
pub fn is_not(&self) -> Self {
|
61
|
+
self.clone().inner.not().into()
|
62
|
+
}
|
63
|
+
|
64
|
+
pub fn is_null(&self) -> Self {
|
65
|
+
self.clone().inner.is_null().into()
|
66
|
+
}
|
67
|
+
|
68
|
+
pub fn is_not_null(&self) -> Self {
|
69
|
+
self.clone().inner.is_not_null().into()
|
70
|
+
}
|
71
|
+
|
72
|
+
pub fn min(&self) -> Self {
|
73
|
+
self.clone().inner.min().into()
|
74
|
+
}
|
75
|
+
|
76
|
+
pub fn max(&self) -> Self {
|
77
|
+
self.clone().inner.max().into()
|
78
|
+
}
|
79
|
+
|
80
|
+
pub fn mean(&self) -> Self {
|
81
|
+
self.clone().inner.mean().into()
|
82
|
+
}
|
83
|
+
|
84
|
+
pub fn median(&self) -> Self {
|
85
|
+
self.clone().inner.median().into()
|
86
|
+
}
|
87
|
+
|
88
|
+
pub fn sum(&self) -> Self {
|
89
|
+
self.clone().inner.sum().into()
|
90
|
+
}
|
91
|
+
|
92
|
+
pub fn n_unique(&self) -> Self {
|
93
|
+
self.clone().inner.n_unique().into()
|
94
|
+
}
|
95
|
+
|
96
|
+
pub fn unique(&self) -> Self {
|
97
|
+
self.clone().inner.unique().into()
|
98
|
+
}
|
99
|
+
|
100
|
+
pub fn unique_stable(&self) -> Self {
|
101
|
+
self.clone().inner.unique_stable().into()
|
102
|
+
}
|
103
|
+
|
104
|
+
pub fn first(&self) -> Self {
|
105
|
+
self.clone().inner.first().into()
|
106
|
+
}
|
107
|
+
pub fn last(&self) -> Self {
|
108
|
+
self.clone().inner.last().into()
|
109
|
+
}
|
110
|
+
|
111
|
+
pub fn list(&self) -> Self {
|
112
|
+
self.clone().inner.list().into()
|
113
|
+
}
|
114
|
+
|
115
|
+
pub fn count(&self) -> Self {
|
116
|
+
self.clone().inner.count().into()
|
117
|
+
}
|
118
|
+
|
119
|
+
pub fn sort_with(&self, descending: bool, nulls_last: bool) -> Self {
|
120
|
+
self.clone()
|
121
|
+
.inner
|
122
|
+
.sort_with(SortOptions {
|
123
|
+
descending,
|
124
|
+
nulls_last,
|
125
|
+
})
|
126
|
+
.into()
|
127
|
+
}
|
128
|
+
|
129
|
+
pub fn sort_by(&self, by: RArray, reverse: Vec<bool>) -> RbResult<Self> {
|
130
|
+
let by = rb_exprs_to_exprs(by)?;
|
131
|
+
Ok(self.clone().inner.sort_by(by, reverse).into())
|
132
|
+
}
|
133
|
+
|
134
|
+
pub fn fill_null(&self, expr: &RbExpr) -> Self {
|
135
|
+
self.clone().inner.fill_null(expr.inner.clone()).into()
|
136
|
+
}
|
137
|
+
|
138
|
+
pub fn fill_null_with_strategy(
|
139
|
+
&self,
|
140
|
+
strategy: String,
|
141
|
+
limit: FillNullLimit,
|
142
|
+
) -> RbResult<Self> {
|
143
|
+
let strat = parse_fill_null_strategy(&strategy, limit)?;
|
144
|
+
Ok(self
|
145
|
+
.inner
|
146
|
+
.clone()
|
147
|
+
.apply(move |s| s.fill_null(strat), GetOutput::same_type())
|
148
|
+
.with_fmt("fill_null_with_strategy")
|
149
|
+
.into())
|
150
|
+
}
|
151
|
+
|
152
|
+
pub fn fill_nan(&self, expr: &RbExpr) -> Self {
|
153
|
+
self.inner.clone().fill_nan(expr.inner.clone()).into()
|
154
|
+
}
|
155
|
+
|
156
|
+
pub fn drop_nulls(&self) -> Self {
|
157
|
+
self.inner.clone().drop_nulls().into()
|
158
|
+
}
|
159
|
+
|
160
|
+
pub fn drop_nans(&self) -> Self {
|
161
|
+
self.inner.clone().drop_nans().into()
|
162
|
+
}
|
163
|
+
|
164
|
+
pub fn filter(&self, predicate: &RbExpr) -> Self {
|
165
|
+
self.clone().inner.filter(predicate.inner.clone()).into()
|
166
|
+
}
|
167
|
+
|
168
|
+
pub fn reverse(&self) -> Self {
|
169
|
+
self.clone().inner.reverse().into()
|
170
|
+
}
|
171
|
+
|
172
|
+
pub fn std(&self, ddof: u8) -> Self {
|
173
|
+
self.clone().inner.std(ddof).into()
|
174
|
+
}
|
175
|
+
|
176
|
+
pub fn var(&self, ddof: u8) -> Self {
|
177
|
+
self.clone().inner.var(ddof).into()
|
178
|
+
}
|
179
|
+
|
180
|
+
pub fn tail(&self, n: Option<usize>) -> Self {
|
181
|
+
self.clone().inner.tail(n).into()
|
182
|
+
}
|
183
|
+
|
184
|
+
pub fn head(&self, n: Option<usize>) -> Self {
|
185
|
+
self.clone().inner.head(n).into()
|
186
|
+
}
|
187
|
+
|
188
|
+
pub fn over(&self, partition_by: RArray) -> RbResult<Self> {
|
189
|
+
let partition_by = rb_exprs_to_exprs(partition_by)?;
|
190
|
+
Ok(self.clone().inner.over(partition_by).into())
|
191
|
+
}
|
192
|
+
|
193
|
+
pub fn _and(&self, expr: &RbExpr) -> Self {
|
194
|
+
self.clone().inner.and(expr.inner.clone()).into()
|
195
|
+
}
|
196
|
+
|
197
|
+
pub fn _xor(&self, expr: &RbExpr) -> Self {
|
198
|
+
self.clone().inner.xor(expr.inner.clone()).into()
|
199
|
+
}
|
200
|
+
|
201
|
+
pub fn _or(&self, expr: &RbExpr) -> Self {
|
202
|
+
self.clone().inner.or(expr.inner.clone()).into()
|
203
|
+
}
|
204
|
+
|
205
|
+
pub fn product(&self) -> Self {
|
206
|
+
self.clone().inner.product().into()
|
207
|
+
}
|
208
|
+
|
209
|
+
pub fn str_lengths(&self) -> RbExpr {
|
210
|
+
let function = |s: Series| {
|
211
|
+
let ca = s.utf8()?;
|
212
|
+
Ok(ca.str_lengths().into_series())
|
213
|
+
};
|
214
|
+
self.clone()
|
215
|
+
.inner
|
216
|
+
.map(function, GetOutput::from_type(DataType::UInt32))
|
217
|
+
.with_fmt("str.lengths")
|
218
|
+
.into()
|
219
|
+
}
|
220
|
+
|
221
|
+
pub fn str_contains(&self, pat: String, literal: Option<bool>) -> Self {
|
222
|
+
match literal {
|
223
|
+
Some(true) => self.inner.clone().str().contains_literal(pat).into(),
|
224
|
+
_ => self.inner.clone().str().contains(pat).into(),
|
225
|
+
}
|
226
|
+
}
|
227
|
+
|
228
|
+
pub fn prefix(&self, prefix: String) -> RbExpr {
|
229
|
+
self.inner.clone().prefix(&prefix).into()
|
230
|
+
}
|
231
|
+
|
232
|
+
pub fn suffix(&self, suffix: String) -> RbExpr {
|
233
|
+
self.inner.clone().suffix(&suffix).into()
|
234
|
+
}
|
235
|
+
|
236
|
+
pub fn interpolate(&self) -> RbExpr {
|
237
|
+
self.inner.clone().interpolate().into()
|
238
|
+
}
|
239
|
+
}
|
240
|
+
|
241
|
+
pub fn col(name: String) -> RbExpr {
|
242
|
+
dsl::col(&name).into()
|
243
|
+
}
|
244
|
+
|
245
|
+
// TODO improve
|
246
|
+
pub fn lit(value: Value) -> RbResult<RbExpr> {
|
247
|
+
if value.is_nil() {
|
248
|
+
Ok(dsl::lit(Null {}).into())
|
249
|
+
} else if let Some(v) = RString::from_value(value) {
|
250
|
+
Ok(dsl::lit(v.try_convert::<String>()?).into())
|
251
|
+
} else {
|
252
|
+
Ok(dsl::lit(value.try_convert::<f64>()?).into())
|
253
|
+
}
|
254
|
+
}
|
255
|
+
|
256
|
+
pub fn arange(low: &RbExpr, high: &RbExpr, step: usize) -> RbExpr {
|
257
|
+
polars::lazy::dsl::arange(low.inner.clone(), high.inner.clone(), step).into()
|
258
|
+
}
|
259
|
+
|
260
|
+
#[magnus::wrap(class = "Polars::RbWhen")]
|
261
|
+
#[derive(Clone)]
|
262
|
+
pub struct RbWhen {
|
263
|
+
pub inner: dsl::When,
|
264
|
+
}
|
265
|
+
|
266
|
+
impl From<dsl::When> for RbWhen {
|
267
|
+
fn from(inner: dsl::When) -> Self {
|
268
|
+
RbWhen { inner }
|
269
|
+
}
|
270
|
+
}
|
271
|
+
|
272
|
+
#[magnus::wrap(class = "Polars::RbWhenThen")]
|
273
|
+
#[derive(Clone)]
|
274
|
+
pub struct RbWhenThen {
|
275
|
+
pub inner: dsl::WhenThen,
|
276
|
+
}
|
277
|
+
|
278
|
+
impl From<dsl::WhenThen> for RbWhenThen {
|
279
|
+
fn from(inner: dsl::WhenThen) -> Self {
|
280
|
+
RbWhenThen { inner }
|
281
|
+
}
|
282
|
+
}
|
283
|
+
|
284
|
+
impl RbWhen {
|
285
|
+
pub fn then(&self, expr: &RbExpr) -> RbWhenThen {
|
286
|
+
self.inner.clone().then(expr.inner.clone()).into()
|
287
|
+
}
|
288
|
+
}
|
289
|
+
|
290
|
+
impl RbWhenThen {
|
291
|
+
pub fn overwise(&self, expr: &RbExpr) -> RbExpr {
|
292
|
+
self.inner.clone().otherwise(expr.inner.clone()).into()
|
293
|
+
}
|
294
|
+
}
|
295
|
+
|
296
|
+
pub fn when(predicate: &RbExpr) -> RbWhen {
|
297
|
+
dsl::when(predicate.inner.clone()).into()
|
298
|
+
}
|
@@ -0,0 +1,13 @@
|
|
1
|
+
use magnus::RArray;
|
2
|
+
use polars::lazy::dsl::Expr;
|
3
|
+
|
4
|
+
use crate::lazy::dsl::RbExpr;
|
5
|
+
use crate::RbResult;
|
6
|
+
|
7
|
+
pub fn rb_exprs_to_exprs(rb_exprs: RArray) -> RbResult<Vec<Expr>> {
|
8
|
+
let mut exprs = Vec::new();
|
9
|
+
for item in rb_exprs.each() {
|
10
|
+
exprs.push(item?.try_convert::<&RbExpr>()?.inner.clone());
|
11
|
+
}
|
12
|
+
Ok(exprs)
|
13
|
+
}
|
@@ -0,0 +1,256 @@
|
|
1
|
+
mod conversion;
|
2
|
+
mod dataframe;
|
3
|
+
mod error;
|
4
|
+
mod file;
|
5
|
+
mod lazy;
|
6
|
+
mod series;
|
7
|
+
|
8
|
+
use conversion::get_df;
|
9
|
+
use dataframe::RbDataFrame;
|
10
|
+
use error::{RbPolarsErr, RbValueError};
|
11
|
+
use lazy::dataframe::{RbLazyFrame, RbLazyGroupBy};
|
12
|
+
use lazy::dsl::{RbExpr, RbWhen, RbWhenThen};
|
13
|
+
use magnus::{
|
14
|
+
define_module, function, memoize, method, prelude::*, Error, RArray, RClass, RModule,
|
15
|
+
};
|
16
|
+
use polars::error::PolarsResult;
|
17
|
+
use polars::frame::DataFrame;
|
18
|
+
use polars::functions::{diag_concat_df, hor_concat_df};
|
19
|
+
use series::RbSeries;
|
20
|
+
|
21
|
+
type RbResult<T> = Result<T, Error>;
|
22
|
+
|
23
|
+
fn module() -> RModule {
|
24
|
+
*memoize!(RModule: define_module("Polars").unwrap())
|
25
|
+
}
|
26
|
+
|
27
|
+
fn series() -> RClass {
|
28
|
+
*memoize!(RClass: module().define_class("Series", Default::default()).unwrap())
|
29
|
+
}
|
30
|
+
|
31
|
+
#[magnus::init]
|
32
|
+
fn init() -> RbResult<()> {
|
33
|
+
let module = module();
|
34
|
+
module.define_singleton_method("_concat_df", function!(concat_df, 1))?;
|
35
|
+
module.define_singleton_method("_diag_concat_df", function!(rb_diag_concat_df, 1))?;
|
36
|
+
module.define_singleton_method("_hor_concat_df", function!(rb_hor_concat_df, 1))?;
|
37
|
+
|
38
|
+
let class = module.define_class("RbDataFrame", Default::default())?;
|
39
|
+
class.define_singleton_method("new", function!(RbDataFrame::init, 1))?;
|
40
|
+
class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, 2))?;
|
41
|
+
class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 1))?;
|
42
|
+
class.define_singleton_method("read_hash", function!(RbDataFrame::read_hash, 1))?;
|
43
|
+
class.define_singleton_method("read_json", function!(RbDataFrame::read_json, 1))?;
|
44
|
+
class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson, 1))?;
|
45
|
+
class.define_method("write_json", method!(RbDataFrame::write_json, 3))?;
|
46
|
+
class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
|
47
|
+
class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
|
48
|
+
class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 5))?;
|
49
|
+
class.define_method("rechunk", method!(RbDataFrame::rechunk, 0))?;
|
50
|
+
class.define_method("to_s", method!(RbDataFrame::to_s, 0))?;
|
51
|
+
class.define_method("columns", method!(RbDataFrame::columns, 0))?;
|
52
|
+
class.define_method("dtypes", method!(RbDataFrame::dtypes, 0))?;
|
53
|
+
class.define_method("shape", method!(RbDataFrame::shape, 0))?;
|
54
|
+
class.define_method("height", method!(RbDataFrame::height, 0))?;
|
55
|
+
class.define_method("width", method!(RbDataFrame::width, 0))?;
|
56
|
+
class.define_method("select_at_idx", method!(RbDataFrame::select_at_idx, 1))?;
|
57
|
+
class.define_method("column", method!(RbDataFrame::column, 1))?;
|
58
|
+
class.define_method("sort", method!(RbDataFrame::sort, 3))?;
|
59
|
+
class.define_method("head", method!(RbDataFrame::head, 1))?;
|
60
|
+
class.define_method("tail", method!(RbDataFrame::tail, 1))?;
|
61
|
+
class.define_method("frame_equal", method!(RbDataFrame::frame_equal, 2))?;
|
62
|
+
class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
|
63
|
+
class.define_method("mean", method!(RbDataFrame::mean, 0))?;
|
64
|
+
class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
|
65
|
+
|
66
|
+
let class = module.define_class("RbExpr", Default::default())?;
|
67
|
+
class.define_method("*", method!(RbExpr::mul, 1))?;
|
68
|
+
class.define_method("to_str", method!(RbExpr::to_str, 0))?;
|
69
|
+
class.define_method("eq", method!(RbExpr::eq, 1))?;
|
70
|
+
class.define_method("neq", method!(RbExpr::neq, 1))?;
|
71
|
+
class.define_method("gt", method!(RbExpr::gt, 1))?;
|
72
|
+
class.define_method("gt_eq", method!(RbExpr::gt_eq, 1))?;
|
73
|
+
class.define_method("lt_eq", method!(RbExpr::lt_eq, 1))?;
|
74
|
+
class.define_method("lt", method!(RbExpr::lt, 1))?;
|
75
|
+
class.define_method("_alias", method!(RbExpr::alias, 1))?;
|
76
|
+
class.define_method("is_not", method!(RbExpr::is_not, 0))?;
|
77
|
+
class.define_method("is_null", method!(RbExpr::is_null, 0))?;
|
78
|
+
class.define_method("is_not_null", method!(RbExpr::is_not_null, 0))?;
|
79
|
+
class.define_method("min", method!(RbExpr::min, 0))?;
|
80
|
+
class.define_method("max", method!(RbExpr::max, 0))?;
|
81
|
+
class.define_method("mean", method!(RbExpr::mean, 0))?;
|
82
|
+
class.define_method("median", method!(RbExpr::median, 0))?;
|
83
|
+
class.define_method("sum", method!(RbExpr::sum, 0))?;
|
84
|
+
class.define_method("n_unique", method!(RbExpr::n_unique, 0))?;
|
85
|
+
class.define_method("unique", method!(RbExpr::unique, 0))?;
|
86
|
+
class.define_method("unique_stable", method!(RbExpr::unique_stable, 0))?;
|
87
|
+
class.define_method("first", method!(RbExpr::first, 0))?;
|
88
|
+
class.define_method("last", method!(RbExpr::last, 0))?;
|
89
|
+
class.define_method("list", method!(RbExpr::list, 0))?;
|
90
|
+
class.define_method("count", method!(RbExpr::count, 0))?;
|
91
|
+
class.define_method("sort_with", method!(RbExpr::sort_with, 2))?;
|
92
|
+
class.define_method("sort_by", method!(RbExpr::sort_by, 2))?;
|
93
|
+
class.define_method("fill_null", method!(RbExpr::fill_null, 1))?;
|
94
|
+
class.define_method(
|
95
|
+
"fill_null_with_strategy",
|
96
|
+
method!(RbExpr::fill_null_with_strategy, 2),
|
97
|
+
)?;
|
98
|
+
class.define_method("fill_nan", method!(RbExpr::fill_nan, 1))?;
|
99
|
+
class.define_method("drop_nulls", method!(RbExpr::drop_nulls, 0))?;
|
100
|
+
class.define_method("drop_nans", method!(RbExpr::drop_nans, 0))?;
|
101
|
+
class.define_method("filter", method!(RbExpr::filter, 1))?;
|
102
|
+
class.define_method("reverse", method!(RbExpr::reverse, 0))?;
|
103
|
+
class.define_method("std", method!(RbExpr::std, 1))?;
|
104
|
+
class.define_method("var", method!(RbExpr::var, 1))?;
|
105
|
+
class.define_method("tail", method!(RbExpr::tail, 1))?;
|
106
|
+
class.define_method("head", method!(RbExpr::head, 1))?;
|
107
|
+
class.define_method("over", method!(RbExpr::over, 1))?;
|
108
|
+
class.define_method("_and", method!(RbExpr::_and, 1))?;
|
109
|
+
class.define_method("_xor", method!(RbExpr::_xor, 1))?;
|
110
|
+
class.define_method("_or", method!(RbExpr::_or, 1))?;
|
111
|
+
class.define_method("product", method!(RbExpr::product, 0))?;
|
112
|
+
class.define_method("str_lengths", method!(RbExpr::str_lengths, 0))?;
|
113
|
+
class.define_method("str_contains", method!(RbExpr::str_contains, 2))?;
|
114
|
+
class.define_method("prefix", method!(RbExpr::prefix, 1))?;
|
115
|
+
class.define_method("suffix", method!(RbExpr::suffix, 1))?;
|
116
|
+
class.define_method("interpolate", method!(RbExpr::interpolate, 0))?;
|
117
|
+
|
118
|
+
// maybe add to different class
|
119
|
+
class.define_singleton_method("col", function!(crate::lazy::dsl::col, 1))?;
|
120
|
+
class.define_singleton_method("lit", function!(crate::lazy::dsl::lit, 1))?;
|
121
|
+
class.define_singleton_method("arange", function!(crate::lazy::dsl::arange, 3))?;
|
122
|
+
class.define_singleton_method("when", function!(crate::lazy::dsl::when, 1))?;
|
123
|
+
|
124
|
+
let class = module.define_class("RbLazyFrame", Default::default())?;
|
125
|
+
class.define_method(
|
126
|
+
"optimization_toggle",
|
127
|
+
method!(RbLazyFrame::optimization_toggle, 7),
|
128
|
+
)?;
|
129
|
+
class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
|
130
|
+
class.define_method("filter", method!(RbLazyFrame::filter, 1))?;
|
131
|
+
class.define_method("select", method!(RbLazyFrame::select, 1))?;
|
132
|
+
class.define_method("groupby", method!(RbLazyFrame::groupby, 2))?;
|
133
|
+
class.define_method("join", method!(RbLazyFrame::join, 7))?;
|
134
|
+
class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
|
135
|
+
|
136
|
+
let class = module.define_class("RbLazyGroupBy", Default::default())?;
|
137
|
+
class.define_method("agg", method!(RbLazyGroupBy::agg, 1))?;
|
138
|
+
|
139
|
+
let class = module.define_class("RbSeries", Default::default())?;
|
140
|
+
class.define_singleton_method("new_opt_bool", function!(RbSeries::new_opt_bool, 3))?;
|
141
|
+
class.define_singleton_method("new_opt_u8", function!(RbSeries::new_opt_u8, 3))?;
|
142
|
+
class.define_singleton_method("new_opt_u16", function!(RbSeries::new_opt_u16, 3))?;
|
143
|
+
class.define_singleton_method("new_opt_u32", function!(RbSeries::new_opt_u32, 3))?;
|
144
|
+
class.define_singleton_method("new_opt_u64", function!(RbSeries::new_opt_u64, 3))?;
|
145
|
+
class.define_singleton_method("new_opt_i8", function!(RbSeries::new_opt_i8, 3))?;
|
146
|
+
class.define_singleton_method("new_opt_i16", function!(RbSeries::new_opt_i16, 3))?;
|
147
|
+
class.define_singleton_method("new_opt_i32", function!(RbSeries::new_opt_i32, 3))?;
|
148
|
+
class.define_singleton_method("new_opt_i64", function!(RbSeries::new_opt_i64, 3))?;
|
149
|
+
class.define_singleton_method("new_opt_f32", function!(RbSeries::new_opt_f32, 3))?;
|
150
|
+
class.define_singleton_method("new_opt_f64", function!(RbSeries::new_opt_f64, 3))?;
|
151
|
+
class.define_singleton_method("new_str", function!(RbSeries::new_str, 3))?;
|
152
|
+
class.define_method("rechunk", method!(RbSeries::rechunk, 1))?;
|
153
|
+
class.define_method("bitand", method!(RbSeries::bitand, 1))?;
|
154
|
+
class.define_method("bitor", method!(RbSeries::bitor, 1))?;
|
155
|
+
class.define_method("bitxor", method!(RbSeries::bitxor, 1))?;
|
156
|
+
class.define_method("chunk_lengths", method!(RbSeries::chunk_lengths, 0))?;
|
157
|
+
class.define_method("name", method!(RbSeries::name, 0))?;
|
158
|
+
class.define_method("rename", method!(RbSeries::rename, 1))?;
|
159
|
+
class.define_method("dtype", method!(RbSeries::dtype, 0))?;
|
160
|
+
class.define_method("inner_dtype", method!(RbSeries::inner_dtype, 0))?;
|
161
|
+
class.define_method("set_sorted", method!(RbSeries::set_sorted, 1))?;
|
162
|
+
class.define_method("mean", method!(RbSeries::mean, 0))?;
|
163
|
+
class.define_method("max", method!(RbSeries::max, 0))?;
|
164
|
+
class.define_method("min", method!(RbSeries::min, 0))?;
|
165
|
+
class.define_method("sum", method!(RbSeries::sum, 0))?;
|
166
|
+
class.define_method("n_chunks", method!(RbSeries::n_chunks, 0))?;
|
167
|
+
class.define_method("append", method!(RbSeries::append, 1))?;
|
168
|
+
class.define_method("extend", method!(RbSeries::extend, 1))?;
|
169
|
+
class.define_method("new_from_index", method!(RbSeries::new_from_index, 2))?;
|
170
|
+
class.define_method("filter", method!(RbSeries::filter, 1))?;
|
171
|
+
class.define_method("add", method!(RbSeries::add, 1))?;
|
172
|
+
class.define_method("sub", method!(RbSeries::sub, 1))?;
|
173
|
+
class.define_method("mul", method!(RbSeries::mul, 1))?;
|
174
|
+
class.define_method("div", method!(RbSeries::div, 1))?;
|
175
|
+
class.define_method("rem", method!(RbSeries::rem, 1))?;
|
176
|
+
class.define_method("sort", method!(RbSeries::sort, 1))?;
|
177
|
+
class.define_method("value_counts", method!(RbSeries::value_counts, 1))?;
|
178
|
+
class.define_method("arg_min", method!(RbSeries::arg_min, 0))?;
|
179
|
+
class.define_method("arg_max", method!(RbSeries::arg_max, 0))?;
|
180
|
+
class.define_method("take_with_series", method!(RbSeries::take_with_series, 1))?;
|
181
|
+
class.define_method("null_count", method!(RbSeries::null_count, 0))?;
|
182
|
+
class.define_method("has_validity", method!(RbSeries::has_validity, 0))?;
|
183
|
+
class.define_method("sample_n", method!(RbSeries::sample_n, 4))?;
|
184
|
+
class.define_method("sample_frac", method!(RbSeries::sample_frac, 4))?;
|
185
|
+
class.define_method("series_equal", method!(RbSeries::series_equal, 3))?;
|
186
|
+
class.define_method("eq", method!(RbSeries::eq, 1))?;
|
187
|
+
class.define_method("neq", method!(RbSeries::neq, 1))?;
|
188
|
+
class.define_method("gt", method!(RbSeries::gt, 1))?;
|
189
|
+
class.define_method("gt_eq", method!(RbSeries::gt_eq, 1))?;
|
190
|
+
class.define_method("lt", method!(RbSeries::lt, 1))?;
|
191
|
+
class.define_method("lt_eq", method!(RbSeries::lt_eq, 1))?;
|
192
|
+
class.define_method("not", method!(RbSeries::not, 0))?;
|
193
|
+
class.define_method("to_s", method!(RbSeries::to_s, 0))?;
|
194
|
+
class.define_method("len", method!(RbSeries::len, 0))?;
|
195
|
+
class.define_method("to_a", method!(RbSeries::to_a, 0))?;
|
196
|
+
class.define_method("median", method!(RbSeries::median, 0))?;
|
197
|
+
// rest
|
198
|
+
class.define_method("cumsum", method!(RbSeries::cumsum, 1))?;
|
199
|
+
class.define_method("cummax", method!(RbSeries::cummax, 1))?;
|
200
|
+
class.define_method("cummin", method!(RbSeries::cummin, 1))?;
|
201
|
+
class.define_method("slice", method!(RbSeries::slice, 2))?;
|
202
|
+
|
203
|
+
let class = module.define_class("RbWhen", Default::default())?;
|
204
|
+
class.define_method("_then", method!(RbWhen::then, 1))?;
|
205
|
+
|
206
|
+
let class = module.define_class("RbWhenThen", Default::default())?;
|
207
|
+
class.define_method("otherwise", method!(RbWhenThen::overwise, 1))?;
|
208
|
+
|
209
|
+
Ok(())
|
210
|
+
}
|
211
|
+
|
212
|
+
fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
213
|
+
let mut iter = seq.each();
|
214
|
+
let first = iter.next().unwrap()?;
|
215
|
+
|
216
|
+
let first_rdf = get_df(first)?;
|
217
|
+
let identity_df = first_rdf.slice(0, 0);
|
218
|
+
|
219
|
+
let mut rdfs: Vec<PolarsResult<DataFrame>> = vec![Ok(first_rdf)];
|
220
|
+
|
221
|
+
for item in iter {
|
222
|
+
let rdf = get_df(item?)?;
|
223
|
+
rdfs.push(Ok(rdf));
|
224
|
+
}
|
225
|
+
|
226
|
+
let identity = Ok(identity_df);
|
227
|
+
|
228
|
+
let df = rdfs
|
229
|
+
.into_iter()
|
230
|
+
.fold(identity, |acc: PolarsResult<DataFrame>, df| {
|
231
|
+
let mut acc = acc?;
|
232
|
+
acc.vstack_mut(&df?)?;
|
233
|
+
Ok(acc)
|
234
|
+
})
|
235
|
+
.map_err(RbPolarsErr::from)?;
|
236
|
+
|
237
|
+
Ok(df.into())
|
238
|
+
}
|
239
|
+
|
240
|
+
fn rb_diag_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
241
|
+
let mut dfs = Vec::new();
|
242
|
+
for item in seq.each() {
|
243
|
+
dfs.push(get_df(item?)?);
|
244
|
+
}
|
245
|
+
let df = diag_concat_df(&dfs).map_err(RbPolarsErr::from)?;
|
246
|
+
Ok(df.into())
|
247
|
+
}
|
248
|
+
|
249
|
+
fn rb_hor_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
250
|
+
let mut dfs = Vec::new();
|
251
|
+
for item in seq.each() {
|
252
|
+
dfs.push(get_df(item?)?);
|
253
|
+
}
|
254
|
+
let df = hor_concat_df(&dfs).map_err(RbPolarsErr::from)?;
|
255
|
+
Ok(df.into())
|
256
|
+
}
|