polars-df 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,10 +1,13 @@
1
- use magnus::RArray;
1
+ use magnus::{RArray, RHash, Value};
2
2
  use polars::lazy::frame::{LazyFrame, LazyGroupBy};
3
+ use polars::prelude::*;
3
4
  use std::cell::RefCell;
5
+ use std::io::BufWriter;
4
6
 
5
- use crate::conversion::wrap_join_type;
7
+ use crate::conversion::*;
8
+ use crate::file::get_file_like;
6
9
  use crate::lazy::utils::rb_exprs_to_exprs;
7
- use crate::{RbDataFrame, RbExpr, RbPolarsErr, RbResult};
10
+ use crate::{RbDataFrame, RbExpr, RbPolarsErr, RbResult, RbValueError};
8
11
 
9
12
  #[magnus::wrap(class = "Polars::RbLazyGroupBy")]
10
13
  pub struct RbLazyGroupBy {
@@ -17,6 +20,16 @@ impl RbLazyGroupBy {
17
20
  let aggs = rb_exprs_to_exprs(aggs)?;
18
21
  Ok(lgb.agg(aggs).into())
19
22
  }
23
+
24
+ pub fn head(&self, n: usize) -> RbLazyFrame {
25
+ let lgb = self.lgb.take().unwrap();
26
+ lgb.head(Some(n)).into()
27
+ }
28
+
29
+ pub fn tail(&self, n: usize) -> RbLazyFrame {
30
+ let lgb = self.lgb.take().unwrap();
31
+ lgb.tail(Some(n)).into()
32
+ }
20
33
  }
21
34
 
22
35
  #[magnus::wrap(class = "Polars::RbLazyFrame")]
@@ -25,6 +38,13 @@ pub struct RbLazyFrame {
25
38
  pub ldf: LazyFrame,
26
39
  }
27
40
 
41
+ impl RbLazyFrame {
42
+ fn get_schema(&self) -> RbResult<SchemaRef> {
43
+ let schema = self.ldf.schema().map_err(RbPolarsErr::from)?;
44
+ Ok(schema)
45
+ }
46
+ }
47
+
28
48
  impl From<LazyFrame> for RbLazyFrame {
29
49
  fn from(ldf: LazyFrame) -> Self {
30
50
  RbLazyFrame { ldf }
@@ -32,6 +52,25 @@ impl From<LazyFrame> for RbLazyFrame {
32
52
  }
33
53
 
34
54
  impl RbLazyFrame {
55
+ pub fn write_json(&self, rb_f: Value) -> RbResult<()> {
56
+ let file = BufWriter::new(get_file_like(rb_f, true)?);
57
+ serde_json::to_writer(file, &self.ldf.logical_plan)
58
+ .map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
59
+ Ok(())
60
+ }
61
+
62
+ pub fn describe_plan(&self) -> String {
63
+ self.ldf.describe_plan()
64
+ }
65
+
66
+ pub fn describe_optimized_plan(&self) -> RbResult<String> {
67
+ let result = self
68
+ .ldf
69
+ .describe_optimized_plan()
70
+ .map_err(RbPolarsErr::from)?;
71
+ Ok(result)
72
+ }
73
+
35
74
  #[allow(clippy::too_many_arguments)]
36
75
  pub fn optimization_toggle(
37
76
  &self,
@@ -55,18 +94,52 @@ impl RbLazyFrame {
55
94
  ldf.into()
56
95
  }
57
96
 
97
+ pub fn sort(&self, by_column: String, reverse: bool, nulls_last: bool) -> Self {
98
+ let ldf = self.ldf.clone();
99
+ ldf.sort(
100
+ &by_column,
101
+ SortOptions {
102
+ descending: reverse,
103
+ nulls_last,
104
+ },
105
+ )
106
+ .into()
107
+ }
108
+
109
+ pub fn sort_by_exprs(
110
+ &self,
111
+ by_column: RArray,
112
+ reverse: Vec<bool>,
113
+ nulls_last: bool,
114
+ ) -> RbResult<Self> {
115
+ let ldf = self.ldf.clone();
116
+ let exprs = rb_exprs_to_exprs(by_column)?;
117
+ Ok(ldf.sort_by_exprs(exprs, reverse, nulls_last).into())
118
+ }
119
+
120
+ pub fn cache(&self) -> Self {
121
+ let ldf = self.ldf.clone();
122
+ ldf.cache().into()
123
+ }
124
+
58
125
  pub fn collect(&self) -> RbResult<RbDataFrame> {
59
126
  let ldf = self.ldf.clone();
60
127
  let df = ldf.collect().map_err(RbPolarsErr::from)?;
61
128
  Ok(df.into())
62
129
  }
63
130
 
64
- pub fn filter(&self, predicate: &RbExpr) -> RbLazyFrame {
131
+ pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
132
+ let ldf = self.ldf.clone();
133
+ let df = ldf.fetch(n_rows).map_err(RbPolarsErr::from)?;
134
+ Ok(df.into())
135
+ }
136
+
137
+ pub fn filter(&self, predicate: &RbExpr) -> Self {
65
138
  let ldf = self.ldf.clone();
66
139
  ldf.filter(predicate.inner.clone()).into()
67
140
  }
68
141
 
69
- pub fn select(&self, exprs: RArray) -> RbResult<RbLazyFrame> {
142
+ pub fn select(&self, exprs: RArray) -> RbResult<Self> {
70
143
  let ldf = self.ldf.clone();
71
144
  let exprs = rb_exprs_to_exprs(exprs)?;
72
145
  Ok(ldf.select(exprs).into())
@@ -85,6 +158,65 @@ impl RbLazyFrame {
85
158
  })
86
159
  }
87
160
 
161
+ pub fn groupby_rolling(
162
+ &self,
163
+ index_column: String,
164
+ period: String,
165
+ offset: String,
166
+ closed: Wrap<ClosedWindow>,
167
+ by: RArray,
168
+ ) -> RbResult<RbLazyGroupBy> {
169
+ let closed_window = closed.0;
170
+ let ldf = self.ldf.clone();
171
+ let by = rb_exprs_to_exprs(by)?;
172
+ let lazy_gb = ldf.groupby_rolling(
173
+ by,
174
+ RollingGroupOptions {
175
+ index_column,
176
+ period: Duration::parse(&period),
177
+ offset: Duration::parse(&offset),
178
+ closed_window,
179
+ },
180
+ );
181
+
182
+ Ok(RbLazyGroupBy {
183
+ lgb: RefCell::new(Some(lazy_gb)),
184
+ })
185
+ }
186
+
187
+ #[allow(clippy::too_many_arguments)]
188
+ pub fn groupby_dynamic(
189
+ &self,
190
+ index_column: String,
191
+ every: String,
192
+ period: String,
193
+ offset: String,
194
+ truncate: bool,
195
+ include_boundaries: bool,
196
+ closed: Wrap<ClosedWindow>,
197
+ by: RArray,
198
+ ) -> RbResult<RbLazyGroupBy> {
199
+ let closed_window = closed.0;
200
+ let by = rb_exprs_to_exprs(by)?;
201
+ let ldf = self.ldf.clone();
202
+ let lazy_gb = ldf.groupby_dynamic(
203
+ by,
204
+ DynamicGroupOptions {
205
+ index_column,
206
+ every: Duration::parse(&every),
207
+ period: Duration::parse(&period),
208
+ offset: Duration::parse(&offset),
209
+ truncate,
210
+ include_boundaries,
211
+ closed_window,
212
+ },
213
+ );
214
+
215
+ Ok(RbLazyGroupBy {
216
+ lgb: RefCell::new(Some(lazy_gb)),
217
+ })
218
+ }
219
+
88
220
  #[allow(clippy::too_many_arguments)]
89
221
  pub fn join(
90
222
  &self,
@@ -93,11 +225,9 @@ impl RbLazyFrame {
93
225
  right_on: RArray,
94
226
  allow_parallel: bool,
95
227
  force_parallel: bool,
96
- how: String,
228
+ how: Wrap<JoinType>,
97
229
  suffix: String,
98
230
  ) -> RbResult<Self> {
99
- let how = wrap_join_type(&how)?;
100
-
101
231
  let ldf = self.ldf.clone();
102
232
  let other = other.ldf.clone();
103
233
  let left_on = rb_exprs_to_exprs(left_on)?;
@@ -110,14 +240,178 @@ impl RbLazyFrame {
110
240
  .right_on(right_on)
111
241
  .allow_parallel(allow_parallel)
112
242
  .force_parallel(force_parallel)
113
- .how(how)
243
+ .how(how.0)
114
244
  .suffix(suffix)
115
245
  .finish()
116
246
  .into())
117
247
  }
118
248
 
119
- pub fn with_columns(&self, exprs: RArray) -> RbResult<RbLazyFrame> {
249
+ pub fn with_columns(&self, exprs: RArray) -> RbResult<Self> {
120
250
  let ldf = self.ldf.clone();
121
251
  Ok(ldf.with_columns(rb_exprs_to_exprs(exprs)?).into())
122
252
  }
253
+
254
+ pub fn rename(&self, existing: Vec<String>, new: Vec<String>) -> Self {
255
+ let ldf = self.ldf.clone();
256
+ ldf.rename(existing, new).into()
257
+ }
258
+
259
+ pub fn reverse(&self) -> Self {
260
+ let ldf = self.ldf.clone();
261
+ ldf.reverse().into()
262
+ }
263
+
264
+ pub fn shift(&self, periods: i64) -> Self {
265
+ let ldf = self.ldf.clone();
266
+ ldf.shift(periods).into()
267
+ }
268
+
269
+ pub fn shift_and_fill(&self, periods: i64, fill_value: &RbExpr) -> Self {
270
+ let ldf = self.ldf.clone();
271
+ ldf.shift_and_fill(periods, fill_value.inner.clone()).into()
272
+ }
273
+
274
+ pub fn fill_nan(&self, fill_value: &RbExpr) -> Self {
275
+ let ldf = self.ldf.clone();
276
+ ldf.fill_nan(fill_value.inner.clone()).into()
277
+ }
278
+
279
+ pub fn min(&self) -> Self {
280
+ let ldf = self.ldf.clone();
281
+ ldf.min().into()
282
+ }
283
+
284
+ pub fn max(&self) -> Self {
285
+ let ldf = self.ldf.clone();
286
+ ldf.max().into()
287
+ }
288
+
289
+ pub fn sum(&self) -> Self {
290
+ let ldf = self.ldf.clone();
291
+ ldf.sum().into()
292
+ }
293
+
294
+ pub fn mean(&self) -> Self {
295
+ let ldf = self.ldf.clone();
296
+ ldf.mean().into()
297
+ }
298
+
299
+ pub fn std(&self, ddof: u8) -> Self {
300
+ let ldf = self.ldf.clone();
301
+ ldf.std(ddof).into()
302
+ }
303
+
304
+ pub fn var(&self, ddof: u8) -> Self {
305
+ let ldf = self.ldf.clone();
306
+ ldf.var(ddof).into()
307
+ }
308
+
309
+ pub fn median(&self) -> Self {
310
+ let ldf = self.ldf.clone();
311
+ ldf.median().into()
312
+ }
313
+
314
+ pub fn quantile(&self, quantile: f64, interpolation: Wrap<QuantileInterpolOptions>) -> Self {
315
+ let ldf = self.ldf.clone();
316
+ ldf.quantile(quantile, interpolation.0).into()
317
+ }
318
+
319
+ pub fn explode(&self, column: RArray) -> RbResult<Self> {
320
+ let ldf = self.ldf.clone();
321
+ let column = rb_exprs_to_exprs(column)?;
322
+ Ok(ldf.explode(column).into())
323
+ }
324
+
325
+ pub fn unique(
326
+ &self,
327
+ maintain_order: bool,
328
+ subset: Option<Vec<String>>,
329
+ keep: Wrap<UniqueKeepStrategy>,
330
+ ) -> RbResult<Self> {
331
+ let ldf = self.ldf.clone();
332
+ Ok(match maintain_order {
333
+ true => ldf.unique_stable(subset, keep.0),
334
+ false => ldf.unique(subset, keep.0),
335
+ }
336
+ .into())
337
+ }
338
+
339
+ pub fn drop_nulls(&self, subset: Option<Vec<String>>) -> Self {
340
+ let ldf = self.ldf.clone();
341
+ ldf.drop_nulls(subset.map(|v| v.into_iter().map(|s| col(&s)).collect()))
342
+ .into()
343
+ }
344
+
345
+ pub fn slice(&self, offset: i64, len: Option<IdxSize>) -> Self {
346
+ let ldf = self.ldf.clone();
347
+ ldf.slice(offset, len.unwrap_or(IdxSize::MAX)).into()
348
+ }
349
+
350
+ pub fn tail(&self, n: IdxSize) -> Self {
351
+ let ldf = self.ldf.clone();
352
+ ldf.tail(n).into()
353
+ }
354
+
355
+ pub fn melt(
356
+ &self,
357
+ id_vars: Vec<String>,
358
+ value_vars: Vec<String>,
359
+ value_name: Option<String>,
360
+ variable_name: Option<String>,
361
+ ) -> Self {
362
+ let args = MeltArgs {
363
+ id_vars,
364
+ value_vars,
365
+ value_name,
366
+ variable_name,
367
+ };
368
+
369
+ let ldf = self.ldf.clone();
370
+ ldf.melt(args).into()
371
+ }
372
+
373
+ pub fn with_row_count(&self, name: String, offset: Option<IdxSize>) -> Self {
374
+ let ldf = self.ldf.clone();
375
+ ldf.with_row_count(&name, offset).into()
376
+ }
377
+
378
+ pub fn drop_columns(&self, cols: Vec<String>) -> Self {
379
+ let ldf = self.ldf.clone();
380
+ ldf.drop_columns(cols).into()
381
+ }
382
+
383
+ pub fn clone(&self) -> Self {
384
+ self.ldf.clone().into()
385
+ }
386
+
387
+ pub fn columns(&self) -> RbResult<Vec<String>> {
388
+ Ok(self.get_schema()?.iter_names().cloned().collect())
389
+ }
390
+
391
+ pub fn dtypes(&self) -> RbResult<Vec<String>> {
392
+ let schema = self.get_schema()?;
393
+ let iter = schema.iter_dtypes().map(|dt| dt.to_string());
394
+ Ok(iter.collect())
395
+ }
396
+
397
+ pub fn schema(&self) -> RbResult<RHash> {
398
+ let schema = self.get_schema()?;
399
+ let schema_dict = RHash::new();
400
+
401
+ schema.iter_fields().for_each(|fld| {
402
+ // TODO remove unwrap
403
+ schema_dict
404
+ .aset(fld.name().clone(), fld.data_type().to_string())
405
+ .unwrap();
406
+ });
407
+ Ok(schema_dict)
408
+ }
409
+
410
+ pub fn unnest(&self, cols: Vec<String>) -> Self {
411
+ self.ldf.clone().unnest(cols).into()
412
+ }
413
+
414
+ pub fn width(&self) -> RbResult<usize> {
415
+ Ok(self.get_schema()?.len())
416
+ }
123
417
  }