polars-df 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,7 @@ use polars::io::RowCount;
3
3
  use polars::lazy::frame::{LazyFrame, LazyGroupBy};
4
4
  use polars::prelude::*;
5
5
  use std::cell::RefCell;
6
- use std::io::BufWriter;
6
+ use std::io::{BufWriter, Read};
7
7
 
8
8
  use crate::conversion::*;
9
9
  use crate::file::get_file_like;
@@ -53,6 +53,27 @@ impl From<LazyFrame> for RbLazyFrame {
53
53
  }
54
54
 
55
55
  impl RbLazyFrame {
56
+ pub fn read_json(rb_f: Value) -> RbResult<Self> {
57
+ // it is faster to first read to memory and then parse: https://github.com/serde-rs/json/issues/160
58
+ // so don't bother with files.
59
+ let mut json = String::new();
60
+ let _ = get_file_like(rb_f, false)?
61
+ .read_to_string(&mut json)
62
+ .unwrap();
63
+
64
+ // Safety
65
+ // we skipped the serializing/deserializing of the static in lifetime in `DataType`
66
+ // so we actually don't have a lifetime at all when serializing.
67
+
68
+ // &str still has a lifetime. Bit its ok, because we drop it immediately
69
+ // in this scope
70
+ let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
71
+
72
+ let lp = serde_json::from_str::<LogicalPlan>(json)
73
+ .map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
74
+ Ok(LazyFrame::from(lp).into())
75
+ }
76
+
56
77
  pub fn new_from_ndjson(
57
78
  path: String,
58
79
  infer_schema_length: Option<usize>,
@@ -327,6 +348,7 @@ impl RbLazyFrame {
327
348
  include_boundaries: bool,
328
349
  closed: Wrap<ClosedWindow>,
329
350
  by: RArray,
351
+ start_by: Wrap<StartBy>,
330
352
  ) -> RbResult<RbLazyGroupBy> {
331
353
  let closed_window = closed.0;
332
354
  let by = rb_exprs_to_exprs(by)?;
@@ -341,6 +363,7 @@ impl RbLazyFrame {
341
363
  truncate,
342
364
  include_boundaries,
343
365
  closed_window,
366
+ start_by: start_by.0,
344
367
  },
345
368
  );
346
369
 
@@ -349,6 +372,56 @@ impl RbLazyFrame {
349
372
  })
350
373
  }
351
374
 
375
+ pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
376
+ let contexts = contexts
377
+ .each()
378
+ .map(|v| v.unwrap().try_convert())
379
+ .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
380
+ let contexts = contexts
381
+ .into_iter()
382
+ .map(|ldf| ldf.ldf.clone())
383
+ .collect::<Vec<_>>();
384
+ Ok(self.ldf.clone().with_context(contexts).into())
385
+ }
386
+
387
+ #[allow(clippy::too_many_arguments)]
388
+ pub fn join_asof(
389
+ &self,
390
+ other: &RbLazyFrame,
391
+ left_on: &RbExpr,
392
+ right_on: &RbExpr,
393
+ left_by: Option<Vec<String>>,
394
+ right_by: Option<Vec<String>>,
395
+ allow_parallel: bool,
396
+ force_parallel: bool,
397
+ suffix: String,
398
+ strategy: Wrap<AsofStrategy>,
399
+ tolerance: Option<Wrap<AnyValue<'_>>>,
400
+ tolerance_str: Option<String>,
401
+ ) -> RbResult<Self> {
402
+ let ldf = self.ldf.clone();
403
+ let other = other.ldf.clone();
404
+ let left_on = left_on.inner.clone();
405
+ let right_on = right_on.inner.clone();
406
+ Ok(ldf
407
+ .join_builder()
408
+ .with(other)
409
+ .left_on([left_on])
410
+ .right_on([right_on])
411
+ .allow_parallel(allow_parallel)
412
+ .force_parallel(force_parallel)
413
+ .how(JoinType::AsOf(AsOfOptions {
414
+ strategy: strategy.0,
415
+ left_by,
416
+ right_by,
417
+ tolerance: tolerance.map(|t| t.0.into_static().unwrap()),
418
+ tolerance_str,
419
+ }))
420
+ .suffix(suffix)
421
+ .finish()
422
+ .into())
423
+ }
424
+
352
425
  #[allow(clippy::too_many_arguments)]
353
426
  pub fn join(
354
427
  &self,
@@ -443,9 +516,13 @@ impl RbLazyFrame {
443
516
  ldf.median().into()
444
517
  }
445
518
 
446
- pub fn quantile(&self, quantile: f64, interpolation: Wrap<QuantileInterpolOptions>) -> Self {
519
+ pub fn quantile(
520
+ &self,
521
+ quantile: &RbExpr,
522
+ interpolation: Wrap<QuantileInterpolOptions>,
523
+ ) -> Self {
447
524
  let ldf = self.ldf.clone();
448
- ldf.quantile(quantile, interpolation.0).into()
525
+ ldf.quantile(quantile.inner.clone(), interpolation.0).into()
449
526
  }
450
527
 
451
528
  pub fn explode(&self, column: RArray) -> RbResult<Self> {
@@ -1,3 +1,4 @@
1
+ use magnus::block::Proc;
1
2
  use magnus::{class, RArray, RString, Value};
2
3
  use polars::chunked_array::ops::SortOptions;
3
4
  use polars::lazy::dsl;
@@ -163,10 +164,14 @@ impl RbExpr {
163
164
  self.clone().inner.list().into()
164
165
  }
165
166
 
166
- pub fn quantile(&self, quantile: f64, interpolation: Wrap<QuantileInterpolOptions>) -> Self {
167
+ pub fn quantile(
168
+ &self,
169
+ quantile: &RbExpr,
170
+ interpolation: Wrap<QuantileInterpolOptions>,
171
+ ) -> Self {
167
172
  self.clone()
168
173
  .inner
169
- .quantile(quantile, interpolation.0)
174
+ .quantile(quantile.inner.clone(), interpolation.0)
170
175
  .into()
171
176
  }
172
177
 
@@ -505,7 +510,13 @@ impl RbExpr {
505
510
  self.inner.clone().shrink_dtype().into()
506
511
  }
507
512
 
508
- pub fn str_parse_date(&self, fmt: Option<String>, strict: bool, exact: bool) -> Self {
513
+ pub fn str_parse_date(
514
+ &self,
515
+ fmt: Option<String>,
516
+ strict: bool,
517
+ exact: bool,
518
+ cache: bool,
519
+ ) -> Self {
509
520
  self.inner
510
521
  .clone()
511
522
  .str()
@@ -514,11 +525,20 @@ impl RbExpr {
514
525
  fmt,
515
526
  strict,
516
527
  exact,
528
+ cache,
529
+ tz_aware: false,
517
530
  })
518
531
  .into()
519
532
  }
520
533
 
521
- pub fn str_parse_datetime(&self, fmt: Option<String>, strict: bool, exact: bool) -> Self {
534
+ pub fn str_parse_datetime(
535
+ &self,
536
+ fmt: Option<String>,
537
+ strict: bool,
538
+ exact: bool,
539
+ cache: bool,
540
+ tz_aware: bool,
541
+ ) -> Self {
522
542
  let tu = match fmt {
523
543
  Some(ref fmt) => {
524
544
  if fmt.contains("%.9f")
@@ -543,11 +563,19 @@ impl RbExpr {
543
563
  fmt,
544
564
  strict,
545
565
  exact,
566
+ cache,
567
+ tz_aware,
546
568
  })
547
569
  .into()
548
570
  }
549
571
 
550
- pub fn str_parse_time(&self, fmt: Option<String>, strict: bool, exact: bool) -> Self {
572
+ pub fn str_parse_time(
573
+ &self,
574
+ fmt: Option<String>,
575
+ strict: bool,
576
+ exact: bool,
577
+ cache: bool,
578
+ ) -> Self {
551
579
  self.inner
552
580
  .clone()
553
581
  .str()
@@ -556,6 +584,8 @@ impl RbExpr {
556
584
  fmt,
557
585
  strict,
558
586
  exact,
587
+ cache,
588
+ tz_aware: false,
559
589
  })
560
590
  .into()
561
591
  }
@@ -722,8 +752,12 @@ impl RbExpr {
722
752
  self.inner.clone().str().extract(&pat, group_index).into()
723
753
  }
724
754
 
725
- pub fn str_extract_all(&self, pat: String) -> Self {
726
- self.inner.clone().str().extract_all(&pat).into()
755
+ pub fn str_extract_all(&self, pat: &RbExpr) -> Self {
756
+ self.inner
757
+ .clone()
758
+ .str()
759
+ .extract_all(pat.inner.clone())
760
+ .into()
727
761
  }
728
762
 
729
763
  pub fn count_match(&self, pat: String) -> Self {
@@ -946,6 +980,10 @@ impl RbExpr {
946
980
  self.inner.clone().dt().round(&every, &offset).into()
947
981
  }
948
982
 
983
+ pub fn map(&self, lambda: Value, output_type: Option<Wrap<DataType>>, agg_list: bool) -> Self {
984
+ map_single(self, lambda, output_type, agg_list)
985
+ }
986
+
949
987
  pub fn dot(&self, other: &RbExpr) -> Self {
950
988
  self.inner.clone().dot(other.inner.clone()).into()
951
989
  }
@@ -979,12 +1017,27 @@ impl RbExpr {
979
1017
  self.inner.clone().suffix(&suffix).into()
980
1018
  }
981
1019
 
1020
+ pub fn map_alias(&self, lambda: Proc) -> Self {
1021
+ self.inner
1022
+ .clone()
1023
+ .map_alias(move |name| {
1024
+ let out = lambda.call::<_, String>((name,));
1025
+ match out {
1026
+ Ok(out) => Ok(out),
1027
+ Err(e) => Err(PolarsError::ComputeError(
1028
+ format!("Ruby function in 'map_alias' produced an error: {}.", e).into(),
1029
+ )),
1030
+ }
1031
+ })
1032
+ .into()
1033
+ }
1034
+
982
1035
  pub fn exclude(&self, columns: Vec<String>) -> Self {
983
1036
  self.inner.clone().exclude(columns).into()
984
1037
  }
985
1038
 
986
- pub fn interpolate(&self) -> Self {
987
- self.inner.clone().interpolate().into()
1039
+ pub fn interpolate(&self, method: Wrap<InterpolationMethod>) -> Self {
1040
+ self.inner.clone().interpolate(method.0).into()
988
1041
  }
989
1042
 
990
1043
  pub fn rolling_sum(
@@ -1275,6 +1328,7 @@ impl RbExpr {
1275
1328
  &self,
1276
1329
  width_strat: Wrap<ListToStructWidthStrategy>,
1277
1330
  _name_gen: Option<Value>,
1331
+ upper_bound: usize,
1278
1332
  ) -> RbResult<Self> {
1279
1333
  // TODO fix
1280
1334
  let name_gen = None;
@@ -1289,7 +1343,7 @@ impl RbExpr {
1289
1343
  .inner
1290
1344
  .clone()
1291
1345
  .arr()
1292
- .to_struct(width_strat.0, name_gen)
1346
+ .to_struct(width_strat.0, name_gen, upper_bound)
1293
1347
  .into())
1294
1348
  }
1295
1349
 
@@ -1450,6 +1504,10 @@ impl RbExpr {
1450
1504
  pub fn entropy(&self, base: f64, normalize: bool) -> Self {
1451
1505
  self.inner.clone().entropy(base, normalize).into()
1452
1506
  }
1507
+
1508
+ pub fn hash(&self, seed: u64, seed_1: u64, seed_2: u64, seed_3: u64) -> Self {
1509
+ self.inner.clone().hash(seed, seed_1, seed_2, seed_3).into()
1510
+ }
1453
1511
  }
1454
1512
 
1455
1513
  pub fn col(name: String) -> RbExpr {
@@ -1472,6 +1530,10 @@ pub fn cols(names: Vec<String>) -> RbExpr {
1472
1530
  dsl::cols(names).into()
1473
1531
  }
1474
1532
 
1533
+ pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
1534
+ dsl::dtype_cols(dtypes).into()
1535
+ }
1536
+
1475
1537
  pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
1476
1538
  let exprs = rb_exprs_to_exprs(exprs)?;
1477
1539
 
@@ -1479,6 +1541,13 @@ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
1479
1541
  Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
1480
1542
  }
1481
1543
 
1544
+ pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
1545
+ let exprs = rb_exprs_to_exprs(exprs)?;
1546
+
1547
+ let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
1548
+ Ok(polars::lazy::dsl::cumfold_exprs(acc.inner.clone(), func, exprs, include_init).into())
1549
+ }
1550
+
1482
1551
  // TODO improve
1483
1552
  pub fn lit(value: Value) -> RbResult<RbExpr> {
1484
1553
  if value.is_nil() {
@@ -1531,6 +1600,11 @@ pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
1531
1600
  polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
1532
1601
  }
1533
1602
 
1603
+ pub fn argsort_by(by: RArray, reverse: Vec<bool>) -> RbResult<RbExpr> {
1604
+ let by = rb_exprs_to_exprs(by)?;
1605
+ Ok(polars::lazy::dsl::argsort_by(by, &reverse).into())
1606
+ }
1607
+
1534
1608
  #[magnus::wrap(class = "Polars::RbWhen")]
1535
1609
  #[derive(Clone)]
1536
1610
  pub struct RbWhen {
@@ -1,3 +1,4 @@
1
+ mod apply;
1
2
  mod batched_csv;
2
3
  mod conversion;
3
4
  mod dataframe;
@@ -25,7 +26,7 @@ use polars::datatypes::{DataType, TimeUnit};
25
26
  use polars::error::PolarsResult;
26
27
  use polars::frame::DataFrame;
27
28
  use polars::functions::{diag_concat_df, hor_concat_df};
28
- use polars::prelude::{ClosedWindow, Duration, IntoSeries, TimeZone};
29
+ use polars::prelude::{ClosedWindow, Duration, DurationArgs, IntoSeries, TimeZone};
29
30
  use series::RbSeries;
30
31
 
31
32
  #[cfg(target_os = "linux")]
@@ -55,13 +56,19 @@ fn series() -> RClass {
55
56
  #[magnus::init]
56
57
  fn init() -> RbResult<()> {
57
58
  let module = module();
59
+ module.define_singleton_method("_dtype_cols", function!(dtype_cols, 1))?;
60
+ module.define_singleton_method("_rb_duration", function!(rb_duration, 8))?;
58
61
  module.define_singleton_method("_concat_df", function!(concat_df, 1))?;
62
+ module.define_singleton_method("_concat_lf", function!(concat_lf, 3))?;
59
63
  module.define_singleton_method("_diag_concat_df", function!(rb_diag_concat_df, 1))?;
60
64
  module.define_singleton_method("_hor_concat_df", function!(rb_hor_concat_df, 1))?;
61
65
  module.define_singleton_method("_concat_series", function!(concat_series, 1))?;
62
66
  module.define_singleton_method("_ipc_schema", function!(ipc_schema, 1))?;
63
67
  module.define_singleton_method("_parquet_schema", function!(parquet_schema, 1))?;
68
+ module.define_singleton_method("_collect_all", function!(collect_all, 1))?;
64
69
  module.define_singleton_method("_rb_date_range", function!(rb_date_range, 7))?;
70
+ module.define_singleton_method("_coalesce_exprs", function!(coalesce_exprs, 1))?;
71
+ module.define_singleton_method("_sum_exprs", function!(sum_exprs, 1))?;
65
72
  module.define_singleton_method("_as_struct", function!(as_struct, 1))?;
66
73
  module.define_singleton_method("_arg_where", function!(arg_where, 1))?;
67
74
 
@@ -74,10 +81,13 @@ fn init() -> RbResult<()> {
74
81
  class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
75
82
  class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 7))?;
76
83
  class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
84
+ class.define_singleton_method("read_avro", function!(RbDataFrame::read_avro, 4))?;
85
+ class.define_singleton_method("read_hashes", function!(RbDataFrame::read_hashes, 3))?;
77
86
  class.define_singleton_method("read_hash", function!(RbDataFrame::read_hash, 1))?;
78
87
  class.define_singleton_method("read_json", function!(RbDataFrame::read_json, 1))?;
79
88
  class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson, 1))?;
80
89
  class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
90
+ class.define_method("write_avro", method!(RbDataFrame::write_avro, 2))?;
81
91
  class.define_method("write_json", method!(RbDataFrame::write_json, 3))?;
82
92
  class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
83
93
  class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
@@ -143,6 +153,7 @@ fn init() -> RbResult<()> {
143
153
  class.define_method("with_row_count", method!(RbDataFrame::with_row_count, 2))?;
144
154
  class.define_method("_clone", method!(RbDataFrame::clone, 0))?;
145
155
  class.define_method("melt", method!(RbDataFrame::melt, 4))?;
156
+ class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 6))?;
146
157
  class.define_method("partition_by", method!(RbDataFrame::partition_by, 2))?;
147
158
  class.define_method("shift", method!(RbDataFrame::shift, 1))?;
148
159
  class.define_method("unique", method!(RbDataFrame::unique, 3))?;
@@ -161,7 +172,9 @@ fn init() -> RbResult<()> {
161
172
  class.define_method("quantile", method!(RbDataFrame::quantile, 2))?;
162
173
  class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 1))?;
163
174
  class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
175
+ class.define_method("apply", method!(RbDataFrame::apply, 3))?;
164
176
  class.define_method("shrink_to_fit", method!(RbDataFrame::shrink_to_fit, 0))?;
177
+ class.define_method("hash_rows", method!(RbDataFrame::hash_rows, 4))?;
165
178
  class.define_method("transpose", method!(RbDataFrame::transpose, 2))?;
166
179
  class.define_method("upsample", method!(RbDataFrame::upsample, 5))?;
167
180
  class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
@@ -277,9 +290,9 @@ fn init() -> RbResult<()> {
277
290
  class.define_method("cumprod", method!(RbExpr::cumprod, 1))?;
278
291
  class.define_method("product", method!(RbExpr::product, 0))?;
279
292
  class.define_method("shrink_dtype", method!(RbExpr::shrink_dtype, 0))?;
280
- class.define_method("str_parse_date", method!(RbExpr::str_parse_date, 3))?;
281
- class.define_method("str_parse_datetime", method!(RbExpr::str_parse_datetime, 3))?;
282
- class.define_method("str_parse_time", method!(RbExpr::str_parse_time, 3))?;
293
+ class.define_method("str_parse_date", method!(RbExpr::str_parse_date, 4))?;
294
+ class.define_method("str_parse_datetime", method!(RbExpr::str_parse_datetime, 5))?;
295
+ class.define_method("str_parse_time", method!(RbExpr::str_parse_time, 4))?;
283
296
  class.define_method("str_strip", method!(RbExpr::str_strip, 1))?;
284
297
  class.define_method("str_rstrip", method!(RbExpr::str_rstrip, 1))?;
285
298
  class.define_method("str_lstrip", method!(RbExpr::str_lstrip, 1))?;
@@ -361,14 +374,16 @@ fn init() -> RbResult<()> {
361
374
  class.define_method("dt_tz_localize", method!(RbExpr::dt_tz_localize, 1))?;
362
375
  class.define_method("dt_truncate", method!(RbExpr::dt_truncate, 2))?;
363
376
  class.define_method("dt_round", method!(RbExpr::dt_round, 2))?;
377
+ class.define_method("map", method!(RbExpr::map, 3))?;
364
378
  class.define_method("dot", method!(RbExpr::dot, 1))?;
365
379
  class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
366
380
  class.define_method("mode", method!(RbExpr::mode, 0))?;
367
381
  class.define_method("keep_name", method!(RbExpr::keep_name, 0))?;
368
382
  class.define_method("prefix", method!(RbExpr::prefix, 1))?;
369
383
  class.define_method("suffix", method!(RbExpr::suffix, 1))?;
384
+ class.define_method("map_alias", method!(RbExpr::map_alias, 1))?;
370
385
  class.define_method("exclude", method!(RbExpr::exclude, 1))?;
371
- class.define_method("interpolate", method!(RbExpr::interpolate, 0))?;
386
+ class.define_method("interpolate", method!(RbExpr::interpolate, 1))?;
372
387
  class.define_method("rolling_sum", method!(RbExpr::rolling_sum, 6))?;
373
388
  class.define_method("rolling_min", method!(RbExpr::rolling_min, 6))?;
374
389
  class.define_method("rolling_max", method!(RbExpr::rolling_max, 6))?;
@@ -396,7 +411,7 @@ fn init() -> RbResult<()> {
396
411
  class.define_method("lst_slice", method!(RbExpr::lst_slice, 2))?;
397
412
  class.define_method("lst_eval", method!(RbExpr::lst_eval, 2))?;
398
413
  class.define_method("cumulative_eval", method!(RbExpr::cumulative_eval, 3))?;
399
- class.define_method("lst_to_struct", method!(RbExpr::lst_to_struct, 2))?;
414
+ class.define_method("lst_to_struct", method!(RbExpr::lst_to_struct, 3))?;
400
415
  class.define_method("rank", method!(RbExpr::rank, 2))?;
401
416
  class.define_method("diff", method!(RbExpr::diff, 2))?;
402
417
  class.define_method("pct_change", method!(RbExpr::pct_change, 1))?;
@@ -431,6 +446,7 @@ fn init() -> RbResult<()> {
431
446
  class.define_method("log", method!(RbExpr::log, 1))?;
432
447
  class.define_method("exp", method!(RbExpr::exp, 0))?;
433
448
  class.define_method("entropy", method!(RbExpr::entropy, 2))?;
449
+ class.define_method("_hash", method!(RbExpr::hash, 4))?;
434
450
 
435
451
  // meta
436
452
  class.define_method("meta_pop", method!(RbExpr::meta_pop, 0))?;
@@ -446,6 +462,7 @@ fn init() -> RbResult<()> {
446
462
  class.define_singleton_method("last", function!(crate::lazy::dsl::last, 0))?;
447
463
  class.define_singleton_method("cols", function!(crate::lazy::dsl::cols, 1))?;
448
464
  class.define_singleton_method("fold", function!(crate::lazy::dsl::fold, 3))?;
465
+ class.define_singleton_method("cumfold", function!(crate::lazy::dsl::cumfold, 4))?;
449
466
  class.define_singleton_method("lit", function!(crate::lazy::dsl::lit, 1))?;
450
467
  class.define_singleton_method("arange", function!(crate::lazy::dsl::arange, 3))?;
451
468
  class.define_singleton_method("repeat", function!(crate::lazy::dsl::repeat, 2))?;
@@ -455,11 +472,13 @@ fn init() -> RbResult<()> {
455
472
  function!(crate::lazy::dsl::spearman_rank_corr, 4),
456
473
  )?;
457
474
  class.define_singleton_method("cov", function!(crate::lazy::dsl::cov, 2))?;
475
+ class.define_singleton_method("argsort_by", function!(crate::lazy::dsl::argsort_by, 2))?;
458
476
  class.define_singleton_method("when", function!(crate::lazy::dsl::when, 1))?;
459
477
  class.define_singleton_method("concat_str", function!(crate::lazy::dsl::concat_str, 2))?;
460
478
  class.define_singleton_method("concat_lst", function!(crate::lazy::dsl::concat_lst, 1))?;
461
479
 
462
480
  let class = module.define_class("RbLazyFrame", Default::default())?;
481
+ class.define_singleton_method("read_json", function!(RbLazyFrame::read_json, 1))?;
463
482
  class.define_singleton_method(
464
483
  "new_from_ndjson",
465
484
  function!(RbLazyFrame::new_from_ndjson, 7),
@@ -489,7 +508,9 @@ fn init() -> RbResult<()> {
489
508
  class.define_method("select", method!(RbLazyFrame::select, 1))?;
490
509
  class.define_method("groupby", method!(RbLazyFrame::groupby, 2))?;
491
510
  class.define_method("groupby_rolling", method!(RbLazyFrame::groupby_rolling, 5))?;
492
- class.define_method("groupby_dynamic", method!(RbLazyFrame::groupby_dynamic, 8))?;
511
+ class.define_method("groupby_dynamic", method!(RbLazyFrame::groupby_dynamic, 9))?;
512
+ class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
513
+ class.define_method("join_asof", method!(RbLazyFrame::join_asof, 11))?;
493
514
  class.define_method("join", method!(RbLazyFrame::join, 7))?;
494
515
  class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
495
516
  class.define_method("rename", method!(RbLazyFrame::rename, 2))?;
@@ -597,6 +618,7 @@ fn init() -> RbResult<()> {
597
618
  class.define_method("median", method!(RbSeries::median, 0))?;
598
619
  class.define_method("quantile", method!(RbSeries::quantile, 2))?;
599
620
  class.define_method("_clone", method!(RbSeries::clone, 0))?;
621
+ class.define_method("apply_lambda", method!(RbSeries::apply_lambda, 3))?;
600
622
  class.define_method("zip_with", method!(RbSeries::zip_with, 2))?;
601
623
  class.define_method("to_dummies", method!(RbSeries::to_dummies, 0))?;
602
624
  class.define_method("peak_max", method!(RbSeries::peak_max, 0))?;
@@ -611,6 +633,79 @@ fn init() -> RbResult<()> {
611
633
  class.define_method("time_unit", method!(RbSeries::time_unit, 0))?;
612
634
  class.define_method("set_at_idx", method!(RbSeries::set_at_idx, 2))?;
613
635
 
636
+ // set
637
+ // class.define_method("set_with_mask_str", method!(RbSeries::set_with_mask_str, 2))?;
638
+ class.define_method("set_with_mask_f64", method!(RbSeries::set_with_mask_f64, 2))?;
639
+ class.define_method("set_with_mask_f32", method!(RbSeries::set_with_mask_f32, 2))?;
640
+ class.define_method("set_with_mask_u8", method!(RbSeries::set_with_mask_u8, 2))?;
641
+ class.define_method("set_with_mask_u16", method!(RbSeries::set_with_mask_u16, 2))?;
642
+ class.define_method("set_with_mask_u32", method!(RbSeries::set_with_mask_u32, 2))?;
643
+ class.define_method("set_with_mask_u64", method!(RbSeries::set_with_mask_u64, 2))?;
644
+ class.define_method("set_with_mask_i8", method!(RbSeries::set_with_mask_i8, 2))?;
645
+ class.define_method("set_with_mask_i16", method!(RbSeries::set_with_mask_i16, 2))?;
646
+ class.define_method("set_with_mask_i32", method!(RbSeries::set_with_mask_i32, 2))?;
647
+ class.define_method("set_with_mask_i64", method!(RbSeries::set_with_mask_i64, 2))?;
648
+ class.define_method(
649
+ "set_with_mask_bool",
650
+ method!(RbSeries::set_with_mask_bool, 2),
651
+ )?;
652
+
653
+ // arithmetic
654
+ class.define_method("add_u8", method!(RbSeries::add_u8, 1))?;
655
+ class.define_method("add_u16", method!(RbSeries::add_u16, 1))?;
656
+ class.define_method("add_u32", method!(RbSeries::add_u32, 1))?;
657
+ class.define_method("add_u64", method!(RbSeries::add_u64, 1))?;
658
+ class.define_method("add_i8", method!(RbSeries::add_i8, 1))?;
659
+ class.define_method("add_i16", method!(RbSeries::add_i16, 1))?;
660
+ class.define_method("add_i32", method!(RbSeries::add_i32, 1))?;
661
+ class.define_method("add_i64", method!(RbSeries::add_i64, 1))?;
662
+ class.define_method("add_datetime", method!(RbSeries::add_datetime, 1))?;
663
+ class.define_method("add_duration", method!(RbSeries::add_duration, 1))?;
664
+ class.define_method("add_f32", method!(RbSeries::add_f32, 1))?;
665
+ class.define_method("add_f64", method!(RbSeries::add_f64, 1))?;
666
+ class.define_method("sub_u8", method!(RbSeries::sub_u8, 1))?;
667
+ class.define_method("sub_u16", method!(RbSeries::sub_u16, 1))?;
668
+ class.define_method("sub_u32", method!(RbSeries::sub_u32, 1))?;
669
+ class.define_method("sub_u64", method!(RbSeries::sub_u64, 1))?;
670
+ class.define_method("sub_i8", method!(RbSeries::sub_i8, 1))?;
671
+ class.define_method("sub_i16", method!(RbSeries::sub_i16, 1))?;
672
+ class.define_method("sub_i32", method!(RbSeries::sub_i32, 1))?;
673
+ class.define_method("sub_i64", method!(RbSeries::sub_i64, 1))?;
674
+ class.define_method("sub_datetime", method!(RbSeries::sub_datetime, 1))?;
675
+ class.define_method("sub_duration", method!(RbSeries::sub_duration, 1))?;
676
+ class.define_method("sub_f32", method!(RbSeries::sub_f32, 1))?;
677
+ class.define_method("sub_f64", method!(RbSeries::sub_f64, 1))?;
678
+ class.define_method("div_u8", method!(RbSeries::div_u8, 1))?;
679
+ class.define_method("div_u16", method!(RbSeries::div_u16, 1))?;
680
+ class.define_method("div_u32", method!(RbSeries::div_u32, 1))?;
681
+ class.define_method("div_u64", method!(RbSeries::div_u64, 1))?;
682
+ class.define_method("div_i8", method!(RbSeries::div_i8, 1))?;
683
+ class.define_method("div_i16", method!(RbSeries::div_i16, 1))?;
684
+ class.define_method("div_i32", method!(RbSeries::div_i32, 1))?;
685
+ class.define_method("div_i64", method!(RbSeries::div_i64, 1))?;
686
+ class.define_method("div_f32", method!(RbSeries::div_f32, 1))?;
687
+ class.define_method("div_f64", method!(RbSeries::div_f64, 1))?;
688
+ class.define_method("mul_u8", method!(RbSeries::mul_u8, 1))?;
689
+ class.define_method("mul_u16", method!(RbSeries::mul_u16, 1))?;
690
+ class.define_method("mul_u32", method!(RbSeries::mul_u32, 1))?;
691
+ class.define_method("mul_u64", method!(RbSeries::mul_u64, 1))?;
692
+ class.define_method("mul_i8", method!(RbSeries::mul_i8, 1))?;
693
+ class.define_method("mul_i16", method!(RbSeries::mul_i16, 1))?;
694
+ class.define_method("mul_i32", method!(RbSeries::mul_i32, 1))?;
695
+ class.define_method("mul_i64", method!(RbSeries::mul_i64, 1))?;
696
+ class.define_method("mul_f32", method!(RbSeries::mul_f32, 1))?;
697
+ class.define_method("mul_f64", method!(RbSeries::mul_f64, 1))?;
698
+ class.define_method("rem_u8", method!(RbSeries::rem_u8, 1))?;
699
+ class.define_method("rem_u16", method!(RbSeries::rem_u16, 1))?;
700
+ class.define_method("rem_u32", method!(RbSeries::rem_u32, 1))?;
701
+ class.define_method("rem_u64", method!(RbSeries::rem_u64, 1))?;
702
+ class.define_method("rem_i8", method!(RbSeries::rem_i8, 1))?;
703
+ class.define_method("rem_i16", method!(RbSeries::rem_i16, 1))?;
704
+ class.define_method("rem_i32", method!(RbSeries::rem_i32, 1))?;
705
+ class.define_method("rem_i64", method!(RbSeries::rem_i64, 1))?;
706
+ class.define_method("rem_f32", method!(RbSeries::rem_f32, 1))?;
707
+ class.define_method("rem_f64", method!(RbSeries::rem_f64, 1))?;
708
+
614
709
  // eq
615
710
  class.define_method("eq_u8", method!(RbSeries::eq_u8, 1))?;
616
711
  class.define_method("eq_u16", method!(RbSeries::eq_u16, 1))?;
@@ -698,6 +793,40 @@ fn init() -> RbResult<()> {
698
793
  Ok(())
699
794
  }
700
795
 
796
+ fn dtype_cols(dtypes: RArray) -> RbResult<RbExpr> {
797
+ let dtypes = dtypes
798
+ .each()
799
+ .map(|v| v?.try_convert::<Wrap<DataType>>())
800
+ .collect::<RbResult<Vec<Wrap<DataType>>>>()?;
801
+ let dtypes = vec_extract_wrapped(dtypes);
802
+ Ok(crate::lazy::dsl::dtype_cols(dtypes))
803
+ }
804
+
805
+ #[allow(clippy::too_many_arguments)]
806
+ fn rb_duration(
807
+ days: Option<&RbExpr>,
808
+ seconds: Option<&RbExpr>,
809
+ nanoseconds: Option<&RbExpr>,
810
+ microseconds: Option<&RbExpr>,
811
+ milliseconds: Option<&RbExpr>,
812
+ minutes: Option<&RbExpr>,
813
+ hours: Option<&RbExpr>,
814
+ weeks: Option<&RbExpr>,
815
+ ) -> RbExpr {
816
+ let args = DurationArgs {
817
+ days: days.map(|e| e.inner.clone()),
818
+ seconds: seconds.map(|e| e.inner.clone()),
819
+ nanoseconds: nanoseconds.map(|e| e.inner.clone()),
820
+ microseconds: microseconds.map(|e| e.inner.clone()),
821
+ milliseconds: milliseconds.map(|e| e.inner.clone()),
822
+ minutes: minutes.map(|e| e.inner.clone()),
823
+ hours: hours.map(|e| e.inner.clone()),
824
+ weeks: weeks.map(|e| e.inner.clone()),
825
+ };
826
+
827
+ polars::lazy::dsl::duration(args).into()
828
+ }
829
+
701
830
  fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
702
831
  let mut iter = seq.each();
703
832
  let first = iter.next().unwrap()?;
@@ -726,6 +855,20 @@ fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
726
855
  Ok(df.into())
727
856
  }
728
857
 
858
+ fn concat_lf(lfs: Value, rechunk: bool, parallel: bool) -> RbResult<RbLazyFrame> {
859
+ let (seq, len) = get_rbseq(lfs)?;
860
+ let mut lfs = Vec::with_capacity(len);
861
+
862
+ for res in seq.each() {
863
+ let item = res?;
864
+ let lf = get_lf(item)?;
865
+ lfs.push(lf);
866
+ }
867
+
868
+ let lf = polars::lazy::dsl::concat(lfs, rechunk, parallel).map_err(RbPolarsErr::from)?;
869
+ Ok(lf.into())
870
+ }
871
+
729
872
  fn rb_diag_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
730
873
  let mut dfs = Vec::new();
731
874
  for item in seq.each() {
@@ -786,6 +929,25 @@ fn parquet_schema(rb_f: Value) -> RbResult<Value> {
786
929
  Ok(dict.into())
787
930
  }
788
931
 
932
+ fn collect_all(lfs: RArray) -> RbResult<Vec<RbDataFrame>> {
933
+ use polars_core::utils::rayon::prelude::*;
934
+
935
+ let lfs = lfs
936
+ .each()
937
+ .map(|v| v?.try_convert::<&RbLazyFrame>())
938
+ .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
939
+
940
+ polars_core::POOL.install(|| {
941
+ lfs.par_iter()
942
+ .map(|lf| {
943
+ let df = lf.ldf.clone().collect()?;
944
+ Ok(RbDataFrame::new(df))
945
+ })
946
+ .collect::<polars_core::error::PolarsResult<Vec<_>>>()
947
+ .map_err(RbPolarsErr::from)
948
+ })
949
+ }
950
+
789
951
  fn rb_date_range(
790
952
  start: i64,
791
953
  stop: i64,
@@ -802,12 +964,22 @@ fn rb_date_range(
802
964
  Duration::parse(&every),
803
965
  closed.0,
804
966
  tu.0,
805
- tz,
967
+ tz.as_ref(),
806
968
  )
807
969
  .into_series()
808
970
  .into()
809
971
  }
810
972
 
973
+ fn coalesce_exprs(exprs: RArray) -> RbResult<RbExpr> {
974
+ let exprs = rb_exprs_to_exprs(exprs)?;
975
+ Ok(polars::lazy::dsl::coalesce(&exprs).into())
976
+ }
977
+
978
+ fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
979
+ let exprs = rb_exprs_to_exprs(exprs)?;
980
+ Ok(polars::lazy::dsl::sum_exprs(exprs).into())
981
+ }
982
+
811
983
  fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
812
984
  let exprs = rb_exprs_to_exprs(exprs)?;
813
985
  Ok(polars::lazy::dsl::as_struct(&exprs).into())