polars-df 0.1.4 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,7 +3,7 @@ use polars::io::RowCount;
3
3
  use polars::lazy::frame::{LazyFrame, LazyGroupBy};
4
4
  use polars::prelude::*;
5
5
  use std::cell::RefCell;
6
- use std::io::BufWriter;
6
+ use std::io::{BufWriter, Read};
7
7
 
8
8
  use crate::conversion::*;
9
9
  use crate::file::get_file_like;
@@ -53,6 +53,27 @@ impl From<LazyFrame> for RbLazyFrame {
53
53
  }
54
54
 
55
55
  impl RbLazyFrame {
56
+ pub fn read_json(rb_f: Value) -> RbResult<Self> {
57
+ // it is faster to first read to memory and then parse: https://github.com/serde-rs/json/issues/160
58
+ // so don't bother with files.
59
+ let mut json = String::new();
60
+ let _ = get_file_like(rb_f, false)?
61
+ .read_to_string(&mut json)
62
+ .unwrap();
63
+
64
+ // Safety
65
+ // we skipped the serializing/deserializing of the static in lifetime in `DataType`
66
+ // so we actually don't have a lifetime at all when serializing.
67
+
68
+ // &str still has a lifetime. Bit its ok, because we drop it immediately
69
+ // in this scope
70
+ let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
71
+
72
+ let lp = serde_json::from_str::<LogicalPlan>(json)
73
+ .map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
74
+ Ok(LazyFrame::from(lp).into())
75
+ }
76
+
56
77
  pub fn new_from_ndjson(
57
78
  path: String,
58
79
  infer_schema_length: Option<usize>,
@@ -327,6 +348,7 @@ impl RbLazyFrame {
327
348
  include_boundaries: bool,
328
349
  closed: Wrap<ClosedWindow>,
329
350
  by: RArray,
351
+ start_by: Wrap<StartBy>,
330
352
  ) -> RbResult<RbLazyGroupBy> {
331
353
  let closed_window = closed.0;
332
354
  let by = rb_exprs_to_exprs(by)?;
@@ -341,6 +363,7 @@ impl RbLazyFrame {
341
363
  truncate,
342
364
  include_boundaries,
343
365
  closed_window,
366
+ start_by: start_by.0,
344
367
  },
345
368
  );
346
369
 
@@ -349,6 +372,56 @@ impl RbLazyFrame {
349
372
  })
350
373
  }
351
374
 
375
+ pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
376
+ let contexts = contexts
377
+ .each()
378
+ .map(|v| v.unwrap().try_convert())
379
+ .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
380
+ let contexts = contexts
381
+ .into_iter()
382
+ .map(|ldf| ldf.ldf.clone())
383
+ .collect::<Vec<_>>();
384
+ Ok(self.ldf.clone().with_context(contexts).into())
385
+ }
386
+
387
+ #[allow(clippy::too_many_arguments)]
388
+ pub fn join_asof(
389
+ &self,
390
+ other: &RbLazyFrame,
391
+ left_on: &RbExpr,
392
+ right_on: &RbExpr,
393
+ left_by: Option<Vec<String>>,
394
+ right_by: Option<Vec<String>>,
395
+ allow_parallel: bool,
396
+ force_parallel: bool,
397
+ suffix: String,
398
+ strategy: Wrap<AsofStrategy>,
399
+ tolerance: Option<Wrap<AnyValue<'_>>>,
400
+ tolerance_str: Option<String>,
401
+ ) -> RbResult<Self> {
402
+ let ldf = self.ldf.clone();
403
+ let other = other.ldf.clone();
404
+ let left_on = left_on.inner.clone();
405
+ let right_on = right_on.inner.clone();
406
+ Ok(ldf
407
+ .join_builder()
408
+ .with(other)
409
+ .left_on([left_on])
410
+ .right_on([right_on])
411
+ .allow_parallel(allow_parallel)
412
+ .force_parallel(force_parallel)
413
+ .how(JoinType::AsOf(AsOfOptions {
414
+ strategy: strategy.0,
415
+ left_by,
416
+ right_by,
417
+ tolerance: tolerance.map(|t| t.0.into_static().unwrap()),
418
+ tolerance_str,
419
+ }))
420
+ .suffix(suffix)
421
+ .finish()
422
+ .into())
423
+ }
424
+
352
425
  #[allow(clippy::too_many_arguments)]
353
426
  pub fn join(
354
427
  &self,
@@ -443,9 +516,13 @@ impl RbLazyFrame {
443
516
  ldf.median().into()
444
517
  }
445
518
 
446
- pub fn quantile(&self, quantile: f64, interpolation: Wrap<QuantileInterpolOptions>) -> Self {
519
+ pub fn quantile(
520
+ &self,
521
+ quantile: &RbExpr,
522
+ interpolation: Wrap<QuantileInterpolOptions>,
523
+ ) -> Self {
447
524
  let ldf = self.ldf.clone();
448
- ldf.quantile(quantile, interpolation.0).into()
525
+ ldf.quantile(quantile.inner.clone(), interpolation.0).into()
449
526
  }
450
527
 
451
528
  pub fn explode(&self, column: RArray) -> RbResult<Self> {
@@ -1,3 +1,4 @@
1
+ use magnus::block::Proc;
1
2
  use magnus::{class, RArray, RString, Value};
2
3
  use polars::chunked_array::ops::SortOptions;
3
4
  use polars::lazy::dsl;
@@ -163,10 +164,14 @@ impl RbExpr {
163
164
  self.clone().inner.list().into()
164
165
  }
165
166
 
166
- pub fn quantile(&self, quantile: f64, interpolation: Wrap<QuantileInterpolOptions>) -> Self {
167
+ pub fn quantile(
168
+ &self,
169
+ quantile: &RbExpr,
170
+ interpolation: Wrap<QuantileInterpolOptions>,
171
+ ) -> Self {
167
172
  self.clone()
168
173
  .inner
169
- .quantile(quantile, interpolation.0)
174
+ .quantile(quantile.inner.clone(), interpolation.0)
170
175
  .into()
171
176
  }
172
177
 
@@ -505,7 +510,13 @@ impl RbExpr {
505
510
  self.inner.clone().shrink_dtype().into()
506
511
  }
507
512
 
508
- pub fn str_parse_date(&self, fmt: Option<String>, strict: bool, exact: bool) -> Self {
513
+ pub fn str_parse_date(
514
+ &self,
515
+ fmt: Option<String>,
516
+ strict: bool,
517
+ exact: bool,
518
+ cache: bool,
519
+ ) -> Self {
509
520
  self.inner
510
521
  .clone()
511
522
  .str()
@@ -514,11 +525,20 @@ impl RbExpr {
514
525
  fmt,
515
526
  strict,
516
527
  exact,
528
+ cache,
529
+ tz_aware: false,
517
530
  })
518
531
  .into()
519
532
  }
520
533
 
521
- pub fn str_parse_datetime(&self, fmt: Option<String>, strict: bool, exact: bool) -> Self {
534
+ pub fn str_parse_datetime(
535
+ &self,
536
+ fmt: Option<String>,
537
+ strict: bool,
538
+ exact: bool,
539
+ cache: bool,
540
+ tz_aware: bool,
541
+ ) -> Self {
522
542
  let tu = match fmt {
523
543
  Some(ref fmt) => {
524
544
  if fmt.contains("%.9f")
@@ -543,11 +563,19 @@ impl RbExpr {
543
563
  fmt,
544
564
  strict,
545
565
  exact,
566
+ cache,
567
+ tz_aware,
546
568
  })
547
569
  .into()
548
570
  }
549
571
 
550
- pub fn str_parse_time(&self, fmt: Option<String>, strict: bool, exact: bool) -> Self {
572
+ pub fn str_parse_time(
573
+ &self,
574
+ fmt: Option<String>,
575
+ strict: bool,
576
+ exact: bool,
577
+ cache: bool,
578
+ ) -> Self {
551
579
  self.inner
552
580
  .clone()
553
581
  .str()
@@ -556,6 +584,8 @@ impl RbExpr {
556
584
  fmt,
557
585
  strict,
558
586
  exact,
587
+ cache,
588
+ tz_aware: false,
559
589
  })
560
590
  .into()
561
591
  }
@@ -722,8 +752,12 @@ impl RbExpr {
722
752
  self.inner.clone().str().extract(&pat, group_index).into()
723
753
  }
724
754
 
725
- pub fn str_extract_all(&self, pat: String) -> Self {
726
- self.inner.clone().str().extract_all(&pat).into()
755
+ pub fn str_extract_all(&self, pat: &RbExpr) -> Self {
756
+ self.inner
757
+ .clone()
758
+ .str()
759
+ .extract_all(pat.inner.clone())
760
+ .into()
727
761
  }
728
762
 
729
763
  pub fn count_match(&self, pat: String) -> Self {
@@ -946,6 +980,10 @@ impl RbExpr {
946
980
  self.inner.clone().dt().round(&every, &offset).into()
947
981
  }
948
982
 
983
+ pub fn map(&self, lambda: Value, output_type: Option<Wrap<DataType>>, agg_list: bool) -> Self {
984
+ map_single(self, lambda, output_type, agg_list)
985
+ }
986
+
949
987
  pub fn dot(&self, other: &RbExpr) -> Self {
950
988
  self.inner.clone().dot(other.inner.clone()).into()
951
989
  }
@@ -979,12 +1017,27 @@ impl RbExpr {
979
1017
  self.inner.clone().suffix(&suffix).into()
980
1018
  }
981
1019
 
1020
+ pub fn map_alias(&self, lambda: Proc) -> Self {
1021
+ self.inner
1022
+ .clone()
1023
+ .map_alias(move |name| {
1024
+ let out = lambda.call::<_, String>((name,));
1025
+ match out {
1026
+ Ok(out) => Ok(out),
1027
+ Err(e) => Err(PolarsError::ComputeError(
1028
+ format!("Ruby function in 'map_alias' produced an error: {}.", e).into(),
1029
+ )),
1030
+ }
1031
+ })
1032
+ .into()
1033
+ }
1034
+
982
1035
  pub fn exclude(&self, columns: Vec<String>) -> Self {
983
1036
  self.inner.clone().exclude(columns).into()
984
1037
  }
985
1038
 
986
- pub fn interpolate(&self) -> Self {
987
- self.inner.clone().interpolate().into()
1039
+ pub fn interpolate(&self, method: Wrap<InterpolationMethod>) -> Self {
1040
+ self.inner.clone().interpolate(method.0).into()
988
1041
  }
989
1042
 
990
1043
  pub fn rolling_sum(
@@ -1275,6 +1328,7 @@ impl RbExpr {
1275
1328
  &self,
1276
1329
  width_strat: Wrap<ListToStructWidthStrategy>,
1277
1330
  _name_gen: Option<Value>,
1331
+ upper_bound: usize,
1278
1332
  ) -> RbResult<Self> {
1279
1333
  // TODO fix
1280
1334
  let name_gen = None;
@@ -1289,7 +1343,7 @@ impl RbExpr {
1289
1343
  .inner
1290
1344
  .clone()
1291
1345
  .arr()
1292
- .to_struct(width_strat.0, name_gen)
1346
+ .to_struct(width_strat.0, name_gen, upper_bound)
1293
1347
  .into())
1294
1348
  }
1295
1349
 
@@ -1450,6 +1504,10 @@ impl RbExpr {
1450
1504
  pub fn entropy(&self, base: f64, normalize: bool) -> Self {
1451
1505
  self.inner.clone().entropy(base, normalize).into()
1452
1506
  }
1507
+
1508
+ pub fn hash(&self, seed: u64, seed_1: u64, seed_2: u64, seed_3: u64) -> Self {
1509
+ self.inner.clone().hash(seed, seed_1, seed_2, seed_3).into()
1510
+ }
1453
1511
  }
1454
1512
 
1455
1513
  pub fn col(name: String) -> RbExpr {
@@ -1472,6 +1530,10 @@ pub fn cols(names: Vec<String>) -> RbExpr {
1472
1530
  dsl::cols(names).into()
1473
1531
  }
1474
1532
 
1533
+ pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
1534
+ dsl::dtype_cols(dtypes).into()
1535
+ }
1536
+
1475
1537
  pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
1476
1538
  let exprs = rb_exprs_to_exprs(exprs)?;
1477
1539
 
@@ -1479,6 +1541,13 @@ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
1479
1541
  Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
1480
1542
  }
1481
1543
 
1544
+ pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
1545
+ let exprs = rb_exprs_to_exprs(exprs)?;
1546
+
1547
+ let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
1548
+ Ok(polars::lazy::dsl::cumfold_exprs(acc.inner.clone(), func, exprs, include_init).into())
1549
+ }
1550
+
1482
1551
  // TODO improve
1483
1552
  pub fn lit(value: Value) -> RbResult<RbExpr> {
1484
1553
  if value.is_nil() {
@@ -1531,6 +1600,11 @@ pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
1531
1600
  polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
1532
1601
  }
1533
1602
 
1603
+ pub fn argsort_by(by: RArray, reverse: Vec<bool>) -> RbResult<RbExpr> {
1604
+ let by = rb_exprs_to_exprs(by)?;
1605
+ Ok(polars::lazy::dsl::argsort_by(by, &reverse).into())
1606
+ }
1607
+
1534
1608
  #[magnus::wrap(class = "Polars::RbWhen")]
1535
1609
  #[derive(Clone)]
1536
1610
  pub struct RbWhen {
@@ -1,3 +1,4 @@
1
+ mod apply;
1
2
  mod batched_csv;
2
3
  mod conversion;
3
4
  mod dataframe;
@@ -25,7 +26,7 @@ use polars::datatypes::{DataType, TimeUnit};
25
26
  use polars::error::PolarsResult;
26
27
  use polars::frame::DataFrame;
27
28
  use polars::functions::{diag_concat_df, hor_concat_df};
28
- use polars::prelude::{ClosedWindow, Duration, IntoSeries, TimeZone};
29
+ use polars::prelude::{ClosedWindow, Duration, DurationArgs, IntoSeries, TimeZone};
29
30
  use series::RbSeries;
30
31
 
31
32
  #[cfg(target_os = "linux")]
@@ -55,13 +56,19 @@ fn series() -> RClass {
55
56
  #[magnus::init]
56
57
  fn init() -> RbResult<()> {
57
58
  let module = module();
59
+ module.define_singleton_method("_dtype_cols", function!(dtype_cols, 1))?;
60
+ module.define_singleton_method("_rb_duration", function!(rb_duration, 8))?;
58
61
  module.define_singleton_method("_concat_df", function!(concat_df, 1))?;
62
+ module.define_singleton_method("_concat_lf", function!(concat_lf, 3))?;
59
63
  module.define_singleton_method("_diag_concat_df", function!(rb_diag_concat_df, 1))?;
60
64
  module.define_singleton_method("_hor_concat_df", function!(rb_hor_concat_df, 1))?;
61
65
  module.define_singleton_method("_concat_series", function!(concat_series, 1))?;
62
66
  module.define_singleton_method("_ipc_schema", function!(ipc_schema, 1))?;
63
67
  module.define_singleton_method("_parquet_schema", function!(parquet_schema, 1))?;
68
+ module.define_singleton_method("_collect_all", function!(collect_all, 1))?;
64
69
  module.define_singleton_method("_rb_date_range", function!(rb_date_range, 7))?;
70
+ module.define_singleton_method("_coalesce_exprs", function!(coalesce_exprs, 1))?;
71
+ module.define_singleton_method("_sum_exprs", function!(sum_exprs, 1))?;
65
72
  module.define_singleton_method("_as_struct", function!(as_struct, 1))?;
66
73
  module.define_singleton_method("_arg_where", function!(arg_where, 1))?;
67
74
 
@@ -74,10 +81,13 @@ fn init() -> RbResult<()> {
74
81
  class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
75
82
  class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 7))?;
76
83
  class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
84
+ class.define_singleton_method("read_avro", function!(RbDataFrame::read_avro, 4))?;
85
+ class.define_singleton_method("read_hashes", function!(RbDataFrame::read_hashes, 3))?;
77
86
  class.define_singleton_method("read_hash", function!(RbDataFrame::read_hash, 1))?;
78
87
  class.define_singleton_method("read_json", function!(RbDataFrame::read_json, 1))?;
79
88
  class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson, 1))?;
80
89
  class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
90
+ class.define_method("write_avro", method!(RbDataFrame::write_avro, 2))?;
81
91
  class.define_method("write_json", method!(RbDataFrame::write_json, 3))?;
82
92
  class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
83
93
  class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
@@ -143,6 +153,7 @@ fn init() -> RbResult<()> {
143
153
  class.define_method("with_row_count", method!(RbDataFrame::with_row_count, 2))?;
144
154
  class.define_method("_clone", method!(RbDataFrame::clone, 0))?;
145
155
  class.define_method("melt", method!(RbDataFrame::melt, 4))?;
156
+ class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 6))?;
146
157
  class.define_method("partition_by", method!(RbDataFrame::partition_by, 2))?;
147
158
  class.define_method("shift", method!(RbDataFrame::shift, 1))?;
148
159
  class.define_method("unique", method!(RbDataFrame::unique, 3))?;
@@ -161,7 +172,9 @@ fn init() -> RbResult<()> {
161
172
  class.define_method("quantile", method!(RbDataFrame::quantile, 2))?;
162
173
  class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 1))?;
163
174
  class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
175
+ class.define_method("apply", method!(RbDataFrame::apply, 3))?;
164
176
  class.define_method("shrink_to_fit", method!(RbDataFrame::shrink_to_fit, 0))?;
177
+ class.define_method("hash_rows", method!(RbDataFrame::hash_rows, 4))?;
165
178
  class.define_method("transpose", method!(RbDataFrame::transpose, 2))?;
166
179
  class.define_method("upsample", method!(RbDataFrame::upsample, 5))?;
167
180
  class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
@@ -277,9 +290,9 @@ fn init() -> RbResult<()> {
277
290
  class.define_method("cumprod", method!(RbExpr::cumprod, 1))?;
278
291
  class.define_method("product", method!(RbExpr::product, 0))?;
279
292
  class.define_method("shrink_dtype", method!(RbExpr::shrink_dtype, 0))?;
280
- class.define_method("str_parse_date", method!(RbExpr::str_parse_date, 3))?;
281
- class.define_method("str_parse_datetime", method!(RbExpr::str_parse_datetime, 3))?;
282
- class.define_method("str_parse_time", method!(RbExpr::str_parse_time, 3))?;
293
+ class.define_method("str_parse_date", method!(RbExpr::str_parse_date, 4))?;
294
+ class.define_method("str_parse_datetime", method!(RbExpr::str_parse_datetime, 5))?;
295
+ class.define_method("str_parse_time", method!(RbExpr::str_parse_time, 4))?;
283
296
  class.define_method("str_strip", method!(RbExpr::str_strip, 1))?;
284
297
  class.define_method("str_rstrip", method!(RbExpr::str_rstrip, 1))?;
285
298
  class.define_method("str_lstrip", method!(RbExpr::str_lstrip, 1))?;
@@ -361,14 +374,16 @@ fn init() -> RbResult<()> {
361
374
  class.define_method("dt_tz_localize", method!(RbExpr::dt_tz_localize, 1))?;
362
375
  class.define_method("dt_truncate", method!(RbExpr::dt_truncate, 2))?;
363
376
  class.define_method("dt_round", method!(RbExpr::dt_round, 2))?;
377
+ class.define_method("map", method!(RbExpr::map, 3))?;
364
378
  class.define_method("dot", method!(RbExpr::dot, 1))?;
365
379
  class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
366
380
  class.define_method("mode", method!(RbExpr::mode, 0))?;
367
381
  class.define_method("keep_name", method!(RbExpr::keep_name, 0))?;
368
382
  class.define_method("prefix", method!(RbExpr::prefix, 1))?;
369
383
  class.define_method("suffix", method!(RbExpr::suffix, 1))?;
384
+ class.define_method("map_alias", method!(RbExpr::map_alias, 1))?;
370
385
  class.define_method("exclude", method!(RbExpr::exclude, 1))?;
371
- class.define_method("interpolate", method!(RbExpr::interpolate, 0))?;
386
+ class.define_method("interpolate", method!(RbExpr::interpolate, 1))?;
372
387
  class.define_method("rolling_sum", method!(RbExpr::rolling_sum, 6))?;
373
388
  class.define_method("rolling_min", method!(RbExpr::rolling_min, 6))?;
374
389
  class.define_method("rolling_max", method!(RbExpr::rolling_max, 6))?;
@@ -396,7 +411,7 @@ fn init() -> RbResult<()> {
396
411
  class.define_method("lst_slice", method!(RbExpr::lst_slice, 2))?;
397
412
  class.define_method("lst_eval", method!(RbExpr::lst_eval, 2))?;
398
413
  class.define_method("cumulative_eval", method!(RbExpr::cumulative_eval, 3))?;
399
- class.define_method("lst_to_struct", method!(RbExpr::lst_to_struct, 2))?;
414
+ class.define_method("lst_to_struct", method!(RbExpr::lst_to_struct, 3))?;
400
415
  class.define_method("rank", method!(RbExpr::rank, 2))?;
401
416
  class.define_method("diff", method!(RbExpr::diff, 2))?;
402
417
  class.define_method("pct_change", method!(RbExpr::pct_change, 1))?;
@@ -431,6 +446,7 @@ fn init() -> RbResult<()> {
431
446
  class.define_method("log", method!(RbExpr::log, 1))?;
432
447
  class.define_method("exp", method!(RbExpr::exp, 0))?;
433
448
  class.define_method("entropy", method!(RbExpr::entropy, 2))?;
449
+ class.define_method("_hash", method!(RbExpr::hash, 4))?;
434
450
 
435
451
  // meta
436
452
  class.define_method("meta_pop", method!(RbExpr::meta_pop, 0))?;
@@ -446,6 +462,7 @@ fn init() -> RbResult<()> {
446
462
  class.define_singleton_method("last", function!(crate::lazy::dsl::last, 0))?;
447
463
  class.define_singleton_method("cols", function!(crate::lazy::dsl::cols, 1))?;
448
464
  class.define_singleton_method("fold", function!(crate::lazy::dsl::fold, 3))?;
465
+ class.define_singleton_method("cumfold", function!(crate::lazy::dsl::cumfold, 4))?;
449
466
  class.define_singleton_method("lit", function!(crate::lazy::dsl::lit, 1))?;
450
467
  class.define_singleton_method("arange", function!(crate::lazy::dsl::arange, 3))?;
451
468
  class.define_singleton_method("repeat", function!(crate::lazy::dsl::repeat, 2))?;
@@ -455,11 +472,13 @@ fn init() -> RbResult<()> {
455
472
  function!(crate::lazy::dsl::spearman_rank_corr, 4),
456
473
  )?;
457
474
  class.define_singleton_method("cov", function!(crate::lazy::dsl::cov, 2))?;
475
+ class.define_singleton_method("argsort_by", function!(crate::lazy::dsl::argsort_by, 2))?;
458
476
  class.define_singleton_method("when", function!(crate::lazy::dsl::when, 1))?;
459
477
  class.define_singleton_method("concat_str", function!(crate::lazy::dsl::concat_str, 2))?;
460
478
  class.define_singleton_method("concat_lst", function!(crate::lazy::dsl::concat_lst, 1))?;
461
479
 
462
480
  let class = module.define_class("RbLazyFrame", Default::default())?;
481
+ class.define_singleton_method("read_json", function!(RbLazyFrame::read_json, 1))?;
463
482
  class.define_singleton_method(
464
483
  "new_from_ndjson",
465
484
  function!(RbLazyFrame::new_from_ndjson, 7),
@@ -489,7 +508,9 @@ fn init() -> RbResult<()> {
489
508
  class.define_method("select", method!(RbLazyFrame::select, 1))?;
490
509
  class.define_method("groupby", method!(RbLazyFrame::groupby, 2))?;
491
510
  class.define_method("groupby_rolling", method!(RbLazyFrame::groupby_rolling, 5))?;
492
- class.define_method("groupby_dynamic", method!(RbLazyFrame::groupby_dynamic, 8))?;
511
+ class.define_method("groupby_dynamic", method!(RbLazyFrame::groupby_dynamic, 9))?;
512
+ class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
513
+ class.define_method("join_asof", method!(RbLazyFrame::join_asof, 11))?;
493
514
  class.define_method("join", method!(RbLazyFrame::join, 7))?;
494
515
  class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
495
516
  class.define_method("rename", method!(RbLazyFrame::rename, 2))?;
@@ -597,6 +618,7 @@ fn init() -> RbResult<()> {
597
618
  class.define_method("median", method!(RbSeries::median, 0))?;
598
619
  class.define_method("quantile", method!(RbSeries::quantile, 2))?;
599
620
  class.define_method("_clone", method!(RbSeries::clone, 0))?;
621
+ class.define_method("apply_lambda", method!(RbSeries::apply_lambda, 3))?;
600
622
  class.define_method("zip_with", method!(RbSeries::zip_with, 2))?;
601
623
  class.define_method("to_dummies", method!(RbSeries::to_dummies, 0))?;
602
624
  class.define_method("peak_max", method!(RbSeries::peak_max, 0))?;
@@ -611,6 +633,79 @@ fn init() -> RbResult<()> {
611
633
  class.define_method("time_unit", method!(RbSeries::time_unit, 0))?;
612
634
  class.define_method("set_at_idx", method!(RbSeries::set_at_idx, 2))?;
613
635
 
636
+ // set
637
+ // class.define_method("set_with_mask_str", method!(RbSeries::set_with_mask_str, 2))?;
638
+ class.define_method("set_with_mask_f64", method!(RbSeries::set_with_mask_f64, 2))?;
639
+ class.define_method("set_with_mask_f32", method!(RbSeries::set_with_mask_f32, 2))?;
640
+ class.define_method("set_with_mask_u8", method!(RbSeries::set_with_mask_u8, 2))?;
641
+ class.define_method("set_with_mask_u16", method!(RbSeries::set_with_mask_u16, 2))?;
642
+ class.define_method("set_with_mask_u32", method!(RbSeries::set_with_mask_u32, 2))?;
643
+ class.define_method("set_with_mask_u64", method!(RbSeries::set_with_mask_u64, 2))?;
644
+ class.define_method("set_with_mask_i8", method!(RbSeries::set_with_mask_i8, 2))?;
645
+ class.define_method("set_with_mask_i16", method!(RbSeries::set_with_mask_i16, 2))?;
646
+ class.define_method("set_with_mask_i32", method!(RbSeries::set_with_mask_i32, 2))?;
647
+ class.define_method("set_with_mask_i64", method!(RbSeries::set_with_mask_i64, 2))?;
648
+ class.define_method(
649
+ "set_with_mask_bool",
650
+ method!(RbSeries::set_with_mask_bool, 2),
651
+ )?;
652
+
653
+ // arithmetic
654
+ class.define_method("add_u8", method!(RbSeries::add_u8, 1))?;
655
+ class.define_method("add_u16", method!(RbSeries::add_u16, 1))?;
656
+ class.define_method("add_u32", method!(RbSeries::add_u32, 1))?;
657
+ class.define_method("add_u64", method!(RbSeries::add_u64, 1))?;
658
+ class.define_method("add_i8", method!(RbSeries::add_i8, 1))?;
659
+ class.define_method("add_i16", method!(RbSeries::add_i16, 1))?;
660
+ class.define_method("add_i32", method!(RbSeries::add_i32, 1))?;
661
+ class.define_method("add_i64", method!(RbSeries::add_i64, 1))?;
662
+ class.define_method("add_datetime", method!(RbSeries::add_datetime, 1))?;
663
+ class.define_method("add_duration", method!(RbSeries::add_duration, 1))?;
664
+ class.define_method("add_f32", method!(RbSeries::add_f32, 1))?;
665
+ class.define_method("add_f64", method!(RbSeries::add_f64, 1))?;
666
+ class.define_method("sub_u8", method!(RbSeries::sub_u8, 1))?;
667
+ class.define_method("sub_u16", method!(RbSeries::sub_u16, 1))?;
668
+ class.define_method("sub_u32", method!(RbSeries::sub_u32, 1))?;
669
+ class.define_method("sub_u64", method!(RbSeries::sub_u64, 1))?;
670
+ class.define_method("sub_i8", method!(RbSeries::sub_i8, 1))?;
671
+ class.define_method("sub_i16", method!(RbSeries::sub_i16, 1))?;
672
+ class.define_method("sub_i32", method!(RbSeries::sub_i32, 1))?;
673
+ class.define_method("sub_i64", method!(RbSeries::sub_i64, 1))?;
674
+ class.define_method("sub_datetime", method!(RbSeries::sub_datetime, 1))?;
675
+ class.define_method("sub_duration", method!(RbSeries::sub_duration, 1))?;
676
+ class.define_method("sub_f32", method!(RbSeries::sub_f32, 1))?;
677
+ class.define_method("sub_f64", method!(RbSeries::sub_f64, 1))?;
678
+ class.define_method("div_u8", method!(RbSeries::div_u8, 1))?;
679
+ class.define_method("div_u16", method!(RbSeries::div_u16, 1))?;
680
+ class.define_method("div_u32", method!(RbSeries::div_u32, 1))?;
681
+ class.define_method("div_u64", method!(RbSeries::div_u64, 1))?;
682
+ class.define_method("div_i8", method!(RbSeries::div_i8, 1))?;
683
+ class.define_method("div_i16", method!(RbSeries::div_i16, 1))?;
684
+ class.define_method("div_i32", method!(RbSeries::div_i32, 1))?;
685
+ class.define_method("div_i64", method!(RbSeries::div_i64, 1))?;
686
+ class.define_method("div_f32", method!(RbSeries::div_f32, 1))?;
687
+ class.define_method("div_f64", method!(RbSeries::div_f64, 1))?;
688
+ class.define_method("mul_u8", method!(RbSeries::mul_u8, 1))?;
689
+ class.define_method("mul_u16", method!(RbSeries::mul_u16, 1))?;
690
+ class.define_method("mul_u32", method!(RbSeries::mul_u32, 1))?;
691
+ class.define_method("mul_u64", method!(RbSeries::mul_u64, 1))?;
692
+ class.define_method("mul_i8", method!(RbSeries::mul_i8, 1))?;
693
+ class.define_method("mul_i16", method!(RbSeries::mul_i16, 1))?;
694
+ class.define_method("mul_i32", method!(RbSeries::mul_i32, 1))?;
695
+ class.define_method("mul_i64", method!(RbSeries::mul_i64, 1))?;
696
+ class.define_method("mul_f32", method!(RbSeries::mul_f32, 1))?;
697
+ class.define_method("mul_f64", method!(RbSeries::mul_f64, 1))?;
698
+ class.define_method("rem_u8", method!(RbSeries::rem_u8, 1))?;
699
+ class.define_method("rem_u16", method!(RbSeries::rem_u16, 1))?;
700
+ class.define_method("rem_u32", method!(RbSeries::rem_u32, 1))?;
701
+ class.define_method("rem_u64", method!(RbSeries::rem_u64, 1))?;
702
+ class.define_method("rem_i8", method!(RbSeries::rem_i8, 1))?;
703
+ class.define_method("rem_i16", method!(RbSeries::rem_i16, 1))?;
704
+ class.define_method("rem_i32", method!(RbSeries::rem_i32, 1))?;
705
+ class.define_method("rem_i64", method!(RbSeries::rem_i64, 1))?;
706
+ class.define_method("rem_f32", method!(RbSeries::rem_f32, 1))?;
707
+ class.define_method("rem_f64", method!(RbSeries::rem_f64, 1))?;
708
+
614
709
  // eq
615
710
  class.define_method("eq_u8", method!(RbSeries::eq_u8, 1))?;
616
711
  class.define_method("eq_u16", method!(RbSeries::eq_u16, 1))?;
@@ -698,6 +793,40 @@ fn init() -> RbResult<()> {
698
793
  Ok(())
699
794
  }
700
795
 
796
+ fn dtype_cols(dtypes: RArray) -> RbResult<RbExpr> {
797
+ let dtypes = dtypes
798
+ .each()
799
+ .map(|v| v?.try_convert::<Wrap<DataType>>())
800
+ .collect::<RbResult<Vec<Wrap<DataType>>>>()?;
801
+ let dtypes = vec_extract_wrapped(dtypes);
802
+ Ok(crate::lazy::dsl::dtype_cols(dtypes))
803
+ }
804
+
805
+ #[allow(clippy::too_many_arguments)]
806
+ fn rb_duration(
807
+ days: Option<&RbExpr>,
808
+ seconds: Option<&RbExpr>,
809
+ nanoseconds: Option<&RbExpr>,
810
+ microseconds: Option<&RbExpr>,
811
+ milliseconds: Option<&RbExpr>,
812
+ minutes: Option<&RbExpr>,
813
+ hours: Option<&RbExpr>,
814
+ weeks: Option<&RbExpr>,
815
+ ) -> RbExpr {
816
+ let args = DurationArgs {
817
+ days: days.map(|e| e.inner.clone()),
818
+ seconds: seconds.map(|e| e.inner.clone()),
819
+ nanoseconds: nanoseconds.map(|e| e.inner.clone()),
820
+ microseconds: microseconds.map(|e| e.inner.clone()),
821
+ milliseconds: milliseconds.map(|e| e.inner.clone()),
822
+ minutes: minutes.map(|e| e.inner.clone()),
823
+ hours: hours.map(|e| e.inner.clone()),
824
+ weeks: weeks.map(|e| e.inner.clone()),
825
+ };
826
+
827
+ polars::lazy::dsl::duration(args).into()
828
+ }
829
+
701
830
  fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
702
831
  let mut iter = seq.each();
703
832
  let first = iter.next().unwrap()?;
@@ -726,6 +855,20 @@ fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
726
855
  Ok(df.into())
727
856
  }
728
857
 
858
+ fn concat_lf(lfs: Value, rechunk: bool, parallel: bool) -> RbResult<RbLazyFrame> {
859
+ let (seq, len) = get_rbseq(lfs)?;
860
+ let mut lfs = Vec::with_capacity(len);
861
+
862
+ for res in seq.each() {
863
+ let item = res?;
864
+ let lf = get_lf(item)?;
865
+ lfs.push(lf);
866
+ }
867
+
868
+ let lf = polars::lazy::dsl::concat(lfs, rechunk, parallel).map_err(RbPolarsErr::from)?;
869
+ Ok(lf.into())
870
+ }
871
+
729
872
  fn rb_diag_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
730
873
  let mut dfs = Vec::new();
731
874
  for item in seq.each() {
@@ -786,6 +929,25 @@ fn parquet_schema(rb_f: Value) -> RbResult<Value> {
786
929
  Ok(dict.into())
787
930
  }
788
931
 
932
+ fn collect_all(lfs: RArray) -> RbResult<Vec<RbDataFrame>> {
933
+ use polars_core::utils::rayon::prelude::*;
934
+
935
+ let lfs = lfs
936
+ .each()
937
+ .map(|v| v?.try_convert::<&RbLazyFrame>())
938
+ .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
939
+
940
+ polars_core::POOL.install(|| {
941
+ lfs.par_iter()
942
+ .map(|lf| {
943
+ let df = lf.ldf.clone().collect()?;
944
+ Ok(RbDataFrame::new(df))
945
+ })
946
+ .collect::<polars_core::error::PolarsResult<Vec<_>>>()
947
+ .map_err(RbPolarsErr::from)
948
+ })
949
+ }
950
+
789
951
  fn rb_date_range(
790
952
  start: i64,
791
953
  stop: i64,
@@ -802,12 +964,22 @@ fn rb_date_range(
802
964
  Duration::parse(&every),
803
965
  closed.0,
804
966
  tu.0,
805
- tz,
967
+ tz.as_ref(),
806
968
  )
807
969
  .into_series()
808
970
  .into()
809
971
  }
810
972
 
973
+ fn coalesce_exprs(exprs: RArray) -> RbResult<RbExpr> {
974
+ let exprs = rb_exprs_to_exprs(exprs)?;
975
+ Ok(polars::lazy::dsl::coalesce(&exprs).into())
976
+ }
977
+
978
+ fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
979
+ let exprs = rb_exprs_to_exprs(exprs)?;
980
+ Ok(polars::lazy::dsl::sum_exprs(exprs).into())
981
+ }
982
+
811
983
  fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
812
984
  let exprs = rb_exprs_to_exprs(exprs)?;
813
985
  Ok(polars::lazy::dsl::as_struct(&exprs).into())