polars-df 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/Cargo.lock +142 -11
  4. data/Cargo.toml +5 -0
  5. data/ext/polars/Cargo.toml +17 -1
  6. data/ext/polars/src/apply/dataframe.rs +292 -0
  7. data/ext/polars/src/apply/mod.rs +254 -0
  8. data/ext/polars/src/apply/series.rs +1173 -0
  9. data/ext/polars/src/conversion.rs +180 -5
  10. data/ext/polars/src/dataframe.rs +146 -1
  11. data/ext/polars/src/error.rs +12 -0
  12. data/ext/polars/src/lazy/apply.rs +34 -2
  13. data/ext/polars/src/lazy/dataframe.rs +74 -3
  14. data/ext/polars/src/lazy/dsl.rs +136 -0
  15. data/ext/polars/src/lib.rs +199 -1
  16. data/ext/polars/src/list_construction.rs +100 -0
  17. data/ext/polars/src/series.rs +331 -0
  18. data/ext/polars/src/utils.rs +25 -0
  19. data/lib/polars/cat_name_space.rb +54 -0
  20. data/lib/polars/convert.rb +100 -0
  21. data/lib/polars/data_frame.rb +1558 -60
  22. data/lib/polars/date_time_expr.rb +2 -2
  23. data/lib/polars/date_time_name_space.rb +1484 -0
  24. data/lib/polars/dynamic_group_by.rb +49 -0
  25. data/lib/polars/expr.rb +4072 -107
  26. data/lib/polars/expr_dispatch.rb +8 -0
  27. data/lib/polars/functions.rb +192 -3
  28. data/lib/polars/group_by.rb +44 -3
  29. data/lib/polars/io.rb +20 -4
  30. data/lib/polars/lazy_frame.rb +800 -26
  31. data/lib/polars/lazy_functions.rb +687 -43
  32. data/lib/polars/lazy_group_by.rb +1 -0
  33. data/lib/polars/list_expr.rb +502 -5
  34. data/lib/polars/list_name_space.rb +346 -0
  35. data/lib/polars/rolling_group_by.rb +35 -0
  36. data/lib/polars/series.rb +934 -62
  37. data/lib/polars/string_expr.rb +189 -13
  38. data/lib/polars/string_name_space.rb +690 -0
  39. data/lib/polars/struct_name_space.rb +64 -0
  40. data/lib/polars/utils.rb +44 -0
  41. data/lib/polars/version.rb +1 -1
  42. data/lib/polars.rb +14 -1
  43. metadata +15 -3
@@ -1,3 +1,4 @@
1
+ use magnus::block::Proc;
1
2
  use magnus::{class, RArray, RString, Value};
2
3
  use polars::chunked_array::ops::SortOptions;
3
4
  use polars::lazy::dsl;
@@ -659,6 +660,65 @@ impl RbExpr {
659
660
  self.inner.clone().str().starts_with(sub).into()
660
661
  }
661
662
 
663
+ pub fn str_hex_encode(&self) -> Self {
664
+ self.clone()
665
+ .inner
666
+ .map(
667
+ move |s| s.utf8().map(|s| s.hex_encode().into_series()),
668
+ GetOutput::same_type(),
669
+ )
670
+ .with_fmt("str.hex_encode")
671
+ .into()
672
+ }
673
+
674
+ pub fn str_hex_decode(&self, strict: Option<bool>) -> Self {
675
+ self.clone()
676
+ .inner
677
+ .map(
678
+ move |s| s.utf8()?.hex_decode(strict).map(|s| s.into_series()),
679
+ GetOutput::same_type(),
680
+ )
681
+ .with_fmt("str.hex_decode")
682
+ .into()
683
+ }
684
+
685
+ pub fn str_base64_encode(&self) -> Self {
686
+ self.clone()
687
+ .inner
688
+ .map(
689
+ move |s| s.utf8().map(|s| s.base64_encode().into_series()),
690
+ GetOutput::same_type(),
691
+ )
692
+ .with_fmt("str.base64_encode")
693
+ .into()
694
+ }
695
+
696
+ pub fn str_base64_decode(&self, strict: Option<bool>) -> Self {
697
+ self.clone()
698
+ .inner
699
+ .map(
700
+ move |s| s.utf8()?.base64_decode(strict).map(|s| s.into_series()),
701
+ GetOutput::same_type(),
702
+ )
703
+ .with_fmt("str.base64_decode")
704
+ .into()
705
+ }
706
+
707
+ pub fn str_json_path_match(&self, pat: String) -> Self {
708
+ let function = move |s: Series| {
709
+ let ca = s.utf8()?;
710
+ match ca.json_path_match(&pat) {
711
+ Ok(ca) => Ok(ca.into_series()),
712
+ Err(e) => Err(PolarsError::ComputeError(format!("{:?}", e).into())),
713
+ }
714
+ };
715
+ self.clone()
716
+ .inner
717
+ .map(function, GetOutput::from_type(DataType::Utf8))
718
+ .with_fmt("str.json_path_match")
719
+ .into()
720
+ }
721
+
662
722
  pub fn str_extract(&self, pat: String, group_index: usize) -> Self {
663
723
  self.inner.clone().str().extract(&pat, group_index).into()
664
724
  }
@@ -887,6 +947,14 @@ impl RbExpr {
887
947
  self.inner.clone().dt().round(&every, &offset).into()
888
948
  }
889
949
 
950
+ pub fn map(&self, lambda: Value, output_type: Option<Wrap<DataType>>, agg_list: bool) -> Self {
951
+ map_single(self, lambda, output_type, agg_list)
952
+ }
953
+
954
+ pub fn dot(&self, other: &RbExpr) -> Self {
955
+ self.inner.clone().dot(other.inner.clone()).into()
956
+ }
957
+
890
958
  pub fn reinterpret(&self, signed: bool) -> Self {
891
959
  let function = move |s: Series| reinterpret(&s, signed);
892
960
  let dt = if signed {
@@ -916,6 +984,23 @@ impl RbExpr {
916
984
  self.inner.clone().suffix(&suffix).into()
917
985
  }
918
986
 
987
+ pub fn map_alias(&self, lambda: Proc) -> Self {
988
+ self.inner
989
+ .clone()
990
+ .map_alias(move |name| {
991
+ let out = lambda.call::<_, String>((name,));
992
+ // TODO switch to match
993
+ out.unwrap()
994
+ // match out {
995
+ // Ok(out) => Ok(out.to_string()),
996
+ // Err(e) => Err(PolarsError::ComputeError(
997
+ // format!("Ruby function in 'map_alias' produced an error: {}.", e).into(),
998
+ // )),
999
+ // }
1000
+ })
1001
+ .into()
1002
+ }
1003
+
919
1004
  pub fn exclude(&self, columns: Vec<String>) -> Self {
920
1005
  self.inner.clone().exclude(columns).into()
921
1006
  }
@@ -1208,6 +1293,28 @@ impl RbExpr {
1208
1293
  .into()
1209
1294
  }
1210
1295
 
1296
+ pub fn lst_to_struct(
1297
+ &self,
1298
+ width_strat: Wrap<ListToStructWidthStrategy>,
1299
+ _name_gen: Option<Value>,
1300
+ ) -> RbResult<Self> {
1301
+ // TODO fix
1302
+ let name_gen = None;
1303
+ // let name_gen = name_gen.map(|lambda| {
1304
+ // Arc::new(move |idx: usize| {
1305
+ // let out: Value = lambda.funcall("call", (idx,)).unwrap();
1306
+ // out.try_convert::<String>().unwrap()
1307
+ // }) as NameGenerator
1308
+ // });
1309
+
1310
+ Ok(self
1311
+ .inner
1312
+ .clone()
1313
+ .arr()
1314
+ .to_struct(width_strat.0, name_gen)
1315
+ .into())
1316
+ }
1317
+
1211
1318
  pub fn rank(&self, method: Wrap<RankMethod>, reverse: bool) -> Self {
1212
1319
  let options = RankOptions {
1213
1320
  method: method.0,
@@ -1365,6 +1472,10 @@ impl RbExpr {
1365
1472
  pub fn entropy(&self, base: f64, normalize: bool) -> Self {
1366
1473
  self.inner.clone().entropy(base, normalize).into()
1367
1474
  }
1475
+
1476
+ pub fn hash(&self, seed: u64, seed_1: u64, seed_2: u64, seed_3: u64) -> Self {
1477
+ self.inner.clone().hash(seed, seed_1, seed_2, seed_3).into()
1478
+ }
1368
1479
  }
1369
1480
 
1370
1481
  pub fn col(name: String) -> RbExpr {
@@ -1394,6 +1505,13 @@ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
1394
1505
  Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
1395
1506
  }
1396
1507
 
1508
+ pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
1509
+ let exprs = rb_exprs_to_exprs(exprs)?;
1510
+
1511
+ let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
1512
+ Ok(polars::lazy::dsl::cumfold_exprs(acc.inner.clone(), func, exprs, include_init).into())
1513
+ }
1514
+
1397
1515
  // TODO improve
1398
1516
  pub fn lit(value: Value) -> RbResult<RbExpr> {
1399
1517
  if value.is_nil() {
@@ -1433,6 +1551,24 @@ pub fn repeat(value: Value, n_times: &RbExpr) -> RbResult<RbExpr> {
1433
1551
  }
1434
1552
  }
1435
1553
 
1554
+ pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
1555
+ polars::lazy::dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
1556
+ }
1557
+
1558
+ pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool) -> RbExpr {
1559
+ polars::lazy::dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans)
1560
+ .into()
1561
+ }
1562
+
1563
+ pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
1564
+ polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
1565
+ }
1566
+
1567
+ pub fn argsort_by(by: RArray, reverse: Vec<bool>) -> RbResult<RbExpr> {
1568
+ let by = rb_exprs_to_exprs(by)?;
1569
+ Ok(polars::lazy::dsl::argsort_by(by, &reverse).into())
1570
+ }
1571
+
1436
1572
  #[magnus::wrap(class = "Polars::RbWhen")]
1437
1573
  #[derive(Clone)]
1438
1574
  pub struct RbWhen {
@@ -1,9 +1,11 @@
1
+ mod apply;
1
2
  mod batched_csv;
2
3
  mod conversion;
3
4
  mod dataframe;
4
5
  mod error;
5
6
  mod file;
6
7
  mod lazy;
8
+ mod list_construction;
7
9
  mod series;
8
10
  mod set;
9
11
  mod utils;
@@ -24,9 +26,23 @@ use polars::datatypes::{DataType, TimeUnit};
24
26
  use polars::error::PolarsResult;
25
27
  use polars::frame::DataFrame;
26
28
  use polars::functions::{diag_concat_df, hor_concat_df};
27
- use polars::prelude::{ClosedWindow, Duration, IntoSeries, TimeZone};
29
+ use polars::prelude::{ClosedWindow, Duration, DurationArgs, IntoSeries, TimeZone};
28
30
  use series::RbSeries;
29
31
 
32
+ #[cfg(target_os = "linux")]
33
+ use jemallocator::Jemalloc;
34
+
35
+ #[cfg(not(target_os = "linux"))]
36
+ use mimalloc::MiMalloc;
37
+
38
+ #[global_allocator]
39
+ #[cfg(target_os = "linux")]
40
+ static GLOBAL: Jemalloc = Jemalloc;
41
+
42
+ #[global_allocator]
43
+ #[cfg(not(target_os = "linux"))]
44
+ static GLOBAL: MiMalloc = MiMalloc;
45
+
30
46
  type RbResult<T> = Result<T, Error>;
31
47
 
32
48
  fn module() -> RModule {
@@ -40,13 +56,18 @@ fn series() -> RClass {
40
56
  #[magnus::init]
41
57
  fn init() -> RbResult<()> {
42
58
  let module = module();
59
+ module.define_singleton_method("_rb_duration", function!(rb_duration, 8))?;
43
60
  module.define_singleton_method("_concat_df", function!(concat_df, 1))?;
61
+ module.define_singleton_method("_concat_lf", function!(concat_lf, 3))?;
44
62
  module.define_singleton_method("_diag_concat_df", function!(rb_diag_concat_df, 1))?;
45
63
  module.define_singleton_method("_hor_concat_df", function!(rb_hor_concat_df, 1))?;
46
64
  module.define_singleton_method("_concat_series", function!(concat_series, 1))?;
47
65
  module.define_singleton_method("_ipc_schema", function!(ipc_schema, 1))?;
48
66
  module.define_singleton_method("_parquet_schema", function!(parquet_schema, 1))?;
67
+ module.define_singleton_method("_collect_all", function!(collect_all, 1))?;
49
68
  module.define_singleton_method("_rb_date_range", function!(rb_date_range, 7))?;
69
+ module.define_singleton_method("_coalesce_exprs", function!(coalesce_exprs, 1))?;
70
+ module.define_singleton_method("_sum_exprs", function!(sum_exprs, 1))?;
50
71
  module.define_singleton_method("_as_struct", function!(as_struct, 1))?;
51
72
  module.define_singleton_method("_arg_where", function!(arg_where, 1))?;
52
73
 
@@ -59,10 +80,13 @@ fn init() -> RbResult<()> {
59
80
  class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
60
81
  class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 7))?;
61
82
  class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
83
+ class.define_singleton_method("read_avro", function!(RbDataFrame::read_avro, 4))?;
84
+ class.define_singleton_method("read_hashes", function!(RbDataFrame::read_hashes, 3))?;
62
85
  class.define_singleton_method("read_hash", function!(RbDataFrame::read_hash, 1))?;
63
86
  class.define_singleton_method("read_json", function!(RbDataFrame::read_json, 1))?;
64
87
  class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson, 1))?;
65
88
  class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
89
+ class.define_method("write_avro", method!(RbDataFrame::write_avro, 2))?;
66
90
  class.define_method("write_json", method!(RbDataFrame::write_json, 3))?;
67
91
  class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
68
92
  class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
@@ -128,6 +152,7 @@ fn init() -> RbResult<()> {
128
152
  class.define_method("with_row_count", method!(RbDataFrame::with_row_count, 2))?;
129
153
  class.define_method("_clone", method!(RbDataFrame::clone, 0))?;
130
154
  class.define_method("melt", method!(RbDataFrame::melt, 4))?;
155
+ class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 6))?;
131
156
  class.define_method("partition_by", method!(RbDataFrame::partition_by, 2))?;
132
157
  class.define_method("shift", method!(RbDataFrame::shift, 1))?;
133
158
  class.define_method("unique", method!(RbDataFrame::unique, 3))?;
@@ -146,7 +171,9 @@ fn init() -> RbResult<()> {
146
171
  class.define_method("quantile", method!(RbDataFrame::quantile, 2))?;
147
172
  class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 1))?;
148
173
  class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
174
+ class.define_method("apply", method!(RbDataFrame::apply, 3))?;
149
175
  class.define_method("shrink_to_fit", method!(RbDataFrame::shrink_to_fit, 0))?;
176
+ class.define_method("hash_rows", method!(RbDataFrame::hash_rows, 4))?;
150
177
  class.define_method("transpose", method!(RbDataFrame::transpose, 2))?;
151
178
  class.define_method("upsample", method!(RbDataFrame::upsample, 5))?;
152
179
  class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
@@ -281,6 +308,14 @@ fn init() -> RbResult<()> {
281
308
  class.define_method("str_contains", method!(RbExpr::str_contains, 2))?;
282
309
  class.define_method("str_ends_with", method!(RbExpr::str_ends_with, 1))?;
283
310
  class.define_method("str_starts_with", method!(RbExpr::str_starts_with, 1))?;
311
+ class.define_method("str_hex_encode", method!(RbExpr::str_hex_encode, 0))?;
312
+ class.define_method("str_hex_decode", method!(RbExpr::str_hex_decode, 1))?;
313
+ class.define_method("str_base64_encode", method!(RbExpr::str_base64_encode, 0))?;
314
+ class.define_method("str_base64_decode", method!(RbExpr::str_base64_decode, 1))?;
315
+ class.define_method(
316
+ "str_json_path_match",
317
+ method!(RbExpr::str_json_path_match, 1),
318
+ )?;
284
319
  class.define_method("str_extract", method!(RbExpr::str_extract, 2))?;
285
320
  class.define_method("str_extract_all", method!(RbExpr::str_extract_all, 1))?;
286
321
  class.define_method("count_match", method!(RbExpr::count_match, 1))?;
@@ -338,11 +373,14 @@ fn init() -> RbResult<()> {
338
373
  class.define_method("dt_tz_localize", method!(RbExpr::dt_tz_localize, 1))?;
339
374
  class.define_method("dt_truncate", method!(RbExpr::dt_truncate, 2))?;
340
375
  class.define_method("dt_round", method!(RbExpr::dt_round, 2))?;
376
+ class.define_method("map", method!(RbExpr::map, 3))?;
377
+ class.define_method("dot", method!(RbExpr::dot, 1))?;
341
378
  class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
342
379
  class.define_method("mode", method!(RbExpr::mode, 0))?;
343
380
  class.define_method("keep_name", method!(RbExpr::keep_name, 0))?;
344
381
  class.define_method("prefix", method!(RbExpr::prefix, 1))?;
345
382
  class.define_method("suffix", method!(RbExpr::suffix, 1))?;
383
+ class.define_method("map_alias", method!(RbExpr::map_alias, 1))?;
346
384
  class.define_method("exclude", method!(RbExpr::exclude, 1))?;
347
385
  class.define_method("interpolate", method!(RbExpr::interpolate, 0))?;
348
386
  class.define_method("rolling_sum", method!(RbExpr::rolling_sum, 6))?;
@@ -372,6 +410,7 @@ fn init() -> RbResult<()> {
372
410
  class.define_method("lst_slice", method!(RbExpr::lst_slice, 2))?;
373
411
  class.define_method("lst_eval", method!(RbExpr::lst_eval, 2))?;
374
412
  class.define_method("cumulative_eval", method!(RbExpr::cumulative_eval, 3))?;
413
+ class.define_method("lst_to_struct", method!(RbExpr::lst_to_struct, 2))?;
375
414
  class.define_method("rank", method!(RbExpr::rank, 2))?;
376
415
  class.define_method("diff", method!(RbExpr::diff, 2))?;
377
416
  class.define_method("pct_change", method!(RbExpr::pct_change, 1))?;
@@ -406,6 +445,7 @@ fn init() -> RbResult<()> {
406
445
  class.define_method("log", method!(RbExpr::log, 1))?;
407
446
  class.define_method("exp", method!(RbExpr::exp, 0))?;
408
447
  class.define_method("entropy", method!(RbExpr::entropy, 2))?;
448
+ class.define_method("_hash", method!(RbExpr::hash, 4))?;
409
449
 
410
450
  // meta
411
451
  class.define_method("meta_pop", method!(RbExpr::meta_pop, 0))?;
@@ -421,14 +461,23 @@ fn init() -> RbResult<()> {
421
461
  class.define_singleton_method("last", function!(crate::lazy::dsl::last, 0))?;
422
462
  class.define_singleton_method("cols", function!(crate::lazy::dsl::cols, 1))?;
423
463
  class.define_singleton_method("fold", function!(crate::lazy::dsl::fold, 3))?;
464
+ class.define_singleton_method("cumfold", function!(crate::lazy::dsl::cumfold, 4))?;
424
465
  class.define_singleton_method("lit", function!(crate::lazy::dsl::lit, 1))?;
425
466
  class.define_singleton_method("arange", function!(crate::lazy::dsl::arange, 3))?;
426
467
  class.define_singleton_method("repeat", function!(crate::lazy::dsl::repeat, 2))?;
468
+ class.define_singleton_method("pearson_corr", function!(crate::lazy::dsl::pearson_corr, 3))?;
469
+ class.define_singleton_method(
470
+ "spearman_rank_corr",
471
+ function!(crate::lazy::dsl::spearman_rank_corr, 4),
472
+ )?;
473
+ class.define_singleton_method("cov", function!(crate::lazy::dsl::cov, 2))?;
474
+ class.define_singleton_method("argsort_by", function!(crate::lazy::dsl::argsort_by, 2))?;
427
475
  class.define_singleton_method("when", function!(crate::lazy::dsl::when, 1))?;
428
476
  class.define_singleton_method("concat_str", function!(crate::lazy::dsl::concat_str, 2))?;
429
477
  class.define_singleton_method("concat_lst", function!(crate::lazy::dsl::concat_lst, 1))?;
430
478
 
431
479
  let class = module.define_class("RbLazyFrame", Default::default())?;
480
+ class.define_singleton_method("read_json", function!(RbLazyFrame::read_json, 1))?;
432
481
  class.define_singleton_method(
433
482
  "new_from_ndjson",
434
483
  function!(RbLazyFrame::new_from_ndjson, 7),
@@ -459,6 +508,8 @@ fn init() -> RbResult<()> {
459
508
  class.define_method("groupby", method!(RbLazyFrame::groupby, 2))?;
460
509
  class.define_method("groupby_rolling", method!(RbLazyFrame::groupby_rolling, 5))?;
461
510
  class.define_method("groupby_dynamic", method!(RbLazyFrame::groupby_dynamic, 8))?;
511
+ class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
512
+ class.define_method("join_asof", method!(RbLazyFrame::join_asof, 11))?;
462
513
  class.define_method("join", method!(RbLazyFrame::join, 7))?;
463
514
  class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
464
515
  class.define_method("rename", method!(RbLazyFrame::rename, 2))?;
@@ -507,7 +558,10 @@ fn init() -> RbResult<()> {
507
558
  class.define_singleton_method("new_opt_f32", function!(RbSeries::new_opt_f32, 3))?;
508
559
  class.define_singleton_method("new_opt_f64", function!(RbSeries::new_opt_f64, 3))?;
509
560
  class.define_singleton_method("new_str", function!(RbSeries::new_str, 3))?;
561
+ class.define_singleton_method("new_object", function!(RbSeries::new_object, 3))?;
562
+ class.define_singleton_method("new_list", function!(RbSeries::new_list, 3))?;
510
563
  class.define_singleton_method("new_opt_date", function!(RbSeries::new_opt_date, 3))?;
564
+ class.define_singleton_method("new_opt_datetime", function!(RbSeries::new_opt_datetime, 3))?;
511
565
  class.define_method("is_sorted_flag", method!(RbSeries::is_sorted_flag, 0))?;
512
566
  class.define_method(
513
567
  "is_sorted_reverse_flag",
@@ -563,6 +617,7 @@ fn init() -> RbResult<()> {
563
617
  class.define_method("median", method!(RbSeries::median, 0))?;
564
618
  class.define_method("quantile", method!(RbSeries::quantile, 2))?;
565
619
  class.define_method("_clone", method!(RbSeries::clone, 0))?;
620
+ class.define_method("apply_lambda", method!(RbSeries::apply_lambda, 3))?;
566
621
  class.define_method("zip_with", method!(RbSeries::zip_with, 2))?;
567
622
  class.define_method("to_dummies", method!(RbSeries::to_dummies, 0))?;
568
623
  class.define_method("peak_max", method!(RbSeries::peak_max, 0))?;
@@ -577,6 +632,79 @@ fn init() -> RbResult<()> {
577
632
  class.define_method("time_unit", method!(RbSeries::time_unit, 0))?;
578
633
  class.define_method("set_at_idx", method!(RbSeries::set_at_idx, 2))?;
579
634
 
635
+ // set
636
+ // class.define_method("set_with_mask_str", method!(RbSeries::set_with_mask_str, 2))?;
637
+ class.define_method("set_with_mask_f64", method!(RbSeries::set_with_mask_f64, 2))?;
638
+ class.define_method("set_with_mask_f32", method!(RbSeries::set_with_mask_f32, 2))?;
639
+ class.define_method("set_with_mask_u8", method!(RbSeries::set_with_mask_u8, 2))?;
640
+ class.define_method("set_with_mask_u16", method!(RbSeries::set_with_mask_u16, 2))?;
641
+ class.define_method("set_with_mask_u32", method!(RbSeries::set_with_mask_u32, 2))?;
642
+ class.define_method("set_with_mask_u64", method!(RbSeries::set_with_mask_u64, 2))?;
643
+ class.define_method("set_with_mask_i8", method!(RbSeries::set_with_mask_i8, 2))?;
644
+ class.define_method("set_with_mask_i16", method!(RbSeries::set_with_mask_i16, 2))?;
645
+ class.define_method("set_with_mask_i32", method!(RbSeries::set_with_mask_i32, 2))?;
646
+ class.define_method("set_with_mask_i64", method!(RbSeries::set_with_mask_i64, 2))?;
647
+ class.define_method(
648
+ "set_with_mask_bool",
649
+ method!(RbSeries::set_with_mask_bool, 2),
650
+ )?;
651
+
652
+ // arithmetic
653
+ class.define_method("add_u8", method!(RbSeries::add_u8, 1))?;
654
+ class.define_method("add_u16", method!(RbSeries::add_u16, 1))?;
655
+ class.define_method("add_u32", method!(RbSeries::add_u32, 1))?;
656
+ class.define_method("add_u64", method!(RbSeries::add_u64, 1))?;
657
+ class.define_method("add_i8", method!(RbSeries::add_i8, 1))?;
658
+ class.define_method("add_i16", method!(RbSeries::add_i16, 1))?;
659
+ class.define_method("add_i32", method!(RbSeries::add_i32, 1))?;
660
+ class.define_method("add_i64", method!(RbSeries::add_i64, 1))?;
661
+ class.define_method("add_datetime", method!(RbSeries::add_datetime, 1))?;
662
+ class.define_method("add_duration", method!(RbSeries::add_duration, 1))?;
663
+ class.define_method("add_f32", method!(RbSeries::add_f32, 1))?;
664
+ class.define_method("add_f64", method!(RbSeries::add_f64, 1))?;
665
+ class.define_method("sub_u8", method!(RbSeries::sub_u8, 1))?;
666
+ class.define_method("sub_u16", method!(RbSeries::sub_u16, 1))?;
667
+ class.define_method("sub_u32", method!(RbSeries::sub_u32, 1))?;
668
+ class.define_method("sub_u64", method!(RbSeries::sub_u64, 1))?;
669
+ class.define_method("sub_i8", method!(RbSeries::sub_i8, 1))?;
670
+ class.define_method("sub_i16", method!(RbSeries::sub_i16, 1))?;
671
+ class.define_method("sub_i32", method!(RbSeries::sub_i32, 1))?;
672
+ class.define_method("sub_i64", method!(RbSeries::sub_i64, 1))?;
673
+ class.define_method("sub_datetime", method!(RbSeries::sub_datetime, 1))?;
674
+ class.define_method("sub_duration", method!(RbSeries::sub_duration, 1))?;
675
+ class.define_method("sub_f32", method!(RbSeries::sub_f32, 1))?;
676
+ class.define_method("sub_f64", method!(RbSeries::sub_f64, 1))?;
677
+ class.define_method("div_u8", method!(RbSeries::div_u8, 1))?;
678
+ class.define_method("div_u16", method!(RbSeries::div_u16, 1))?;
679
+ class.define_method("div_u32", method!(RbSeries::div_u32, 1))?;
680
+ class.define_method("div_u64", method!(RbSeries::div_u64, 1))?;
681
+ class.define_method("div_i8", method!(RbSeries::div_i8, 1))?;
682
+ class.define_method("div_i16", method!(RbSeries::div_i16, 1))?;
683
+ class.define_method("div_i32", method!(RbSeries::div_i32, 1))?;
684
+ class.define_method("div_i64", method!(RbSeries::div_i64, 1))?;
685
+ class.define_method("div_f32", method!(RbSeries::div_f32, 1))?;
686
+ class.define_method("div_f64", method!(RbSeries::div_f64, 1))?;
687
+ class.define_method("mul_u8", method!(RbSeries::mul_u8, 1))?;
688
+ class.define_method("mul_u16", method!(RbSeries::mul_u16, 1))?;
689
+ class.define_method("mul_u32", method!(RbSeries::mul_u32, 1))?;
690
+ class.define_method("mul_u64", method!(RbSeries::mul_u64, 1))?;
691
+ class.define_method("mul_i8", method!(RbSeries::mul_i8, 1))?;
692
+ class.define_method("mul_i16", method!(RbSeries::mul_i16, 1))?;
693
+ class.define_method("mul_i32", method!(RbSeries::mul_i32, 1))?;
694
+ class.define_method("mul_i64", method!(RbSeries::mul_i64, 1))?;
695
+ class.define_method("mul_f32", method!(RbSeries::mul_f32, 1))?;
696
+ class.define_method("mul_f64", method!(RbSeries::mul_f64, 1))?;
697
+ class.define_method("rem_u8", method!(RbSeries::rem_u8, 1))?;
698
+ class.define_method("rem_u16", method!(RbSeries::rem_u16, 1))?;
699
+ class.define_method("rem_u32", method!(RbSeries::rem_u32, 1))?;
700
+ class.define_method("rem_u64", method!(RbSeries::rem_u64, 1))?;
701
+ class.define_method("rem_i8", method!(RbSeries::rem_i8, 1))?;
702
+ class.define_method("rem_i16", method!(RbSeries::rem_i16, 1))?;
703
+ class.define_method("rem_i32", method!(RbSeries::rem_i32, 1))?;
704
+ class.define_method("rem_i64", method!(RbSeries::rem_i64, 1))?;
705
+ class.define_method("rem_f32", method!(RbSeries::rem_f32, 1))?;
706
+ class.define_method("rem_f64", method!(RbSeries::rem_f64, 1))?;
707
+
580
708
  // eq
581
709
  class.define_method("eq_u8", method!(RbSeries::eq_u8, 1))?;
582
710
  class.define_method("eq_u16", method!(RbSeries::eq_u16, 1))?;
@@ -664,6 +792,31 @@ fn init() -> RbResult<()> {
664
792
  Ok(())
665
793
  }
666
794
 
795
+ #[allow(clippy::too_many_arguments)]
796
+ fn rb_duration(
797
+ days: Option<&RbExpr>,
798
+ seconds: Option<&RbExpr>,
799
+ nanoseconds: Option<&RbExpr>,
800
+ microseconds: Option<&RbExpr>,
801
+ milliseconds: Option<&RbExpr>,
802
+ minutes: Option<&RbExpr>,
803
+ hours: Option<&RbExpr>,
804
+ weeks: Option<&RbExpr>,
805
+ ) -> RbExpr {
806
+ let args = DurationArgs {
807
+ days: days.map(|e| e.inner.clone()),
808
+ seconds: seconds.map(|e| e.inner.clone()),
809
+ nanoseconds: nanoseconds.map(|e| e.inner.clone()),
810
+ microseconds: microseconds.map(|e| e.inner.clone()),
811
+ milliseconds: milliseconds.map(|e| e.inner.clone()),
812
+ minutes: minutes.map(|e| e.inner.clone()),
813
+ hours: hours.map(|e| e.inner.clone()),
814
+ weeks: weeks.map(|e| e.inner.clone()),
815
+ };
816
+
817
+ polars::lazy::dsl::duration(args).into()
818
+ }
819
+
667
820
  fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
668
821
  let mut iter = seq.each();
669
822
  let first = iter.next().unwrap()?;
@@ -692,6 +845,20 @@ fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
692
845
  Ok(df.into())
693
846
  }
694
847
 
848
+ fn concat_lf(lfs: Value, rechunk: bool, parallel: bool) -> RbResult<RbLazyFrame> {
849
+ let (seq, len) = get_rbseq(lfs)?;
850
+ let mut lfs = Vec::with_capacity(len);
851
+
852
+ for res in seq.each() {
853
+ let item = res?;
854
+ let lf = get_lf(item)?;
855
+ lfs.push(lf);
856
+ }
857
+
858
+ let lf = polars::lazy::dsl::concat(lfs, rechunk, parallel).map_err(RbPolarsErr::from)?;
859
+ Ok(lf.into())
860
+ }
861
+
695
862
  fn rb_diag_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
696
863
  let mut dfs = Vec::new();
697
864
  for item in seq.each() {
@@ -752,6 +919,27 @@ fn parquet_schema(rb_f: Value) -> RbResult<Value> {
752
919
  Ok(dict.into())
753
920
  }
754
921
 
922
+ fn collect_all(lfs: RArray) -> RbResult<Vec<RbDataFrame>> {
923
+ use polars_core::utils::rayon::prelude::*;
924
+
925
+ let lfs = lfs
926
+ .each()
927
+ .map(|v| v?.try_convert::<&RbLazyFrame>())
928
+ .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
929
+
930
+ let out = polars_core::POOL.install(|| {
931
+ lfs.par_iter()
932
+ .map(|lf| {
933
+ let df = lf.ldf.clone().collect()?;
934
+ Ok(RbDataFrame::new(df))
935
+ })
936
+ .collect::<polars_core::error::PolarsResult<Vec<_>>>()
937
+ .map_err(RbPolarsErr::from)
938
+ });
939
+
940
+ Ok(out?)
941
+ }
942
+
755
943
  fn rb_date_range(
756
944
  start: i64,
757
945
  stop: i64,
@@ -774,6 +962,16 @@ fn rb_date_range(
774
962
  .into()
775
963
  }
776
964
 
965
+ fn coalesce_exprs(exprs: RArray) -> RbResult<RbExpr> {
966
+ let exprs = rb_exprs_to_exprs(exprs)?;
967
+ Ok(polars::lazy::dsl::coalesce(&exprs).into())
968
+ }
969
+
970
+ fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
971
+ let exprs = rb_exprs_to_exprs(exprs)?;
972
+ Ok(polars::lazy::dsl::sum_exprs(exprs).into())
973
+ }
974
+
777
975
  fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
778
976
  let exprs = rb_exprs_to_exprs(exprs)?;
779
977
  Ok(polars::lazy::dsl::as_struct(&exprs).into())
@@ -0,0 +1,100 @@
1
+ use magnus::Value;
2
+ use polars::prelude::*;
3
+ use polars_core::utils::CustomIterTools;
4
+
5
+ use crate::conversion::get_rbseq;
6
+ use crate::{RbPolarsErr, RbResult};
7
+
8
+ pub fn rb_seq_to_list(name: &str, seq: Value, dtype: &DataType) -> RbResult<Series> {
9
+ let (seq, len) = get_rbseq(seq)?;
10
+
11
+ let s = match dtype {
12
+ DataType::Int64 => {
13
+ let mut builder =
14
+ ListPrimitiveChunkedBuilder::<Int64Type>::new(name, len, len * 5, DataType::Int64);
15
+ for sub_seq in seq.each() {
16
+ let sub_seq = sub_seq?;
17
+ let (sub_seq, len) = get_rbseq(sub_seq)?;
18
+
19
+ // safety: we know the iterators len
20
+ let iter = unsafe {
21
+ sub_seq
22
+ .each()
23
+ .map(|v| {
24
+ let v = v.unwrap();
25
+ if v.is_nil() {
26
+ None
27
+ } else {
28
+ Some(v.try_convert::<i64>().unwrap())
29
+ }
30
+ })
31
+ .trust_my_length(len)
32
+ };
33
+ builder.append_iter(iter)
34
+ }
35
+ builder.finish().into_series()
36
+ }
37
+ DataType::Float64 => {
38
+ let mut builder = ListPrimitiveChunkedBuilder::<Float64Type>::new(
39
+ name,
40
+ len,
41
+ len * 5,
42
+ DataType::Float64,
43
+ );
44
+ for sub_seq in seq.each() {
45
+ let sub_seq = sub_seq?;
46
+ let (sub_seq, len) = get_rbseq(sub_seq)?;
47
+ // safety: we know the iterators len
48
+ let iter = unsafe {
49
+ sub_seq
50
+ .each()
51
+ .map(|v| {
52
+ let v = v.unwrap();
53
+ if v.is_nil() {
54
+ None
55
+ } else {
56
+ Some(v.try_convert::<f64>().unwrap())
57
+ }
58
+ })
59
+ .trust_my_length(len)
60
+ };
61
+ builder.append_iter(iter)
62
+ }
63
+ builder.finish().into_series()
64
+ }
65
+ DataType::Boolean => {
66
+ let mut builder = ListBooleanChunkedBuilder::new(name, len, len * 5);
67
+ for sub_seq in seq.each() {
68
+ let sub_seq = sub_seq?;
69
+ let (sub_seq, len) = get_rbseq(sub_seq)?;
70
+ // safety: we know the iterators len
71
+ let iter = unsafe {
72
+ sub_seq
73
+ .each()
74
+ .map(|v| {
75
+ let v = v.unwrap();
76
+ if v.is_nil() {
77
+ None
78
+ } else {
79
+ Some(v.try_convert::<bool>().unwrap())
80
+ }
81
+ })
82
+ .trust_my_length(len)
83
+ };
84
+ builder.append_iter(iter)
85
+ }
86
+ builder.finish().into_series()
87
+ }
88
+ DataType::Utf8 => {
89
+ return Err(RbPolarsErr::todo());
90
+ }
91
+ dt => {
92
+ return Err(RbPolarsErr::other(format!(
93
+ "cannot create list array from {:?}",
94
+ dt
95
+ )));
96
+ }
97
+ };
98
+
99
+ Ok(s)
100
+ }