polars-df 0.1.3 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/Cargo.lock +142 -11
  4. data/Cargo.toml +5 -0
  5. data/ext/polars/Cargo.toml +17 -1
  6. data/ext/polars/src/apply/dataframe.rs +292 -0
  7. data/ext/polars/src/apply/mod.rs +254 -0
  8. data/ext/polars/src/apply/series.rs +1173 -0
  9. data/ext/polars/src/conversion.rs +180 -5
  10. data/ext/polars/src/dataframe.rs +146 -1
  11. data/ext/polars/src/error.rs +12 -0
  12. data/ext/polars/src/lazy/apply.rs +34 -2
  13. data/ext/polars/src/lazy/dataframe.rs +74 -3
  14. data/ext/polars/src/lazy/dsl.rs +136 -0
  15. data/ext/polars/src/lib.rs +199 -1
  16. data/ext/polars/src/list_construction.rs +100 -0
  17. data/ext/polars/src/series.rs +331 -0
  18. data/ext/polars/src/utils.rs +25 -0
  19. data/lib/polars/cat_name_space.rb +54 -0
  20. data/lib/polars/convert.rb +100 -0
  21. data/lib/polars/data_frame.rb +1558 -60
  22. data/lib/polars/date_time_expr.rb +2 -2
  23. data/lib/polars/date_time_name_space.rb +1484 -0
  24. data/lib/polars/dynamic_group_by.rb +49 -0
  25. data/lib/polars/expr.rb +4072 -107
  26. data/lib/polars/expr_dispatch.rb +8 -0
  27. data/lib/polars/functions.rb +192 -3
  28. data/lib/polars/group_by.rb +44 -3
  29. data/lib/polars/io.rb +20 -4
  30. data/lib/polars/lazy_frame.rb +800 -26
  31. data/lib/polars/lazy_functions.rb +687 -43
  32. data/lib/polars/lazy_group_by.rb +1 -0
  33. data/lib/polars/list_expr.rb +502 -5
  34. data/lib/polars/list_name_space.rb +346 -0
  35. data/lib/polars/rolling_group_by.rb +35 -0
  36. data/lib/polars/series.rb +934 -62
  37. data/lib/polars/string_expr.rb +189 -13
  38. data/lib/polars/string_name_space.rb +690 -0
  39. data/lib/polars/struct_name_space.rb +64 -0
  40. data/lib/polars/utils.rb +44 -0
  41. data/lib/polars/version.rb +1 -1
  42. data/lib/polars.rb +14 -1
  43. metadata +15 -3
@@ -1,3 +1,4 @@
1
+ use magnus::block::Proc;
1
2
  use magnus::{class, RArray, RString, Value};
2
3
  use polars::chunked_array::ops::SortOptions;
3
4
  use polars::lazy::dsl;
@@ -659,6 +660,65 @@ impl RbExpr {
659
660
  self.inner.clone().str().starts_with(sub).into()
660
661
  }
661
662
 
663
+ pub fn str_hex_encode(&self) -> Self {
664
+ self.clone()
665
+ .inner
666
+ .map(
667
+ move |s| s.utf8().map(|s| s.hex_encode().into_series()),
668
+ GetOutput::same_type(),
669
+ )
670
+ .with_fmt("str.hex_encode")
671
+ .into()
672
+ }
673
+
674
+ pub fn str_hex_decode(&self, strict: Option<bool>) -> Self {
675
+ self.clone()
676
+ .inner
677
+ .map(
678
+ move |s| s.utf8()?.hex_decode(strict).map(|s| s.into_series()),
679
+ GetOutput::same_type(),
680
+ )
681
+ .with_fmt("str.hex_decode")
682
+ .into()
683
+ }
684
+
685
+ pub fn str_base64_encode(&self) -> Self {
686
+ self.clone()
687
+ .inner
688
+ .map(
689
+ move |s| s.utf8().map(|s| s.base64_encode().into_series()),
690
+ GetOutput::same_type(),
691
+ )
692
+ .with_fmt("str.base64_encode")
693
+ .into()
694
+ }
695
+
696
+ pub fn str_base64_decode(&self, strict: Option<bool>) -> Self {
697
+ self.clone()
698
+ .inner
699
+ .map(
700
+ move |s| s.utf8()?.base64_decode(strict).map(|s| s.into_series()),
701
+ GetOutput::same_type(),
702
+ )
703
+ .with_fmt("str.base64_decode")
704
+ .into()
705
+ }
706
+
707
+ pub fn str_json_path_match(&self, pat: String) -> Self {
708
+ let function = move |s: Series| {
709
+ let ca = s.utf8()?;
710
+ match ca.json_path_match(&pat) {
711
+ Ok(ca) => Ok(ca.into_series()),
712
+ Err(e) => Err(PolarsError::ComputeError(format!("{:?}", e).into())),
713
+ }
714
+ };
715
+ self.clone()
716
+ .inner
717
+ .map(function, GetOutput::from_type(DataType::Utf8))
718
+ .with_fmt("str.json_path_match")
719
+ .into()
720
+ }
721
+
662
722
  pub fn str_extract(&self, pat: String, group_index: usize) -> Self {
663
723
  self.inner.clone().str().extract(&pat, group_index).into()
664
724
  }
@@ -887,6 +947,14 @@ impl RbExpr {
887
947
  self.inner.clone().dt().round(&every, &offset).into()
888
948
  }
889
949
 
950
+ pub fn map(&self, lambda: Value, output_type: Option<Wrap<DataType>>, agg_list: bool) -> Self {
951
+ map_single(self, lambda, output_type, agg_list)
952
+ }
953
+
954
+ pub fn dot(&self, other: &RbExpr) -> Self {
955
+ self.inner.clone().dot(other.inner.clone()).into()
956
+ }
957
+
890
958
  pub fn reinterpret(&self, signed: bool) -> Self {
891
959
  let function = move |s: Series| reinterpret(&s, signed);
892
960
  let dt = if signed {
@@ -916,6 +984,23 @@ impl RbExpr {
916
984
  self.inner.clone().suffix(&suffix).into()
917
985
  }
918
986
 
987
+ pub fn map_alias(&self, lambda: Proc) -> Self {
988
+ self.inner
989
+ .clone()
990
+ .map_alias(move |name| {
991
+ let out = lambda.call::<_, String>((name,));
992
+ // TODO switch to match
993
+ out.unwrap()
994
+ // match out {
995
+ // Ok(out) => Ok(out.to_string()),
996
+ // Err(e) => Err(PolarsError::ComputeError(
997
+ // format!("Ruby function in 'map_alias' produced an error: {}.", e).into(),
998
+ // )),
999
+ // }
1000
+ })
1001
+ .into()
1002
+ }
1003
+
919
1004
  pub fn exclude(&self, columns: Vec<String>) -> Self {
920
1005
  self.inner.clone().exclude(columns).into()
921
1006
  }
@@ -1208,6 +1293,28 @@ impl RbExpr {
1208
1293
  .into()
1209
1294
  }
1210
1295
 
1296
+ pub fn lst_to_struct(
1297
+ &self,
1298
+ width_strat: Wrap<ListToStructWidthStrategy>,
1299
+ _name_gen: Option<Value>,
1300
+ ) -> RbResult<Self> {
1301
+ // TODO fix
1302
+ let name_gen = None;
1303
+ // let name_gen = name_gen.map(|lambda| {
1304
+ // Arc::new(move |idx: usize| {
1305
+ // let out: Value = lambda.funcall("call", (idx,)).unwrap();
1306
+ // out.try_convert::<String>().unwrap()
1307
+ // }) as NameGenerator
1308
+ // });
1309
+
1310
+ Ok(self
1311
+ .inner
1312
+ .clone()
1313
+ .arr()
1314
+ .to_struct(width_strat.0, name_gen)
1315
+ .into())
1316
+ }
1317
+
1211
1318
  pub fn rank(&self, method: Wrap<RankMethod>, reverse: bool) -> Self {
1212
1319
  let options = RankOptions {
1213
1320
  method: method.0,
@@ -1365,6 +1472,10 @@ impl RbExpr {
1365
1472
  pub fn entropy(&self, base: f64, normalize: bool) -> Self {
1366
1473
  self.inner.clone().entropy(base, normalize).into()
1367
1474
  }
1475
+
1476
+ pub fn hash(&self, seed: u64, seed_1: u64, seed_2: u64, seed_3: u64) -> Self {
1477
+ self.inner.clone().hash(seed, seed_1, seed_2, seed_3).into()
1478
+ }
1368
1479
  }
1369
1480
 
1370
1481
  pub fn col(name: String) -> RbExpr {
@@ -1394,6 +1505,13 @@ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
1394
1505
  Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
1395
1506
  }
1396
1507
 
1508
+ pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
1509
+ let exprs = rb_exprs_to_exprs(exprs)?;
1510
+
1511
+ let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
1512
+ Ok(polars::lazy::dsl::cumfold_exprs(acc.inner.clone(), func, exprs, include_init).into())
1513
+ }
1514
+
1397
1515
  // TODO improve
1398
1516
  pub fn lit(value: Value) -> RbResult<RbExpr> {
1399
1517
  if value.is_nil() {
@@ -1433,6 +1551,24 @@ pub fn repeat(value: Value, n_times: &RbExpr) -> RbResult<RbExpr> {
1433
1551
  }
1434
1552
  }
1435
1553
 
1554
+ pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
1555
+ polars::lazy::dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
1556
+ }
1557
+
1558
+ pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool) -> RbExpr {
1559
+ polars::lazy::dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans)
1560
+ .into()
1561
+ }
1562
+
1563
+ pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
1564
+ polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
1565
+ }
1566
+
1567
+ pub fn argsort_by(by: RArray, reverse: Vec<bool>) -> RbResult<RbExpr> {
1568
+ let by = rb_exprs_to_exprs(by)?;
1569
+ Ok(polars::lazy::dsl::argsort_by(by, &reverse).into())
1570
+ }
1571
+
1436
1572
  #[magnus::wrap(class = "Polars::RbWhen")]
1437
1573
  #[derive(Clone)]
1438
1574
  pub struct RbWhen {
@@ -1,9 +1,11 @@
1
+ mod apply;
1
2
  mod batched_csv;
2
3
  mod conversion;
3
4
  mod dataframe;
4
5
  mod error;
5
6
  mod file;
6
7
  mod lazy;
8
+ mod list_construction;
7
9
  mod series;
8
10
  mod set;
9
11
  mod utils;
@@ -24,9 +26,23 @@ use polars::datatypes::{DataType, TimeUnit};
24
26
  use polars::error::PolarsResult;
25
27
  use polars::frame::DataFrame;
26
28
  use polars::functions::{diag_concat_df, hor_concat_df};
27
- use polars::prelude::{ClosedWindow, Duration, IntoSeries, TimeZone};
29
+ use polars::prelude::{ClosedWindow, Duration, DurationArgs, IntoSeries, TimeZone};
28
30
  use series::RbSeries;
29
31
 
32
+ #[cfg(target_os = "linux")]
33
+ use jemallocator::Jemalloc;
34
+
35
+ #[cfg(not(target_os = "linux"))]
36
+ use mimalloc::MiMalloc;
37
+
38
+ #[global_allocator]
39
+ #[cfg(target_os = "linux")]
40
+ static GLOBAL: Jemalloc = Jemalloc;
41
+
42
+ #[global_allocator]
43
+ #[cfg(not(target_os = "linux"))]
44
+ static GLOBAL: MiMalloc = MiMalloc;
45
+
30
46
  type RbResult<T> = Result<T, Error>;
31
47
 
32
48
  fn module() -> RModule {
@@ -40,13 +56,18 @@ fn series() -> RClass {
40
56
  #[magnus::init]
41
57
  fn init() -> RbResult<()> {
42
58
  let module = module();
59
+ module.define_singleton_method("_rb_duration", function!(rb_duration, 8))?;
43
60
  module.define_singleton_method("_concat_df", function!(concat_df, 1))?;
61
+ module.define_singleton_method("_concat_lf", function!(concat_lf, 3))?;
44
62
  module.define_singleton_method("_diag_concat_df", function!(rb_diag_concat_df, 1))?;
45
63
  module.define_singleton_method("_hor_concat_df", function!(rb_hor_concat_df, 1))?;
46
64
  module.define_singleton_method("_concat_series", function!(concat_series, 1))?;
47
65
  module.define_singleton_method("_ipc_schema", function!(ipc_schema, 1))?;
48
66
  module.define_singleton_method("_parquet_schema", function!(parquet_schema, 1))?;
67
+ module.define_singleton_method("_collect_all", function!(collect_all, 1))?;
49
68
  module.define_singleton_method("_rb_date_range", function!(rb_date_range, 7))?;
69
+ module.define_singleton_method("_coalesce_exprs", function!(coalesce_exprs, 1))?;
70
+ module.define_singleton_method("_sum_exprs", function!(sum_exprs, 1))?;
50
71
  module.define_singleton_method("_as_struct", function!(as_struct, 1))?;
51
72
  module.define_singleton_method("_arg_where", function!(arg_where, 1))?;
52
73
 
@@ -59,10 +80,13 @@ fn init() -> RbResult<()> {
59
80
  class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
60
81
  class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 7))?;
61
82
  class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
83
+ class.define_singleton_method("read_avro", function!(RbDataFrame::read_avro, 4))?;
84
+ class.define_singleton_method("read_hashes", function!(RbDataFrame::read_hashes, 3))?;
62
85
  class.define_singleton_method("read_hash", function!(RbDataFrame::read_hash, 1))?;
63
86
  class.define_singleton_method("read_json", function!(RbDataFrame::read_json, 1))?;
64
87
  class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson, 1))?;
65
88
  class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
89
+ class.define_method("write_avro", method!(RbDataFrame::write_avro, 2))?;
66
90
  class.define_method("write_json", method!(RbDataFrame::write_json, 3))?;
67
91
  class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
68
92
  class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
@@ -128,6 +152,7 @@ fn init() -> RbResult<()> {
128
152
  class.define_method("with_row_count", method!(RbDataFrame::with_row_count, 2))?;
129
153
  class.define_method("_clone", method!(RbDataFrame::clone, 0))?;
130
154
  class.define_method("melt", method!(RbDataFrame::melt, 4))?;
155
+ class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 6))?;
131
156
  class.define_method("partition_by", method!(RbDataFrame::partition_by, 2))?;
132
157
  class.define_method("shift", method!(RbDataFrame::shift, 1))?;
133
158
  class.define_method("unique", method!(RbDataFrame::unique, 3))?;
@@ -146,7 +171,9 @@ fn init() -> RbResult<()> {
146
171
  class.define_method("quantile", method!(RbDataFrame::quantile, 2))?;
147
172
  class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 1))?;
148
173
  class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
174
+ class.define_method("apply", method!(RbDataFrame::apply, 3))?;
149
175
  class.define_method("shrink_to_fit", method!(RbDataFrame::shrink_to_fit, 0))?;
176
+ class.define_method("hash_rows", method!(RbDataFrame::hash_rows, 4))?;
150
177
  class.define_method("transpose", method!(RbDataFrame::transpose, 2))?;
151
178
  class.define_method("upsample", method!(RbDataFrame::upsample, 5))?;
152
179
  class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
@@ -281,6 +308,14 @@ fn init() -> RbResult<()> {
281
308
  class.define_method("str_contains", method!(RbExpr::str_contains, 2))?;
282
309
  class.define_method("str_ends_with", method!(RbExpr::str_ends_with, 1))?;
283
310
  class.define_method("str_starts_with", method!(RbExpr::str_starts_with, 1))?;
311
+ class.define_method("str_hex_encode", method!(RbExpr::str_hex_encode, 0))?;
312
+ class.define_method("str_hex_decode", method!(RbExpr::str_hex_decode, 1))?;
313
+ class.define_method("str_base64_encode", method!(RbExpr::str_base64_encode, 0))?;
314
+ class.define_method("str_base64_decode", method!(RbExpr::str_base64_decode, 1))?;
315
+ class.define_method(
316
+ "str_json_path_match",
317
+ method!(RbExpr::str_json_path_match, 1),
318
+ )?;
284
319
  class.define_method("str_extract", method!(RbExpr::str_extract, 2))?;
285
320
  class.define_method("str_extract_all", method!(RbExpr::str_extract_all, 1))?;
286
321
  class.define_method("count_match", method!(RbExpr::count_match, 1))?;
@@ -338,11 +373,14 @@ fn init() -> RbResult<()> {
338
373
  class.define_method("dt_tz_localize", method!(RbExpr::dt_tz_localize, 1))?;
339
374
  class.define_method("dt_truncate", method!(RbExpr::dt_truncate, 2))?;
340
375
  class.define_method("dt_round", method!(RbExpr::dt_round, 2))?;
376
+ class.define_method("map", method!(RbExpr::map, 3))?;
377
+ class.define_method("dot", method!(RbExpr::dot, 1))?;
341
378
  class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
342
379
  class.define_method("mode", method!(RbExpr::mode, 0))?;
343
380
  class.define_method("keep_name", method!(RbExpr::keep_name, 0))?;
344
381
  class.define_method("prefix", method!(RbExpr::prefix, 1))?;
345
382
  class.define_method("suffix", method!(RbExpr::suffix, 1))?;
383
+ class.define_method("map_alias", method!(RbExpr::map_alias, 1))?;
346
384
  class.define_method("exclude", method!(RbExpr::exclude, 1))?;
347
385
  class.define_method("interpolate", method!(RbExpr::interpolate, 0))?;
348
386
  class.define_method("rolling_sum", method!(RbExpr::rolling_sum, 6))?;
@@ -372,6 +410,7 @@ fn init() -> RbResult<()> {
372
410
  class.define_method("lst_slice", method!(RbExpr::lst_slice, 2))?;
373
411
  class.define_method("lst_eval", method!(RbExpr::lst_eval, 2))?;
374
412
  class.define_method("cumulative_eval", method!(RbExpr::cumulative_eval, 3))?;
413
+ class.define_method("lst_to_struct", method!(RbExpr::lst_to_struct, 2))?;
375
414
  class.define_method("rank", method!(RbExpr::rank, 2))?;
376
415
  class.define_method("diff", method!(RbExpr::diff, 2))?;
377
416
  class.define_method("pct_change", method!(RbExpr::pct_change, 1))?;
@@ -406,6 +445,7 @@ fn init() -> RbResult<()> {
406
445
  class.define_method("log", method!(RbExpr::log, 1))?;
407
446
  class.define_method("exp", method!(RbExpr::exp, 0))?;
408
447
  class.define_method("entropy", method!(RbExpr::entropy, 2))?;
448
+ class.define_method("_hash", method!(RbExpr::hash, 4))?;
409
449
 
410
450
  // meta
411
451
  class.define_method("meta_pop", method!(RbExpr::meta_pop, 0))?;
@@ -421,14 +461,23 @@ fn init() -> RbResult<()> {
421
461
  class.define_singleton_method("last", function!(crate::lazy::dsl::last, 0))?;
422
462
  class.define_singleton_method("cols", function!(crate::lazy::dsl::cols, 1))?;
423
463
  class.define_singleton_method("fold", function!(crate::lazy::dsl::fold, 3))?;
464
+ class.define_singleton_method("cumfold", function!(crate::lazy::dsl::cumfold, 4))?;
424
465
  class.define_singleton_method("lit", function!(crate::lazy::dsl::lit, 1))?;
425
466
  class.define_singleton_method("arange", function!(crate::lazy::dsl::arange, 3))?;
426
467
  class.define_singleton_method("repeat", function!(crate::lazy::dsl::repeat, 2))?;
468
+ class.define_singleton_method("pearson_corr", function!(crate::lazy::dsl::pearson_corr, 3))?;
469
+ class.define_singleton_method(
470
+ "spearman_rank_corr",
471
+ function!(crate::lazy::dsl::spearman_rank_corr, 4),
472
+ )?;
473
+ class.define_singleton_method("cov", function!(crate::lazy::dsl::cov, 2))?;
474
+ class.define_singleton_method("argsort_by", function!(crate::lazy::dsl::argsort_by, 2))?;
427
475
  class.define_singleton_method("when", function!(crate::lazy::dsl::when, 1))?;
428
476
  class.define_singleton_method("concat_str", function!(crate::lazy::dsl::concat_str, 2))?;
429
477
  class.define_singleton_method("concat_lst", function!(crate::lazy::dsl::concat_lst, 1))?;
430
478
 
431
479
  let class = module.define_class("RbLazyFrame", Default::default())?;
480
+ class.define_singleton_method("read_json", function!(RbLazyFrame::read_json, 1))?;
432
481
  class.define_singleton_method(
433
482
  "new_from_ndjson",
434
483
  function!(RbLazyFrame::new_from_ndjson, 7),
@@ -459,6 +508,8 @@ fn init() -> RbResult<()> {
459
508
  class.define_method("groupby", method!(RbLazyFrame::groupby, 2))?;
460
509
  class.define_method("groupby_rolling", method!(RbLazyFrame::groupby_rolling, 5))?;
461
510
  class.define_method("groupby_dynamic", method!(RbLazyFrame::groupby_dynamic, 8))?;
511
+ class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
512
+ class.define_method("join_asof", method!(RbLazyFrame::join_asof, 11))?;
462
513
  class.define_method("join", method!(RbLazyFrame::join, 7))?;
463
514
  class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
464
515
  class.define_method("rename", method!(RbLazyFrame::rename, 2))?;
@@ -507,7 +558,10 @@ fn init() -> RbResult<()> {
507
558
  class.define_singleton_method("new_opt_f32", function!(RbSeries::new_opt_f32, 3))?;
508
559
  class.define_singleton_method("new_opt_f64", function!(RbSeries::new_opt_f64, 3))?;
509
560
  class.define_singleton_method("new_str", function!(RbSeries::new_str, 3))?;
561
+ class.define_singleton_method("new_object", function!(RbSeries::new_object, 3))?;
562
+ class.define_singleton_method("new_list", function!(RbSeries::new_list, 3))?;
510
563
  class.define_singleton_method("new_opt_date", function!(RbSeries::new_opt_date, 3))?;
564
+ class.define_singleton_method("new_opt_datetime", function!(RbSeries::new_opt_datetime, 3))?;
511
565
  class.define_method("is_sorted_flag", method!(RbSeries::is_sorted_flag, 0))?;
512
566
  class.define_method(
513
567
  "is_sorted_reverse_flag",
@@ -563,6 +617,7 @@ fn init() -> RbResult<()> {
563
617
  class.define_method("median", method!(RbSeries::median, 0))?;
564
618
  class.define_method("quantile", method!(RbSeries::quantile, 2))?;
565
619
  class.define_method("_clone", method!(RbSeries::clone, 0))?;
620
+ class.define_method("apply_lambda", method!(RbSeries::apply_lambda, 3))?;
566
621
  class.define_method("zip_with", method!(RbSeries::zip_with, 2))?;
567
622
  class.define_method("to_dummies", method!(RbSeries::to_dummies, 0))?;
568
623
  class.define_method("peak_max", method!(RbSeries::peak_max, 0))?;
@@ -577,6 +632,79 @@ fn init() -> RbResult<()> {
577
632
  class.define_method("time_unit", method!(RbSeries::time_unit, 0))?;
578
633
  class.define_method("set_at_idx", method!(RbSeries::set_at_idx, 2))?;
579
634
 
635
+ // set
636
+ // class.define_method("set_with_mask_str", method!(RbSeries::set_with_mask_str, 2))?;
637
+ class.define_method("set_with_mask_f64", method!(RbSeries::set_with_mask_f64, 2))?;
638
+ class.define_method("set_with_mask_f32", method!(RbSeries::set_with_mask_f32, 2))?;
639
+ class.define_method("set_with_mask_u8", method!(RbSeries::set_with_mask_u8, 2))?;
640
+ class.define_method("set_with_mask_u16", method!(RbSeries::set_with_mask_u16, 2))?;
641
+ class.define_method("set_with_mask_u32", method!(RbSeries::set_with_mask_u32, 2))?;
642
+ class.define_method("set_with_mask_u64", method!(RbSeries::set_with_mask_u64, 2))?;
643
+ class.define_method("set_with_mask_i8", method!(RbSeries::set_with_mask_i8, 2))?;
644
+ class.define_method("set_with_mask_i16", method!(RbSeries::set_with_mask_i16, 2))?;
645
+ class.define_method("set_with_mask_i32", method!(RbSeries::set_with_mask_i32, 2))?;
646
+ class.define_method("set_with_mask_i64", method!(RbSeries::set_with_mask_i64, 2))?;
647
+ class.define_method(
648
+ "set_with_mask_bool",
649
+ method!(RbSeries::set_with_mask_bool, 2),
650
+ )?;
651
+
652
+ // arithmetic
653
+ class.define_method("add_u8", method!(RbSeries::add_u8, 1))?;
654
+ class.define_method("add_u16", method!(RbSeries::add_u16, 1))?;
655
+ class.define_method("add_u32", method!(RbSeries::add_u32, 1))?;
656
+ class.define_method("add_u64", method!(RbSeries::add_u64, 1))?;
657
+ class.define_method("add_i8", method!(RbSeries::add_i8, 1))?;
658
+ class.define_method("add_i16", method!(RbSeries::add_i16, 1))?;
659
+ class.define_method("add_i32", method!(RbSeries::add_i32, 1))?;
660
+ class.define_method("add_i64", method!(RbSeries::add_i64, 1))?;
661
+ class.define_method("add_datetime", method!(RbSeries::add_datetime, 1))?;
662
+ class.define_method("add_duration", method!(RbSeries::add_duration, 1))?;
663
+ class.define_method("add_f32", method!(RbSeries::add_f32, 1))?;
664
+ class.define_method("add_f64", method!(RbSeries::add_f64, 1))?;
665
+ class.define_method("sub_u8", method!(RbSeries::sub_u8, 1))?;
666
+ class.define_method("sub_u16", method!(RbSeries::sub_u16, 1))?;
667
+ class.define_method("sub_u32", method!(RbSeries::sub_u32, 1))?;
668
+ class.define_method("sub_u64", method!(RbSeries::sub_u64, 1))?;
669
+ class.define_method("sub_i8", method!(RbSeries::sub_i8, 1))?;
670
+ class.define_method("sub_i16", method!(RbSeries::sub_i16, 1))?;
671
+ class.define_method("sub_i32", method!(RbSeries::sub_i32, 1))?;
672
+ class.define_method("sub_i64", method!(RbSeries::sub_i64, 1))?;
673
+ class.define_method("sub_datetime", method!(RbSeries::sub_datetime, 1))?;
674
+ class.define_method("sub_duration", method!(RbSeries::sub_duration, 1))?;
675
+ class.define_method("sub_f32", method!(RbSeries::sub_f32, 1))?;
676
+ class.define_method("sub_f64", method!(RbSeries::sub_f64, 1))?;
677
+ class.define_method("div_u8", method!(RbSeries::div_u8, 1))?;
678
+ class.define_method("div_u16", method!(RbSeries::div_u16, 1))?;
679
+ class.define_method("div_u32", method!(RbSeries::div_u32, 1))?;
680
+ class.define_method("div_u64", method!(RbSeries::div_u64, 1))?;
681
+ class.define_method("div_i8", method!(RbSeries::div_i8, 1))?;
682
+ class.define_method("div_i16", method!(RbSeries::div_i16, 1))?;
683
+ class.define_method("div_i32", method!(RbSeries::div_i32, 1))?;
684
+ class.define_method("div_i64", method!(RbSeries::div_i64, 1))?;
685
+ class.define_method("div_f32", method!(RbSeries::div_f32, 1))?;
686
+ class.define_method("div_f64", method!(RbSeries::div_f64, 1))?;
687
+ class.define_method("mul_u8", method!(RbSeries::mul_u8, 1))?;
688
+ class.define_method("mul_u16", method!(RbSeries::mul_u16, 1))?;
689
+ class.define_method("mul_u32", method!(RbSeries::mul_u32, 1))?;
690
+ class.define_method("mul_u64", method!(RbSeries::mul_u64, 1))?;
691
+ class.define_method("mul_i8", method!(RbSeries::mul_i8, 1))?;
692
+ class.define_method("mul_i16", method!(RbSeries::mul_i16, 1))?;
693
+ class.define_method("mul_i32", method!(RbSeries::mul_i32, 1))?;
694
+ class.define_method("mul_i64", method!(RbSeries::mul_i64, 1))?;
695
+ class.define_method("mul_f32", method!(RbSeries::mul_f32, 1))?;
696
+ class.define_method("mul_f64", method!(RbSeries::mul_f64, 1))?;
697
+ class.define_method("rem_u8", method!(RbSeries::rem_u8, 1))?;
698
+ class.define_method("rem_u16", method!(RbSeries::rem_u16, 1))?;
699
+ class.define_method("rem_u32", method!(RbSeries::rem_u32, 1))?;
700
+ class.define_method("rem_u64", method!(RbSeries::rem_u64, 1))?;
701
+ class.define_method("rem_i8", method!(RbSeries::rem_i8, 1))?;
702
+ class.define_method("rem_i16", method!(RbSeries::rem_i16, 1))?;
703
+ class.define_method("rem_i32", method!(RbSeries::rem_i32, 1))?;
704
+ class.define_method("rem_i64", method!(RbSeries::rem_i64, 1))?;
705
+ class.define_method("rem_f32", method!(RbSeries::rem_f32, 1))?;
706
+ class.define_method("rem_f64", method!(RbSeries::rem_f64, 1))?;
707
+
580
708
  // eq
581
709
  class.define_method("eq_u8", method!(RbSeries::eq_u8, 1))?;
582
710
  class.define_method("eq_u16", method!(RbSeries::eq_u16, 1))?;
@@ -664,6 +792,31 @@ fn init() -> RbResult<()> {
664
792
  Ok(())
665
793
  }
666
794
 
795
+ #[allow(clippy::too_many_arguments)]
796
+ fn rb_duration(
797
+ days: Option<&RbExpr>,
798
+ seconds: Option<&RbExpr>,
799
+ nanoseconds: Option<&RbExpr>,
800
+ microseconds: Option<&RbExpr>,
801
+ milliseconds: Option<&RbExpr>,
802
+ minutes: Option<&RbExpr>,
803
+ hours: Option<&RbExpr>,
804
+ weeks: Option<&RbExpr>,
805
+ ) -> RbExpr {
806
+ let args = DurationArgs {
807
+ days: days.map(|e| e.inner.clone()),
808
+ seconds: seconds.map(|e| e.inner.clone()),
809
+ nanoseconds: nanoseconds.map(|e| e.inner.clone()),
810
+ microseconds: microseconds.map(|e| e.inner.clone()),
811
+ milliseconds: milliseconds.map(|e| e.inner.clone()),
812
+ minutes: minutes.map(|e| e.inner.clone()),
813
+ hours: hours.map(|e| e.inner.clone()),
814
+ weeks: weeks.map(|e| e.inner.clone()),
815
+ };
816
+
817
+ polars::lazy::dsl::duration(args).into()
818
+ }
819
+
667
820
  fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
668
821
  let mut iter = seq.each();
669
822
  let first = iter.next().unwrap()?;
@@ -692,6 +845,20 @@ fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
692
845
  Ok(df.into())
693
846
  }
694
847
 
848
+ fn concat_lf(lfs: Value, rechunk: bool, parallel: bool) -> RbResult<RbLazyFrame> {
849
+ let (seq, len) = get_rbseq(lfs)?;
850
+ let mut lfs = Vec::with_capacity(len);
851
+
852
+ for res in seq.each() {
853
+ let item = res?;
854
+ let lf = get_lf(item)?;
855
+ lfs.push(lf);
856
+ }
857
+
858
+ let lf = polars::lazy::dsl::concat(lfs, rechunk, parallel).map_err(RbPolarsErr::from)?;
859
+ Ok(lf.into())
860
+ }
861
+
695
862
  fn rb_diag_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
696
863
  let mut dfs = Vec::new();
697
864
  for item in seq.each() {
@@ -752,6 +919,27 @@ fn parquet_schema(rb_f: Value) -> RbResult<Value> {
752
919
  Ok(dict.into())
753
920
  }
754
921
 
922
+ fn collect_all(lfs: RArray) -> RbResult<Vec<RbDataFrame>> {
923
+ use polars_core::utils::rayon::prelude::*;
924
+
925
+ let lfs = lfs
926
+ .each()
927
+ .map(|v| v?.try_convert::<&RbLazyFrame>())
928
+ .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
929
+
930
+ let out = polars_core::POOL.install(|| {
931
+ lfs.par_iter()
932
+ .map(|lf| {
933
+ let df = lf.ldf.clone().collect()?;
934
+ Ok(RbDataFrame::new(df))
935
+ })
936
+ .collect::<polars_core::error::PolarsResult<Vec<_>>>()
937
+ .map_err(RbPolarsErr::from)
938
+ });
939
+
940
+ Ok(out?)
941
+ }
942
+
755
943
  fn rb_date_range(
756
944
  start: i64,
757
945
  stop: i64,
@@ -774,6 +962,16 @@ fn rb_date_range(
774
962
  .into()
775
963
  }
776
964
 
965
+ fn coalesce_exprs(exprs: RArray) -> RbResult<RbExpr> {
966
+ let exprs = rb_exprs_to_exprs(exprs)?;
967
+ Ok(polars::lazy::dsl::coalesce(&exprs).into())
968
+ }
969
+
970
+ fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
971
+ let exprs = rb_exprs_to_exprs(exprs)?;
972
+ Ok(polars::lazy::dsl::sum_exprs(exprs).into())
973
+ }
974
+
777
975
  fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
778
976
  let exprs = rb_exprs_to_exprs(exprs)?;
779
977
  Ok(polars::lazy::dsl::as_struct(&exprs).into())
@@ -0,0 +1,100 @@
1
+ use magnus::Value;
2
+ use polars::prelude::*;
3
+ use polars_core::utils::CustomIterTools;
4
+
5
+ use crate::conversion::get_rbseq;
6
+ use crate::{RbPolarsErr, RbResult};
7
+
8
+ pub fn rb_seq_to_list(name: &str, seq: Value, dtype: &DataType) -> RbResult<Series> {
9
+ let (seq, len) = get_rbseq(seq)?;
10
+
11
+ let s = match dtype {
12
+ DataType::Int64 => {
13
+ let mut builder =
14
+ ListPrimitiveChunkedBuilder::<Int64Type>::new(name, len, len * 5, DataType::Int64);
15
+ for sub_seq in seq.each() {
16
+ let sub_seq = sub_seq?;
17
+ let (sub_seq, len) = get_rbseq(sub_seq)?;
18
+
19
+ // safety: we know the iterators len
20
+ let iter = unsafe {
21
+ sub_seq
22
+ .each()
23
+ .map(|v| {
24
+ let v = v.unwrap();
25
+ if v.is_nil() {
26
+ None
27
+ } else {
28
+ Some(v.try_convert::<i64>().unwrap())
29
+ }
30
+ })
31
+ .trust_my_length(len)
32
+ };
33
+ builder.append_iter(iter)
34
+ }
35
+ builder.finish().into_series()
36
+ }
37
+ DataType::Float64 => {
38
+ let mut builder = ListPrimitiveChunkedBuilder::<Float64Type>::new(
39
+ name,
40
+ len,
41
+ len * 5,
42
+ DataType::Float64,
43
+ );
44
+ for sub_seq in seq.each() {
45
+ let sub_seq = sub_seq?;
46
+ let (sub_seq, len) = get_rbseq(sub_seq)?;
47
+ // safety: we know the iterators len
48
+ let iter = unsafe {
49
+ sub_seq
50
+ .each()
51
+ .map(|v| {
52
+ let v = v.unwrap();
53
+ if v.is_nil() {
54
+ None
55
+ } else {
56
+ Some(v.try_convert::<f64>().unwrap())
57
+ }
58
+ })
59
+ .trust_my_length(len)
60
+ };
61
+ builder.append_iter(iter)
62
+ }
63
+ builder.finish().into_series()
64
+ }
65
+ DataType::Boolean => {
66
+ let mut builder = ListBooleanChunkedBuilder::new(name, len, len * 5);
67
+ for sub_seq in seq.each() {
68
+ let sub_seq = sub_seq?;
69
+ let (sub_seq, len) = get_rbseq(sub_seq)?;
70
+ // safety: we know the iterators len
71
+ let iter = unsafe {
72
+ sub_seq
73
+ .each()
74
+ .map(|v| {
75
+ let v = v.unwrap();
76
+ if v.is_nil() {
77
+ None
78
+ } else {
79
+ Some(v.try_convert::<bool>().unwrap())
80
+ }
81
+ })
82
+ .trust_my_length(len)
83
+ };
84
+ builder.append_iter(iter)
85
+ }
86
+ builder.finish().into_series()
87
+ }
88
+ DataType::Utf8 => {
89
+ return Err(RbPolarsErr::todo());
90
+ }
91
+ dt => {
92
+ return Err(RbPolarsErr::other(format!(
93
+ "cannot create list array from {:?}",
94
+ dt
95
+ )));
96
+ }
97
+ };
98
+
99
+ Ok(s)
100
+ }