polars-df 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +70 -9
- data/Cargo.toml +2 -0
- data/ext/polars/Cargo.toml +6 -1
- data/ext/polars/src/apply/dataframe.rs +292 -0
- data/ext/polars/src/apply/mod.rs +254 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +100 -5
- data/ext/polars/src/dataframe.rs +146 -1
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +72 -1
- data/ext/polars/src/lazy/dsl.rs +38 -0
- data/ext/polars/src/lib.rs +165 -1
- data/ext/polars/src/series.rs +296 -0
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1457 -56
- data/lib/polars/dynamic_group_by.rb +49 -0
- data/lib/polars/expr.rb +258 -9
- data/lib/polars/functions.rb +192 -3
- data/lib/polars/group_by.rb +43 -3
- data/lib/polars/io.rb +19 -3
- data/lib/polars/lazy_frame.rb +792 -22
- data/lib/polars/lazy_functions.rb +561 -27
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +132 -10
- data/lib/polars/utils.rb +16 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +9 -1
- metadata +9 -3
data/ext/polars/src/lib.rs
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
mod apply;
|
1
2
|
mod batched_csv;
|
2
3
|
mod conversion;
|
3
4
|
mod dataframe;
|
@@ -25,7 +26,7 @@ use polars::datatypes::{DataType, TimeUnit};
|
|
25
26
|
use polars::error::PolarsResult;
|
26
27
|
use polars::frame::DataFrame;
|
27
28
|
use polars::functions::{diag_concat_df, hor_concat_df};
|
28
|
-
use polars::prelude::{ClosedWindow, Duration, IntoSeries, TimeZone};
|
29
|
+
use polars::prelude::{ClosedWindow, Duration, DurationArgs, IntoSeries, TimeZone};
|
29
30
|
use series::RbSeries;
|
30
31
|
|
31
32
|
#[cfg(target_os = "linux")]
|
@@ -55,13 +56,18 @@ fn series() -> RClass {
|
|
55
56
|
#[magnus::init]
|
56
57
|
fn init() -> RbResult<()> {
|
57
58
|
let module = module();
|
59
|
+
module.define_singleton_method("_rb_duration", function!(rb_duration, 8))?;
|
58
60
|
module.define_singleton_method("_concat_df", function!(concat_df, 1))?;
|
61
|
+
module.define_singleton_method("_concat_lf", function!(concat_lf, 3))?;
|
59
62
|
module.define_singleton_method("_diag_concat_df", function!(rb_diag_concat_df, 1))?;
|
60
63
|
module.define_singleton_method("_hor_concat_df", function!(rb_hor_concat_df, 1))?;
|
61
64
|
module.define_singleton_method("_concat_series", function!(concat_series, 1))?;
|
62
65
|
module.define_singleton_method("_ipc_schema", function!(ipc_schema, 1))?;
|
63
66
|
module.define_singleton_method("_parquet_schema", function!(parquet_schema, 1))?;
|
67
|
+
module.define_singleton_method("_collect_all", function!(collect_all, 1))?;
|
64
68
|
module.define_singleton_method("_rb_date_range", function!(rb_date_range, 7))?;
|
69
|
+
module.define_singleton_method("_coalesce_exprs", function!(coalesce_exprs, 1))?;
|
70
|
+
module.define_singleton_method("_sum_exprs", function!(sum_exprs, 1))?;
|
65
71
|
module.define_singleton_method("_as_struct", function!(as_struct, 1))?;
|
66
72
|
module.define_singleton_method("_arg_where", function!(arg_where, 1))?;
|
67
73
|
|
@@ -74,10 +80,13 @@ fn init() -> RbResult<()> {
|
|
74
80
|
class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
|
75
81
|
class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 7))?;
|
76
82
|
class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
|
83
|
+
class.define_singleton_method("read_avro", function!(RbDataFrame::read_avro, 4))?;
|
84
|
+
class.define_singleton_method("read_hashes", function!(RbDataFrame::read_hashes, 3))?;
|
77
85
|
class.define_singleton_method("read_hash", function!(RbDataFrame::read_hash, 1))?;
|
78
86
|
class.define_singleton_method("read_json", function!(RbDataFrame::read_json, 1))?;
|
79
87
|
class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson, 1))?;
|
80
88
|
class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
|
89
|
+
class.define_method("write_avro", method!(RbDataFrame::write_avro, 2))?;
|
81
90
|
class.define_method("write_json", method!(RbDataFrame::write_json, 3))?;
|
82
91
|
class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
|
83
92
|
class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
|
@@ -143,6 +152,7 @@ fn init() -> RbResult<()> {
|
|
143
152
|
class.define_method("with_row_count", method!(RbDataFrame::with_row_count, 2))?;
|
144
153
|
class.define_method("_clone", method!(RbDataFrame::clone, 0))?;
|
145
154
|
class.define_method("melt", method!(RbDataFrame::melt, 4))?;
|
155
|
+
class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 6))?;
|
146
156
|
class.define_method("partition_by", method!(RbDataFrame::partition_by, 2))?;
|
147
157
|
class.define_method("shift", method!(RbDataFrame::shift, 1))?;
|
148
158
|
class.define_method("unique", method!(RbDataFrame::unique, 3))?;
|
@@ -161,7 +171,9 @@ fn init() -> RbResult<()> {
|
|
161
171
|
class.define_method("quantile", method!(RbDataFrame::quantile, 2))?;
|
162
172
|
class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 1))?;
|
163
173
|
class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
|
174
|
+
class.define_method("apply", method!(RbDataFrame::apply, 3))?;
|
164
175
|
class.define_method("shrink_to_fit", method!(RbDataFrame::shrink_to_fit, 0))?;
|
176
|
+
class.define_method("hash_rows", method!(RbDataFrame::hash_rows, 4))?;
|
165
177
|
class.define_method("transpose", method!(RbDataFrame::transpose, 2))?;
|
166
178
|
class.define_method("upsample", method!(RbDataFrame::upsample, 5))?;
|
167
179
|
class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
|
@@ -361,12 +373,14 @@ fn init() -> RbResult<()> {
|
|
361
373
|
class.define_method("dt_tz_localize", method!(RbExpr::dt_tz_localize, 1))?;
|
362
374
|
class.define_method("dt_truncate", method!(RbExpr::dt_truncate, 2))?;
|
363
375
|
class.define_method("dt_round", method!(RbExpr::dt_round, 2))?;
|
376
|
+
class.define_method("map", method!(RbExpr::map, 3))?;
|
364
377
|
class.define_method("dot", method!(RbExpr::dot, 1))?;
|
365
378
|
class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
|
366
379
|
class.define_method("mode", method!(RbExpr::mode, 0))?;
|
367
380
|
class.define_method("keep_name", method!(RbExpr::keep_name, 0))?;
|
368
381
|
class.define_method("prefix", method!(RbExpr::prefix, 1))?;
|
369
382
|
class.define_method("suffix", method!(RbExpr::suffix, 1))?;
|
383
|
+
class.define_method("map_alias", method!(RbExpr::map_alias, 1))?;
|
370
384
|
class.define_method("exclude", method!(RbExpr::exclude, 1))?;
|
371
385
|
class.define_method("interpolate", method!(RbExpr::interpolate, 0))?;
|
372
386
|
class.define_method("rolling_sum", method!(RbExpr::rolling_sum, 6))?;
|
@@ -431,6 +445,7 @@ fn init() -> RbResult<()> {
|
|
431
445
|
class.define_method("log", method!(RbExpr::log, 1))?;
|
432
446
|
class.define_method("exp", method!(RbExpr::exp, 0))?;
|
433
447
|
class.define_method("entropy", method!(RbExpr::entropy, 2))?;
|
448
|
+
class.define_method("_hash", method!(RbExpr::hash, 4))?;
|
434
449
|
|
435
450
|
// meta
|
436
451
|
class.define_method("meta_pop", method!(RbExpr::meta_pop, 0))?;
|
@@ -446,6 +461,7 @@ fn init() -> RbResult<()> {
|
|
446
461
|
class.define_singleton_method("last", function!(crate::lazy::dsl::last, 0))?;
|
447
462
|
class.define_singleton_method("cols", function!(crate::lazy::dsl::cols, 1))?;
|
448
463
|
class.define_singleton_method("fold", function!(crate::lazy::dsl::fold, 3))?;
|
464
|
+
class.define_singleton_method("cumfold", function!(crate::lazy::dsl::cumfold, 4))?;
|
449
465
|
class.define_singleton_method("lit", function!(crate::lazy::dsl::lit, 1))?;
|
450
466
|
class.define_singleton_method("arange", function!(crate::lazy::dsl::arange, 3))?;
|
451
467
|
class.define_singleton_method("repeat", function!(crate::lazy::dsl::repeat, 2))?;
|
@@ -455,11 +471,13 @@ fn init() -> RbResult<()> {
|
|
455
471
|
function!(crate::lazy::dsl::spearman_rank_corr, 4),
|
456
472
|
)?;
|
457
473
|
class.define_singleton_method("cov", function!(crate::lazy::dsl::cov, 2))?;
|
474
|
+
class.define_singleton_method("argsort_by", function!(crate::lazy::dsl::argsort_by, 2))?;
|
458
475
|
class.define_singleton_method("when", function!(crate::lazy::dsl::when, 1))?;
|
459
476
|
class.define_singleton_method("concat_str", function!(crate::lazy::dsl::concat_str, 2))?;
|
460
477
|
class.define_singleton_method("concat_lst", function!(crate::lazy::dsl::concat_lst, 1))?;
|
461
478
|
|
462
479
|
let class = module.define_class("RbLazyFrame", Default::default())?;
|
480
|
+
class.define_singleton_method("read_json", function!(RbLazyFrame::read_json, 1))?;
|
463
481
|
class.define_singleton_method(
|
464
482
|
"new_from_ndjson",
|
465
483
|
function!(RbLazyFrame::new_from_ndjson, 7),
|
@@ -490,6 +508,8 @@ fn init() -> RbResult<()> {
|
|
490
508
|
class.define_method("groupby", method!(RbLazyFrame::groupby, 2))?;
|
491
509
|
class.define_method("groupby_rolling", method!(RbLazyFrame::groupby_rolling, 5))?;
|
492
510
|
class.define_method("groupby_dynamic", method!(RbLazyFrame::groupby_dynamic, 8))?;
|
511
|
+
class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
|
512
|
+
class.define_method("join_asof", method!(RbLazyFrame::join_asof, 11))?;
|
493
513
|
class.define_method("join", method!(RbLazyFrame::join, 7))?;
|
494
514
|
class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
|
495
515
|
class.define_method("rename", method!(RbLazyFrame::rename, 2))?;
|
@@ -597,6 +617,7 @@ fn init() -> RbResult<()> {
|
|
597
617
|
class.define_method("median", method!(RbSeries::median, 0))?;
|
598
618
|
class.define_method("quantile", method!(RbSeries::quantile, 2))?;
|
599
619
|
class.define_method("_clone", method!(RbSeries::clone, 0))?;
|
620
|
+
class.define_method("apply_lambda", method!(RbSeries::apply_lambda, 3))?;
|
600
621
|
class.define_method("zip_with", method!(RbSeries::zip_with, 2))?;
|
601
622
|
class.define_method("to_dummies", method!(RbSeries::to_dummies, 0))?;
|
602
623
|
class.define_method("peak_max", method!(RbSeries::peak_max, 0))?;
|
@@ -611,6 +632,79 @@ fn init() -> RbResult<()> {
|
|
611
632
|
class.define_method("time_unit", method!(RbSeries::time_unit, 0))?;
|
612
633
|
class.define_method("set_at_idx", method!(RbSeries::set_at_idx, 2))?;
|
613
634
|
|
635
|
+
// set
|
636
|
+
// class.define_method("set_with_mask_str", method!(RbSeries::set_with_mask_str, 2))?;
|
637
|
+
class.define_method("set_with_mask_f64", method!(RbSeries::set_with_mask_f64, 2))?;
|
638
|
+
class.define_method("set_with_mask_f32", method!(RbSeries::set_with_mask_f32, 2))?;
|
639
|
+
class.define_method("set_with_mask_u8", method!(RbSeries::set_with_mask_u8, 2))?;
|
640
|
+
class.define_method("set_with_mask_u16", method!(RbSeries::set_with_mask_u16, 2))?;
|
641
|
+
class.define_method("set_with_mask_u32", method!(RbSeries::set_with_mask_u32, 2))?;
|
642
|
+
class.define_method("set_with_mask_u64", method!(RbSeries::set_with_mask_u64, 2))?;
|
643
|
+
class.define_method("set_with_mask_i8", method!(RbSeries::set_with_mask_i8, 2))?;
|
644
|
+
class.define_method("set_with_mask_i16", method!(RbSeries::set_with_mask_i16, 2))?;
|
645
|
+
class.define_method("set_with_mask_i32", method!(RbSeries::set_with_mask_i32, 2))?;
|
646
|
+
class.define_method("set_with_mask_i64", method!(RbSeries::set_with_mask_i64, 2))?;
|
647
|
+
class.define_method(
|
648
|
+
"set_with_mask_bool",
|
649
|
+
method!(RbSeries::set_with_mask_bool, 2),
|
650
|
+
)?;
|
651
|
+
|
652
|
+
// arithmetic
|
653
|
+
class.define_method("add_u8", method!(RbSeries::add_u8, 1))?;
|
654
|
+
class.define_method("add_u16", method!(RbSeries::add_u16, 1))?;
|
655
|
+
class.define_method("add_u32", method!(RbSeries::add_u32, 1))?;
|
656
|
+
class.define_method("add_u64", method!(RbSeries::add_u64, 1))?;
|
657
|
+
class.define_method("add_i8", method!(RbSeries::add_i8, 1))?;
|
658
|
+
class.define_method("add_i16", method!(RbSeries::add_i16, 1))?;
|
659
|
+
class.define_method("add_i32", method!(RbSeries::add_i32, 1))?;
|
660
|
+
class.define_method("add_i64", method!(RbSeries::add_i64, 1))?;
|
661
|
+
class.define_method("add_datetime", method!(RbSeries::add_datetime, 1))?;
|
662
|
+
class.define_method("add_duration", method!(RbSeries::add_duration, 1))?;
|
663
|
+
class.define_method("add_f32", method!(RbSeries::add_f32, 1))?;
|
664
|
+
class.define_method("add_f64", method!(RbSeries::add_f64, 1))?;
|
665
|
+
class.define_method("sub_u8", method!(RbSeries::sub_u8, 1))?;
|
666
|
+
class.define_method("sub_u16", method!(RbSeries::sub_u16, 1))?;
|
667
|
+
class.define_method("sub_u32", method!(RbSeries::sub_u32, 1))?;
|
668
|
+
class.define_method("sub_u64", method!(RbSeries::sub_u64, 1))?;
|
669
|
+
class.define_method("sub_i8", method!(RbSeries::sub_i8, 1))?;
|
670
|
+
class.define_method("sub_i16", method!(RbSeries::sub_i16, 1))?;
|
671
|
+
class.define_method("sub_i32", method!(RbSeries::sub_i32, 1))?;
|
672
|
+
class.define_method("sub_i64", method!(RbSeries::sub_i64, 1))?;
|
673
|
+
class.define_method("sub_datetime", method!(RbSeries::sub_datetime, 1))?;
|
674
|
+
class.define_method("sub_duration", method!(RbSeries::sub_duration, 1))?;
|
675
|
+
class.define_method("sub_f32", method!(RbSeries::sub_f32, 1))?;
|
676
|
+
class.define_method("sub_f64", method!(RbSeries::sub_f64, 1))?;
|
677
|
+
class.define_method("div_u8", method!(RbSeries::div_u8, 1))?;
|
678
|
+
class.define_method("div_u16", method!(RbSeries::div_u16, 1))?;
|
679
|
+
class.define_method("div_u32", method!(RbSeries::div_u32, 1))?;
|
680
|
+
class.define_method("div_u64", method!(RbSeries::div_u64, 1))?;
|
681
|
+
class.define_method("div_i8", method!(RbSeries::div_i8, 1))?;
|
682
|
+
class.define_method("div_i16", method!(RbSeries::div_i16, 1))?;
|
683
|
+
class.define_method("div_i32", method!(RbSeries::div_i32, 1))?;
|
684
|
+
class.define_method("div_i64", method!(RbSeries::div_i64, 1))?;
|
685
|
+
class.define_method("div_f32", method!(RbSeries::div_f32, 1))?;
|
686
|
+
class.define_method("div_f64", method!(RbSeries::div_f64, 1))?;
|
687
|
+
class.define_method("mul_u8", method!(RbSeries::mul_u8, 1))?;
|
688
|
+
class.define_method("mul_u16", method!(RbSeries::mul_u16, 1))?;
|
689
|
+
class.define_method("mul_u32", method!(RbSeries::mul_u32, 1))?;
|
690
|
+
class.define_method("mul_u64", method!(RbSeries::mul_u64, 1))?;
|
691
|
+
class.define_method("mul_i8", method!(RbSeries::mul_i8, 1))?;
|
692
|
+
class.define_method("mul_i16", method!(RbSeries::mul_i16, 1))?;
|
693
|
+
class.define_method("mul_i32", method!(RbSeries::mul_i32, 1))?;
|
694
|
+
class.define_method("mul_i64", method!(RbSeries::mul_i64, 1))?;
|
695
|
+
class.define_method("mul_f32", method!(RbSeries::mul_f32, 1))?;
|
696
|
+
class.define_method("mul_f64", method!(RbSeries::mul_f64, 1))?;
|
697
|
+
class.define_method("rem_u8", method!(RbSeries::rem_u8, 1))?;
|
698
|
+
class.define_method("rem_u16", method!(RbSeries::rem_u16, 1))?;
|
699
|
+
class.define_method("rem_u32", method!(RbSeries::rem_u32, 1))?;
|
700
|
+
class.define_method("rem_u64", method!(RbSeries::rem_u64, 1))?;
|
701
|
+
class.define_method("rem_i8", method!(RbSeries::rem_i8, 1))?;
|
702
|
+
class.define_method("rem_i16", method!(RbSeries::rem_i16, 1))?;
|
703
|
+
class.define_method("rem_i32", method!(RbSeries::rem_i32, 1))?;
|
704
|
+
class.define_method("rem_i64", method!(RbSeries::rem_i64, 1))?;
|
705
|
+
class.define_method("rem_f32", method!(RbSeries::rem_f32, 1))?;
|
706
|
+
class.define_method("rem_f64", method!(RbSeries::rem_f64, 1))?;
|
707
|
+
|
614
708
|
// eq
|
615
709
|
class.define_method("eq_u8", method!(RbSeries::eq_u8, 1))?;
|
616
710
|
class.define_method("eq_u16", method!(RbSeries::eq_u16, 1))?;
|
@@ -698,6 +792,31 @@ fn init() -> RbResult<()> {
|
|
698
792
|
Ok(())
|
699
793
|
}
|
700
794
|
|
795
|
+
#[allow(clippy::too_many_arguments)]
|
796
|
+
fn rb_duration(
|
797
|
+
days: Option<&RbExpr>,
|
798
|
+
seconds: Option<&RbExpr>,
|
799
|
+
nanoseconds: Option<&RbExpr>,
|
800
|
+
microseconds: Option<&RbExpr>,
|
801
|
+
milliseconds: Option<&RbExpr>,
|
802
|
+
minutes: Option<&RbExpr>,
|
803
|
+
hours: Option<&RbExpr>,
|
804
|
+
weeks: Option<&RbExpr>,
|
805
|
+
) -> RbExpr {
|
806
|
+
let args = DurationArgs {
|
807
|
+
days: days.map(|e| e.inner.clone()),
|
808
|
+
seconds: seconds.map(|e| e.inner.clone()),
|
809
|
+
nanoseconds: nanoseconds.map(|e| e.inner.clone()),
|
810
|
+
microseconds: microseconds.map(|e| e.inner.clone()),
|
811
|
+
milliseconds: milliseconds.map(|e| e.inner.clone()),
|
812
|
+
minutes: minutes.map(|e| e.inner.clone()),
|
813
|
+
hours: hours.map(|e| e.inner.clone()),
|
814
|
+
weeks: weeks.map(|e| e.inner.clone()),
|
815
|
+
};
|
816
|
+
|
817
|
+
polars::lazy::dsl::duration(args).into()
|
818
|
+
}
|
819
|
+
|
701
820
|
fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
702
821
|
let mut iter = seq.each();
|
703
822
|
let first = iter.next().unwrap()?;
|
@@ -726,6 +845,20 @@ fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
|
726
845
|
Ok(df.into())
|
727
846
|
}
|
728
847
|
|
848
|
+
fn concat_lf(lfs: Value, rechunk: bool, parallel: bool) -> RbResult<RbLazyFrame> {
|
849
|
+
let (seq, len) = get_rbseq(lfs)?;
|
850
|
+
let mut lfs = Vec::with_capacity(len);
|
851
|
+
|
852
|
+
for res in seq.each() {
|
853
|
+
let item = res?;
|
854
|
+
let lf = get_lf(item)?;
|
855
|
+
lfs.push(lf);
|
856
|
+
}
|
857
|
+
|
858
|
+
let lf = polars::lazy::dsl::concat(lfs, rechunk, parallel).map_err(RbPolarsErr::from)?;
|
859
|
+
Ok(lf.into())
|
860
|
+
}
|
861
|
+
|
729
862
|
fn rb_diag_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
730
863
|
let mut dfs = Vec::new();
|
731
864
|
for item in seq.each() {
|
@@ -786,6 +919,27 @@ fn parquet_schema(rb_f: Value) -> RbResult<Value> {
|
|
786
919
|
Ok(dict.into())
|
787
920
|
}
|
788
921
|
|
922
|
+
fn collect_all(lfs: RArray) -> RbResult<Vec<RbDataFrame>> {
|
923
|
+
use polars_core::utils::rayon::prelude::*;
|
924
|
+
|
925
|
+
let lfs = lfs
|
926
|
+
.each()
|
927
|
+
.map(|v| v?.try_convert::<&RbLazyFrame>())
|
928
|
+
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
929
|
+
|
930
|
+
let out = polars_core::POOL.install(|| {
|
931
|
+
lfs.par_iter()
|
932
|
+
.map(|lf| {
|
933
|
+
let df = lf.ldf.clone().collect()?;
|
934
|
+
Ok(RbDataFrame::new(df))
|
935
|
+
})
|
936
|
+
.collect::<polars_core::error::PolarsResult<Vec<_>>>()
|
937
|
+
.map_err(RbPolarsErr::from)
|
938
|
+
});
|
939
|
+
|
940
|
+
Ok(out?)
|
941
|
+
}
|
942
|
+
|
789
943
|
fn rb_date_range(
|
790
944
|
start: i64,
|
791
945
|
stop: i64,
|
@@ -808,6 +962,16 @@ fn rb_date_range(
|
|
808
962
|
.into()
|
809
963
|
}
|
810
964
|
|
965
|
+
fn coalesce_exprs(exprs: RArray) -> RbResult<RbExpr> {
|
966
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
967
|
+
Ok(polars::lazy::dsl::coalesce(&exprs).into())
|
968
|
+
}
|
969
|
+
|
970
|
+
fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
|
971
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
972
|
+
Ok(polars::lazy::dsl::sum_exprs(exprs).into())
|
973
|
+
}
|
974
|
+
|
811
975
|
fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
|
812
976
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
813
977
|
Ok(polars::lazy::dsl::as_struct(&exprs).into())
|
data/ext/polars/src/series.rs
CHANGED
@@ -4,6 +4,8 @@ use polars::prelude::*;
|
|
4
4
|
use polars::series::IsSorted;
|
5
5
|
use std::cell::RefCell;
|
6
6
|
|
7
|
+
use crate::apply::series::{call_lambda_and_extract, ApplyLambda};
|
8
|
+
use crate::apply_method_all_arrow_series2;
|
7
9
|
use crate::conversion::*;
|
8
10
|
use crate::list_construction::rb_seq_to_list;
|
9
11
|
use crate::set::set_at_idx;
|
@@ -529,6 +531,198 @@ impl RbSeries {
|
|
529
531
|
RbSeries::new(self.series.borrow().clone())
|
530
532
|
}
|
531
533
|
|
534
|
+
pub fn apply_lambda(
|
535
|
+
&self,
|
536
|
+
lambda: Value,
|
537
|
+
output_type: Option<Wrap<DataType>>,
|
538
|
+
skip_nulls: bool,
|
539
|
+
) -> RbResult<Self> {
|
540
|
+
let series = &self.series.borrow();
|
541
|
+
|
542
|
+
let output_type = output_type.map(|dt| dt.0);
|
543
|
+
|
544
|
+
macro_rules! dispatch_apply {
|
545
|
+
($self:expr, $method:ident, $($args:expr),*) => {
|
546
|
+
if matches!($self.dtype(), DataType::Object(_)) {
|
547
|
+
// let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
|
548
|
+
// ca.$method($($args),*)
|
549
|
+
todo!()
|
550
|
+
} else {
|
551
|
+
apply_method_all_arrow_series2!(
|
552
|
+
$self,
|
553
|
+
$method,
|
554
|
+
$($args),*
|
555
|
+
)
|
556
|
+
}
|
557
|
+
|
558
|
+
}
|
559
|
+
|
560
|
+
}
|
561
|
+
|
562
|
+
if matches!(
|
563
|
+
series.dtype(),
|
564
|
+
DataType::Datetime(_, _)
|
565
|
+
| DataType::Date
|
566
|
+
| DataType::Duration(_)
|
567
|
+
| DataType::Categorical(_)
|
568
|
+
| DataType::Time
|
569
|
+
) || !skip_nulls
|
570
|
+
{
|
571
|
+
let mut avs = Vec::with_capacity(series.len());
|
572
|
+
let iter = series.iter().map(|av| {
|
573
|
+
let input = Wrap(av);
|
574
|
+
call_lambda_and_extract::<_, Wrap<AnyValue>>(lambda, input)
|
575
|
+
.unwrap()
|
576
|
+
.0
|
577
|
+
});
|
578
|
+
avs.extend(iter);
|
579
|
+
return Ok(Series::new(&self.name(), &avs).into());
|
580
|
+
}
|
581
|
+
|
582
|
+
let out = match output_type {
|
583
|
+
Some(DataType::Int8) => {
|
584
|
+
let ca: Int8Chunked = dispatch_apply!(
|
585
|
+
series,
|
586
|
+
apply_lambda_with_primitive_out_type,
|
587
|
+
lambda,
|
588
|
+
0,
|
589
|
+
None
|
590
|
+
)?;
|
591
|
+
ca.into_series()
|
592
|
+
}
|
593
|
+
Some(DataType::Int16) => {
|
594
|
+
let ca: Int16Chunked = dispatch_apply!(
|
595
|
+
series,
|
596
|
+
apply_lambda_with_primitive_out_type,
|
597
|
+
lambda,
|
598
|
+
0,
|
599
|
+
None
|
600
|
+
)?;
|
601
|
+
ca.into_series()
|
602
|
+
}
|
603
|
+
Some(DataType::Int32) => {
|
604
|
+
let ca: Int32Chunked = dispatch_apply!(
|
605
|
+
series,
|
606
|
+
apply_lambda_with_primitive_out_type,
|
607
|
+
lambda,
|
608
|
+
0,
|
609
|
+
None
|
610
|
+
)?;
|
611
|
+
ca.into_series()
|
612
|
+
}
|
613
|
+
Some(DataType::Int64) => {
|
614
|
+
let ca: Int64Chunked = dispatch_apply!(
|
615
|
+
series,
|
616
|
+
apply_lambda_with_primitive_out_type,
|
617
|
+
lambda,
|
618
|
+
0,
|
619
|
+
None
|
620
|
+
)?;
|
621
|
+
ca.into_series()
|
622
|
+
}
|
623
|
+
Some(DataType::UInt8) => {
|
624
|
+
let ca: UInt8Chunked = dispatch_apply!(
|
625
|
+
series,
|
626
|
+
apply_lambda_with_primitive_out_type,
|
627
|
+
lambda,
|
628
|
+
0,
|
629
|
+
None
|
630
|
+
)?;
|
631
|
+
ca.into_series()
|
632
|
+
}
|
633
|
+
Some(DataType::UInt16) => {
|
634
|
+
let ca: UInt16Chunked = dispatch_apply!(
|
635
|
+
series,
|
636
|
+
apply_lambda_with_primitive_out_type,
|
637
|
+
lambda,
|
638
|
+
0,
|
639
|
+
None
|
640
|
+
)?;
|
641
|
+
ca.into_series()
|
642
|
+
}
|
643
|
+
Some(DataType::UInt32) => {
|
644
|
+
let ca: UInt32Chunked = dispatch_apply!(
|
645
|
+
series,
|
646
|
+
apply_lambda_with_primitive_out_type,
|
647
|
+
lambda,
|
648
|
+
0,
|
649
|
+
None
|
650
|
+
)?;
|
651
|
+
ca.into_series()
|
652
|
+
}
|
653
|
+
Some(DataType::UInt64) => {
|
654
|
+
let ca: UInt64Chunked = dispatch_apply!(
|
655
|
+
series,
|
656
|
+
apply_lambda_with_primitive_out_type,
|
657
|
+
lambda,
|
658
|
+
0,
|
659
|
+
None
|
660
|
+
)?;
|
661
|
+
ca.into_series()
|
662
|
+
}
|
663
|
+
Some(DataType::Float32) => {
|
664
|
+
let ca: Float32Chunked = dispatch_apply!(
|
665
|
+
series,
|
666
|
+
apply_lambda_with_primitive_out_type,
|
667
|
+
lambda,
|
668
|
+
0,
|
669
|
+
None
|
670
|
+
)?;
|
671
|
+
ca.into_series()
|
672
|
+
}
|
673
|
+
Some(DataType::Float64) => {
|
674
|
+
let ca: Float64Chunked = dispatch_apply!(
|
675
|
+
series,
|
676
|
+
apply_lambda_with_primitive_out_type,
|
677
|
+
lambda,
|
678
|
+
0,
|
679
|
+
None
|
680
|
+
)?;
|
681
|
+
ca.into_series()
|
682
|
+
}
|
683
|
+
Some(DataType::Boolean) => {
|
684
|
+
let ca: BooleanChunked =
|
685
|
+
dispatch_apply!(series, apply_lambda_with_bool_out_type, lambda, 0, None)?;
|
686
|
+
ca.into_series()
|
687
|
+
}
|
688
|
+
Some(DataType::Date) => {
|
689
|
+
let ca: Int32Chunked = dispatch_apply!(
|
690
|
+
series,
|
691
|
+
apply_lambda_with_primitive_out_type,
|
692
|
+
lambda,
|
693
|
+
0,
|
694
|
+
None
|
695
|
+
)?;
|
696
|
+
ca.into_date().into_series()
|
697
|
+
}
|
698
|
+
Some(DataType::Datetime(tu, tz)) => {
|
699
|
+
let ca: Int64Chunked = dispatch_apply!(
|
700
|
+
series,
|
701
|
+
apply_lambda_with_primitive_out_type,
|
702
|
+
lambda,
|
703
|
+
0,
|
704
|
+
None
|
705
|
+
)?;
|
706
|
+
ca.into_datetime(tu, tz).into_series()
|
707
|
+
}
|
708
|
+
Some(DataType::Utf8) => {
|
709
|
+
let ca = dispatch_apply!(series, apply_lambda_with_utf8_out_type, lambda, 0, None)?;
|
710
|
+
|
711
|
+
ca.into_series()
|
712
|
+
}
|
713
|
+
Some(DataType::Object(_)) => {
|
714
|
+
let ca =
|
715
|
+
dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
|
716
|
+
ca.into_series()
|
717
|
+
}
|
718
|
+
None => return dispatch_apply!(series, apply_lambda_unknown, lambda),
|
719
|
+
|
720
|
+
_ => return dispatch_apply!(series, apply_lambda_unknown, lambda),
|
721
|
+
};
|
722
|
+
|
723
|
+
Ok(RbSeries::new(out))
|
724
|
+
}
|
725
|
+
|
532
726
|
pub fn zip_with(&self, mask: &RbSeries, other: &RbSeries) -> RbResult<Self> {
|
533
727
|
let binding = mask.series.borrow();
|
534
728
|
let mask = binding.bool().map_err(RbPolarsErr::from)?;
|
@@ -627,6 +821,108 @@ impl RbSeries {
|
|
627
821
|
}
|
628
822
|
}
|
629
823
|
|
824
|
+
macro_rules! impl_set_with_mask {
|
825
|
+
($name:ident, $native:ty, $cast:ident, $variant:ident) => {
|
826
|
+
fn $name(
|
827
|
+
series: &Series,
|
828
|
+
filter: &RbSeries,
|
829
|
+
value: Option<$native>,
|
830
|
+
) -> PolarsResult<Series> {
|
831
|
+
let binding = filter.series.borrow();
|
832
|
+
let mask = binding.bool()?;
|
833
|
+
let ca = series.$cast()?;
|
834
|
+
let new = ca.set(mask, value)?;
|
835
|
+
Ok(new.into_series())
|
836
|
+
}
|
837
|
+
|
838
|
+
impl RbSeries {
|
839
|
+
pub fn $name(&self, filter: &RbSeries, value: Option<$native>) -> RbResult<Self> {
|
840
|
+
let series =
|
841
|
+
$name(&self.series.borrow(), filter, value).map_err(RbPolarsErr::from)?;
|
842
|
+
Ok(Self::new(series))
|
843
|
+
}
|
844
|
+
}
|
845
|
+
};
|
846
|
+
}
|
847
|
+
|
848
|
+
// impl_set_with_mask!(set_with_mask_str, &str, utf8, Utf8);
|
849
|
+
impl_set_with_mask!(set_with_mask_f64, f64, f64, Float64);
|
850
|
+
impl_set_with_mask!(set_with_mask_f32, f32, f32, Float32);
|
851
|
+
impl_set_with_mask!(set_with_mask_u8, u8, u8, UInt8);
|
852
|
+
impl_set_with_mask!(set_with_mask_u16, u16, u16, UInt16);
|
853
|
+
impl_set_with_mask!(set_with_mask_u32, u32, u32, UInt32);
|
854
|
+
impl_set_with_mask!(set_with_mask_u64, u64, u64, UInt64);
|
855
|
+
impl_set_with_mask!(set_with_mask_i8, i8, i8, Int8);
|
856
|
+
impl_set_with_mask!(set_with_mask_i16, i16, i16, Int16);
|
857
|
+
impl_set_with_mask!(set_with_mask_i32, i32, i32, Int32);
|
858
|
+
impl_set_with_mask!(set_with_mask_i64, i64, i64, Int64);
|
859
|
+
impl_set_with_mask!(set_with_mask_bool, bool, bool, Boolean);
|
860
|
+
|
861
|
+
macro_rules! impl_arithmetic {
|
862
|
+
($name:ident, $type:ty, $operand:tt) => {
|
863
|
+
impl RbSeries {
|
864
|
+
pub fn $name(&self, other: $type) -> RbResult<Self> {
|
865
|
+
Ok(RbSeries::new(&*self.series.borrow() $operand other))
|
866
|
+
}
|
867
|
+
}
|
868
|
+
};
|
869
|
+
}
|
870
|
+
|
871
|
+
impl_arithmetic!(add_u8, u8, +);
|
872
|
+
impl_arithmetic!(add_u16, u16, +);
|
873
|
+
impl_arithmetic!(add_u32, u32, +);
|
874
|
+
impl_arithmetic!(add_u64, u64, +);
|
875
|
+
impl_arithmetic!(add_i8, i8, +);
|
876
|
+
impl_arithmetic!(add_i16, i16, +);
|
877
|
+
impl_arithmetic!(add_i32, i32, +);
|
878
|
+
impl_arithmetic!(add_i64, i64, +);
|
879
|
+
impl_arithmetic!(add_datetime, i64, +);
|
880
|
+
impl_arithmetic!(add_duration, i64, +);
|
881
|
+
impl_arithmetic!(add_f32, f32, +);
|
882
|
+
impl_arithmetic!(add_f64, f64, +);
|
883
|
+
impl_arithmetic!(sub_u8, u8, -);
|
884
|
+
impl_arithmetic!(sub_u16, u16, -);
|
885
|
+
impl_arithmetic!(sub_u32, u32, -);
|
886
|
+
impl_arithmetic!(sub_u64, u64, -);
|
887
|
+
impl_arithmetic!(sub_i8, i8, -);
|
888
|
+
impl_arithmetic!(sub_i16, i16, -);
|
889
|
+
impl_arithmetic!(sub_i32, i32, -);
|
890
|
+
impl_arithmetic!(sub_i64, i64, -);
|
891
|
+
impl_arithmetic!(sub_datetime, i64, -);
|
892
|
+
impl_arithmetic!(sub_duration, i64, -);
|
893
|
+
impl_arithmetic!(sub_f32, f32, -);
|
894
|
+
impl_arithmetic!(sub_f64, f64, -);
|
895
|
+
impl_arithmetic!(div_u8, u8, /);
|
896
|
+
impl_arithmetic!(div_u16, u16, /);
|
897
|
+
impl_arithmetic!(div_u32, u32, /);
|
898
|
+
impl_arithmetic!(div_u64, u64, /);
|
899
|
+
impl_arithmetic!(div_i8, i8, /);
|
900
|
+
impl_arithmetic!(div_i16, i16, /);
|
901
|
+
impl_arithmetic!(div_i32, i32, /);
|
902
|
+
impl_arithmetic!(div_i64, i64, /);
|
903
|
+
impl_arithmetic!(div_f32, f32, /);
|
904
|
+
impl_arithmetic!(div_f64, f64, /);
|
905
|
+
impl_arithmetic!(mul_u8, u8, *);
|
906
|
+
impl_arithmetic!(mul_u16, u16, *);
|
907
|
+
impl_arithmetic!(mul_u32, u32, *);
|
908
|
+
impl_arithmetic!(mul_u64, u64, *);
|
909
|
+
impl_arithmetic!(mul_i8, i8, *);
|
910
|
+
impl_arithmetic!(mul_i16, i16, *);
|
911
|
+
impl_arithmetic!(mul_i32, i32, *);
|
912
|
+
impl_arithmetic!(mul_i64, i64, *);
|
913
|
+
impl_arithmetic!(mul_f32, f32, *);
|
914
|
+
impl_arithmetic!(mul_f64, f64, *);
|
915
|
+
impl_arithmetic!(rem_u8, u8, %);
|
916
|
+
impl_arithmetic!(rem_u16, u16, %);
|
917
|
+
impl_arithmetic!(rem_u32, u32, %);
|
918
|
+
impl_arithmetic!(rem_u64, u64, %);
|
919
|
+
impl_arithmetic!(rem_i8, i8, %);
|
920
|
+
impl_arithmetic!(rem_i16, i16, %);
|
921
|
+
impl_arithmetic!(rem_i32, i32, %);
|
922
|
+
impl_arithmetic!(rem_i64, i64, %);
|
923
|
+
impl_arithmetic!(rem_f32, f32, %);
|
924
|
+
impl_arithmetic!(rem_f64, f64, %);
|
925
|
+
|
630
926
|
macro_rules! impl_eq_num {
|
631
927
|
($name:ident, $type:ty) => {
|
632
928
|
impl RbSeries {
|
data/ext/polars/src/utils.rs
CHANGED
@@ -17,3 +17,28 @@ pub fn reinterpret(s: &Series, signed: bool) -> polars::prelude::PolarsResult<Se
|
|
17
17
|
)),
|
18
18
|
}
|
19
19
|
}
|
20
|
+
|
21
|
+
#[macro_export]
|
22
|
+
macro_rules! apply_method_all_arrow_series2 {
|
23
|
+
($self:expr, $method:ident, $($args:expr),*) => {
|
24
|
+
match $self.dtype() {
|
25
|
+
DataType::Boolean => $self.bool().unwrap().$method($($args),*),
|
26
|
+
DataType::Utf8 => $self.utf8().unwrap().$method($($args),*),
|
27
|
+
DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
|
28
|
+
DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
|
29
|
+
DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
|
30
|
+
DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
|
31
|
+
DataType::Int8 => $self.i8().unwrap().$method($($args),*),
|
32
|
+
DataType::Int16 => $self.i16().unwrap().$method($($args),*),
|
33
|
+
DataType::Int32 => $self.i32().unwrap().$method($($args),*),
|
34
|
+
DataType::Int64 => $self.i64().unwrap().$method($($args),*),
|
35
|
+
DataType::Float32 => $self.f32().unwrap().$method($($args),*),
|
36
|
+
DataType::Float64 => $self.f64().unwrap().$method($($args),*),
|
37
|
+
DataType::Date => $self.date().unwrap().$method($($args),*),
|
38
|
+
DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
|
39
|
+
// DataType::List(_) => $self.list().unwrap().$method($($args),*),
|
40
|
+
DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
|
41
|
+
dt => panic!("dtype {:?} not supported", dt)
|
42
|
+
}
|
43
|
+
}
|
44
|
+
}
|