polars-df 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,8 @@ mod error;
5
5
  mod file;
6
6
  mod lazy;
7
7
  mod series;
8
+ mod set;
9
+ mod utils;
8
10
 
9
11
  use batched_csv::RbBatchedCsv;
10
12
  use conversion::*;
@@ -13,14 +15,16 @@ use error::{RbPolarsErr, RbValueError};
13
15
  use file::get_file_like;
14
16
  use lazy::dataframe::{RbLazyFrame, RbLazyGroupBy};
15
17
  use lazy::dsl::{RbExpr, RbWhen, RbWhenThen};
18
+ use lazy::utils::rb_exprs_to_exprs;
16
19
  use magnus::{
17
20
  define_module, function, memoize, method, prelude::*, Error, RArray, RClass, RHash, RModule,
18
21
  Value,
19
22
  };
20
- use polars::datatypes::DataType;
23
+ use polars::datatypes::{DataType, TimeUnit};
21
24
  use polars::error::PolarsResult;
22
25
  use polars::frame::DataFrame;
23
26
  use polars::functions::{diag_concat_df, hor_concat_df};
27
+ use polars::prelude::{ClosedWindow, Duration, IntoSeries, TimeZone};
24
28
  use series::RbSeries;
25
29
 
26
30
  type RbResult<T> = Result<T, Error>;
@@ -42,6 +46,9 @@ fn init() -> RbResult<()> {
42
46
  module.define_singleton_method("_concat_series", function!(concat_series, 1))?;
43
47
  module.define_singleton_method("_ipc_schema", function!(ipc_schema, 1))?;
44
48
  module.define_singleton_method("_parquet_schema", function!(parquet_schema, 1))?;
49
+ module.define_singleton_method("_rb_date_range", function!(rb_date_range, 7))?;
50
+ module.define_singleton_method("_as_struct", function!(as_struct, 1))?;
51
+ module.define_singleton_method("_arg_where", function!(arg_where, 1))?;
45
52
 
46
53
  let class = module.define_class("RbBatchedCsv", Default::default())?;
47
54
  class.define_singleton_method("new", function!(RbBatchedCsv::new, -1))?;
@@ -50,7 +57,7 @@ fn init() -> RbResult<()> {
50
57
  let class = module.define_class("RbDataFrame", Default::default())?;
51
58
  class.define_singleton_method("new", function!(RbDataFrame::init, 1))?;
52
59
  class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
53
- class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 1))?;
60
+ class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 7))?;
54
61
  class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
55
62
  class.define_singleton_method("read_hash", function!(RbDataFrame::read_hash, 1))?;
56
63
  class.define_singleton_method("read_json", function!(RbDataFrame::read_json, 1))?;
@@ -60,7 +67,21 @@ fn init() -> RbResult<()> {
60
67
  class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
61
68
  class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
62
69
  class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 2))?;
70
+ class.define_method("row_tuple", method!(RbDataFrame::row_tuple, 1))?;
71
+ class.define_method("row_tuples", method!(RbDataFrame::row_tuples, 0))?;
63
72
  class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 5))?;
73
+ class.define_method("add", method!(RbDataFrame::add, 1))?;
74
+ class.define_method("sub", method!(RbDataFrame::sub, 1))?;
75
+ class.define_method("div", method!(RbDataFrame::div, 1))?;
76
+ class.define_method("mul", method!(RbDataFrame::mul, 1))?;
77
+ class.define_method("rem", method!(RbDataFrame::rem, 1))?;
78
+ class.define_method("add_df", method!(RbDataFrame::add_df, 1))?;
79
+ class.define_method("sub_df", method!(RbDataFrame::sub_df, 1))?;
80
+ class.define_method("div_df", method!(RbDataFrame::div_df, 1))?;
81
+ class.define_method("mul_df", method!(RbDataFrame::mul_df, 1))?;
82
+ class.define_method("rem_df", method!(RbDataFrame::rem_df, 1))?;
83
+ class.define_method("sample_n", method!(RbDataFrame::sample_n, 4))?;
84
+ class.define_method("sample_frac", method!(RbDataFrame::sample_frac, 4))?;
64
85
  class.define_method("rechunk", method!(RbDataFrame::rechunk, 0))?;
65
86
  class.define_method("to_s", method!(RbDataFrame::to_s, 0))?;
66
87
  class.define_method("get_columns", method!(RbDataFrame::get_columns, 0))?;
@@ -74,7 +95,19 @@ fn init() -> RbResult<()> {
74
95
  class.define_method("shape", method!(RbDataFrame::shape, 0))?;
75
96
  class.define_method("height", method!(RbDataFrame::height, 0))?;
76
97
  class.define_method("width", method!(RbDataFrame::width, 0))?;
98
+ class.define_method("hstack_mut", method!(RbDataFrame::hstack_mut, 1))?;
99
+ class.define_method("hstack", method!(RbDataFrame::hstack, 1))?;
100
+ class.define_method("extend", method!(RbDataFrame::extend, 1))?;
101
+ class.define_method("vstack_mut", method!(RbDataFrame::vstack_mut, 1))?;
102
+ class.define_method("vstack", method!(RbDataFrame::vstack, 1))?;
103
+ class.define_method("drop_in_place", method!(RbDataFrame::drop_in_place, 1))?;
104
+ class.define_method("drop_nulls", method!(RbDataFrame::drop_nulls, 1))?;
105
+ class.define_method("drop", method!(RbDataFrame::drop, 1))?;
77
106
  class.define_method("select_at_idx", method!(RbDataFrame::select_at_idx, 1))?;
107
+ class.define_method(
108
+ "find_idx_by_name",
109
+ method!(RbDataFrame::find_idx_by_name, 1),
110
+ )?;
78
111
  class.define_method("column", method!(RbDataFrame::column, 1))?;
79
112
  class.define_method("select", method!(RbDataFrame::select, 1))?;
80
113
  class.define_method("take", method!(RbDataFrame::take, 1))?;
@@ -116,6 +149,7 @@ fn init() -> RbResult<()> {
116
149
  class.define_method("shrink_to_fit", method!(RbDataFrame::shrink_to_fit, 0))?;
117
150
  class.define_method("transpose", method!(RbDataFrame::transpose, 2))?;
118
151
  class.define_method("upsample", method!(RbDataFrame::upsample, 5))?;
152
+ class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
119
153
  class.define_method("unnest", method!(RbDataFrame::unnest, 1))?;
120
154
 
121
155
  let class = module.define_class("RbExpr", Default::default())?;
@@ -304,6 +338,7 @@ fn init() -> RbResult<()> {
304
338
  class.define_method("dt_tz_localize", method!(RbExpr::dt_tz_localize, 1))?;
305
339
  class.define_method("dt_truncate", method!(RbExpr::dt_truncate, 2))?;
306
340
  class.define_method("dt_round", method!(RbExpr::dt_round, 2))?;
341
+ class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
307
342
  class.define_method("mode", method!(RbExpr::mode, 0))?;
308
343
  class.define_method("keep_name", method!(RbExpr::keep_name, 0))?;
309
344
  class.define_method("prefix", method!(RbExpr::prefix, 1))?;
@@ -353,6 +388,7 @@ fn init() -> RbResult<()> {
353
388
  class.define_method("ewm_mean", method!(RbExpr::ewm_mean, 3))?;
354
389
  class.define_method("ewm_std", method!(RbExpr::ewm_std, 4))?;
355
390
  class.define_method("ewm_var", method!(RbExpr::ewm_var, 4))?;
391
+ class.define_method("extend_constant", method!(RbExpr::extend_constant, 2))?;
356
392
  class.define_method("any", method!(RbExpr::any, 0))?;
357
393
  class.define_method("all", method!(RbExpr::all, 0))?;
358
394
  class.define_method(
@@ -369,6 +405,7 @@ fn init() -> RbResult<()> {
369
405
  )?;
370
406
  class.define_method("log", method!(RbExpr::log, 1))?;
371
407
  class.define_method("exp", method!(RbExpr::exp, 0))?;
408
+ class.define_method("entropy", method!(RbExpr::entropy, 2))?;
372
409
 
373
410
  // meta
374
411
  class.define_method("meta_pop", method!(RbExpr::meta_pop, 0))?;
@@ -386,6 +423,7 @@ fn init() -> RbResult<()> {
386
423
  class.define_singleton_method("fold", function!(crate::lazy::dsl::fold, 3))?;
387
424
  class.define_singleton_method("lit", function!(crate::lazy::dsl::lit, 1))?;
388
425
  class.define_singleton_method("arange", function!(crate::lazy::dsl::arange, 3))?;
426
+ class.define_singleton_method("repeat", function!(crate::lazy::dsl::repeat, 2))?;
389
427
  class.define_singleton_method("when", function!(crate::lazy::dsl::when, 1))?;
390
428
  class.define_singleton_method("concat_str", function!(crate::lazy::dsl::concat_str, 2))?;
391
429
  class.define_singleton_method("concat_lst", function!(crate::lazy::dsl::concat_lst, 1))?;
@@ -469,6 +507,7 @@ fn init() -> RbResult<()> {
469
507
  class.define_singleton_method("new_opt_f32", function!(RbSeries::new_opt_f32, 3))?;
470
508
  class.define_singleton_method("new_opt_f64", function!(RbSeries::new_opt_f64, 3))?;
471
509
  class.define_singleton_method("new_str", function!(RbSeries::new_str, 3))?;
510
+ class.define_singleton_method("new_opt_date", function!(RbSeries::new_opt_date, 3))?;
472
511
  class.define_method("is_sorted_flag", method!(RbSeries::is_sorted_flag, 0))?;
473
512
  class.define_method(
474
513
  "is_sorted_reverse_flag",
@@ -536,14 +575,85 @@ fn init() -> RbResult<()> {
536
575
  class.define_method("kurtosis", method!(RbSeries::kurtosis, 2))?;
537
576
  class.define_method("cast", method!(RbSeries::cast, 2))?;
538
577
  class.define_method("time_unit", method!(RbSeries::time_unit, 0))?;
539
- // rest
540
- class.define_method("cumsum", method!(RbSeries::cumsum, 1))?;
541
- class.define_method("cummax", method!(RbSeries::cummax, 1))?;
542
- class.define_method("cummin", method!(RbSeries::cummin, 1))?;
543
- class.define_method("cumprod", method!(RbSeries::cumprod, 1))?;
544
- class.define_method("slice", method!(RbSeries::slice, 2))?;
545
- class.define_method("ceil", method!(RbSeries::ceil, 0))?;
546
- class.define_method("round", method!(RbSeries::round, 1))?;
578
+ class.define_method("set_at_idx", method!(RbSeries::set_at_idx, 2))?;
579
+
580
+ // eq
581
+ class.define_method("eq_u8", method!(RbSeries::eq_u8, 1))?;
582
+ class.define_method("eq_u16", method!(RbSeries::eq_u16, 1))?;
583
+ class.define_method("eq_u32", method!(RbSeries::eq_u32, 1))?;
584
+ class.define_method("eq_u64", method!(RbSeries::eq_u64, 1))?;
585
+ class.define_method("eq_i8", method!(RbSeries::eq_i8, 1))?;
586
+ class.define_method("eq_i16", method!(RbSeries::eq_i16, 1))?;
587
+ class.define_method("eq_i32", method!(RbSeries::eq_i32, 1))?;
588
+ class.define_method("eq_i64", method!(RbSeries::eq_i64, 1))?;
589
+ class.define_method("eq_f32", method!(RbSeries::eq_f32, 1))?;
590
+ class.define_method("eq_f64", method!(RbSeries::eq_f64, 1))?;
591
+ // class.define_method("eq_str", method!(RbSeries::eq_str, 1))?;
592
+
593
+ // neq
594
+ class.define_method("neq_u8", method!(RbSeries::neq_u8, 1))?;
595
+ class.define_method("neq_u16", method!(RbSeries::neq_u16, 1))?;
596
+ class.define_method("neq_u32", method!(RbSeries::neq_u32, 1))?;
597
+ class.define_method("neq_u64", method!(RbSeries::neq_u64, 1))?;
598
+ class.define_method("neq_i8", method!(RbSeries::neq_i8, 1))?;
599
+ class.define_method("neq_i16", method!(RbSeries::neq_i16, 1))?;
600
+ class.define_method("neq_i32", method!(RbSeries::neq_i32, 1))?;
601
+ class.define_method("neq_i64", method!(RbSeries::neq_i64, 1))?;
602
+ class.define_method("neq_f32", method!(RbSeries::neq_f32, 1))?;
603
+ class.define_method("neq_f64", method!(RbSeries::neq_f64, 1))?;
604
+ // class.define_method("neq_str", method!(RbSeries::neq_str, 1))?;
605
+
606
+ // gt
607
+ class.define_method("gt_u8", method!(RbSeries::gt_u8, 1))?;
608
+ class.define_method("gt_u16", method!(RbSeries::gt_u16, 1))?;
609
+ class.define_method("gt_u32", method!(RbSeries::gt_u32, 1))?;
610
+ class.define_method("gt_u64", method!(RbSeries::gt_u64, 1))?;
611
+ class.define_method("gt_i8", method!(RbSeries::gt_i8, 1))?;
612
+ class.define_method("gt_i16", method!(RbSeries::gt_i16, 1))?;
613
+ class.define_method("gt_i32", method!(RbSeries::gt_i32, 1))?;
614
+ class.define_method("gt_i64", method!(RbSeries::gt_i64, 1))?;
615
+ class.define_method("gt_f32", method!(RbSeries::gt_f32, 1))?;
616
+ class.define_method("gt_f64", method!(RbSeries::gt_f64, 1))?;
617
+ // class.define_method("gt_str", method!(RbSeries::gt_str, 1))?;
618
+
619
+ // gt_eq
620
+ class.define_method("gt_eq_u8", method!(RbSeries::gt_eq_u8, 1))?;
621
+ class.define_method("gt_eq_u16", method!(RbSeries::gt_eq_u16, 1))?;
622
+ class.define_method("gt_eq_u32", method!(RbSeries::gt_eq_u32, 1))?;
623
+ class.define_method("gt_eq_u64", method!(RbSeries::gt_eq_u64, 1))?;
624
+ class.define_method("gt_eq_i8", method!(RbSeries::gt_eq_i8, 1))?;
625
+ class.define_method("gt_eq_i16", method!(RbSeries::gt_eq_i16, 1))?;
626
+ class.define_method("gt_eq_i32", method!(RbSeries::gt_eq_i32, 1))?;
627
+ class.define_method("gt_eq_i64", method!(RbSeries::gt_eq_i64, 1))?;
628
+ class.define_method("gt_eq_f32", method!(RbSeries::gt_eq_f32, 1))?;
629
+ class.define_method("gt_eq_f64", method!(RbSeries::gt_eq_f64, 1))?;
630
+ // class.define_method("gt_eq_str", method!(RbSeries::gt_eq_str, 1))?;
631
+
632
+ // lt
633
+ class.define_method("lt_u8", method!(RbSeries::lt_u8, 1))?;
634
+ class.define_method("lt_u16", method!(RbSeries::lt_u16, 1))?;
635
+ class.define_method("lt_u32", method!(RbSeries::lt_u32, 1))?;
636
+ class.define_method("lt_u64", method!(RbSeries::lt_u64, 1))?;
637
+ class.define_method("lt_i8", method!(RbSeries::lt_i8, 1))?;
638
+ class.define_method("lt_i16", method!(RbSeries::lt_i16, 1))?;
639
+ class.define_method("lt_i32", method!(RbSeries::lt_i32, 1))?;
640
+ class.define_method("lt_i64", method!(RbSeries::lt_i64, 1))?;
641
+ class.define_method("lt_f32", method!(RbSeries::lt_f32, 1))?;
642
+ class.define_method("lt_f64", method!(RbSeries::lt_f64, 1))?;
643
+ // class.define_method("lt_str", method!(RbSeries::lt_str, 1))?;
644
+
645
+ // lt_eq
646
+ class.define_method("lt_eq_u8", method!(RbSeries::lt_eq_u8, 1))?;
647
+ class.define_method("lt_eq_u16", method!(RbSeries::lt_eq_u16, 1))?;
648
+ class.define_method("lt_eq_u32", method!(RbSeries::lt_eq_u32, 1))?;
649
+ class.define_method("lt_eq_u64", method!(RbSeries::lt_eq_u64, 1))?;
650
+ class.define_method("lt_eq_i8", method!(RbSeries::lt_eq_i8, 1))?;
651
+ class.define_method("lt_eq_i16", method!(RbSeries::lt_eq_i16, 1))?;
652
+ class.define_method("lt_eq_i32", method!(RbSeries::lt_eq_i32, 1))?;
653
+ class.define_method("lt_eq_i64", method!(RbSeries::lt_eq_i64, 1))?;
654
+ class.define_method("lt_eq_f32", method!(RbSeries::lt_eq_f32, 1))?;
655
+ class.define_method("lt_eq_f64", method!(RbSeries::lt_eq_f64, 1))?;
656
+ // class.define_method("lt_eq_str", method!(RbSeries::lt_eq_str, 1))?;
547
657
 
548
658
  let class = module.define_class("RbWhen", Default::default())?;
549
659
  class.define_method("_then", method!(RbWhen::then, 1))?;
@@ -641,3 +751,34 @@ fn parquet_schema(rb_f: Value) -> RbResult<Value> {
641
751
  }
642
752
  Ok(dict.into())
643
753
  }
754
+
755
+ fn rb_date_range(
756
+ start: i64,
757
+ stop: i64,
758
+ every: String,
759
+ closed: Wrap<ClosedWindow>,
760
+ name: String,
761
+ tu: Wrap<TimeUnit>,
762
+ tz: Option<TimeZone>,
763
+ ) -> RbSeries {
764
+ polars::time::date_range_impl(
765
+ &name,
766
+ start,
767
+ stop,
768
+ Duration::parse(&every),
769
+ closed.0,
770
+ tu.0,
771
+ tz,
772
+ )
773
+ .into_series()
774
+ .into()
775
+ }
776
+
777
+ fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
778
+ let exprs = rb_exprs_to_exprs(exprs)?;
779
+ Ok(polars::lazy::dsl::as_struct(&exprs).into())
780
+ }
781
+
782
+ fn arg_where(condition: &RbExpr) -> RbExpr {
783
+ polars::lazy::dsl::arg_where(condition.inner.clone()).into()
784
+ }
@@ -5,6 +5,7 @@ use polars::series::IsSorted;
5
5
  use std::cell::RefCell;
6
6
 
7
7
  use crate::conversion::*;
8
+ use crate::set::set_at_idx;
8
9
  use crate::{RbDataFrame, RbPolarsErr, RbResult, RbValueError};
9
10
 
10
11
  #[magnus::wrap(class = "Polars::RbSeries")]
@@ -474,6 +475,12 @@ impl RbSeries {
474
475
  s.into_iter().collect()
475
476
  } else if let Ok(s) = series.utf8() {
476
477
  s.into_iter().collect()
478
+ } else if let Ok(_s) = series.date() {
479
+ let a = RArray::with_capacity(series.len());
480
+ for v in series.iter() {
481
+ a.push::<Value>(Wrap(v).into()).unwrap();
482
+ }
483
+ a
477
484
  } else {
478
485
  unimplemented!();
479
486
  }
@@ -594,44 +601,190 @@ impl RbSeries {
594
601
  }
595
602
  }
596
603
 
597
- // dispatch dynamically in future?
598
-
599
- pub fn cumsum(&self, reverse: bool) -> Self {
600
- self.series.borrow().cumsum(reverse).into()
604
+ pub fn set_at_idx(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
605
+ let mut s = self.series.borrow_mut();
606
+ match set_at_idx(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
607
+ Ok(out) => {
608
+ *s = out;
609
+ Ok(())
610
+ }
611
+ Err(e) => Err(RbPolarsErr::from(e)),
612
+ }
601
613
  }
614
+ }
602
615
 
603
- pub fn cummax(&self, reverse: bool) -> Self {
604
- self.series.borrow().cummax(reverse).into()
605
- }
616
+ macro_rules! impl_eq_num {
617
+ ($name:ident, $type:ty) => {
618
+ impl RbSeries {
619
+ pub fn $name(&self, rhs: $type) -> RbResult<Self> {
620
+ let s = self.series.borrow().equal(rhs).map_err(RbPolarsErr::from)?;
621
+ Ok(RbSeries::new(s.into_series()))
622
+ }
623
+ }
624
+ };
625
+ }
606
626
 
607
- pub fn cummin(&self, reverse: bool) -> Self {
608
- self.series.borrow().cummin(reverse).into()
609
- }
627
+ impl_eq_num!(eq_u8, u8);
628
+ impl_eq_num!(eq_u16, u16);
629
+ impl_eq_num!(eq_u32, u32);
630
+ impl_eq_num!(eq_u64, u64);
631
+ impl_eq_num!(eq_i8, i8);
632
+ impl_eq_num!(eq_i16, i16);
633
+ impl_eq_num!(eq_i32, i32);
634
+ impl_eq_num!(eq_i64, i64);
635
+ impl_eq_num!(eq_f32, f32);
636
+ impl_eq_num!(eq_f64, f64);
637
+ // impl_eq_num!(eq_str, &str);
638
+
639
+ macro_rules! impl_neq_num {
640
+ ($name:ident, $type:ty) => {
641
+ impl RbSeries {
642
+ pub fn $name(&self, rhs: $type) -> RbResult<Self> {
643
+ let s = self
644
+ .series
645
+ .borrow()
646
+ .not_equal(rhs)
647
+ .map_err(RbPolarsErr::from)?;
648
+ Ok(RbSeries::new(s.into_series()))
649
+ }
650
+ }
651
+ };
652
+ }
610
653
 
611
- pub fn cumprod(&self, reverse: bool) -> Self {
612
- self.series.borrow().cumprod(reverse).into()
613
- }
654
+ impl_neq_num!(neq_u8, u8);
655
+ impl_neq_num!(neq_u16, u16);
656
+ impl_neq_num!(neq_u32, u32);
657
+ impl_neq_num!(neq_u64, u64);
658
+ impl_neq_num!(neq_i8, i8);
659
+ impl_neq_num!(neq_i16, i16);
660
+ impl_neq_num!(neq_i32, i32);
661
+ impl_neq_num!(neq_i64, i64);
662
+ impl_neq_num!(neq_f32, f32);
663
+ impl_neq_num!(neq_f64, f64);
664
+ // impl_neq_num!(neq_str, &str);
665
+
666
+ macro_rules! impl_gt_num {
667
+ ($name:ident, $type:ty) => {
668
+ impl RbSeries {
669
+ pub fn $name(&self, rhs: $type) -> RbResult<Self> {
670
+ let s = self.series.borrow().gt(rhs).map_err(RbPolarsErr::from)?;
671
+ Ok(RbSeries::new(s.into_series()))
672
+ }
673
+ }
674
+ };
675
+ }
614
676
 
615
- pub fn slice(&self, offset: i64, length: usize) -> Self {
616
- let series = self.series.borrow().slice(offset, length);
617
- series.into()
618
- }
677
+ impl_gt_num!(gt_u8, u8);
678
+ impl_gt_num!(gt_u16, u16);
679
+ impl_gt_num!(gt_u32, u32);
680
+ impl_gt_num!(gt_u64, u64);
681
+ impl_gt_num!(gt_i8, i8);
682
+ impl_gt_num!(gt_i16, i16);
683
+ impl_gt_num!(gt_i32, i32);
684
+ impl_gt_num!(gt_i64, i64);
685
+ impl_gt_num!(gt_f32, f32);
686
+ impl_gt_num!(gt_f64, f64);
687
+ // impl_gt_num!(gt_str, &str);
688
+
689
+ macro_rules! impl_gt_eq_num {
690
+ ($name:ident, $type:ty) => {
691
+ impl RbSeries {
692
+ pub fn $name(&self, rhs: $type) -> RbResult<Self> {
693
+ let s = self.series.borrow().gt_eq(rhs).map_err(RbPolarsErr::from)?;
694
+ Ok(RbSeries::new(s.into_series()))
695
+ }
696
+ }
697
+ };
698
+ }
619
699
 
620
- pub fn ceil(&self) -> RbResult<Self> {
621
- let s = self.series.borrow().ceil().map_err(RbPolarsErr::from)?;
622
- Ok(s.into())
623
- }
700
+ impl_gt_eq_num!(gt_eq_u8, u8);
701
+ impl_gt_eq_num!(gt_eq_u16, u16);
702
+ impl_gt_eq_num!(gt_eq_u32, u32);
703
+ impl_gt_eq_num!(gt_eq_u64, u64);
704
+ impl_gt_eq_num!(gt_eq_i8, i8);
705
+ impl_gt_eq_num!(gt_eq_i16, i16);
706
+ impl_gt_eq_num!(gt_eq_i32, i32);
707
+ impl_gt_eq_num!(gt_eq_i64, i64);
708
+ impl_gt_eq_num!(gt_eq_f32, f32);
709
+ impl_gt_eq_num!(gt_eq_f64, f64);
710
+ // impl_gt_eq_num!(gt_eq_str, &str);
711
+
712
+ macro_rules! impl_lt_num {
713
+ ($name:ident, $type:ty) => {
714
+ impl RbSeries {
715
+ pub fn $name(&self, rhs: $type) -> RbResult<RbSeries> {
716
+ let s = self.series.borrow().lt(rhs).map_err(RbPolarsErr::from)?;
717
+ Ok(RbSeries::new(s.into_series()))
718
+ }
719
+ }
720
+ };
721
+ }
624
722
 
625
- pub fn round(&self, decimals: u32) -> RbResult<Self> {
626
- let s = self
627
- .series
628
- .borrow()
629
- .round(decimals)
630
- .map_err(RbPolarsErr::from)?;
631
- Ok(s.into())
632
- }
723
+ impl_lt_num!(lt_u8, u8);
724
+ impl_lt_num!(lt_u16, u16);
725
+ impl_lt_num!(lt_u32, u32);
726
+ impl_lt_num!(lt_u64, u64);
727
+ impl_lt_num!(lt_i8, i8);
728
+ impl_lt_num!(lt_i16, i16);
729
+ impl_lt_num!(lt_i32, i32);
730
+ impl_lt_num!(lt_i64, i64);
731
+ impl_lt_num!(lt_f32, f32);
732
+ impl_lt_num!(lt_f64, f64);
733
+ // impl_lt_num!(lt_str, &str);
734
+
735
+ macro_rules! impl_lt_eq_num {
736
+ ($name:ident, $type:ty) => {
737
+ impl RbSeries {
738
+ pub fn $name(&self, rhs: $type) -> RbResult<Self> {
739
+ let s = self.series.borrow().lt_eq(rhs).map_err(RbPolarsErr::from)?;
740
+ Ok(RbSeries::new(s.into_series()))
741
+ }
742
+ }
743
+ };
744
+ }
745
+
746
+ impl_lt_eq_num!(lt_eq_u8, u8);
747
+ impl_lt_eq_num!(lt_eq_u16, u16);
748
+ impl_lt_eq_num!(lt_eq_u32, u32);
749
+ impl_lt_eq_num!(lt_eq_u64, u64);
750
+ impl_lt_eq_num!(lt_eq_i8, i8);
751
+ impl_lt_eq_num!(lt_eq_i16, i16);
752
+ impl_lt_eq_num!(lt_eq_i32, i32);
753
+ impl_lt_eq_num!(lt_eq_i64, i64);
754
+ impl_lt_eq_num!(lt_eq_f32, f32);
755
+ impl_lt_eq_num!(lt_eq_f64, f64);
756
+ // impl_lt_eq_num!(lt_eq_str, &str);
757
+
758
+ pub fn to_series_collection(rs: RArray) -> RbResult<Vec<Series>> {
759
+ let mut series = Vec::new();
760
+ for item in rs.each() {
761
+ series.push(item?.try_convert::<&RbSeries>()?.series.borrow().clone());
762
+ }
763
+ Ok(series)
633
764
  }
634
765
 
635
766
  pub fn to_rbseries_collection(s: Vec<Series>) -> Vec<RbSeries> {
636
767
  s.into_iter().map(RbSeries::new).collect()
637
768
  }
769
+
770
+ impl RbSeries {
771
+ pub fn new_opt_date(name: String, values: RArray, _strict: Option<bool>) -> RbResult<Self> {
772
+ let len = values.len();
773
+ let mut builder = PrimitiveChunkedBuilder::<Int32Type>::new(&name, len);
774
+ for item in values.each() {
775
+ let v = item?;
776
+ if v.is_nil() {
777
+ builder.append_null();
778
+ } else {
779
+ // convert to DateTime for UTC
780
+ let v: Value = v.funcall("to_datetime", ())?;
781
+ let v: Value = v.funcall("to_time", ())?;
782
+ let v: Value = v.funcall("to_i", ())?;
783
+ // TODO use strict
784
+ builder.append_value(v.try_convert::<i32>()? / 86400);
785
+ }
786
+ }
787
+ let ca: ChunkedArray<Int32Type> = builder.finish();
788
+ Ok(ca.into_date().into_series().into())
789
+ }
790
+ }
@@ -0,0 +1,91 @@
1
+ // use polars::export::arrow2::array::Array;
2
+ use polars::prelude::*;
3
+
4
+ pub fn set_at_idx(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Series> {
5
+ let logical_dtype = s.dtype().clone();
6
+ let idx = idx.cast(&IDX_DTYPE)?;
7
+ let idx = idx.rechunk();
8
+ let idx = idx.idx().unwrap();
9
+ let idx = idx.downcast_iter().next().unwrap();
10
+
11
+ // if idx.null_count() > 0 {
12
+ // return Err(PolarsError::ComputeError(
13
+ // "index values should not be null".into(),
14
+ // ));
15
+ // }
16
+
17
+ let idx = idx.values().as_slice();
18
+
19
+ let values = values.to_physical_repr().cast(&s.dtype().to_physical())?;
20
+
21
+ // do not shadow, otherwise s is not dropped immediately
22
+ // and we want to have mutable access
23
+ s = s.to_physical_repr().into_owned();
24
+ let mutable_s = s._get_inner_mut();
25
+
26
+ let s = match logical_dtype.to_physical() {
27
+ DataType::Int8 => {
28
+ let ca: &mut ChunkedArray<Int8Type> = mutable_s.as_mut();
29
+ let values = values.i8()?;
30
+ std::mem::take(ca).set_at_idx2(idx, values.into_iter())
31
+ }
32
+ DataType::Int16 => {
33
+ let ca: &mut ChunkedArray<Int16Type> = mutable_s.as_mut();
34
+ let values = values.i16()?;
35
+ std::mem::take(ca).set_at_idx2(idx, values.into_iter())
36
+ }
37
+ DataType::Int32 => {
38
+ let ca: &mut ChunkedArray<Int32Type> = mutable_s.as_mut();
39
+ let values = values.i32()?;
40
+ std::mem::take(ca).set_at_idx2(idx, values.into_iter())
41
+ }
42
+ DataType::Int64 => {
43
+ let ca: &mut ChunkedArray<Int64Type> = mutable_s.as_mut();
44
+ let values = values.i64()?;
45
+ std::mem::take(ca).set_at_idx2(idx, values.into_iter())
46
+ }
47
+ DataType::UInt8 => {
48
+ let ca: &mut ChunkedArray<UInt8Type> = mutable_s.as_mut();
49
+ let values = values.u8()?;
50
+ std::mem::take(ca).set_at_idx2(idx, values.into_iter())
51
+ }
52
+ DataType::UInt16 => {
53
+ let ca: &mut ChunkedArray<UInt16Type> = mutable_s.as_mut();
54
+ let values = values.u16()?;
55
+ std::mem::take(ca).set_at_idx2(idx, values.into_iter())
56
+ }
57
+ DataType::UInt32 => {
58
+ let ca: &mut ChunkedArray<UInt32Type> = mutable_s.as_mut();
59
+ let values = values.u32()?;
60
+ std::mem::take(ca).set_at_idx2(idx, values.into_iter())
61
+ }
62
+ DataType::UInt64 => {
63
+ let ca: &mut ChunkedArray<UInt64Type> = mutable_s.as_mut();
64
+ let values = values.u64()?;
65
+ std::mem::take(ca).set_at_idx2(idx, values.into_iter())
66
+ }
67
+ DataType::Float32 => {
68
+ let ca: &mut ChunkedArray<Float32Type> = mutable_s.as_mut();
69
+ let values = values.f32()?;
70
+ std::mem::take(ca).set_at_idx2(idx, values.into_iter())
71
+ }
72
+ DataType::Float64 => {
73
+ let ca: &mut ChunkedArray<Float64Type> = mutable_s.as_mut();
74
+ let values = values.f64()?;
75
+ std::mem::take(ca).set_at_idx2(idx, values.into_iter())
76
+ }
77
+ DataType::Boolean => {
78
+ let ca = s.bool()?;
79
+ let values = values.bool()?;
80
+ ca.set_at_idx2(idx, values)
81
+ }
82
+ DataType::Utf8 => {
83
+ let ca = s.utf8()?;
84
+ let values = values.utf8()?;
85
+ ca.set_at_idx2(idx, values)
86
+ }
87
+ _ => panic!("not yet implemented for dtype: {}", logical_dtype),
88
+ };
89
+
90
+ s.and_then(|s| s.cast(&logical_dtype))
91
+ }
@@ -0,0 +1,19 @@
1
+ use polars::prelude::*;
2
+
3
+ pub fn reinterpret(s: &Series, signed: bool) -> polars::prelude::PolarsResult<Series> {
4
+ match (s.dtype(), signed) {
5
+ (DataType::UInt64, true) => {
6
+ let ca = s.u64().unwrap();
7
+ Ok(ca.reinterpret_signed().into_series())
8
+ }
9
+ (DataType::UInt64, false) => Ok(s.clone()),
10
+ (DataType::Int64, false) => {
11
+ let ca = s.i64().unwrap();
12
+ Ok(ca.reinterpret_unsigned().into_series())
13
+ }
14
+ (DataType::Int64, true) => Ok(s.clone()),
15
+ _ => Err(PolarsError::ComputeError(
16
+ "reinterpret is only allowed for 64bit integers dtype, use cast otherwise".into(),
17
+ )),
18
+ }
19
+ }
@@ -1,4 +1,5 @@
1
1
  module Polars
2
+ # @private
2
3
  class BatchedCsvReader
3
4
  attr_accessor :_reader, :new_columns
4
5
 
@@ -1,11 +1,50 @@
1
1
  module Polars
2
+ # Namespace for categorical related expressions.
2
3
  class CatExpr
4
+ # @private
3
5
  attr_accessor :_rbexpr
4
6
 
7
+ # @private
5
8
  def initialize(expr)
6
9
  self._rbexpr = expr._rbexpr
7
10
  end
8
11
 
12
+ # Determine how this categorical series should be sorted.
13
+ #
14
+ # @param ordering ["physical", "lexical"]
15
+ # Ordering type:
16
+ #
17
+ # - 'physical' -> Use the physical representation of the categories to determine the order (default).
18
+ # - 'lexical' -> Use the string values to determine the ordering.
19
+ #
20
+ # @return [Expr]
21
+ #
22
+ # @example
23
+ # df = Polars::DataFrame.new(
24
+ # {"cats" => ["z", "z", "k", "a", "b"], "vals" => [3, 1, 2, 2, 3]}
25
+ # ).with_columns(
26
+ # [
27
+ # Polars.col("cats").cast(:cat).cat.set_ordering("lexical")
28
+ # ]
29
+ # )
30
+ # df.sort(["cats", "vals"])
31
+ # # =>
32
+ # # shape: (5, 2)
33
+ # # ┌──────┬──────┐
34
+ # # │ cats ┆ vals │
35
+ # # │ --- ┆ --- │
36
+ # # │ cat ┆ i64 │
37
+ # # ╞══════╪══════╡
38
+ # # │ a ┆ 2 │
39
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
40
+ # # │ b ┆ 3 │
41
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
42
+ # # │ k ┆ 2 │
43
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
44
+ # # │ z ┆ 1 │
45
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
46
+ # # │ z ┆ 3 │
47
+ # # └──────┴──────┘
9
48
  def set_ordering(ordering)
10
49
  Utils.wrap_expr(_rbexpr.cat_set_ordering(ordering))
11
50
  end