polars-df 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,6 +5,8 @@ mod error;
5
5
  mod file;
6
6
  mod lazy;
7
7
  mod series;
8
+ mod set;
9
+ mod utils;
8
10
 
9
11
  use batched_csv::RbBatchedCsv;
10
12
  use conversion::*;
@@ -13,14 +15,16 @@ use error::{RbPolarsErr, RbValueError};
13
15
  use file::get_file_like;
14
16
  use lazy::dataframe::{RbLazyFrame, RbLazyGroupBy};
15
17
  use lazy::dsl::{RbExpr, RbWhen, RbWhenThen};
18
+ use lazy::utils::rb_exprs_to_exprs;
16
19
  use magnus::{
17
20
  define_module, function, memoize, method, prelude::*, Error, RArray, RClass, RHash, RModule,
18
21
  Value,
19
22
  };
20
- use polars::datatypes::DataType;
23
+ use polars::datatypes::{DataType, TimeUnit};
21
24
  use polars::error::PolarsResult;
22
25
  use polars::frame::DataFrame;
23
26
  use polars::functions::{diag_concat_df, hor_concat_df};
27
+ use polars::prelude::{ClosedWindow, Duration, IntoSeries, TimeZone};
24
28
  use series::RbSeries;
25
29
 
26
30
  type RbResult<T> = Result<T, Error>;
@@ -42,6 +46,9 @@ fn init() -> RbResult<()> {
42
46
  module.define_singleton_method("_concat_series", function!(concat_series, 1))?;
43
47
  module.define_singleton_method("_ipc_schema", function!(ipc_schema, 1))?;
44
48
  module.define_singleton_method("_parquet_schema", function!(parquet_schema, 1))?;
49
+ module.define_singleton_method("_rb_date_range", function!(rb_date_range, 7))?;
50
+ module.define_singleton_method("_as_struct", function!(as_struct, 1))?;
51
+ module.define_singleton_method("_arg_where", function!(arg_where, 1))?;
45
52
 
46
53
  let class = module.define_class("RbBatchedCsv", Default::default())?;
47
54
  class.define_singleton_method("new", function!(RbBatchedCsv::new, -1))?;
@@ -50,7 +57,7 @@ fn init() -> RbResult<()> {
50
57
  let class = module.define_class("RbDataFrame", Default::default())?;
51
58
  class.define_singleton_method("new", function!(RbDataFrame::init, 1))?;
52
59
  class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
53
- class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 1))?;
60
+ class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 7))?;
54
61
  class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
55
62
  class.define_singleton_method("read_hash", function!(RbDataFrame::read_hash, 1))?;
56
63
  class.define_singleton_method("read_json", function!(RbDataFrame::read_json, 1))?;
@@ -60,7 +67,21 @@ fn init() -> RbResult<()> {
60
67
  class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
61
68
  class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
62
69
  class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 2))?;
70
+ class.define_method("row_tuple", method!(RbDataFrame::row_tuple, 1))?;
71
+ class.define_method("row_tuples", method!(RbDataFrame::row_tuples, 0))?;
63
72
  class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 5))?;
73
+ class.define_method("add", method!(RbDataFrame::add, 1))?;
74
+ class.define_method("sub", method!(RbDataFrame::sub, 1))?;
75
+ class.define_method("div", method!(RbDataFrame::div, 1))?;
76
+ class.define_method("mul", method!(RbDataFrame::mul, 1))?;
77
+ class.define_method("rem", method!(RbDataFrame::rem, 1))?;
78
+ class.define_method("add_df", method!(RbDataFrame::add_df, 1))?;
79
+ class.define_method("sub_df", method!(RbDataFrame::sub_df, 1))?;
80
+ class.define_method("div_df", method!(RbDataFrame::div_df, 1))?;
81
+ class.define_method("mul_df", method!(RbDataFrame::mul_df, 1))?;
82
+ class.define_method("rem_df", method!(RbDataFrame::rem_df, 1))?;
83
+ class.define_method("sample_n", method!(RbDataFrame::sample_n, 4))?;
84
+ class.define_method("sample_frac", method!(RbDataFrame::sample_frac, 4))?;
64
85
  class.define_method("rechunk", method!(RbDataFrame::rechunk, 0))?;
65
86
  class.define_method("to_s", method!(RbDataFrame::to_s, 0))?;
66
87
  class.define_method("get_columns", method!(RbDataFrame::get_columns, 0))?;
@@ -74,7 +95,19 @@ fn init() -> RbResult<()> {
74
95
  class.define_method("shape", method!(RbDataFrame::shape, 0))?;
75
96
  class.define_method("height", method!(RbDataFrame::height, 0))?;
76
97
  class.define_method("width", method!(RbDataFrame::width, 0))?;
98
+ class.define_method("hstack_mut", method!(RbDataFrame::hstack_mut, 1))?;
99
+ class.define_method("hstack", method!(RbDataFrame::hstack, 1))?;
100
+ class.define_method("extend", method!(RbDataFrame::extend, 1))?;
101
+ class.define_method("vstack_mut", method!(RbDataFrame::vstack_mut, 1))?;
102
+ class.define_method("vstack", method!(RbDataFrame::vstack, 1))?;
103
+ class.define_method("drop_in_place", method!(RbDataFrame::drop_in_place, 1))?;
104
+ class.define_method("drop_nulls", method!(RbDataFrame::drop_nulls, 1))?;
105
+ class.define_method("drop", method!(RbDataFrame::drop, 1))?;
77
106
  class.define_method("select_at_idx", method!(RbDataFrame::select_at_idx, 1))?;
107
+ class.define_method(
108
+ "find_idx_by_name",
109
+ method!(RbDataFrame::find_idx_by_name, 1),
110
+ )?;
78
111
  class.define_method("column", method!(RbDataFrame::column, 1))?;
79
112
  class.define_method("select", method!(RbDataFrame::select, 1))?;
80
113
  class.define_method("take", method!(RbDataFrame::take, 1))?;
@@ -116,6 +149,7 @@ fn init() -> RbResult<()> {
116
149
  class.define_method("shrink_to_fit", method!(RbDataFrame::shrink_to_fit, 0))?;
117
150
  class.define_method("transpose", method!(RbDataFrame::transpose, 2))?;
118
151
  class.define_method("upsample", method!(RbDataFrame::upsample, 5))?;
152
+ class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
119
153
  class.define_method("unnest", method!(RbDataFrame::unnest, 1))?;
120
154
 
121
155
  let class = module.define_class("RbExpr", Default::default())?;
@@ -304,6 +338,7 @@ fn init() -> RbResult<()> {
304
338
  class.define_method("dt_tz_localize", method!(RbExpr::dt_tz_localize, 1))?;
305
339
  class.define_method("dt_truncate", method!(RbExpr::dt_truncate, 2))?;
306
340
  class.define_method("dt_round", method!(RbExpr::dt_round, 2))?;
341
+ class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
307
342
  class.define_method("mode", method!(RbExpr::mode, 0))?;
308
343
  class.define_method("keep_name", method!(RbExpr::keep_name, 0))?;
309
344
  class.define_method("prefix", method!(RbExpr::prefix, 1))?;
@@ -353,6 +388,7 @@ fn init() -> RbResult<()> {
353
388
  class.define_method("ewm_mean", method!(RbExpr::ewm_mean, 3))?;
354
389
  class.define_method("ewm_std", method!(RbExpr::ewm_std, 4))?;
355
390
  class.define_method("ewm_var", method!(RbExpr::ewm_var, 4))?;
391
+ class.define_method("extend_constant", method!(RbExpr::extend_constant, 2))?;
356
392
  class.define_method("any", method!(RbExpr::any, 0))?;
357
393
  class.define_method("all", method!(RbExpr::all, 0))?;
358
394
  class.define_method(
@@ -369,6 +405,7 @@ fn init() -> RbResult<()> {
369
405
  )?;
370
406
  class.define_method("log", method!(RbExpr::log, 1))?;
371
407
  class.define_method("exp", method!(RbExpr::exp, 0))?;
408
+ class.define_method("entropy", method!(RbExpr::entropy, 2))?;
372
409
 
373
410
  // meta
374
411
  class.define_method("meta_pop", method!(RbExpr::meta_pop, 0))?;
@@ -386,6 +423,7 @@ fn init() -> RbResult<()> {
386
423
  class.define_singleton_method("fold", function!(crate::lazy::dsl::fold, 3))?;
387
424
  class.define_singleton_method("lit", function!(crate::lazy::dsl::lit, 1))?;
388
425
  class.define_singleton_method("arange", function!(crate::lazy::dsl::arange, 3))?;
426
+ class.define_singleton_method("repeat", function!(crate::lazy::dsl::repeat, 2))?;
389
427
  class.define_singleton_method("when", function!(crate::lazy::dsl::when, 1))?;
390
428
  class.define_singleton_method("concat_str", function!(crate::lazy::dsl::concat_str, 2))?;
391
429
  class.define_singleton_method("concat_lst", function!(crate::lazy::dsl::concat_lst, 1))?;
@@ -469,6 +507,7 @@ fn init() -> RbResult<()> {
469
507
  class.define_singleton_method("new_opt_f32", function!(RbSeries::new_opt_f32, 3))?;
470
508
  class.define_singleton_method("new_opt_f64", function!(RbSeries::new_opt_f64, 3))?;
471
509
  class.define_singleton_method("new_str", function!(RbSeries::new_str, 3))?;
510
+ class.define_singleton_method("new_opt_date", function!(RbSeries::new_opt_date, 3))?;
472
511
  class.define_method("is_sorted_flag", method!(RbSeries::is_sorted_flag, 0))?;
473
512
  class.define_method(
474
513
  "is_sorted_reverse_flag",
@@ -536,14 +575,85 @@ fn init() -> RbResult<()> {
536
575
  class.define_method("kurtosis", method!(RbSeries::kurtosis, 2))?;
537
576
  class.define_method("cast", method!(RbSeries::cast, 2))?;
538
577
  class.define_method("time_unit", method!(RbSeries::time_unit, 0))?;
539
- // rest
540
- class.define_method("cumsum", method!(RbSeries::cumsum, 1))?;
541
- class.define_method("cummax", method!(RbSeries::cummax, 1))?;
542
- class.define_method("cummin", method!(RbSeries::cummin, 1))?;
543
- class.define_method("cumprod", method!(RbSeries::cumprod, 1))?;
544
- class.define_method("slice", method!(RbSeries::slice, 2))?;
545
- class.define_method("ceil", method!(RbSeries::ceil, 0))?;
546
- class.define_method("round", method!(RbSeries::round, 1))?;
578
+ class.define_method("set_at_idx", method!(RbSeries::set_at_idx, 2))?;
579
+
580
+ // eq
581
+ class.define_method("eq_u8", method!(RbSeries::eq_u8, 1))?;
582
+ class.define_method("eq_u16", method!(RbSeries::eq_u16, 1))?;
583
+ class.define_method("eq_u32", method!(RbSeries::eq_u32, 1))?;
584
+ class.define_method("eq_u64", method!(RbSeries::eq_u64, 1))?;
585
+ class.define_method("eq_i8", method!(RbSeries::eq_i8, 1))?;
586
+ class.define_method("eq_i16", method!(RbSeries::eq_i16, 1))?;
587
+ class.define_method("eq_i32", method!(RbSeries::eq_i32, 1))?;
588
+ class.define_method("eq_i64", method!(RbSeries::eq_i64, 1))?;
589
+ class.define_method("eq_f32", method!(RbSeries::eq_f32, 1))?;
590
+ class.define_method("eq_f64", method!(RbSeries::eq_f64, 1))?;
591
+ // class.define_method("eq_str", method!(RbSeries::eq_str, 1))?;
592
+
593
+ // neq
594
+ class.define_method("neq_u8", method!(RbSeries::neq_u8, 1))?;
595
+ class.define_method("neq_u16", method!(RbSeries::neq_u16, 1))?;
596
+ class.define_method("neq_u32", method!(RbSeries::neq_u32, 1))?;
597
+ class.define_method("neq_u64", method!(RbSeries::neq_u64, 1))?;
598
+ class.define_method("neq_i8", method!(RbSeries::neq_i8, 1))?;
599
+ class.define_method("neq_i16", method!(RbSeries::neq_i16, 1))?;
600
+ class.define_method("neq_i32", method!(RbSeries::neq_i32, 1))?;
601
+ class.define_method("neq_i64", method!(RbSeries::neq_i64, 1))?;
602
+ class.define_method("neq_f32", method!(RbSeries::neq_f32, 1))?;
603
+ class.define_method("neq_f64", method!(RbSeries::neq_f64, 1))?;
604
+ // class.define_method("neq_str", method!(RbSeries::neq_str, 1))?;
605
+
606
+ // gt
607
+ class.define_method("gt_u8", method!(RbSeries::gt_u8, 1))?;
608
+ class.define_method("gt_u16", method!(RbSeries::gt_u16, 1))?;
609
+ class.define_method("gt_u32", method!(RbSeries::gt_u32, 1))?;
610
+ class.define_method("gt_u64", method!(RbSeries::gt_u64, 1))?;
611
+ class.define_method("gt_i8", method!(RbSeries::gt_i8, 1))?;
612
+ class.define_method("gt_i16", method!(RbSeries::gt_i16, 1))?;
613
+ class.define_method("gt_i32", method!(RbSeries::gt_i32, 1))?;
614
+ class.define_method("gt_i64", method!(RbSeries::gt_i64, 1))?;
615
+ class.define_method("gt_f32", method!(RbSeries::gt_f32, 1))?;
616
+ class.define_method("gt_f64", method!(RbSeries::gt_f64, 1))?;
617
+ // class.define_method("gt_str", method!(RbSeries::gt_str, 1))?;
618
+
619
+ // gt_eq
620
+ class.define_method("gt_eq_u8", method!(RbSeries::gt_eq_u8, 1))?;
621
+ class.define_method("gt_eq_u16", method!(RbSeries::gt_eq_u16, 1))?;
622
+ class.define_method("gt_eq_u32", method!(RbSeries::gt_eq_u32, 1))?;
623
+ class.define_method("gt_eq_u64", method!(RbSeries::gt_eq_u64, 1))?;
624
+ class.define_method("gt_eq_i8", method!(RbSeries::gt_eq_i8, 1))?;
625
+ class.define_method("gt_eq_i16", method!(RbSeries::gt_eq_i16, 1))?;
626
+ class.define_method("gt_eq_i32", method!(RbSeries::gt_eq_i32, 1))?;
627
+ class.define_method("gt_eq_i64", method!(RbSeries::gt_eq_i64, 1))?;
628
+ class.define_method("gt_eq_f32", method!(RbSeries::gt_eq_f32, 1))?;
629
+ class.define_method("gt_eq_f64", method!(RbSeries::gt_eq_f64, 1))?;
630
+ // class.define_method("gt_eq_str", method!(RbSeries::gt_eq_str, 1))?;
631
+
632
+ // lt
633
+ class.define_method("lt_u8", method!(RbSeries::lt_u8, 1))?;
634
+ class.define_method("lt_u16", method!(RbSeries::lt_u16, 1))?;
635
+ class.define_method("lt_u32", method!(RbSeries::lt_u32, 1))?;
636
+ class.define_method("lt_u64", method!(RbSeries::lt_u64, 1))?;
637
+ class.define_method("lt_i8", method!(RbSeries::lt_i8, 1))?;
638
+ class.define_method("lt_i16", method!(RbSeries::lt_i16, 1))?;
639
+ class.define_method("lt_i32", method!(RbSeries::lt_i32, 1))?;
640
+ class.define_method("lt_i64", method!(RbSeries::lt_i64, 1))?;
641
+ class.define_method("lt_f32", method!(RbSeries::lt_f32, 1))?;
642
+ class.define_method("lt_f64", method!(RbSeries::lt_f64, 1))?;
643
+ // class.define_method("lt_str", method!(RbSeries::lt_str, 1))?;
644
+
645
+ // lt_eq
646
+ class.define_method("lt_eq_u8", method!(RbSeries::lt_eq_u8, 1))?;
647
+ class.define_method("lt_eq_u16", method!(RbSeries::lt_eq_u16, 1))?;
648
+ class.define_method("lt_eq_u32", method!(RbSeries::lt_eq_u32, 1))?;
649
+ class.define_method("lt_eq_u64", method!(RbSeries::lt_eq_u64, 1))?;
650
+ class.define_method("lt_eq_i8", method!(RbSeries::lt_eq_i8, 1))?;
651
+ class.define_method("lt_eq_i16", method!(RbSeries::lt_eq_i16, 1))?;
652
+ class.define_method("lt_eq_i32", method!(RbSeries::lt_eq_i32, 1))?;
653
+ class.define_method("lt_eq_i64", method!(RbSeries::lt_eq_i64, 1))?;
654
+ class.define_method("lt_eq_f32", method!(RbSeries::lt_eq_f32, 1))?;
655
+ class.define_method("lt_eq_f64", method!(RbSeries::lt_eq_f64, 1))?;
656
+ // class.define_method("lt_eq_str", method!(RbSeries::lt_eq_str, 1))?;
547
657
 
548
658
  let class = module.define_class("RbWhen", Default::default())?;
549
659
  class.define_method("_then", method!(RbWhen::then, 1))?;
@@ -641,3 +751,34 @@ fn parquet_schema(rb_f: Value) -> RbResult<Value> {
641
751
  }
642
752
  Ok(dict.into())
643
753
  }
754
+
755
+ fn rb_date_range(
756
+ start: i64,
757
+ stop: i64,
758
+ every: String,
759
+ closed: Wrap<ClosedWindow>,
760
+ name: String,
761
+ tu: Wrap<TimeUnit>,
762
+ tz: Option<TimeZone>,
763
+ ) -> RbSeries {
764
+ polars::time::date_range_impl(
765
+ &name,
766
+ start,
767
+ stop,
768
+ Duration::parse(&every),
769
+ closed.0,
770
+ tu.0,
771
+ tz,
772
+ )
773
+ .into_series()
774
+ .into()
775
+ }
776
+
777
+ fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
778
+ let exprs = rb_exprs_to_exprs(exprs)?;
779
+ Ok(polars::lazy::dsl::as_struct(&exprs).into())
780
+ }
781
+
782
+ fn arg_where(condition: &RbExpr) -> RbExpr {
783
+ polars::lazy::dsl::arg_where(condition.inner.clone()).into()
784
+ }
@@ -5,6 +5,7 @@ use polars::series::IsSorted;
5
5
  use std::cell::RefCell;
6
6
 
7
7
  use crate::conversion::*;
8
+ use crate::set::set_at_idx;
8
9
  use crate::{RbDataFrame, RbPolarsErr, RbResult, RbValueError};
9
10
 
10
11
  #[magnus::wrap(class = "Polars::RbSeries")]
@@ -474,6 +475,12 @@ impl RbSeries {
474
475
  s.into_iter().collect()
475
476
  } else if let Ok(s) = series.utf8() {
476
477
  s.into_iter().collect()
478
+ } else if let Ok(_s) = series.date() {
479
+ let a = RArray::with_capacity(series.len());
480
+ for v in series.iter() {
481
+ a.push::<Value>(Wrap(v).into()).unwrap();
482
+ }
483
+ a
477
484
  } else {
478
485
  unimplemented!();
479
486
  }
@@ -594,44 +601,190 @@ impl RbSeries {
594
601
  }
595
602
  }
596
603
 
597
- // dispatch dynamically in future?
598
-
599
- pub fn cumsum(&self, reverse: bool) -> Self {
600
- self.series.borrow().cumsum(reverse).into()
604
+ pub fn set_at_idx(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
605
+ let mut s = self.series.borrow_mut();
606
+ match set_at_idx(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
607
+ Ok(out) => {
608
+ *s = out;
609
+ Ok(())
610
+ }
611
+ Err(e) => Err(RbPolarsErr::from(e)),
612
+ }
601
613
  }
614
+ }
602
615
 
603
- pub fn cummax(&self, reverse: bool) -> Self {
604
- self.series.borrow().cummax(reverse).into()
605
- }
616
+ macro_rules! impl_eq_num {
617
+ ($name:ident, $type:ty) => {
618
+ impl RbSeries {
619
+ pub fn $name(&self, rhs: $type) -> RbResult<Self> {
620
+ let s = self.series.borrow().equal(rhs).map_err(RbPolarsErr::from)?;
621
+ Ok(RbSeries::new(s.into_series()))
622
+ }
623
+ }
624
+ };
625
+ }
606
626
 
607
- pub fn cummin(&self, reverse: bool) -> Self {
608
- self.series.borrow().cummin(reverse).into()
609
- }
627
+ impl_eq_num!(eq_u8, u8);
628
+ impl_eq_num!(eq_u16, u16);
629
+ impl_eq_num!(eq_u32, u32);
630
+ impl_eq_num!(eq_u64, u64);
631
+ impl_eq_num!(eq_i8, i8);
632
+ impl_eq_num!(eq_i16, i16);
633
+ impl_eq_num!(eq_i32, i32);
634
+ impl_eq_num!(eq_i64, i64);
635
+ impl_eq_num!(eq_f32, f32);
636
+ impl_eq_num!(eq_f64, f64);
637
+ // impl_eq_num!(eq_str, &str);
638
+
639
+ macro_rules! impl_neq_num {
640
+ ($name:ident, $type:ty) => {
641
+ impl RbSeries {
642
+ pub fn $name(&self, rhs: $type) -> RbResult<Self> {
643
+ let s = self
644
+ .series
645
+ .borrow()
646
+ .not_equal(rhs)
647
+ .map_err(RbPolarsErr::from)?;
648
+ Ok(RbSeries::new(s.into_series()))
649
+ }
650
+ }
651
+ };
652
+ }
610
653
 
611
- pub fn cumprod(&self, reverse: bool) -> Self {
612
- self.series.borrow().cumprod(reverse).into()
613
- }
654
+ impl_neq_num!(neq_u8, u8);
655
+ impl_neq_num!(neq_u16, u16);
656
+ impl_neq_num!(neq_u32, u32);
657
+ impl_neq_num!(neq_u64, u64);
658
+ impl_neq_num!(neq_i8, i8);
659
+ impl_neq_num!(neq_i16, i16);
660
+ impl_neq_num!(neq_i32, i32);
661
+ impl_neq_num!(neq_i64, i64);
662
+ impl_neq_num!(neq_f32, f32);
663
+ impl_neq_num!(neq_f64, f64);
664
+ // impl_neq_num!(neq_str, &str);
665
+
666
+ macro_rules! impl_gt_num {
667
+ ($name:ident, $type:ty) => {
668
+ impl RbSeries {
669
+ pub fn $name(&self, rhs: $type) -> RbResult<Self> {
670
+ let s = self.series.borrow().gt(rhs).map_err(RbPolarsErr::from)?;
671
+ Ok(RbSeries::new(s.into_series()))
672
+ }
673
+ }
674
+ };
675
+ }
614
676
 
615
- pub fn slice(&self, offset: i64, length: usize) -> Self {
616
- let series = self.series.borrow().slice(offset, length);
617
- series.into()
618
- }
677
+ impl_gt_num!(gt_u8, u8);
678
+ impl_gt_num!(gt_u16, u16);
679
+ impl_gt_num!(gt_u32, u32);
680
+ impl_gt_num!(gt_u64, u64);
681
+ impl_gt_num!(gt_i8, i8);
682
+ impl_gt_num!(gt_i16, i16);
683
+ impl_gt_num!(gt_i32, i32);
684
+ impl_gt_num!(gt_i64, i64);
685
+ impl_gt_num!(gt_f32, f32);
686
+ impl_gt_num!(gt_f64, f64);
687
+ // impl_gt_num!(gt_str, &str);
688
+
689
+ macro_rules! impl_gt_eq_num {
690
+ ($name:ident, $type:ty) => {
691
+ impl RbSeries {
692
+ pub fn $name(&self, rhs: $type) -> RbResult<Self> {
693
+ let s = self.series.borrow().gt_eq(rhs).map_err(RbPolarsErr::from)?;
694
+ Ok(RbSeries::new(s.into_series()))
695
+ }
696
+ }
697
+ };
698
+ }
619
699
 
620
- pub fn ceil(&self) -> RbResult<Self> {
621
- let s = self.series.borrow().ceil().map_err(RbPolarsErr::from)?;
622
- Ok(s.into())
623
- }
700
+ impl_gt_eq_num!(gt_eq_u8, u8);
701
+ impl_gt_eq_num!(gt_eq_u16, u16);
702
+ impl_gt_eq_num!(gt_eq_u32, u32);
703
+ impl_gt_eq_num!(gt_eq_u64, u64);
704
+ impl_gt_eq_num!(gt_eq_i8, i8);
705
+ impl_gt_eq_num!(gt_eq_i16, i16);
706
+ impl_gt_eq_num!(gt_eq_i32, i32);
707
+ impl_gt_eq_num!(gt_eq_i64, i64);
708
+ impl_gt_eq_num!(gt_eq_f32, f32);
709
+ impl_gt_eq_num!(gt_eq_f64, f64);
710
+ // impl_gt_eq_num!(gt_eq_str, &str);
711
+
712
+ macro_rules! impl_lt_num {
713
+ ($name:ident, $type:ty) => {
714
+ impl RbSeries {
715
+ pub fn $name(&self, rhs: $type) -> RbResult<RbSeries> {
716
+ let s = self.series.borrow().lt(rhs).map_err(RbPolarsErr::from)?;
717
+ Ok(RbSeries::new(s.into_series()))
718
+ }
719
+ }
720
+ };
721
+ }
624
722
 
625
- pub fn round(&self, decimals: u32) -> RbResult<Self> {
626
- let s = self
627
- .series
628
- .borrow()
629
- .round(decimals)
630
- .map_err(RbPolarsErr::from)?;
631
- Ok(s.into())
632
- }
723
+ impl_lt_num!(lt_u8, u8);
724
+ impl_lt_num!(lt_u16, u16);
725
+ impl_lt_num!(lt_u32, u32);
726
+ impl_lt_num!(lt_u64, u64);
727
+ impl_lt_num!(lt_i8, i8);
728
+ impl_lt_num!(lt_i16, i16);
729
+ impl_lt_num!(lt_i32, i32);
730
+ impl_lt_num!(lt_i64, i64);
731
+ impl_lt_num!(lt_f32, f32);
732
+ impl_lt_num!(lt_f64, f64);
733
+ // impl_lt_num!(lt_str, &str);
734
+
735
+ macro_rules! impl_lt_eq_num {
736
+ ($name:ident, $type:ty) => {
737
+ impl RbSeries {
738
+ pub fn $name(&self, rhs: $type) -> RbResult<Self> {
739
+ let s = self.series.borrow().lt_eq(rhs).map_err(RbPolarsErr::from)?;
740
+ Ok(RbSeries::new(s.into_series()))
741
+ }
742
+ }
743
+ };
744
+ }
745
+
746
+ impl_lt_eq_num!(lt_eq_u8, u8);
747
+ impl_lt_eq_num!(lt_eq_u16, u16);
748
+ impl_lt_eq_num!(lt_eq_u32, u32);
749
+ impl_lt_eq_num!(lt_eq_u64, u64);
750
+ impl_lt_eq_num!(lt_eq_i8, i8);
751
+ impl_lt_eq_num!(lt_eq_i16, i16);
752
+ impl_lt_eq_num!(lt_eq_i32, i32);
753
+ impl_lt_eq_num!(lt_eq_i64, i64);
754
+ impl_lt_eq_num!(lt_eq_f32, f32);
755
+ impl_lt_eq_num!(lt_eq_f64, f64);
756
+ // impl_lt_eq_num!(lt_eq_str, &str);
757
+
758
+ pub fn to_series_collection(rs: RArray) -> RbResult<Vec<Series>> {
759
+ let mut series = Vec::new();
760
+ for item in rs.each() {
761
+ series.push(item?.try_convert::<&RbSeries>()?.series.borrow().clone());
762
+ }
763
+ Ok(series)
633
764
  }
634
765
 
635
766
  pub fn to_rbseries_collection(s: Vec<Series>) -> Vec<RbSeries> {
636
767
  s.into_iter().map(RbSeries::new).collect()
637
768
  }
769
+
770
+ impl RbSeries {
771
+ pub fn new_opt_date(name: String, values: RArray, _strict: Option<bool>) -> RbResult<Self> {
772
+ let len = values.len();
773
+ let mut builder = PrimitiveChunkedBuilder::<Int32Type>::new(&name, len);
774
+ for item in values.each() {
775
+ let v = item?;
776
+ if v.is_nil() {
777
+ builder.append_null();
778
+ } else {
779
+ // convert to DateTime for UTC
780
+ let v: Value = v.funcall("to_datetime", ())?;
781
+ let v: Value = v.funcall("to_time", ())?;
782
+ let v: Value = v.funcall("to_i", ())?;
783
+ // TODO use strict
784
+ builder.append_value(v.try_convert::<i32>()? / 86400);
785
+ }
786
+ }
787
+ let ca: ChunkedArray<Int32Type> = builder.finish();
788
+ Ok(ca.into_date().into_series().into())
789
+ }
790
+ }
@@ -0,0 +1,91 @@
1
+ // use polars::export::arrow2::array::Array;
2
+ use polars::prelude::*;
3
+
4
+ pub fn set_at_idx(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Series> {
5
+ let logical_dtype = s.dtype().clone();
6
+ let idx = idx.cast(&IDX_DTYPE)?;
7
+ let idx = idx.rechunk();
8
+ let idx = idx.idx().unwrap();
9
+ let idx = idx.downcast_iter().next().unwrap();
10
+
11
+ // if idx.null_count() > 0 {
12
+ // return Err(PolarsError::ComputeError(
13
+ // "index values should not be null".into(),
14
+ // ));
15
+ // }
16
+
17
+ let idx = idx.values().as_slice();
18
+
19
+ let values = values.to_physical_repr().cast(&s.dtype().to_physical())?;
20
+
21
+ // do not shadow, otherwise s is not dropped immediately
22
+ // and we want to have mutable access
23
+ s = s.to_physical_repr().into_owned();
24
+ let mutable_s = s._get_inner_mut();
25
+
26
+ let s = match logical_dtype.to_physical() {
27
+ DataType::Int8 => {
28
+ let ca: &mut ChunkedArray<Int8Type> = mutable_s.as_mut();
29
+ let values = values.i8()?;
30
+ std::mem::take(ca).set_at_idx2(idx, values.into_iter())
31
+ }
32
+ DataType::Int16 => {
33
+ let ca: &mut ChunkedArray<Int16Type> = mutable_s.as_mut();
34
+ let values = values.i16()?;
35
+ std::mem::take(ca).set_at_idx2(idx, values.into_iter())
36
+ }
37
+ DataType::Int32 => {
38
+ let ca: &mut ChunkedArray<Int32Type> = mutable_s.as_mut();
39
+ let values = values.i32()?;
40
+ std::mem::take(ca).set_at_idx2(idx, values.into_iter())
41
+ }
42
+ DataType::Int64 => {
43
+ let ca: &mut ChunkedArray<Int64Type> = mutable_s.as_mut();
44
+ let values = values.i64()?;
45
+ std::mem::take(ca).set_at_idx2(idx, values.into_iter())
46
+ }
47
+ DataType::UInt8 => {
48
+ let ca: &mut ChunkedArray<UInt8Type> = mutable_s.as_mut();
49
+ let values = values.u8()?;
50
+ std::mem::take(ca).set_at_idx2(idx, values.into_iter())
51
+ }
52
+ DataType::UInt16 => {
53
+ let ca: &mut ChunkedArray<UInt16Type> = mutable_s.as_mut();
54
+ let values = values.u16()?;
55
+ std::mem::take(ca).set_at_idx2(idx, values.into_iter())
56
+ }
57
+ DataType::UInt32 => {
58
+ let ca: &mut ChunkedArray<UInt32Type> = mutable_s.as_mut();
59
+ let values = values.u32()?;
60
+ std::mem::take(ca).set_at_idx2(idx, values.into_iter())
61
+ }
62
+ DataType::UInt64 => {
63
+ let ca: &mut ChunkedArray<UInt64Type> = mutable_s.as_mut();
64
+ let values = values.u64()?;
65
+ std::mem::take(ca).set_at_idx2(idx, values.into_iter())
66
+ }
67
+ DataType::Float32 => {
68
+ let ca: &mut ChunkedArray<Float32Type> = mutable_s.as_mut();
69
+ let values = values.f32()?;
70
+ std::mem::take(ca).set_at_idx2(idx, values.into_iter())
71
+ }
72
+ DataType::Float64 => {
73
+ let ca: &mut ChunkedArray<Float64Type> = mutable_s.as_mut();
74
+ let values = values.f64()?;
75
+ std::mem::take(ca).set_at_idx2(idx, values.into_iter())
76
+ }
77
+ DataType::Boolean => {
78
+ let ca = s.bool()?;
79
+ let values = values.bool()?;
80
+ ca.set_at_idx2(idx, values)
81
+ }
82
+ DataType::Utf8 => {
83
+ let ca = s.utf8()?;
84
+ let values = values.utf8()?;
85
+ ca.set_at_idx2(idx, values)
86
+ }
87
+ _ => panic!("not yet implemented for dtype: {}", logical_dtype),
88
+ };
89
+
90
+ s.and_then(|s| s.cast(&logical_dtype))
91
+ }
@@ -0,0 +1,19 @@
1
+ use polars::prelude::*;
2
+
3
+ pub fn reinterpret(s: &Series, signed: bool) -> polars::prelude::PolarsResult<Series> {
4
+ match (s.dtype(), signed) {
5
+ (DataType::UInt64, true) => {
6
+ let ca = s.u64().unwrap();
7
+ Ok(ca.reinterpret_signed().into_series())
8
+ }
9
+ (DataType::UInt64, false) => Ok(s.clone()),
10
+ (DataType::Int64, false) => {
11
+ let ca = s.i64().unwrap();
12
+ Ok(ca.reinterpret_unsigned().into_series())
13
+ }
14
+ (DataType::Int64, true) => Ok(s.clone()),
15
+ _ => Err(PolarsError::ComputeError(
16
+ "reinterpret is only allowed for 64bit integers dtype, use cast otherwise".into(),
17
+ )),
18
+ }
19
+ }
@@ -1,4 +1,5 @@
1
1
  module Polars
2
+ # @private
2
3
  class BatchedCsvReader
3
4
  attr_accessor :_reader, :new_columns
4
5
 
@@ -1,11 +1,50 @@
1
1
  module Polars
2
+ # Namespace for categorical related expressions.
2
3
  class CatExpr
4
+ # @private
3
5
  attr_accessor :_rbexpr
4
6
 
7
+ # @private
5
8
  def initialize(expr)
6
9
  self._rbexpr = expr._rbexpr
7
10
  end
8
11
 
12
+ # Determine how this categorical series should be sorted.
13
+ #
14
+ # @param ordering ["physical", "lexical"]
15
+ # Ordering type:
16
+ #
17
+ # - 'physical' -> Use the physical representation of the categories to determine the order (default).
18
+ # - 'lexical' -> Use the string values to determine the ordering.
19
+ #
20
+ # @return [Expr]
21
+ #
22
+ # @example
23
+ # df = Polars::DataFrame.new(
24
+ # {"cats" => ["z", "z", "k", "a", "b"], "vals" => [3, 1, 2, 2, 3]}
25
+ # ).with_columns(
26
+ # [
27
+ # Polars.col("cats").cast(:cat).cat.set_ordering("lexical")
28
+ # ]
29
+ # )
30
+ # df.sort(["cats", "vals"])
31
+ # # =>
32
+ # # shape: (5, 2)
33
+ # # ┌──────┬──────┐
34
+ # # │ cats ┆ vals │
35
+ # # │ --- ┆ --- │
36
+ # # │ cat ┆ i64 │
37
+ # # ╞══════╪══════╡
38
+ # # │ a ┆ 2 │
39
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
40
+ # # │ b ┆ 3 │
41
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
42
+ # # │ k ┆ 2 │
43
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
44
+ # # │ z ┆ 1 │
45
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
46
+ # # │ z ┆ 3 │
47
+ # # └──────┴──────┘
9
48
  def set_ordering(ordering)
10
49
  Utils.wrap_expr(_rbexpr.cat_set_ordering(ordering))
11
50
  end