polars-df 0.1.2 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +9 -0
- data/Cargo.lock +74 -3
- data/Cargo.toml +3 -0
- data/README.md +1 -1
- data/ext/polars/Cargo.toml +18 -1
- data/ext/polars/src/conversion.rs +115 -2
- data/ext/polars/src/dataframe.rs +228 -11
- data/ext/polars/src/error.rs +4 -0
- data/ext/polars/src/lazy/dataframe.rs +5 -5
- data/ext/polars/src/lazy/dsl.rs +157 -2
- data/ext/polars/src/lib.rs +185 -10
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +217 -29
- data/ext/polars/src/set.rs +91 -0
- data/ext/polars/src/utils.rs +19 -0
- data/lib/polars/batched_csv_reader.rb +1 -0
- data/lib/polars/cat_expr.rb +39 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/data_frame.rb +2384 -140
- data/lib/polars/date_time_expr.rb +1282 -7
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +4374 -53
- data/lib/polars/expr_dispatch.rb +22 -0
- data/lib/polars/functions.rb +219 -0
- data/lib/polars/group_by.rb +518 -0
- data/lib/polars/io.rb +421 -2
- data/lib/polars/lazy_frame.rb +1267 -69
- data/lib/polars/lazy_functions.rb +412 -24
- data/lib/polars/lazy_group_by.rb +80 -0
- data/lib/polars/list_expr.rb +507 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/meta_expr.rb +21 -0
- data/lib/polars/series.rb +2256 -242
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +847 -10
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_expr.rb +73 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +71 -3
- data/lib/polars/version.rb +2 -1
- data/lib/polars/when.rb +1 -0
- data/lib/polars/when_then.rb +1 -0
- data/lib/polars.rb +12 -10
- metadata +15 -2
data/ext/polars/src/series.rs
CHANGED
@@ -5,6 +5,8 @@ use polars::series::IsSorted;
|
|
5
5
|
use std::cell::RefCell;
|
6
6
|
|
7
7
|
use crate::conversion::*;
|
8
|
+
use crate::list_construction::rb_seq_to_list;
|
9
|
+
use crate::set::set_at_idx;
|
8
10
|
use crate::{RbDataFrame, RbPolarsErr, RbResult, RbValueError};
|
9
11
|
|
10
12
|
#[magnus::wrap(class = "Polars::RbSeries")]
|
@@ -122,6 +124,19 @@ impl RbSeries {
|
|
122
124
|
RbSeries::new(s)
|
123
125
|
}
|
124
126
|
|
127
|
+
pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
|
128
|
+
let val = val
|
129
|
+
.each()
|
130
|
+
.map(|v| v.map(ObjectValue::from))
|
131
|
+
.collect::<RbResult<Vec<ObjectValue>>>()?;
|
132
|
+
let s = ObjectChunked::<ObjectValue>::new_from_vec(&name, val).into_series();
|
133
|
+
Ok(s.into())
|
134
|
+
}
|
135
|
+
|
136
|
+
pub fn new_list(name: String, seq: Value, dtype: Wrap<DataType>) -> RbResult<Self> {
|
137
|
+
rb_seq_to_list(&name, seq, &dtype.0).map(|s| s.into())
|
138
|
+
}
|
139
|
+
|
125
140
|
pub fn estimated_size(&self) -> usize {
|
126
141
|
self.series.borrow().estimated_size()
|
127
142
|
}
|
@@ -474,6 +489,12 @@ impl RbSeries {
|
|
474
489
|
s.into_iter().collect()
|
475
490
|
} else if let Ok(s) = series.utf8() {
|
476
491
|
s.into_iter().collect()
|
492
|
+
} else if let Ok(_s) = series.date() {
|
493
|
+
let a = RArray::with_capacity(series.len());
|
494
|
+
for v in series.iter() {
|
495
|
+
a.push::<Value>(Wrap(v).into()).unwrap();
|
496
|
+
}
|
497
|
+
a
|
477
498
|
} else {
|
478
499
|
unimplemented!();
|
479
500
|
}
|
@@ -594,44 +615,211 @@ impl RbSeries {
|
|
594
615
|
}
|
595
616
|
}
|
596
617
|
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
618
|
+
pub fn set_at_idx(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
|
619
|
+
let mut s = self.series.borrow_mut();
|
620
|
+
match set_at_idx(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
|
621
|
+
Ok(out) => {
|
622
|
+
*s = out;
|
623
|
+
Ok(())
|
624
|
+
}
|
625
|
+
Err(e) => Err(RbPolarsErr::from(e)),
|
626
|
+
}
|
601
627
|
}
|
628
|
+
}
|
602
629
|
|
603
|
-
|
604
|
-
|
605
|
-
|
630
|
+
macro_rules! impl_eq_num {
|
631
|
+
($name:ident, $type:ty) => {
|
632
|
+
impl RbSeries {
|
633
|
+
pub fn $name(&self, rhs: $type) -> RbResult<Self> {
|
634
|
+
let s = self.series.borrow().equal(rhs).map_err(RbPolarsErr::from)?;
|
635
|
+
Ok(RbSeries::new(s.into_series()))
|
636
|
+
}
|
637
|
+
}
|
638
|
+
};
|
639
|
+
}
|
606
640
|
|
607
|
-
|
608
|
-
|
609
|
-
|
641
|
+
impl_eq_num!(eq_u8, u8);
|
642
|
+
impl_eq_num!(eq_u16, u16);
|
643
|
+
impl_eq_num!(eq_u32, u32);
|
644
|
+
impl_eq_num!(eq_u64, u64);
|
645
|
+
impl_eq_num!(eq_i8, i8);
|
646
|
+
impl_eq_num!(eq_i16, i16);
|
647
|
+
impl_eq_num!(eq_i32, i32);
|
648
|
+
impl_eq_num!(eq_i64, i64);
|
649
|
+
impl_eq_num!(eq_f32, f32);
|
650
|
+
impl_eq_num!(eq_f64, f64);
|
651
|
+
// impl_eq_num!(eq_str, &str);
|
652
|
+
|
653
|
+
macro_rules! impl_neq_num {
|
654
|
+
($name:ident, $type:ty) => {
|
655
|
+
impl RbSeries {
|
656
|
+
pub fn $name(&self, rhs: $type) -> RbResult<Self> {
|
657
|
+
let s = self
|
658
|
+
.series
|
659
|
+
.borrow()
|
660
|
+
.not_equal(rhs)
|
661
|
+
.map_err(RbPolarsErr::from)?;
|
662
|
+
Ok(RbSeries::new(s.into_series()))
|
663
|
+
}
|
664
|
+
}
|
665
|
+
};
|
666
|
+
}
|
610
667
|
|
611
|
-
|
612
|
-
|
613
|
-
|
668
|
+
impl_neq_num!(neq_u8, u8);
|
669
|
+
impl_neq_num!(neq_u16, u16);
|
670
|
+
impl_neq_num!(neq_u32, u32);
|
671
|
+
impl_neq_num!(neq_u64, u64);
|
672
|
+
impl_neq_num!(neq_i8, i8);
|
673
|
+
impl_neq_num!(neq_i16, i16);
|
674
|
+
impl_neq_num!(neq_i32, i32);
|
675
|
+
impl_neq_num!(neq_i64, i64);
|
676
|
+
impl_neq_num!(neq_f32, f32);
|
677
|
+
impl_neq_num!(neq_f64, f64);
|
678
|
+
// impl_neq_num!(neq_str, &str);
|
679
|
+
|
680
|
+
macro_rules! impl_gt_num {
|
681
|
+
($name:ident, $type:ty) => {
|
682
|
+
impl RbSeries {
|
683
|
+
pub fn $name(&self, rhs: $type) -> RbResult<Self> {
|
684
|
+
let s = self.series.borrow().gt(rhs).map_err(RbPolarsErr::from)?;
|
685
|
+
Ok(RbSeries::new(s.into_series()))
|
686
|
+
}
|
687
|
+
}
|
688
|
+
};
|
689
|
+
}
|
614
690
|
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
691
|
+
impl_gt_num!(gt_u8, u8);
|
692
|
+
impl_gt_num!(gt_u16, u16);
|
693
|
+
impl_gt_num!(gt_u32, u32);
|
694
|
+
impl_gt_num!(gt_u64, u64);
|
695
|
+
impl_gt_num!(gt_i8, i8);
|
696
|
+
impl_gt_num!(gt_i16, i16);
|
697
|
+
impl_gt_num!(gt_i32, i32);
|
698
|
+
impl_gt_num!(gt_i64, i64);
|
699
|
+
impl_gt_num!(gt_f32, f32);
|
700
|
+
impl_gt_num!(gt_f64, f64);
|
701
|
+
// impl_gt_num!(gt_str, &str);
|
702
|
+
|
703
|
+
macro_rules! impl_gt_eq_num {
|
704
|
+
($name:ident, $type:ty) => {
|
705
|
+
impl RbSeries {
|
706
|
+
pub fn $name(&self, rhs: $type) -> RbResult<Self> {
|
707
|
+
let s = self.series.borrow().gt_eq(rhs).map_err(RbPolarsErr::from)?;
|
708
|
+
Ok(RbSeries::new(s.into_series()))
|
709
|
+
}
|
710
|
+
}
|
711
|
+
};
|
712
|
+
}
|
619
713
|
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
714
|
+
impl_gt_eq_num!(gt_eq_u8, u8);
|
715
|
+
impl_gt_eq_num!(gt_eq_u16, u16);
|
716
|
+
impl_gt_eq_num!(gt_eq_u32, u32);
|
717
|
+
impl_gt_eq_num!(gt_eq_u64, u64);
|
718
|
+
impl_gt_eq_num!(gt_eq_i8, i8);
|
719
|
+
impl_gt_eq_num!(gt_eq_i16, i16);
|
720
|
+
impl_gt_eq_num!(gt_eq_i32, i32);
|
721
|
+
impl_gt_eq_num!(gt_eq_i64, i64);
|
722
|
+
impl_gt_eq_num!(gt_eq_f32, f32);
|
723
|
+
impl_gt_eq_num!(gt_eq_f64, f64);
|
724
|
+
// impl_gt_eq_num!(gt_eq_str, &str);
|
725
|
+
|
726
|
+
macro_rules! impl_lt_num {
|
727
|
+
($name:ident, $type:ty) => {
|
728
|
+
impl RbSeries {
|
729
|
+
pub fn $name(&self, rhs: $type) -> RbResult<RbSeries> {
|
730
|
+
let s = self.series.borrow().lt(rhs).map_err(RbPolarsErr::from)?;
|
731
|
+
Ok(RbSeries::new(s.into_series()))
|
732
|
+
}
|
733
|
+
}
|
734
|
+
};
|
735
|
+
}
|
624
736
|
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
737
|
+
impl_lt_num!(lt_u8, u8);
|
738
|
+
impl_lt_num!(lt_u16, u16);
|
739
|
+
impl_lt_num!(lt_u32, u32);
|
740
|
+
impl_lt_num!(lt_u64, u64);
|
741
|
+
impl_lt_num!(lt_i8, i8);
|
742
|
+
impl_lt_num!(lt_i16, i16);
|
743
|
+
impl_lt_num!(lt_i32, i32);
|
744
|
+
impl_lt_num!(lt_i64, i64);
|
745
|
+
impl_lt_num!(lt_f32, f32);
|
746
|
+
impl_lt_num!(lt_f64, f64);
|
747
|
+
// impl_lt_num!(lt_str, &str);
|
748
|
+
|
749
|
+
macro_rules! impl_lt_eq_num {
|
750
|
+
($name:ident, $type:ty) => {
|
751
|
+
impl RbSeries {
|
752
|
+
pub fn $name(&self, rhs: $type) -> RbResult<Self> {
|
753
|
+
let s = self.series.borrow().lt_eq(rhs).map_err(RbPolarsErr::from)?;
|
754
|
+
Ok(RbSeries::new(s.into_series()))
|
755
|
+
}
|
756
|
+
}
|
757
|
+
};
|
758
|
+
}
|
759
|
+
|
760
|
+
impl_lt_eq_num!(lt_eq_u8, u8);
|
761
|
+
impl_lt_eq_num!(lt_eq_u16, u16);
|
762
|
+
impl_lt_eq_num!(lt_eq_u32, u32);
|
763
|
+
impl_lt_eq_num!(lt_eq_u64, u64);
|
764
|
+
impl_lt_eq_num!(lt_eq_i8, i8);
|
765
|
+
impl_lt_eq_num!(lt_eq_i16, i16);
|
766
|
+
impl_lt_eq_num!(lt_eq_i32, i32);
|
767
|
+
impl_lt_eq_num!(lt_eq_i64, i64);
|
768
|
+
impl_lt_eq_num!(lt_eq_f32, f32);
|
769
|
+
impl_lt_eq_num!(lt_eq_f64, f64);
|
770
|
+
// impl_lt_eq_num!(lt_eq_str, &str);
|
771
|
+
|
772
|
+
pub fn to_series_collection(rs: RArray) -> RbResult<Vec<Series>> {
|
773
|
+
let mut series = Vec::new();
|
774
|
+
for item in rs.each() {
|
775
|
+
series.push(item?.try_convert::<&RbSeries>()?.series.borrow().clone());
|
776
|
+
}
|
777
|
+
Ok(series)
|
633
778
|
}
|
634
779
|
|
635
780
|
pub fn to_rbseries_collection(s: Vec<Series>) -> Vec<RbSeries> {
|
636
781
|
s.into_iter().map(RbSeries::new).collect()
|
637
782
|
}
|
783
|
+
|
784
|
+
impl RbSeries {
|
785
|
+
pub fn new_opt_date(name: String, values: RArray, _strict: Option<bool>) -> RbResult<Self> {
|
786
|
+
let len = values.len();
|
787
|
+
let mut builder = PrimitiveChunkedBuilder::<Int32Type>::new(&name, len);
|
788
|
+
for item in values.each() {
|
789
|
+
let v = item?;
|
790
|
+
if v.is_nil() {
|
791
|
+
builder.append_null();
|
792
|
+
} else {
|
793
|
+
// convert to DateTime for UTC
|
794
|
+
let v: Value = v.funcall("to_datetime", ())?;
|
795
|
+
let v: Value = v.funcall("to_time", ())?;
|
796
|
+
let v: Value = v.funcall("to_i", ())?;
|
797
|
+
// TODO use strict
|
798
|
+
builder.append_value(v.try_convert::<i32>()? / 86400);
|
799
|
+
}
|
800
|
+
}
|
801
|
+
let ca: ChunkedArray<Int32Type> = builder.finish();
|
802
|
+
Ok(ca.into_date().into_series().into())
|
803
|
+
}
|
804
|
+
|
805
|
+
pub fn new_opt_datetime(name: String, values: RArray, _strict: Option<bool>) -> RbResult<Self> {
|
806
|
+
let len = values.len();
|
807
|
+
let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(&name, len);
|
808
|
+
for item in values.each() {
|
809
|
+
let v = item?;
|
810
|
+
if v.is_nil() {
|
811
|
+
builder.append_null();
|
812
|
+
} else {
|
813
|
+
let sec: i64 = v.funcall("to_i", ())?;
|
814
|
+
let nsec: i64 = v.funcall("nsec", ())?;
|
815
|
+
// TODO use strict
|
816
|
+
builder.append_value(sec * 1_000_000_000 + nsec);
|
817
|
+
}
|
818
|
+
}
|
819
|
+
let ca: ChunkedArray<Int64Type> = builder.finish();
|
820
|
+
Ok(ca
|
821
|
+
.into_datetime(TimeUnit::Nanoseconds, None)
|
822
|
+
.into_series()
|
823
|
+
.into())
|
824
|
+
}
|
825
|
+
}
|
@@ -0,0 +1,91 @@
|
|
1
|
+
// use polars::export::arrow2::array::Array;
|
2
|
+
use polars::prelude::*;
|
3
|
+
|
4
|
+
pub fn set_at_idx(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Series> {
|
5
|
+
let logical_dtype = s.dtype().clone();
|
6
|
+
let idx = idx.cast(&IDX_DTYPE)?;
|
7
|
+
let idx = idx.rechunk();
|
8
|
+
let idx = idx.idx().unwrap();
|
9
|
+
let idx = idx.downcast_iter().next().unwrap();
|
10
|
+
|
11
|
+
// if idx.null_count() > 0 {
|
12
|
+
// return Err(PolarsError::ComputeError(
|
13
|
+
// "index values should not be null".into(),
|
14
|
+
// ));
|
15
|
+
// }
|
16
|
+
|
17
|
+
let idx = idx.values().as_slice();
|
18
|
+
|
19
|
+
let values = values.to_physical_repr().cast(&s.dtype().to_physical())?;
|
20
|
+
|
21
|
+
// do not shadow, otherwise s is not dropped immediately
|
22
|
+
// and we want to have mutable access
|
23
|
+
s = s.to_physical_repr().into_owned();
|
24
|
+
let mutable_s = s._get_inner_mut();
|
25
|
+
|
26
|
+
let s = match logical_dtype.to_physical() {
|
27
|
+
DataType::Int8 => {
|
28
|
+
let ca: &mut ChunkedArray<Int8Type> = mutable_s.as_mut();
|
29
|
+
let values = values.i8()?;
|
30
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
31
|
+
}
|
32
|
+
DataType::Int16 => {
|
33
|
+
let ca: &mut ChunkedArray<Int16Type> = mutable_s.as_mut();
|
34
|
+
let values = values.i16()?;
|
35
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
36
|
+
}
|
37
|
+
DataType::Int32 => {
|
38
|
+
let ca: &mut ChunkedArray<Int32Type> = mutable_s.as_mut();
|
39
|
+
let values = values.i32()?;
|
40
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
41
|
+
}
|
42
|
+
DataType::Int64 => {
|
43
|
+
let ca: &mut ChunkedArray<Int64Type> = mutable_s.as_mut();
|
44
|
+
let values = values.i64()?;
|
45
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
46
|
+
}
|
47
|
+
DataType::UInt8 => {
|
48
|
+
let ca: &mut ChunkedArray<UInt8Type> = mutable_s.as_mut();
|
49
|
+
let values = values.u8()?;
|
50
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
51
|
+
}
|
52
|
+
DataType::UInt16 => {
|
53
|
+
let ca: &mut ChunkedArray<UInt16Type> = mutable_s.as_mut();
|
54
|
+
let values = values.u16()?;
|
55
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
56
|
+
}
|
57
|
+
DataType::UInt32 => {
|
58
|
+
let ca: &mut ChunkedArray<UInt32Type> = mutable_s.as_mut();
|
59
|
+
let values = values.u32()?;
|
60
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
61
|
+
}
|
62
|
+
DataType::UInt64 => {
|
63
|
+
let ca: &mut ChunkedArray<UInt64Type> = mutable_s.as_mut();
|
64
|
+
let values = values.u64()?;
|
65
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
66
|
+
}
|
67
|
+
DataType::Float32 => {
|
68
|
+
let ca: &mut ChunkedArray<Float32Type> = mutable_s.as_mut();
|
69
|
+
let values = values.f32()?;
|
70
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
71
|
+
}
|
72
|
+
DataType::Float64 => {
|
73
|
+
let ca: &mut ChunkedArray<Float64Type> = mutable_s.as_mut();
|
74
|
+
let values = values.f64()?;
|
75
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
76
|
+
}
|
77
|
+
DataType::Boolean => {
|
78
|
+
let ca = s.bool()?;
|
79
|
+
let values = values.bool()?;
|
80
|
+
ca.set_at_idx2(idx, values)
|
81
|
+
}
|
82
|
+
DataType::Utf8 => {
|
83
|
+
let ca = s.utf8()?;
|
84
|
+
let values = values.utf8()?;
|
85
|
+
ca.set_at_idx2(idx, values)
|
86
|
+
}
|
87
|
+
_ => panic!("not yet implemented for dtype: {}", logical_dtype),
|
88
|
+
};
|
89
|
+
|
90
|
+
s.and_then(|s| s.cast(&logical_dtype))
|
91
|
+
}
|
@@ -0,0 +1,19 @@
|
|
1
|
+
use polars::prelude::*;
|
2
|
+
|
3
|
+
pub fn reinterpret(s: &Series, signed: bool) -> polars::prelude::PolarsResult<Series> {
|
4
|
+
match (s.dtype(), signed) {
|
5
|
+
(DataType::UInt64, true) => {
|
6
|
+
let ca = s.u64().unwrap();
|
7
|
+
Ok(ca.reinterpret_signed().into_series())
|
8
|
+
}
|
9
|
+
(DataType::UInt64, false) => Ok(s.clone()),
|
10
|
+
(DataType::Int64, false) => {
|
11
|
+
let ca = s.i64().unwrap();
|
12
|
+
Ok(ca.reinterpret_unsigned().into_series())
|
13
|
+
}
|
14
|
+
(DataType::Int64, true) => Ok(s.clone()),
|
15
|
+
_ => Err(PolarsError::ComputeError(
|
16
|
+
"reinterpret is only allowed for 64bit integers dtype, use cast otherwise".into(),
|
17
|
+
)),
|
18
|
+
}
|
19
|
+
}
|
data/lib/polars/cat_expr.rb
CHANGED
@@ -1,11 +1,50 @@
|
|
1
1
|
module Polars
|
2
|
+
# Namespace for categorical related expressions.
|
2
3
|
class CatExpr
|
4
|
+
# @private
|
3
5
|
attr_accessor :_rbexpr
|
4
6
|
|
7
|
+
# @private
|
5
8
|
def initialize(expr)
|
6
9
|
self._rbexpr = expr._rbexpr
|
7
10
|
end
|
8
11
|
|
12
|
+
# Determine how this categorical series should be sorted.
|
13
|
+
#
|
14
|
+
# @param ordering ["physical", "lexical"]
|
15
|
+
# Ordering type:
|
16
|
+
#
|
17
|
+
# - 'physical' -> Use the physical representation of the categories to determine the order (default).
|
18
|
+
# - 'lexical' -> Use the string values to determine the ordering.
|
19
|
+
#
|
20
|
+
# @return [Expr]
|
21
|
+
#
|
22
|
+
# @example
|
23
|
+
# df = Polars::DataFrame.new(
|
24
|
+
# {"cats" => ["z", "z", "k", "a", "b"], "vals" => [3, 1, 2, 2, 3]}
|
25
|
+
# ).with_columns(
|
26
|
+
# [
|
27
|
+
# Polars.col("cats").cast(:cat).cat.set_ordering("lexical")
|
28
|
+
# ]
|
29
|
+
# )
|
30
|
+
# df.sort(["cats", "vals"])
|
31
|
+
# # =>
|
32
|
+
# # shape: (5, 2)
|
33
|
+
# # ┌──────┬──────┐
|
34
|
+
# # │ cats ┆ vals │
|
35
|
+
# # │ --- ┆ --- │
|
36
|
+
# # │ cat ┆ i64 │
|
37
|
+
# # ╞══════╪══════╡
|
38
|
+
# # │ a ┆ 2 │
|
39
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
40
|
+
# # │ b ┆ 3 │
|
41
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
42
|
+
# # │ k ┆ 2 │
|
43
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
44
|
+
# # │ z ┆ 1 │
|
45
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
46
|
+
# # │ z ┆ 3 │
|
47
|
+
# # └──────┴──────┘
|
9
48
|
def set_ordering(ordering)
|
10
49
|
Utils.wrap_expr(_rbexpr.cat_set_ordering(ordering))
|
11
50
|
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Polars
|
2
|
+
# Series.cat namespace.
|
3
|
+
class CatNameSpace
|
4
|
+
include ExprDispatch
|
5
|
+
|
6
|
+
self._accessor = "cat"
|
7
|
+
|
8
|
+
# @private
|
9
|
+
def initialize(series)
|
10
|
+
self._s = series._s
|
11
|
+
end
|
12
|
+
|
13
|
+
# Determine how this categorical series should be sorted.
|
14
|
+
#
|
15
|
+
# @param ordering ["physical", "lexical"]
|
16
|
+
# Ordering type:
|
17
|
+
#
|
18
|
+
# - 'physical' -> Use the physical representation of the categories to
|
19
|
+
# determine the order (default).
|
20
|
+
# - 'lexical' -> Use the string values to determine the ordering.
|
21
|
+
#
|
22
|
+
# @return [Series]
|
23
|
+
#
|
24
|
+
# @example
|
25
|
+
# df = Polars::DataFrame.new(
|
26
|
+
# {"cats" => ["z", "z", "k", "a", "b"], "vals" => [3, 1, 2, 2, 3]}
|
27
|
+
# ).with_columns(
|
28
|
+
# [
|
29
|
+
# Polars.col("cats").cast(:cat).cat.set_ordering("lexical")
|
30
|
+
# ]
|
31
|
+
# )
|
32
|
+
# df.sort(["cats", "vals"])
|
33
|
+
# # =>
|
34
|
+
# # shape: (5, 2)
|
35
|
+
# # ┌──────┬──────┐
|
36
|
+
# # │ cats ┆ vals │
|
37
|
+
# # │ --- ┆ --- │
|
38
|
+
# # │ cat ┆ i64 │
|
39
|
+
# # ╞══════╪══════╡
|
40
|
+
# # │ a ┆ 2 │
|
41
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
42
|
+
# # │ b ┆ 3 │
|
43
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
44
|
+
# # │ k ┆ 2 │
|
45
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
46
|
+
# # │ z ┆ 1 │
|
47
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
48
|
+
# # │ z ┆ 3 │
|
49
|
+
# # └──────┴──────┘
|
50
|
+
def set_ordering(ordering)
|
51
|
+
super
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|