polars-df 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +9 -0
- data/Cargo.lock +74 -3
- data/Cargo.toml +3 -0
- data/README.md +1 -1
- data/ext/polars/Cargo.toml +18 -1
- data/ext/polars/src/conversion.rs +115 -2
- data/ext/polars/src/dataframe.rs +228 -11
- data/ext/polars/src/error.rs +4 -0
- data/ext/polars/src/lazy/dataframe.rs +5 -5
- data/ext/polars/src/lazy/dsl.rs +157 -2
- data/ext/polars/src/lib.rs +185 -10
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +217 -29
- data/ext/polars/src/set.rs +91 -0
- data/ext/polars/src/utils.rs +19 -0
- data/lib/polars/batched_csv_reader.rb +1 -0
- data/lib/polars/cat_expr.rb +39 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/data_frame.rb +2384 -140
- data/lib/polars/date_time_expr.rb +1282 -7
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +4374 -53
- data/lib/polars/expr_dispatch.rb +22 -0
- data/lib/polars/functions.rb +219 -0
- data/lib/polars/group_by.rb +518 -0
- data/lib/polars/io.rb +421 -2
- data/lib/polars/lazy_frame.rb +1267 -69
- data/lib/polars/lazy_functions.rb +412 -24
- data/lib/polars/lazy_group_by.rb +80 -0
- data/lib/polars/list_expr.rb +507 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/meta_expr.rb +21 -0
- data/lib/polars/series.rb +2256 -242
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +847 -10
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_expr.rb +73 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +71 -3
- data/lib/polars/version.rb +2 -1
- data/lib/polars/when.rb +1 -0
- data/lib/polars/when_then.rb +1 -0
- data/lib/polars.rb +12 -10
- metadata +15 -2
data/ext/polars/src/series.rs
CHANGED
@@ -5,6 +5,8 @@ use polars::series::IsSorted;
|
|
5
5
|
use std::cell::RefCell;
|
6
6
|
|
7
7
|
use crate::conversion::*;
|
8
|
+
use crate::list_construction::rb_seq_to_list;
|
9
|
+
use crate::set::set_at_idx;
|
8
10
|
use crate::{RbDataFrame, RbPolarsErr, RbResult, RbValueError};
|
9
11
|
|
10
12
|
#[magnus::wrap(class = "Polars::RbSeries")]
|
@@ -122,6 +124,19 @@ impl RbSeries {
|
|
122
124
|
RbSeries::new(s)
|
123
125
|
}
|
124
126
|
|
127
|
+
pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
|
128
|
+
let val = val
|
129
|
+
.each()
|
130
|
+
.map(|v| v.map(ObjectValue::from))
|
131
|
+
.collect::<RbResult<Vec<ObjectValue>>>()?;
|
132
|
+
let s = ObjectChunked::<ObjectValue>::new_from_vec(&name, val).into_series();
|
133
|
+
Ok(s.into())
|
134
|
+
}
|
135
|
+
|
136
|
+
pub fn new_list(name: String, seq: Value, dtype: Wrap<DataType>) -> RbResult<Self> {
|
137
|
+
rb_seq_to_list(&name, seq, &dtype.0).map(|s| s.into())
|
138
|
+
}
|
139
|
+
|
125
140
|
pub fn estimated_size(&self) -> usize {
|
126
141
|
self.series.borrow().estimated_size()
|
127
142
|
}
|
@@ -474,6 +489,12 @@ impl RbSeries {
|
|
474
489
|
s.into_iter().collect()
|
475
490
|
} else if let Ok(s) = series.utf8() {
|
476
491
|
s.into_iter().collect()
|
492
|
+
} else if let Ok(_s) = series.date() {
|
493
|
+
let a = RArray::with_capacity(series.len());
|
494
|
+
for v in series.iter() {
|
495
|
+
a.push::<Value>(Wrap(v).into()).unwrap();
|
496
|
+
}
|
497
|
+
a
|
477
498
|
} else {
|
478
499
|
unimplemented!();
|
479
500
|
}
|
@@ -594,44 +615,211 @@ impl RbSeries {
|
|
594
615
|
}
|
595
616
|
}
|
596
617
|
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
618
|
+
pub fn set_at_idx(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
|
619
|
+
let mut s = self.series.borrow_mut();
|
620
|
+
match set_at_idx(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
|
621
|
+
Ok(out) => {
|
622
|
+
*s = out;
|
623
|
+
Ok(())
|
624
|
+
}
|
625
|
+
Err(e) => Err(RbPolarsErr::from(e)),
|
626
|
+
}
|
601
627
|
}
|
628
|
+
}
|
602
629
|
|
603
|
-
|
604
|
-
|
605
|
-
|
630
|
+
macro_rules! impl_eq_num {
|
631
|
+
($name:ident, $type:ty) => {
|
632
|
+
impl RbSeries {
|
633
|
+
pub fn $name(&self, rhs: $type) -> RbResult<Self> {
|
634
|
+
let s = self.series.borrow().equal(rhs).map_err(RbPolarsErr::from)?;
|
635
|
+
Ok(RbSeries::new(s.into_series()))
|
636
|
+
}
|
637
|
+
}
|
638
|
+
};
|
639
|
+
}
|
606
640
|
|
607
|
-
|
608
|
-
|
609
|
-
|
641
|
+
impl_eq_num!(eq_u8, u8);
|
642
|
+
impl_eq_num!(eq_u16, u16);
|
643
|
+
impl_eq_num!(eq_u32, u32);
|
644
|
+
impl_eq_num!(eq_u64, u64);
|
645
|
+
impl_eq_num!(eq_i8, i8);
|
646
|
+
impl_eq_num!(eq_i16, i16);
|
647
|
+
impl_eq_num!(eq_i32, i32);
|
648
|
+
impl_eq_num!(eq_i64, i64);
|
649
|
+
impl_eq_num!(eq_f32, f32);
|
650
|
+
impl_eq_num!(eq_f64, f64);
|
651
|
+
// impl_eq_num!(eq_str, &str);
|
652
|
+
|
653
|
+
macro_rules! impl_neq_num {
|
654
|
+
($name:ident, $type:ty) => {
|
655
|
+
impl RbSeries {
|
656
|
+
pub fn $name(&self, rhs: $type) -> RbResult<Self> {
|
657
|
+
let s = self
|
658
|
+
.series
|
659
|
+
.borrow()
|
660
|
+
.not_equal(rhs)
|
661
|
+
.map_err(RbPolarsErr::from)?;
|
662
|
+
Ok(RbSeries::new(s.into_series()))
|
663
|
+
}
|
664
|
+
}
|
665
|
+
};
|
666
|
+
}
|
610
667
|
|
611
|
-
|
612
|
-
|
613
|
-
|
668
|
+
impl_neq_num!(neq_u8, u8);
|
669
|
+
impl_neq_num!(neq_u16, u16);
|
670
|
+
impl_neq_num!(neq_u32, u32);
|
671
|
+
impl_neq_num!(neq_u64, u64);
|
672
|
+
impl_neq_num!(neq_i8, i8);
|
673
|
+
impl_neq_num!(neq_i16, i16);
|
674
|
+
impl_neq_num!(neq_i32, i32);
|
675
|
+
impl_neq_num!(neq_i64, i64);
|
676
|
+
impl_neq_num!(neq_f32, f32);
|
677
|
+
impl_neq_num!(neq_f64, f64);
|
678
|
+
// impl_neq_num!(neq_str, &str);
|
679
|
+
|
680
|
+
macro_rules! impl_gt_num {
|
681
|
+
($name:ident, $type:ty) => {
|
682
|
+
impl RbSeries {
|
683
|
+
pub fn $name(&self, rhs: $type) -> RbResult<Self> {
|
684
|
+
let s = self.series.borrow().gt(rhs).map_err(RbPolarsErr::from)?;
|
685
|
+
Ok(RbSeries::new(s.into_series()))
|
686
|
+
}
|
687
|
+
}
|
688
|
+
};
|
689
|
+
}
|
614
690
|
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
691
|
+
impl_gt_num!(gt_u8, u8);
|
692
|
+
impl_gt_num!(gt_u16, u16);
|
693
|
+
impl_gt_num!(gt_u32, u32);
|
694
|
+
impl_gt_num!(gt_u64, u64);
|
695
|
+
impl_gt_num!(gt_i8, i8);
|
696
|
+
impl_gt_num!(gt_i16, i16);
|
697
|
+
impl_gt_num!(gt_i32, i32);
|
698
|
+
impl_gt_num!(gt_i64, i64);
|
699
|
+
impl_gt_num!(gt_f32, f32);
|
700
|
+
impl_gt_num!(gt_f64, f64);
|
701
|
+
// impl_gt_num!(gt_str, &str);
|
702
|
+
|
703
|
+
macro_rules! impl_gt_eq_num {
|
704
|
+
($name:ident, $type:ty) => {
|
705
|
+
impl RbSeries {
|
706
|
+
pub fn $name(&self, rhs: $type) -> RbResult<Self> {
|
707
|
+
let s = self.series.borrow().gt_eq(rhs).map_err(RbPolarsErr::from)?;
|
708
|
+
Ok(RbSeries::new(s.into_series()))
|
709
|
+
}
|
710
|
+
}
|
711
|
+
};
|
712
|
+
}
|
619
713
|
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
714
|
+
impl_gt_eq_num!(gt_eq_u8, u8);
|
715
|
+
impl_gt_eq_num!(gt_eq_u16, u16);
|
716
|
+
impl_gt_eq_num!(gt_eq_u32, u32);
|
717
|
+
impl_gt_eq_num!(gt_eq_u64, u64);
|
718
|
+
impl_gt_eq_num!(gt_eq_i8, i8);
|
719
|
+
impl_gt_eq_num!(gt_eq_i16, i16);
|
720
|
+
impl_gt_eq_num!(gt_eq_i32, i32);
|
721
|
+
impl_gt_eq_num!(gt_eq_i64, i64);
|
722
|
+
impl_gt_eq_num!(gt_eq_f32, f32);
|
723
|
+
impl_gt_eq_num!(gt_eq_f64, f64);
|
724
|
+
// impl_gt_eq_num!(gt_eq_str, &str);
|
725
|
+
|
726
|
+
macro_rules! impl_lt_num {
|
727
|
+
($name:ident, $type:ty) => {
|
728
|
+
impl RbSeries {
|
729
|
+
pub fn $name(&self, rhs: $type) -> RbResult<RbSeries> {
|
730
|
+
let s = self.series.borrow().lt(rhs).map_err(RbPolarsErr::from)?;
|
731
|
+
Ok(RbSeries::new(s.into_series()))
|
732
|
+
}
|
733
|
+
}
|
734
|
+
};
|
735
|
+
}
|
624
736
|
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
737
|
+
impl_lt_num!(lt_u8, u8);
|
738
|
+
impl_lt_num!(lt_u16, u16);
|
739
|
+
impl_lt_num!(lt_u32, u32);
|
740
|
+
impl_lt_num!(lt_u64, u64);
|
741
|
+
impl_lt_num!(lt_i8, i8);
|
742
|
+
impl_lt_num!(lt_i16, i16);
|
743
|
+
impl_lt_num!(lt_i32, i32);
|
744
|
+
impl_lt_num!(lt_i64, i64);
|
745
|
+
impl_lt_num!(lt_f32, f32);
|
746
|
+
impl_lt_num!(lt_f64, f64);
|
747
|
+
// impl_lt_num!(lt_str, &str);
|
748
|
+
|
749
|
+
macro_rules! impl_lt_eq_num {
|
750
|
+
($name:ident, $type:ty) => {
|
751
|
+
impl RbSeries {
|
752
|
+
pub fn $name(&self, rhs: $type) -> RbResult<Self> {
|
753
|
+
let s = self.series.borrow().lt_eq(rhs).map_err(RbPolarsErr::from)?;
|
754
|
+
Ok(RbSeries::new(s.into_series()))
|
755
|
+
}
|
756
|
+
}
|
757
|
+
};
|
758
|
+
}
|
759
|
+
|
760
|
+
impl_lt_eq_num!(lt_eq_u8, u8);
|
761
|
+
impl_lt_eq_num!(lt_eq_u16, u16);
|
762
|
+
impl_lt_eq_num!(lt_eq_u32, u32);
|
763
|
+
impl_lt_eq_num!(lt_eq_u64, u64);
|
764
|
+
impl_lt_eq_num!(lt_eq_i8, i8);
|
765
|
+
impl_lt_eq_num!(lt_eq_i16, i16);
|
766
|
+
impl_lt_eq_num!(lt_eq_i32, i32);
|
767
|
+
impl_lt_eq_num!(lt_eq_i64, i64);
|
768
|
+
impl_lt_eq_num!(lt_eq_f32, f32);
|
769
|
+
impl_lt_eq_num!(lt_eq_f64, f64);
|
770
|
+
// impl_lt_eq_num!(lt_eq_str, &str);
|
771
|
+
|
772
|
+
pub fn to_series_collection(rs: RArray) -> RbResult<Vec<Series>> {
|
773
|
+
let mut series = Vec::new();
|
774
|
+
for item in rs.each() {
|
775
|
+
series.push(item?.try_convert::<&RbSeries>()?.series.borrow().clone());
|
776
|
+
}
|
777
|
+
Ok(series)
|
633
778
|
}
|
634
779
|
|
635
780
|
pub fn to_rbseries_collection(s: Vec<Series>) -> Vec<RbSeries> {
|
636
781
|
s.into_iter().map(RbSeries::new).collect()
|
637
782
|
}
|
783
|
+
|
784
|
+
impl RbSeries {
|
785
|
+
pub fn new_opt_date(name: String, values: RArray, _strict: Option<bool>) -> RbResult<Self> {
|
786
|
+
let len = values.len();
|
787
|
+
let mut builder = PrimitiveChunkedBuilder::<Int32Type>::new(&name, len);
|
788
|
+
for item in values.each() {
|
789
|
+
let v = item?;
|
790
|
+
if v.is_nil() {
|
791
|
+
builder.append_null();
|
792
|
+
} else {
|
793
|
+
// convert to DateTime for UTC
|
794
|
+
let v: Value = v.funcall("to_datetime", ())?;
|
795
|
+
let v: Value = v.funcall("to_time", ())?;
|
796
|
+
let v: Value = v.funcall("to_i", ())?;
|
797
|
+
// TODO use strict
|
798
|
+
builder.append_value(v.try_convert::<i32>()? / 86400);
|
799
|
+
}
|
800
|
+
}
|
801
|
+
let ca: ChunkedArray<Int32Type> = builder.finish();
|
802
|
+
Ok(ca.into_date().into_series().into())
|
803
|
+
}
|
804
|
+
|
805
|
+
pub fn new_opt_datetime(name: String, values: RArray, _strict: Option<bool>) -> RbResult<Self> {
|
806
|
+
let len = values.len();
|
807
|
+
let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(&name, len);
|
808
|
+
for item in values.each() {
|
809
|
+
let v = item?;
|
810
|
+
if v.is_nil() {
|
811
|
+
builder.append_null();
|
812
|
+
} else {
|
813
|
+
let sec: i64 = v.funcall("to_i", ())?;
|
814
|
+
let nsec: i64 = v.funcall("nsec", ())?;
|
815
|
+
// TODO use strict
|
816
|
+
builder.append_value(sec * 1_000_000_000 + nsec);
|
817
|
+
}
|
818
|
+
}
|
819
|
+
let ca: ChunkedArray<Int64Type> = builder.finish();
|
820
|
+
Ok(ca
|
821
|
+
.into_datetime(TimeUnit::Nanoseconds, None)
|
822
|
+
.into_series()
|
823
|
+
.into())
|
824
|
+
}
|
825
|
+
}
|
@@ -0,0 +1,91 @@
|
|
1
|
+
// use polars::export::arrow2::array::Array;
|
2
|
+
use polars::prelude::*;
|
3
|
+
|
4
|
+
pub fn set_at_idx(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Series> {
|
5
|
+
let logical_dtype = s.dtype().clone();
|
6
|
+
let idx = idx.cast(&IDX_DTYPE)?;
|
7
|
+
let idx = idx.rechunk();
|
8
|
+
let idx = idx.idx().unwrap();
|
9
|
+
let idx = idx.downcast_iter().next().unwrap();
|
10
|
+
|
11
|
+
// if idx.null_count() > 0 {
|
12
|
+
// return Err(PolarsError::ComputeError(
|
13
|
+
// "index values should not be null".into(),
|
14
|
+
// ));
|
15
|
+
// }
|
16
|
+
|
17
|
+
let idx = idx.values().as_slice();
|
18
|
+
|
19
|
+
let values = values.to_physical_repr().cast(&s.dtype().to_physical())?;
|
20
|
+
|
21
|
+
// do not shadow, otherwise s is not dropped immediately
|
22
|
+
// and we want to have mutable access
|
23
|
+
s = s.to_physical_repr().into_owned();
|
24
|
+
let mutable_s = s._get_inner_mut();
|
25
|
+
|
26
|
+
let s = match logical_dtype.to_physical() {
|
27
|
+
DataType::Int8 => {
|
28
|
+
let ca: &mut ChunkedArray<Int8Type> = mutable_s.as_mut();
|
29
|
+
let values = values.i8()?;
|
30
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
31
|
+
}
|
32
|
+
DataType::Int16 => {
|
33
|
+
let ca: &mut ChunkedArray<Int16Type> = mutable_s.as_mut();
|
34
|
+
let values = values.i16()?;
|
35
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
36
|
+
}
|
37
|
+
DataType::Int32 => {
|
38
|
+
let ca: &mut ChunkedArray<Int32Type> = mutable_s.as_mut();
|
39
|
+
let values = values.i32()?;
|
40
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
41
|
+
}
|
42
|
+
DataType::Int64 => {
|
43
|
+
let ca: &mut ChunkedArray<Int64Type> = mutable_s.as_mut();
|
44
|
+
let values = values.i64()?;
|
45
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
46
|
+
}
|
47
|
+
DataType::UInt8 => {
|
48
|
+
let ca: &mut ChunkedArray<UInt8Type> = mutable_s.as_mut();
|
49
|
+
let values = values.u8()?;
|
50
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
51
|
+
}
|
52
|
+
DataType::UInt16 => {
|
53
|
+
let ca: &mut ChunkedArray<UInt16Type> = mutable_s.as_mut();
|
54
|
+
let values = values.u16()?;
|
55
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
56
|
+
}
|
57
|
+
DataType::UInt32 => {
|
58
|
+
let ca: &mut ChunkedArray<UInt32Type> = mutable_s.as_mut();
|
59
|
+
let values = values.u32()?;
|
60
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
61
|
+
}
|
62
|
+
DataType::UInt64 => {
|
63
|
+
let ca: &mut ChunkedArray<UInt64Type> = mutable_s.as_mut();
|
64
|
+
let values = values.u64()?;
|
65
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
66
|
+
}
|
67
|
+
DataType::Float32 => {
|
68
|
+
let ca: &mut ChunkedArray<Float32Type> = mutable_s.as_mut();
|
69
|
+
let values = values.f32()?;
|
70
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
71
|
+
}
|
72
|
+
DataType::Float64 => {
|
73
|
+
let ca: &mut ChunkedArray<Float64Type> = mutable_s.as_mut();
|
74
|
+
let values = values.f64()?;
|
75
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
76
|
+
}
|
77
|
+
DataType::Boolean => {
|
78
|
+
let ca = s.bool()?;
|
79
|
+
let values = values.bool()?;
|
80
|
+
ca.set_at_idx2(idx, values)
|
81
|
+
}
|
82
|
+
DataType::Utf8 => {
|
83
|
+
let ca = s.utf8()?;
|
84
|
+
let values = values.utf8()?;
|
85
|
+
ca.set_at_idx2(idx, values)
|
86
|
+
}
|
87
|
+
_ => panic!("not yet implemented for dtype: {}", logical_dtype),
|
88
|
+
};
|
89
|
+
|
90
|
+
s.and_then(|s| s.cast(&logical_dtype))
|
91
|
+
}
|
@@ -0,0 +1,19 @@
|
|
1
|
+
use polars::prelude::*;
|
2
|
+
|
3
|
+
pub fn reinterpret(s: &Series, signed: bool) -> polars::prelude::PolarsResult<Series> {
|
4
|
+
match (s.dtype(), signed) {
|
5
|
+
(DataType::UInt64, true) => {
|
6
|
+
let ca = s.u64().unwrap();
|
7
|
+
Ok(ca.reinterpret_signed().into_series())
|
8
|
+
}
|
9
|
+
(DataType::UInt64, false) => Ok(s.clone()),
|
10
|
+
(DataType::Int64, false) => {
|
11
|
+
let ca = s.i64().unwrap();
|
12
|
+
Ok(ca.reinterpret_unsigned().into_series())
|
13
|
+
}
|
14
|
+
(DataType::Int64, true) => Ok(s.clone()),
|
15
|
+
_ => Err(PolarsError::ComputeError(
|
16
|
+
"reinterpret is only allowed for 64bit integers dtype, use cast otherwise".into(),
|
17
|
+
)),
|
18
|
+
}
|
19
|
+
}
|
data/lib/polars/cat_expr.rb
CHANGED
@@ -1,11 +1,50 @@
|
|
1
1
|
module Polars
|
2
|
+
# Namespace for categorical related expressions.
|
2
3
|
class CatExpr
|
4
|
+
# @private
|
3
5
|
attr_accessor :_rbexpr
|
4
6
|
|
7
|
+
# @private
|
5
8
|
def initialize(expr)
|
6
9
|
self._rbexpr = expr._rbexpr
|
7
10
|
end
|
8
11
|
|
12
|
+
# Determine how this categorical series should be sorted.
|
13
|
+
#
|
14
|
+
# @param ordering ["physical", "lexical"]
|
15
|
+
# Ordering type:
|
16
|
+
#
|
17
|
+
# - 'physical' -> Use the physical representation of the categories to determine the order (default).
|
18
|
+
# - 'lexical' -> Use the string values to determine the ordering.
|
19
|
+
#
|
20
|
+
# @return [Expr]
|
21
|
+
#
|
22
|
+
# @example
|
23
|
+
# df = Polars::DataFrame.new(
|
24
|
+
# {"cats" => ["z", "z", "k", "a", "b"], "vals" => [3, 1, 2, 2, 3]}
|
25
|
+
# ).with_columns(
|
26
|
+
# [
|
27
|
+
# Polars.col("cats").cast(:cat).cat.set_ordering("lexical")
|
28
|
+
# ]
|
29
|
+
# )
|
30
|
+
# df.sort(["cats", "vals"])
|
31
|
+
# # =>
|
32
|
+
# # shape: (5, 2)
|
33
|
+
# # ┌──────┬──────┐
|
34
|
+
# # │ cats ┆ vals │
|
35
|
+
# # │ --- ┆ --- │
|
36
|
+
# # │ cat ┆ i64 │
|
37
|
+
# # ╞══════╪══════╡
|
38
|
+
# # │ a ┆ 2 │
|
39
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
40
|
+
# # │ b ┆ 3 │
|
41
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
42
|
+
# # │ k ┆ 2 │
|
43
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
44
|
+
# # │ z ┆ 1 │
|
45
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
46
|
+
# # │ z ┆ 3 │
|
47
|
+
# # └──────┴──────┘
|
9
48
|
def set_ordering(ordering)
|
10
49
|
Utils.wrap_expr(_rbexpr.cat_set_ordering(ordering))
|
11
50
|
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Polars
|
2
|
+
# Series.cat namespace.
|
3
|
+
class CatNameSpace
|
4
|
+
include ExprDispatch
|
5
|
+
|
6
|
+
self._accessor = "cat"
|
7
|
+
|
8
|
+
# @private
|
9
|
+
def initialize(series)
|
10
|
+
self._s = series._s
|
11
|
+
end
|
12
|
+
|
13
|
+
# Determine how this categorical series should be sorted.
|
14
|
+
#
|
15
|
+
# @param ordering ["physical", "lexical"]
|
16
|
+
# Ordering type:
|
17
|
+
#
|
18
|
+
# - 'physical' -> Use the physical representation of the categories to
|
19
|
+
# determine the order (default).
|
20
|
+
# - 'lexical' -> Use the string values to determine the ordering.
|
21
|
+
#
|
22
|
+
# @return [Series]
|
23
|
+
#
|
24
|
+
# @example
|
25
|
+
# df = Polars::DataFrame.new(
|
26
|
+
# {"cats" => ["z", "z", "k", "a", "b"], "vals" => [3, 1, 2, 2, 3]}
|
27
|
+
# ).with_columns(
|
28
|
+
# [
|
29
|
+
# Polars.col("cats").cast(:cat).cat.set_ordering("lexical")
|
30
|
+
# ]
|
31
|
+
# )
|
32
|
+
# df.sort(["cats", "vals"])
|
33
|
+
# # =>
|
34
|
+
# # shape: (5, 2)
|
35
|
+
# # ┌──────┬──────┐
|
36
|
+
# # │ cats ┆ vals │
|
37
|
+
# # │ --- ┆ --- │
|
38
|
+
# # │ cat ┆ i64 │
|
39
|
+
# # ╞══════╪══════╡
|
40
|
+
# # │ a ┆ 2 │
|
41
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
42
|
+
# # │ b ┆ 3 │
|
43
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
44
|
+
# # │ k ┆ 2 │
|
45
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
46
|
+
# # │ z ┆ 1 │
|
47
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
48
|
+
# # │ z ┆ 3 │
|
49
|
+
# # └──────┴──────┘
|
50
|
+
def set_ordering(ordering)
|
51
|
+
super
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|