polars-df 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Cargo.lock +142 -11
- data/Cargo.toml +5 -0
- data/ext/polars/Cargo.toml +17 -1
- data/ext/polars/src/apply/dataframe.rs +292 -0
- data/ext/polars/src/apply/mod.rs +254 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +180 -5
- data/ext/polars/src/dataframe.rs +146 -1
- data/ext/polars/src/error.rs +12 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +74 -3
- data/ext/polars/src/lazy/dsl.rs +136 -0
- data/ext/polars/src/lib.rs +199 -1
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +331 -0
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1558 -60
- data/lib/polars/date_time_expr.rb +2 -2
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/dynamic_group_by.rb +49 -0
- data/lib/polars/expr.rb +4072 -107
- data/lib/polars/expr_dispatch.rb +8 -0
- data/lib/polars/functions.rb +192 -3
- data/lib/polars/group_by.rb +44 -3
- data/lib/polars/io.rb +20 -4
- data/lib/polars/lazy_frame.rb +800 -26
- data/lib/polars/lazy_functions.rb +687 -43
- data/lib/polars/lazy_group_by.rb +1 -0
- data/lib/polars/list_expr.rb +502 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +934 -62
- data/lib/polars/string_expr.rb +189 -13
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +44 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +14 -1
- metadata +15 -3
data/ext/polars/src/series.rs
CHANGED
@@ -4,7 +4,10 @@ use polars::prelude::*;
|
|
4
4
|
use polars::series::IsSorted;
|
5
5
|
use std::cell::RefCell;
|
6
6
|
|
7
|
+
use crate::apply::series::{call_lambda_and_extract, ApplyLambda};
|
8
|
+
use crate::apply_method_all_arrow_series2;
|
7
9
|
use crate::conversion::*;
|
10
|
+
use crate::list_construction::rb_seq_to_list;
|
8
11
|
use crate::set::set_at_idx;
|
9
12
|
use crate::{RbDataFrame, RbPolarsErr, RbResult, RbValueError};
|
10
13
|
|
@@ -123,6 +126,19 @@ impl RbSeries {
|
|
123
126
|
RbSeries::new(s)
|
124
127
|
}
|
125
128
|
|
129
|
+
pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
|
130
|
+
let val = val
|
131
|
+
.each()
|
132
|
+
.map(|v| v.map(ObjectValue::from))
|
133
|
+
.collect::<RbResult<Vec<ObjectValue>>>()?;
|
134
|
+
let s = ObjectChunked::<ObjectValue>::new_from_vec(&name, val).into_series();
|
135
|
+
Ok(s.into())
|
136
|
+
}
|
137
|
+
|
138
|
+
pub fn new_list(name: String, seq: Value, dtype: Wrap<DataType>) -> RbResult<Self> {
|
139
|
+
rb_seq_to_list(&name, seq, &dtype.0).map(|s| s.into())
|
140
|
+
}
|
141
|
+
|
126
142
|
pub fn estimated_size(&self) -> usize {
|
127
143
|
self.series.borrow().estimated_size()
|
128
144
|
}
|
@@ -515,6 +531,198 @@ impl RbSeries {
|
|
515
531
|
RbSeries::new(self.series.borrow().clone())
|
516
532
|
}
|
517
533
|
|
534
|
+
pub fn apply_lambda(
|
535
|
+
&self,
|
536
|
+
lambda: Value,
|
537
|
+
output_type: Option<Wrap<DataType>>,
|
538
|
+
skip_nulls: bool,
|
539
|
+
) -> RbResult<Self> {
|
540
|
+
let series = &self.series.borrow();
|
541
|
+
|
542
|
+
let output_type = output_type.map(|dt| dt.0);
|
543
|
+
|
544
|
+
macro_rules! dispatch_apply {
|
545
|
+
($self:expr, $method:ident, $($args:expr),*) => {
|
546
|
+
if matches!($self.dtype(), DataType::Object(_)) {
|
547
|
+
// let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
|
548
|
+
// ca.$method($($args),*)
|
549
|
+
todo!()
|
550
|
+
} else {
|
551
|
+
apply_method_all_arrow_series2!(
|
552
|
+
$self,
|
553
|
+
$method,
|
554
|
+
$($args),*
|
555
|
+
)
|
556
|
+
}
|
557
|
+
|
558
|
+
}
|
559
|
+
|
560
|
+
}
|
561
|
+
|
562
|
+
if matches!(
|
563
|
+
series.dtype(),
|
564
|
+
DataType::Datetime(_, _)
|
565
|
+
| DataType::Date
|
566
|
+
| DataType::Duration(_)
|
567
|
+
| DataType::Categorical(_)
|
568
|
+
| DataType::Time
|
569
|
+
) || !skip_nulls
|
570
|
+
{
|
571
|
+
let mut avs = Vec::with_capacity(series.len());
|
572
|
+
let iter = series.iter().map(|av| {
|
573
|
+
let input = Wrap(av);
|
574
|
+
call_lambda_and_extract::<_, Wrap<AnyValue>>(lambda, input)
|
575
|
+
.unwrap()
|
576
|
+
.0
|
577
|
+
});
|
578
|
+
avs.extend(iter);
|
579
|
+
return Ok(Series::new(&self.name(), &avs).into());
|
580
|
+
}
|
581
|
+
|
582
|
+
let out = match output_type {
|
583
|
+
Some(DataType::Int8) => {
|
584
|
+
let ca: Int8Chunked = dispatch_apply!(
|
585
|
+
series,
|
586
|
+
apply_lambda_with_primitive_out_type,
|
587
|
+
lambda,
|
588
|
+
0,
|
589
|
+
None
|
590
|
+
)?;
|
591
|
+
ca.into_series()
|
592
|
+
}
|
593
|
+
Some(DataType::Int16) => {
|
594
|
+
let ca: Int16Chunked = dispatch_apply!(
|
595
|
+
series,
|
596
|
+
apply_lambda_with_primitive_out_type,
|
597
|
+
lambda,
|
598
|
+
0,
|
599
|
+
None
|
600
|
+
)?;
|
601
|
+
ca.into_series()
|
602
|
+
}
|
603
|
+
Some(DataType::Int32) => {
|
604
|
+
let ca: Int32Chunked = dispatch_apply!(
|
605
|
+
series,
|
606
|
+
apply_lambda_with_primitive_out_type,
|
607
|
+
lambda,
|
608
|
+
0,
|
609
|
+
None
|
610
|
+
)?;
|
611
|
+
ca.into_series()
|
612
|
+
}
|
613
|
+
Some(DataType::Int64) => {
|
614
|
+
let ca: Int64Chunked = dispatch_apply!(
|
615
|
+
series,
|
616
|
+
apply_lambda_with_primitive_out_type,
|
617
|
+
lambda,
|
618
|
+
0,
|
619
|
+
None
|
620
|
+
)?;
|
621
|
+
ca.into_series()
|
622
|
+
}
|
623
|
+
Some(DataType::UInt8) => {
|
624
|
+
let ca: UInt8Chunked = dispatch_apply!(
|
625
|
+
series,
|
626
|
+
apply_lambda_with_primitive_out_type,
|
627
|
+
lambda,
|
628
|
+
0,
|
629
|
+
None
|
630
|
+
)?;
|
631
|
+
ca.into_series()
|
632
|
+
}
|
633
|
+
Some(DataType::UInt16) => {
|
634
|
+
let ca: UInt16Chunked = dispatch_apply!(
|
635
|
+
series,
|
636
|
+
apply_lambda_with_primitive_out_type,
|
637
|
+
lambda,
|
638
|
+
0,
|
639
|
+
None
|
640
|
+
)?;
|
641
|
+
ca.into_series()
|
642
|
+
}
|
643
|
+
Some(DataType::UInt32) => {
|
644
|
+
let ca: UInt32Chunked = dispatch_apply!(
|
645
|
+
series,
|
646
|
+
apply_lambda_with_primitive_out_type,
|
647
|
+
lambda,
|
648
|
+
0,
|
649
|
+
None
|
650
|
+
)?;
|
651
|
+
ca.into_series()
|
652
|
+
}
|
653
|
+
Some(DataType::UInt64) => {
|
654
|
+
let ca: UInt64Chunked = dispatch_apply!(
|
655
|
+
series,
|
656
|
+
apply_lambda_with_primitive_out_type,
|
657
|
+
lambda,
|
658
|
+
0,
|
659
|
+
None
|
660
|
+
)?;
|
661
|
+
ca.into_series()
|
662
|
+
}
|
663
|
+
Some(DataType::Float32) => {
|
664
|
+
let ca: Float32Chunked = dispatch_apply!(
|
665
|
+
series,
|
666
|
+
apply_lambda_with_primitive_out_type,
|
667
|
+
lambda,
|
668
|
+
0,
|
669
|
+
None
|
670
|
+
)?;
|
671
|
+
ca.into_series()
|
672
|
+
}
|
673
|
+
Some(DataType::Float64) => {
|
674
|
+
let ca: Float64Chunked = dispatch_apply!(
|
675
|
+
series,
|
676
|
+
apply_lambda_with_primitive_out_type,
|
677
|
+
lambda,
|
678
|
+
0,
|
679
|
+
None
|
680
|
+
)?;
|
681
|
+
ca.into_series()
|
682
|
+
}
|
683
|
+
Some(DataType::Boolean) => {
|
684
|
+
let ca: BooleanChunked =
|
685
|
+
dispatch_apply!(series, apply_lambda_with_bool_out_type, lambda, 0, None)?;
|
686
|
+
ca.into_series()
|
687
|
+
}
|
688
|
+
Some(DataType::Date) => {
|
689
|
+
let ca: Int32Chunked = dispatch_apply!(
|
690
|
+
series,
|
691
|
+
apply_lambda_with_primitive_out_type,
|
692
|
+
lambda,
|
693
|
+
0,
|
694
|
+
None
|
695
|
+
)?;
|
696
|
+
ca.into_date().into_series()
|
697
|
+
}
|
698
|
+
Some(DataType::Datetime(tu, tz)) => {
|
699
|
+
let ca: Int64Chunked = dispatch_apply!(
|
700
|
+
series,
|
701
|
+
apply_lambda_with_primitive_out_type,
|
702
|
+
lambda,
|
703
|
+
0,
|
704
|
+
None
|
705
|
+
)?;
|
706
|
+
ca.into_datetime(tu, tz).into_series()
|
707
|
+
}
|
708
|
+
Some(DataType::Utf8) => {
|
709
|
+
let ca = dispatch_apply!(series, apply_lambda_with_utf8_out_type, lambda, 0, None)?;
|
710
|
+
|
711
|
+
ca.into_series()
|
712
|
+
}
|
713
|
+
Some(DataType::Object(_)) => {
|
714
|
+
let ca =
|
715
|
+
dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
|
716
|
+
ca.into_series()
|
717
|
+
}
|
718
|
+
None => return dispatch_apply!(series, apply_lambda_unknown, lambda),
|
719
|
+
|
720
|
+
_ => return dispatch_apply!(series, apply_lambda_unknown, lambda),
|
721
|
+
};
|
722
|
+
|
723
|
+
Ok(RbSeries::new(out))
|
724
|
+
}
|
725
|
+
|
518
726
|
pub fn zip_with(&self, mask: &RbSeries, other: &RbSeries) -> RbResult<Self> {
|
519
727
|
let binding = mask.series.borrow();
|
520
728
|
let mask = binding.bool().map_err(RbPolarsErr::from)?;
|
@@ -613,6 +821,108 @@ impl RbSeries {
|
|
613
821
|
}
|
614
822
|
}
|
615
823
|
|
824
|
+
macro_rules! impl_set_with_mask {
|
825
|
+
($name:ident, $native:ty, $cast:ident, $variant:ident) => {
|
826
|
+
fn $name(
|
827
|
+
series: &Series,
|
828
|
+
filter: &RbSeries,
|
829
|
+
value: Option<$native>,
|
830
|
+
) -> PolarsResult<Series> {
|
831
|
+
let binding = filter.series.borrow();
|
832
|
+
let mask = binding.bool()?;
|
833
|
+
let ca = series.$cast()?;
|
834
|
+
let new = ca.set(mask, value)?;
|
835
|
+
Ok(new.into_series())
|
836
|
+
}
|
837
|
+
|
838
|
+
impl RbSeries {
|
839
|
+
pub fn $name(&self, filter: &RbSeries, value: Option<$native>) -> RbResult<Self> {
|
840
|
+
let series =
|
841
|
+
$name(&self.series.borrow(), filter, value).map_err(RbPolarsErr::from)?;
|
842
|
+
Ok(Self::new(series))
|
843
|
+
}
|
844
|
+
}
|
845
|
+
};
|
846
|
+
}
|
847
|
+
|
848
|
+
// impl_set_with_mask!(set_with_mask_str, &str, utf8, Utf8);
|
849
|
+
impl_set_with_mask!(set_with_mask_f64, f64, f64, Float64);
|
850
|
+
impl_set_with_mask!(set_with_mask_f32, f32, f32, Float32);
|
851
|
+
impl_set_with_mask!(set_with_mask_u8, u8, u8, UInt8);
|
852
|
+
impl_set_with_mask!(set_with_mask_u16, u16, u16, UInt16);
|
853
|
+
impl_set_with_mask!(set_with_mask_u32, u32, u32, UInt32);
|
854
|
+
impl_set_with_mask!(set_with_mask_u64, u64, u64, UInt64);
|
855
|
+
impl_set_with_mask!(set_with_mask_i8, i8, i8, Int8);
|
856
|
+
impl_set_with_mask!(set_with_mask_i16, i16, i16, Int16);
|
857
|
+
impl_set_with_mask!(set_with_mask_i32, i32, i32, Int32);
|
858
|
+
impl_set_with_mask!(set_with_mask_i64, i64, i64, Int64);
|
859
|
+
impl_set_with_mask!(set_with_mask_bool, bool, bool, Boolean);
|
860
|
+
|
861
|
+
macro_rules! impl_arithmetic {
|
862
|
+
($name:ident, $type:ty, $operand:tt) => {
|
863
|
+
impl RbSeries {
|
864
|
+
pub fn $name(&self, other: $type) -> RbResult<Self> {
|
865
|
+
Ok(RbSeries::new(&*self.series.borrow() $operand other))
|
866
|
+
}
|
867
|
+
}
|
868
|
+
};
|
869
|
+
}
|
870
|
+
|
871
|
+
impl_arithmetic!(add_u8, u8, +);
|
872
|
+
impl_arithmetic!(add_u16, u16, +);
|
873
|
+
impl_arithmetic!(add_u32, u32, +);
|
874
|
+
impl_arithmetic!(add_u64, u64, +);
|
875
|
+
impl_arithmetic!(add_i8, i8, +);
|
876
|
+
impl_arithmetic!(add_i16, i16, +);
|
877
|
+
impl_arithmetic!(add_i32, i32, +);
|
878
|
+
impl_arithmetic!(add_i64, i64, +);
|
879
|
+
impl_arithmetic!(add_datetime, i64, +);
|
880
|
+
impl_arithmetic!(add_duration, i64, +);
|
881
|
+
impl_arithmetic!(add_f32, f32, +);
|
882
|
+
impl_arithmetic!(add_f64, f64, +);
|
883
|
+
impl_arithmetic!(sub_u8, u8, -);
|
884
|
+
impl_arithmetic!(sub_u16, u16, -);
|
885
|
+
impl_arithmetic!(sub_u32, u32, -);
|
886
|
+
impl_arithmetic!(sub_u64, u64, -);
|
887
|
+
impl_arithmetic!(sub_i8, i8, -);
|
888
|
+
impl_arithmetic!(sub_i16, i16, -);
|
889
|
+
impl_arithmetic!(sub_i32, i32, -);
|
890
|
+
impl_arithmetic!(sub_i64, i64, -);
|
891
|
+
impl_arithmetic!(sub_datetime, i64, -);
|
892
|
+
impl_arithmetic!(sub_duration, i64, -);
|
893
|
+
impl_arithmetic!(sub_f32, f32, -);
|
894
|
+
impl_arithmetic!(sub_f64, f64, -);
|
895
|
+
impl_arithmetic!(div_u8, u8, /);
|
896
|
+
impl_arithmetic!(div_u16, u16, /);
|
897
|
+
impl_arithmetic!(div_u32, u32, /);
|
898
|
+
impl_arithmetic!(div_u64, u64, /);
|
899
|
+
impl_arithmetic!(div_i8, i8, /);
|
900
|
+
impl_arithmetic!(div_i16, i16, /);
|
901
|
+
impl_arithmetic!(div_i32, i32, /);
|
902
|
+
impl_arithmetic!(div_i64, i64, /);
|
903
|
+
impl_arithmetic!(div_f32, f32, /);
|
904
|
+
impl_arithmetic!(div_f64, f64, /);
|
905
|
+
impl_arithmetic!(mul_u8, u8, *);
|
906
|
+
impl_arithmetic!(mul_u16, u16, *);
|
907
|
+
impl_arithmetic!(mul_u32, u32, *);
|
908
|
+
impl_arithmetic!(mul_u64, u64, *);
|
909
|
+
impl_arithmetic!(mul_i8, i8, *);
|
910
|
+
impl_arithmetic!(mul_i16, i16, *);
|
911
|
+
impl_arithmetic!(mul_i32, i32, *);
|
912
|
+
impl_arithmetic!(mul_i64, i64, *);
|
913
|
+
impl_arithmetic!(mul_f32, f32, *);
|
914
|
+
impl_arithmetic!(mul_f64, f64, *);
|
915
|
+
impl_arithmetic!(rem_u8, u8, %);
|
916
|
+
impl_arithmetic!(rem_u16, u16, %);
|
917
|
+
impl_arithmetic!(rem_u32, u32, %);
|
918
|
+
impl_arithmetic!(rem_u64, u64, %);
|
919
|
+
impl_arithmetic!(rem_i8, i8, %);
|
920
|
+
impl_arithmetic!(rem_i16, i16, %);
|
921
|
+
impl_arithmetic!(rem_i32, i32, %);
|
922
|
+
impl_arithmetic!(rem_i64, i64, %);
|
923
|
+
impl_arithmetic!(rem_f32, f32, %);
|
924
|
+
impl_arithmetic!(rem_f64, f64, %);
|
925
|
+
|
616
926
|
macro_rules! impl_eq_num {
|
617
927
|
($name:ident, $type:ty) => {
|
618
928
|
impl RbSeries {
|
@@ -787,4 +1097,25 @@ impl RbSeries {
|
|
787
1097
|
let ca: ChunkedArray<Int32Type> = builder.finish();
|
788
1098
|
Ok(ca.into_date().into_series().into())
|
789
1099
|
}
|
1100
|
+
|
1101
|
+
pub fn new_opt_datetime(name: String, values: RArray, _strict: Option<bool>) -> RbResult<Self> {
|
1102
|
+
let len = values.len();
|
1103
|
+
let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(&name, len);
|
1104
|
+
for item in values.each() {
|
1105
|
+
let v = item?;
|
1106
|
+
if v.is_nil() {
|
1107
|
+
builder.append_null();
|
1108
|
+
} else {
|
1109
|
+
let sec: i64 = v.funcall("to_i", ())?;
|
1110
|
+
let nsec: i64 = v.funcall("nsec", ())?;
|
1111
|
+
// TODO use strict
|
1112
|
+
builder.append_value(sec * 1_000_000_000 + nsec);
|
1113
|
+
}
|
1114
|
+
}
|
1115
|
+
let ca: ChunkedArray<Int64Type> = builder.finish();
|
1116
|
+
Ok(ca
|
1117
|
+
.into_datetime(TimeUnit::Nanoseconds, None)
|
1118
|
+
.into_series()
|
1119
|
+
.into())
|
1120
|
+
}
|
790
1121
|
}
|
data/ext/polars/src/utils.rs
CHANGED
@@ -17,3 +17,28 @@ pub fn reinterpret(s: &Series, signed: bool) -> polars::prelude::PolarsResult<Se
|
|
17
17
|
)),
|
18
18
|
}
|
19
19
|
}
|
20
|
+
|
21
|
+
#[macro_export]
|
22
|
+
macro_rules! apply_method_all_arrow_series2 {
|
23
|
+
($self:expr, $method:ident, $($args:expr),*) => {
|
24
|
+
match $self.dtype() {
|
25
|
+
DataType::Boolean => $self.bool().unwrap().$method($($args),*),
|
26
|
+
DataType::Utf8 => $self.utf8().unwrap().$method($($args),*),
|
27
|
+
DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
|
28
|
+
DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
|
29
|
+
DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
|
30
|
+
DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
|
31
|
+
DataType::Int8 => $self.i8().unwrap().$method($($args),*),
|
32
|
+
DataType::Int16 => $self.i16().unwrap().$method($($args),*),
|
33
|
+
DataType::Int32 => $self.i32().unwrap().$method($($args),*),
|
34
|
+
DataType::Int64 => $self.i64().unwrap().$method($($args),*),
|
35
|
+
DataType::Float32 => $self.f32().unwrap().$method($($args),*),
|
36
|
+
DataType::Float64 => $self.f64().unwrap().$method($($args),*),
|
37
|
+
DataType::Date => $self.date().unwrap().$method($($args),*),
|
38
|
+
DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
|
39
|
+
// DataType::List(_) => $self.list().unwrap().$method($($args),*),
|
40
|
+
DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
|
41
|
+
dt => panic!("dtype {:?} not supported", dt)
|
42
|
+
}
|
43
|
+
}
|
44
|
+
}
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Polars
|
2
|
+
# Series.cat namespace.
|
3
|
+
class CatNameSpace
|
4
|
+
include ExprDispatch
|
5
|
+
|
6
|
+
self._accessor = "cat"
|
7
|
+
|
8
|
+
# @private
|
9
|
+
def initialize(series)
|
10
|
+
self._s = series._s
|
11
|
+
end
|
12
|
+
|
13
|
+
# Determine how this categorical series should be sorted.
|
14
|
+
#
|
15
|
+
# @param ordering ["physical", "lexical"]
|
16
|
+
# Ordering type:
|
17
|
+
#
|
18
|
+
# - 'physical' -> Use the physical representation of the categories to
|
19
|
+
# determine the order (default).
|
20
|
+
# - 'lexical' -> Use the string values to determine the ordering.
|
21
|
+
#
|
22
|
+
# @return [Series]
|
23
|
+
#
|
24
|
+
# @example
|
25
|
+
# df = Polars::DataFrame.new(
|
26
|
+
# {"cats" => ["z", "z", "k", "a", "b"], "vals" => [3, 1, 2, 2, 3]}
|
27
|
+
# ).with_columns(
|
28
|
+
# [
|
29
|
+
# Polars.col("cats").cast(:cat).cat.set_ordering("lexical")
|
30
|
+
# ]
|
31
|
+
# )
|
32
|
+
# df.sort(["cats", "vals"])
|
33
|
+
# # =>
|
34
|
+
# # shape: (5, 2)
|
35
|
+
# # ┌──────┬──────┐
|
36
|
+
# # │ cats ┆ vals │
|
37
|
+
# # │ --- ┆ --- │
|
38
|
+
# # │ cat ┆ i64 │
|
39
|
+
# # ╞══════╪══════╡
|
40
|
+
# # │ a ┆ 2 │
|
41
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
42
|
+
# # │ b ┆ 3 │
|
43
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
44
|
+
# # │ k ┆ 2 │
|
45
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
46
|
+
# # │ z ┆ 1 │
|
47
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
48
|
+
# # │ z ┆ 3 │
|
49
|
+
# # └──────┴──────┘
|
50
|
+
def set_ordering(ordering)
|
51
|
+
super
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
module Polars
|
2
|
+
module Convert
|
3
|
+
# Construct a DataFrame from a dictionary of sequences.
|
4
|
+
#
|
5
|
+
# This operation clones data, unless you pass in a `Hash<String, Series>`.
|
6
|
+
#
|
7
|
+
# @param data [Hash]
|
8
|
+
# Two-dimensional data represented as a hash. Hash must contain
|
9
|
+
# arrays.
|
10
|
+
# @param columns [Array]
|
11
|
+
# Column labels to use for resulting DataFrame. If specified, overrides any
|
12
|
+
# labels already present in the data. Must match data dimensions.
|
13
|
+
#
|
14
|
+
# @return [DataFrame]
|
15
|
+
#
|
16
|
+
# @example
|
17
|
+
# data = {"a" => [1, 2], "b" => [3, 4]}
|
18
|
+
# Polars.from_hash(data)
|
19
|
+
# # =>
|
20
|
+
# # shape: (2, 2)
|
21
|
+
# # ┌─────┬─────┐
|
22
|
+
# # │ a ┆ b │
|
23
|
+
# # │ --- ┆ --- │
|
24
|
+
# # │ i64 ┆ i64 │
|
25
|
+
# # ╞═════╪═════╡
|
26
|
+
# # │ 1 ┆ 3 │
|
27
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
28
|
+
# # │ 2 ┆ 4 │
|
29
|
+
# # └─────┴─────┘
|
30
|
+
def from_hash(data, columns: nil)
|
31
|
+
DataFrame._from_hash(data, columns: columns)
|
32
|
+
end
|
33
|
+
|
34
|
+
# Construct a DataFrame from a sequence of dictionaries. This operation clones data.
|
35
|
+
#
|
36
|
+
# @param hashes [Array]
|
37
|
+
# Array with hashes mapping column name to value.
|
38
|
+
# @param infer_schema_length [Integer]
|
39
|
+
# How many hashes/rows to scan to determine the data types
|
40
|
+
# if set to `nil` all rows are scanned. This will be slow.
|
41
|
+
# @param schema [Object]
|
42
|
+
# Schema that (partially) overwrites the inferred schema.
|
43
|
+
#
|
44
|
+
# @return [DataFrame]
|
45
|
+
#
|
46
|
+
# @example
|
47
|
+
# data = [{"a" => 1, "b" => 4}, {"a" => 2, "b" => 5}, {"a" => 3, "b" => 6}]
|
48
|
+
# Polars.from_hashes(data)
|
49
|
+
# # =>
|
50
|
+
# # shape: (3, 2)
|
51
|
+
# # ┌─────┬─────┐
|
52
|
+
# # │ a ┆ b │
|
53
|
+
# # │ --- ┆ --- │
|
54
|
+
# # │ i64 ┆ i64 │
|
55
|
+
# # ╞═════╪═════╡
|
56
|
+
# # │ 1 ┆ 4 │
|
57
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
58
|
+
# # │ 2 ┆ 5 │
|
59
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
60
|
+
# # │ 3 ┆ 6 │
|
61
|
+
# # └─────┴─────┘
|
62
|
+
#
|
63
|
+
# @example Overwrite first column name and dtype
|
64
|
+
# Polars.from_hashes(data, schema: {"c" => :i32})
|
65
|
+
# # =>
|
66
|
+
# # shape: (3, 2)
|
67
|
+
# # ┌─────┬─────┐
|
68
|
+
# # │ c ┆ b │
|
69
|
+
# # │ --- ┆ --- │
|
70
|
+
# # │ i32 ┆ i64 │
|
71
|
+
# # ╞═════╪═════╡
|
72
|
+
# # │ 1 ┆ 4 │
|
73
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
74
|
+
# # │ 2 ┆ 5 │
|
75
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
76
|
+
# # │ 3 ┆ 6 │
|
77
|
+
# # └─────┴─────┘
|
78
|
+
#
|
79
|
+
# @example Let polars infer the dtypes but inform about a 3rd column
|
80
|
+
# Polars.from_hashes(data, schema: {"a" => :unknown, "b" => :unknown, "c" => :i32})
|
81
|
+
# # shape: (3, 3)
|
82
|
+
# # ┌─────┬─────┬──────┐
|
83
|
+
# # │ a ┆ b ┆ c │
|
84
|
+
# # │ --- ┆ --- ┆ --- │
|
85
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
86
|
+
# # ╞═════╪═════╪══════╡
|
87
|
+
# # │ 1 ┆ 4 ┆ null │
|
88
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
|
89
|
+
# # │ 2 ┆ 5 ┆ null │
|
90
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
|
91
|
+
# # │ 3 ┆ 6 ┆ null │
|
92
|
+
# # └─────┴─────┴──────┘
|
93
|
+
# def from_hashes(hashes, infer_schema_length: 50, schema: nil)
|
94
|
+
# DataFrame._from_hashes(hashes, infer_schema_length: infer_schema_length, schema: schema)
|
95
|
+
# end
|
96
|
+
|
97
|
+
# def from_records
|
98
|
+
# end
|
99
|
+
end
|
100
|
+
end
|