polars-df 0.1.3 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Cargo.lock +142 -11
- data/Cargo.toml +5 -0
- data/ext/polars/Cargo.toml +17 -1
- data/ext/polars/src/apply/dataframe.rs +292 -0
- data/ext/polars/src/apply/mod.rs +254 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +180 -5
- data/ext/polars/src/dataframe.rs +146 -1
- data/ext/polars/src/error.rs +12 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +74 -3
- data/ext/polars/src/lazy/dsl.rs +136 -0
- data/ext/polars/src/lib.rs +199 -1
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +331 -0
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1558 -60
- data/lib/polars/date_time_expr.rb +2 -2
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/dynamic_group_by.rb +49 -0
- data/lib/polars/expr.rb +4072 -107
- data/lib/polars/expr_dispatch.rb +8 -0
- data/lib/polars/functions.rb +192 -3
- data/lib/polars/group_by.rb +44 -3
- data/lib/polars/io.rb +20 -4
- data/lib/polars/lazy_frame.rb +800 -26
- data/lib/polars/lazy_functions.rb +687 -43
- data/lib/polars/lazy_group_by.rb +1 -0
- data/lib/polars/list_expr.rb +502 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +934 -62
- data/lib/polars/string_expr.rb +189 -13
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +44 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +14 -1
- metadata +15 -3
data/ext/polars/src/series.rs
CHANGED
@@ -4,7 +4,10 @@ use polars::prelude::*;
|
|
4
4
|
use polars::series::IsSorted;
|
5
5
|
use std::cell::RefCell;
|
6
6
|
|
7
|
+
use crate::apply::series::{call_lambda_and_extract, ApplyLambda};
|
8
|
+
use crate::apply_method_all_arrow_series2;
|
7
9
|
use crate::conversion::*;
|
10
|
+
use crate::list_construction::rb_seq_to_list;
|
8
11
|
use crate::set::set_at_idx;
|
9
12
|
use crate::{RbDataFrame, RbPolarsErr, RbResult, RbValueError};
|
10
13
|
|
@@ -123,6 +126,19 @@ impl RbSeries {
|
|
123
126
|
RbSeries::new(s)
|
124
127
|
}
|
125
128
|
|
129
|
+
pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
|
130
|
+
let val = val
|
131
|
+
.each()
|
132
|
+
.map(|v| v.map(ObjectValue::from))
|
133
|
+
.collect::<RbResult<Vec<ObjectValue>>>()?;
|
134
|
+
let s = ObjectChunked::<ObjectValue>::new_from_vec(&name, val).into_series();
|
135
|
+
Ok(s.into())
|
136
|
+
}
|
137
|
+
|
138
|
+
pub fn new_list(name: String, seq: Value, dtype: Wrap<DataType>) -> RbResult<Self> {
|
139
|
+
rb_seq_to_list(&name, seq, &dtype.0).map(|s| s.into())
|
140
|
+
}
|
141
|
+
|
126
142
|
pub fn estimated_size(&self) -> usize {
|
127
143
|
self.series.borrow().estimated_size()
|
128
144
|
}
|
@@ -515,6 +531,198 @@ impl RbSeries {
|
|
515
531
|
RbSeries::new(self.series.borrow().clone())
|
516
532
|
}
|
517
533
|
|
534
|
+
pub fn apply_lambda(
|
535
|
+
&self,
|
536
|
+
lambda: Value,
|
537
|
+
output_type: Option<Wrap<DataType>>,
|
538
|
+
skip_nulls: bool,
|
539
|
+
) -> RbResult<Self> {
|
540
|
+
let series = &self.series.borrow();
|
541
|
+
|
542
|
+
let output_type = output_type.map(|dt| dt.0);
|
543
|
+
|
544
|
+
macro_rules! dispatch_apply {
|
545
|
+
($self:expr, $method:ident, $($args:expr),*) => {
|
546
|
+
if matches!($self.dtype(), DataType::Object(_)) {
|
547
|
+
// let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
|
548
|
+
// ca.$method($($args),*)
|
549
|
+
todo!()
|
550
|
+
} else {
|
551
|
+
apply_method_all_arrow_series2!(
|
552
|
+
$self,
|
553
|
+
$method,
|
554
|
+
$($args),*
|
555
|
+
)
|
556
|
+
}
|
557
|
+
|
558
|
+
}
|
559
|
+
|
560
|
+
}
|
561
|
+
|
562
|
+
if matches!(
|
563
|
+
series.dtype(),
|
564
|
+
DataType::Datetime(_, _)
|
565
|
+
| DataType::Date
|
566
|
+
| DataType::Duration(_)
|
567
|
+
| DataType::Categorical(_)
|
568
|
+
| DataType::Time
|
569
|
+
) || !skip_nulls
|
570
|
+
{
|
571
|
+
let mut avs = Vec::with_capacity(series.len());
|
572
|
+
let iter = series.iter().map(|av| {
|
573
|
+
let input = Wrap(av);
|
574
|
+
call_lambda_and_extract::<_, Wrap<AnyValue>>(lambda, input)
|
575
|
+
.unwrap()
|
576
|
+
.0
|
577
|
+
});
|
578
|
+
avs.extend(iter);
|
579
|
+
return Ok(Series::new(&self.name(), &avs).into());
|
580
|
+
}
|
581
|
+
|
582
|
+
let out = match output_type {
|
583
|
+
Some(DataType::Int8) => {
|
584
|
+
let ca: Int8Chunked = dispatch_apply!(
|
585
|
+
series,
|
586
|
+
apply_lambda_with_primitive_out_type,
|
587
|
+
lambda,
|
588
|
+
0,
|
589
|
+
None
|
590
|
+
)?;
|
591
|
+
ca.into_series()
|
592
|
+
}
|
593
|
+
Some(DataType::Int16) => {
|
594
|
+
let ca: Int16Chunked = dispatch_apply!(
|
595
|
+
series,
|
596
|
+
apply_lambda_with_primitive_out_type,
|
597
|
+
lambda,
|
598
|
+
0,
|
599
|
+
None
|
600
|
+
)?;
|
601
|
+
ca.into_series()
|
602
|
+
}
|
603
|
+
Some(DataType::Int32) => {
|
604
|
+
let ca: Int32Chunked = dispatch_apply!(
|
605
|
+
series,
|
606
|
+
apply_lambda_with_primitive_out_type,
|
607
|
+
lambda,
|
608
|
+
0,
|
609
|
+
None
|
610
|
+
)?;
|
611
|
+
ca.into_series()
|
612
|
+
}
|
613
|
+
Some(DataType::Int64) => {
|
614
|
+
let ca: Int64Chunked = dispatch_apply!(
|
615
|
+
series,
|
616
|
+
apply_lambda_with_primitive_out_type,
|
617
|
+
lambda,
|
618
|
+
0,
|
619
|
+
None
|
620
|
+
)?;
|
621
|
+
ca.into_series()
|
622
|
+
}
|
623
|
+
Some(DataType::UInt8) => {
|
624
|
+
let ca: UInt8Chunked = dispatch_apply!(
|
625
|
+
series,
|
626
|
+
apply_lambda_with_primitive_out_type,
|
627
|
+
lambda,
|
628
|
+
0,
|
629
|
+
None
|
630
|
+
)?;
|
631
|
+
ca.into_series()
|
632
|
+
}
|
633
|
+
Some(DataType::UInt16) => {
|
634
|
+
let ca: UInt16Chunked = dispatch_apply!(
|
635
|
+
series,
|
636
|
+
apply_lambda_with_primitive_out_type,
|
637
|
+
lambda,
|
638
|
+
0,
|
639
|
+
None
|
640
|
+
)?;
|
641
|
+
ca.into_series()
|
642
|
+
}
|
643
|
+
Some(DataType::UInt32) => {
|
644
|
+
let ca: UInt32Chunked = dispatch_apply!(
|
645
|
+
series,
|
646
|
+
apply_lambda_with_primitive_out_type,
|
647
|
+
lambda,
|
648
|
+
0,
|
649
|
+
None
|
650
|
+
)?;
|
651
|
+
ca.into_series()
|
652
|
+
}
|
653
|
+
Some(DataType::UInt64) => {
|
654
|
+
let ca: UInt64Chunked = dispatch_apply!(
|
655
|
+
series,
|
656
|
+
apply_lambda_with_primitive_out_type,
|
657
|
+
lambda,
|
658
|
+
0,
|
659
|
+
None
|
660
|
+
)?;
|
661
|
+
ca.into_series()
|
662
|
+
}
|
663
|
+
Some(DataType::Float32) => {
|
664
|
+
let ca: Float32Chunked = dispatch_apply!(
|
665
|
+
series,
|
666
|
+
apply_lambda_with_primitive_out_type,
|
667
|
+
lambda,
|
668
|
+
0,
|
669
|
+
None
|
670
|
+
)?;
|
671
|
+
ca.into_series()
|
672
|
+
}
|
673
|
+
Some(DataType::Float64) => {
|
674
|
+
let ca: Float64Chunked = dispatch_apply!(
|
675
|
+
series,
|
676
|
+
apply_lambda_with_primitive_out_type,
|
677
|
+
lambda,
|
678
|
+
0,
|
679
|
+
None
|
680
|
+
)?;
|
681
|
+
ca.into_series()
|
682
|
+
}
|
683
|
+
Some(DataType::Boolean) => {
|
684
|
+
let ca: BooleanChunked =
|
685
|
+
dispatch_apply!(series, apply_lambda_with_bool_out_type, lambda, 0, None)?;
|
686
|
+
ca.into_series()
|
687
|
+
}
|
688
|
+
Some(DataType::Date) => {
|
689
|
+
let ca: Int32Chunked = dispatch_apply!(
|
690
|
+
series,
|
691
|
+
apply_lambda_with_primitive_out_type,
|
692
|
+
lambda,
|
693
|
+
0,
|
694
|
+
None
|
695
|
+
)?;
|
696
|
+
ca.into_date().into_series()
|
697
|
+
}
|
698
|
+
Some(DataType::Datetime(tu, tz)) => {
|
699
|
+
let ca: Int64Chunked = dispatch_apply!(
|
700
|
+
series,
|
701
|
+
apply_lambda_with_primitive_out_type,
|
702
|
+
lambda,
|
703
|
+
0,
|
704
|
+
None
|
705
|
+
)?;
|
706
|
+
ca.into_datetime(tu, tz).into_series()
|
707
|
+
}
|
708
|
+
Some(DataType::Utf8) => {
|
709
|
+
let ca = dispatch_apply!(series, apply_lambda_with_utf8_out_type, lambda, 0, None)?;
|
710
|
+
|
711
|
+
ca.into_series()
|
712
|
+
}
|
713
|
+
Some(DataType::Object(_)) => {
|
714
|
+
let ca =
|
715
|
+
dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
|
716
|
+
ca.into_series()
|
717
|
+
}
|
718
|
+
None => return dispatch_apply!(series, apply_lambda_unknown, lambda),
|
719
|
+
|
720
|
+
_ => return dispatch_apply!(series, apply_lambda_unknown, lambda),
|
721
|
+
};
|
722
|
+
|
723
|
+
Ok(RbSeries::new(out))
|
724
|
+
}
|
725
|
+
|
518
726
|
pub fn zip_with(&self, mask: &RbSeries, other: &RbSeries) -> RbResult<Self> {
|
519
727
|
let binding = mask.series.borrow();
|
520
728
|
let mask = binding.bool().map_err(RbPolarsErr::from)?;
|
@@ -613,6 +821,108 @@ impl RbSeries {
|
|
613
821
|
}
|
614
822
|
}
|
615
823
|
|
824
|
+
macro_rules! impl_set_with_mask {
|
825
|
+
($name:ident, $native:ty, $cast:ident, $variant:ident) => {
|
826
|
+
fn $name(
|
827
|
+
series: &Series,
|
828
|
+
filter: &RbSeries,
|
829
|
+
value: Option<$native>,
|
830
|
+
) -> PolarsResult<Series> {
|
831
|
+
let binding = filter.series.borrow();
|
832
|
+
let mask = binding.bool()?;
|
833
|
+
let ca = series.$cast()?;
|
834
|
+
let new = ca.set(mask, value)?;
|
835
|
+
Ok(new.into_series())
|
836
|
+
}
|
837
|
+
|
838
|
+
impl RbSeries {
|
839
|
+
pub fn $name(&self, filter: &RbSeries, value: Option<$native>) -> RbResult<Self> {
|
840
|
+
let series =
|
841
|
+
$name(&self.series.borrow(), filter, value).map_err(RbPolarsErr::from)?;
|
842
|
+
Ok(Self::new(series))
|
843
|
+
}
|
844
|
+
}
|
845
|
+
};
|
846
|
+
}
|
847
|
+
|
848
|
+
// impl_set_with_mask!(set_with_mask_str, &str, utf8, Utf8);
|
849
|
+
impl_set_with_mask!(set_with_mask_f64, f64, f64, Float64);
|
850
|
+
impl_set_with_mask!(set_with_mask_f32, f32, f32, Float32);
|
851
|
+
impl_set_with_mask!(set_with_mask_u8, u8, u8, UInt8);
|
852
|
+
impl_set_with_mask!(set_with_mask_u16, u16, u16, UInt16);
|
853
|
+
impl_set_with_mask!(set_with_mask_u32, u32, u32, UInt32);
|
854
|
+
impl_set_with_mask!(set_with_mask_u64, u64, u64, UInt64);
|
855
|
+
impl_set_with_mask!(set_with_mask_i8, i8, i8, Int8);
|
856
|
+
impl_set_with_mask!(set_with_mask_i16, i16, i16, Int16);
|
857
|
+
impl_set_with_mask!(set_with_mask_i32, i32, i32, Int32);
|
858
|
+
impl_set_with_mask!(set_with_mask_i64, i64, i64, Int64);
|
859
|
+
impl_set_with_mask!(set_with_mask_bool, bool, bool, Boolean);
|
860
|
+
|
861
|
+
macro_rules! impl_arithmetic {
|
862
|
+
($name:ident, $type:ty, $operand:tt) => {
|
863
|
+
impl RbSeries {
|
864
|
+
pub fn $name(&self, other: $type) -> RbResult<Self> {
|
865
|
+
Ok(RbSeries::new(&*self.series.borrow() $operand other))
|
866
|
+
}
|
867
|
+
}
|
868
|
+
};
|
869
|
+
}
|
870
|
+
|
871
|
+
impl_arithmetic!(add_u8, u8, +);
|
872
|
+
impl_arithmetic!(add_u16, u16, +);
|
873
|
+
impl_arithmetic!(add_u32, u32, +);
|
874
|
+
impl_arithmetic!(add_u64, u64, +);
|
875
|
+
impl_arithmetic!(add_i8, i8, +);
|
876
|
+
impl_arithmetic!(add_i16, i16, +);
|
877
|
+
impl_arithmetic!(add_i32, i32, +);
|
878
|
+
impl_arithmetic!(add_i64, i64, +);
|
879
|
+
impl_arithmetic!(add_datetime, i64, +);
|
880
|
+
impl_arithmetic!(add_duration, i64, +);
|
881
|
+
impl_arithmetic!(add_f32, f32, +);
|
882
|
+
impl_arithmetic!(add_f64, f64, +);
|
883
|
+
impl_arithmetic!(sub_u8, u8, -);
|
884
|
+
impl_arithmetic!(sub_u16, u16, -);
|
885
|
+
impl_arithmetic!(sub_u32, u32, -);
|
886
|
+
impl_arithmetic!(sub_u64, u64, -);
|
887
|
+
impl_arithmetic!(sub_i8, i8, -);
|
888
|
+
impl_arithmetic!(sub_i16, i16, -);
|
889
|
+
impl_arithmetic!(sub_i32, i32, -);
|
890
|
+
impl_arithmetic!(sub_i64, i64, -);
|
891
|
+
impl_arithmetic!(sub_datetime, i64, -);
|
892
|
+
impl_arithmetic!(sub_duration, i64, -);
|
893
|
+
impl_arithmetic!(sub_f32, f32, -);
|
894
|
+
impl_arithmetic!(sub_f64, f64, -);
|
895
|
+
impl_arithmetic!(div_u8, u8, /);
|
896
|
+
impl_arithmetic!(div_u16, u16, /);
|
897
|
+
impl_arithmetic!(div_u32, u32, /);
|
898
|
+
impl_arithmetic!(div_u64, u64, /);
|
899
|
+
impl_arithmetic!(div_i8, i8, /);
|
900
|
+
impl_arithmetic!(div_i16, i16, /);
|
901
|
+
impl_arithmetic!(div_i32, i32, /);
|
902
|
+
impl_arithmetic!(div_i64, i64, /);
|
903
|
+
impl_arithmetic!(div_f32, f32, /);
|
904
|
+
impl_arithmetic!(div_f64, f64, /);
|
905
|
+
impl_arithmetic!(mul_u8, u8, *);
|
906
|
+
impl_arithmetic!(mul_u16, u16, *);
|
907
|
+
impl_arithmetic!(mul_u32, u32, *);
|
908
|
+
impl_arithmetic!(mul_u64, u64, *);
|
909
|
+
impl_arithmetic!(mul_i8, i8, *);
|
910
|
+
impl_arithmetic!(mul_i16, i16, *);
|
911
|
+
impl_arithmetic!(mul_i32, i32, *);
|
912
|
+
impl_arithmetic!(mul_i64, i64, *);
|
913
|
+
impl_arithmetic!(mul_f32, f32, *);
|
914
|
+
impl_arithmetic!(mul_f64, f64, *);
|
915
|
+
impl_arithmetic!(rem_u8, u8, %);
|
916
|
+
impl_arithmetic!(rem_u16, u16, %);
|
917
|
+
impl_arithmetic!(rem_u32, u32, %);
|
918
|
+
impl_arithmetic!(rem_u64, u64, %);
|
919
|
+
impl_arithmetic!(rem_i8, i8, %);
|
920
|
+
impl_arithmetic!(rem_i16, i16, %);
|
921
|
+
impl_arithmetic!(rem_i32, i32, %);
|
922
|
+
impl_arithmetic!(rem_i64, i64, %);
|
923
|
+
impl_arithmetic!(rem_f32, f32, %);
|
924
|
+
impl_arithmetic!(rem_f64, f64, %);
|
925
|
+
|
616
926
|
macro_rules! impl_eq_num {
|
617
927
|
($name:ident, $type:ty) => {
|
618
928
|
impl RbSeries {
|
@@ -787,4 +1097,25 @@ impl RbSeries {
|
|
787
1097
|
let ca: ChunkedArray<Int32Type> = builder.finish();
|
788
1098
|
Ok(ca.into_date().into_series().into())
|
789
1099
|
}
|
1100
|
+
|
1101
|
+
pub fn new_opt_datetime(name: String, values: RArray, _strict: Option<bool>) -> RbResult<Self> {
|
1102
|
+
let len = values.len();
|
1103
|
+
let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(&name, len);
|
1104
|
+
for item in values.each() {
|
1105
|
+
let v = item?;
|
1106
|
+
if v.is_nil() {
|
1107
|
+
builder.append_null();
|
1108
|
+
} else {
|
1109
|
+
let sec: i64 = v.funcall("to_i", ())?;
|
1110
|
+
let nsec: i64 = v.funcall("nsec", ())?;
|
1111
|
+
// TODO use strict
|
1112
|
+
builder.append_value(sec * 1_000_000_000 + nsec);
|
1113
|
+
}
|
1114
|
+
}
|
1115
|
+
let ca: ChunkedArray<Int64Type> = builder.finish();
|
1116
|
+
Ok(ca
|
1117
|
+
.into_datetime(TimeUnit::Nanoseconds, None)
|
1118
|
+
.into_series()
|
1119
|
+
.into())
|
1120
|
+
}
|
790
1121
|
}
|
data/ext/polars/src/utils.rs
CHANGED
@@ -17,3 +17,28 @@ pub fn reinterpret(s: &Series, signed: bool) -> polars::prelude::PolarsResult<Se
|
|
17
17
|
)),
|
18
18
|
}
|
19
19
|
}
|
20
|
+
|
21
|
+
#[macro_export]
|
22
|
+
macro_rules! apply_method_all_arrow_series2 {
|
23
|
+
($self:expr, $method:ident, $($args:expr),*) => {
|
24
|
+
match $self.dtype() {
|
25
|
+
DataType::Boolean => $self.bool().unwrap().$method($($args),*),
|
26
|
+
DataType::Utf8 => $self.utf8().unwrap().$method($($args),*),
|
27
|
+
DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
|
28
|
+
DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
|
29
|
+
DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
|
30
|
+
DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
|
31
|
+
DataType::Int8 => $self.i8().unwrap().$method($($args),*),
|
32
|
+
DataType::Int16 => $self.i16().unwrap().$method($($args),*),
|
33
|
+
DataType::Int32 => $self.i32().unwrap().$method($($args),*),
|
34
|
+
DataType::Int64 => $self.i64().unwrap().$method($($args),*),
|
35
|
+
DataType::Float32 => $self.f32().unwrap().$method($($args),*),
|
36
|
+
DataType::Float64 => $self.f64().unwrap().$method($($args),*),
|
37
|
+
DataType::Date => $self.date().unwrap().$method($($args),*),
|
38
|
+
DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
|
39
|
+
// DataType::List(_) => $self.list().unwrap().$method($($args),*),
|
40
|
+
DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
|
41
|
+
dt => panic!("dtype {:?} not supported", dt)
|
42
|
+
}
|
43
|
+
}
|
44
|
+
}
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Polars
|
2
|
+
# Series.cat namespace.
|
3
|
+
class CatNameSpace
|
4
|
+
include ExprDispatch
|
5
|
+
|
6
|
+
self._accessor = "cat"
|
7
|
+
|
8
|
+
# @private
|
9
|
+
def initialize(series)
|
10
|
+
self._s = series._s
|
11
|
+
end
|
12
|
+
|
13
|
+
# Determine how this categorical series should be sorted.
|
14
|
+
#
|
15
|
+
# @param ordering ["physical", "lexical"]
|
16
|
+
# Ordering type:
|
17
|
+
#
|
18
|
+
# - 'physical' -> Use the physical representation of the categories to
|
19
|
+
# determine the order (default).
|
20
|
+
# - 'lexical' -> Use the string values to determine the ordering.
|
21
|
+
#
|
22
|
+
# @return [Series]
|
23
|
+
#
|
24
|
+
# @example
|
25
|
+
# df = Polars::DataFrame.new(
|
26
|
+
# {"cats" => ["z", "z", "k", "a", "b"], "vals" => [3, 1, 2, 2, 3]}
|
27
|
+
# ).with_columns(
|
28
|
+
# [
|
29
|
+
# Polars.col("cats").cast(:cat).cat.set_ordering("lexical")
|
30
|
+
# ]
|
31
|
+
# )
|
32
|
+
# df.sort(["cats", "vals"])
|
33
|
+
# # =>
|
34
|
+
# # shape: (5, 2)
|
35
|
+
# # ┌──────┬──────┐
|
36
|
+
# # │ cats ┆ vals │
|
37
|
+
# # │ --- ┆ --- │
|
38
|
+
# # │ cat ┆ i64 │
|
39
|
+
# # ╞══════╪══════╡
|
40
|
+
# # │ a ┆ 2 │
|
41
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
42
|
+
# # │ b ┆ 3 │
|
43
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
44
|
+
# # │ k ┆ 2 │
|
45
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
46
|
+
# # │ z ┆ 1 │
|
47
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
48
|
+
# # │ z ┆ 3 │
|
49
|
+
# # └──────┴──────┘
|
50
|
+
def set_ordering(ordering)
|
51
|
+
super
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
module Polars
|
2
|
+
module Convert
|
3
|
+
# Construct a DataFrame from a dictionary of sequences.
|
4
|
+
#
|
5
|
+
# This operation clones data, unless you pass in a `Hash<String, Series>`.
|
6
|
+
#
|
7
|
+
# @param data [Hash]
|
8
|
+
# Two-dimensional data represented as a hash. Hash must contain
|
9
|
+
# arrays.
|
10
|
+
# @param columns [Array]
|
11
|
+
# Column labels to use for resulting DataFrame. If specified, overrides any
|
12
|
+
# labels already present in the data. Must match data dimensions.
|
13
|
+
#
|
14
|
+
# @return [DataFrame]
|
15
|
+
#
|
16
|
+
# @example
|
17
|
+
# data = {"a" => [1, 2], "b" => [3, 4]}
|
18
|
+
# Polars.from_hash(data)
|
19
|
+
# # =>
|
20
|
+
# # shape: (2, 2)
|
21
|
+
# # ┌─────┬─────┐
|
22
|
+
# # │ a ┆ b │
|
23
|
+
# # │ --- ┆ --- │
|
24
|
+
# # │ i64 ┆ i64 │
|
25
|
+
# # ╞═════╪═════╡
|
26
|
+
# # │ 1 ┆ 3 │
|
27
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
28
|
+
# # │ 2 ┆ 4 │
|
29
|
+
# # └─────┴─────┘
|
30
|
+
def from_hash(data, columns: nil)
|
31
|
+
DataFrame._from_hash(data, columns: columns)
|
32
|
+
end
|
33
|
+
|
34
|
+
# Construct a DataFrame from a sequence of dictionaries. This operation clones data.
|
35
|
+
#
|
36
|
+
# @param hashes [Array]
|
37
|
+
# Array with hashes mapping column name to value.
|
38
|
+
# @param infer_schema_length [Integer]
|
39
|
+
# How many hashes/rows to scan to determine the data types
|
40
|
+
# if set to `nil` all rows are scanned. This will be slow.
|
41
|
+
# @param schema [Object]
|
42
|
+
# Schema that (partially) overwrites the inferred schema.
|
43
|
+
#
|
44
|
+
# @return [DataFrame]
|
45
|
+
#
|
46
|
+
# @example
|
47
|
+
# data = [{"a" => 1, "b" => 4}, {"a" => 2, "b" => 5}, {"a" => 3, "b" => 6}]
|
48
|
+
# Polars.from_hashes(data)
|
49
|
+
# # =>
|
50
|
+
# # shape: (3, 2)
|
51
|
+
# # ┌─────┬─────┐
|
52
|
+
# # │ a ┆ b │
|
53
|
+
# # │ --- ┆ --- │
|
54
|
+
# # │ i64 ┆ i64 │
|
55
|
+
# # ╞═════╪═════╡
|
56
|
+
# # │ 1 ┆ 4 │
|
57
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
58
|
+
# # │ 2 ┆ 5 │
|
59
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
60
|
+
# # │ 3 ┆ 6 │
|
61
|
+
# # └─────┴─────┘
|
62
|
+
#
|
63
|
+
# @example Overwrite first column name and dtype
|
64
|
+
# Polars.from_hashes(data, schema: {"c" => :i32})
|
65
|
+
# # =>
|
66
|
+
# # shape: (3, 2)
|
67
|
+
# # ┌─────┬─────┐
|
68
|
+
# # │ c ┆ b │
|
69
|
+
# # │ --- ┆ --- │
|
70
|
+
# # │ i32 ┆ i64 │
|
71
|
+
# # ╞═════╪═════╡
|
72
|
+
# # │ 1 ┆ 4 │
|
73
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
74
|
+
# # │ 2 ┆ 5 │
|
75
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
76
|
+
# # │ 3 ┆ 6 │
|
77
|
+
# # └─────┴─────┘
|
78
|
+
#
|
79
|
+
# @example Let polars infer the dtypes but inform about a 3rd column
|
80
|
+
# Polars.from_hashes(data, schema: {"a" => :unknown, "b" => :unknown, "c" => :i32})
|
81
|
+
# # shape: (3, 3)
|
82
|
+
# # ┌─────┬─────┬──────┐
|
83
|
+
# # │ a ┆ b ┆ c │
|
84
|
+
# # │ --- ┆ --- ┆ --- │
|
85
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
86
|
+
# # ╞═════╪═════╪══════╡
|
87
|
+
# # │ 1 ┆ 4 ┆ null │
|
88
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
|
89
|
+
# # │ 2 ┆ 5 ┆ null │
|
90
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
|
91
|
+
# # │ 3 ┆ 6 ┆ null │
|
92
|
+
# # └─────┴─────┴──────┘
|
93
|
+
# def from_hashes(hashes, infer_schema_length: 50, schema: nil)
|
94
|
+
# DataFrame._from_hashes(hashes, infer_schema_length: infer_schema_length, schema: schema)
|
95
|
+
# end
|
96
|
+
|
97
|
+
# def from_records
|
98
|
+
# end
|
99
|
+
end
|
100
|
+
end
|