polars-df 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/Cargo.lock +430 -217
- data/Cargo.toml +2 -0
- data/LICENSE.txt +1 -1
- data/README.md +0 -2
- data/ext/polars/Cargo.toml +9 -3
- data/ext/polars/src/apply/dataframe.rs +303 -0
- data/ext/polars/src/apply/mod.rs +253 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +254 -35
- data/ext/polars/src/dataframe.rs +151 -6
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +80 -3
- data/ext/polars/src/lazy/dsl.rs +84 -10
- data/ext/polars/src/lib.rs +180 -8
- data/ext/polars/src/series.rs +328 -10
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1480 -77
- data/lib/polars/data_types.rb +122 -0
- data/lib/polars/date_time_expr.rb +10 -10
- data/lib/polars/date_time_name_space.rb +8 -8
- data/lib/polars/dynamic_group_by.rb +52 -0
- data/lib/polars/expr.rb +262 -12
- data/lib/polars/functions.rb +194 -5
- data/lib/polars/group_by.rb +76 -36
- data/lib/polars/io.rb +19 -3
- data/lib/polars/lazy_frame.rb +798 -25
- data/lib/polars/lazy_functions.rb +569 -30
- data/lib/polars/list_expr.rb +1 -1
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +192 -27
- data/lib/polars/string_expr.rb +6 -5
- data/lib/polars/string_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +38 -29
- metadata +11 -4
data/ext/polars/src/series.rs
CHANGED
@@ -4,6 +4,8 @@ use polars::prelude::*;
|
|
4
4
|
use polars::series::IsSorted;
|
5
5
|
use std::cell::RefCell;
|
6
6
|
|
7
|
+
use crate::apply::series::{call_lambda_and_extract, ApplyLambda};
|
8
|
+
use crate::apply_method_all_arrow_series2;
|
7
9
|
use crate::conversion::*;
|
8
10
|
use crate::list_construction::rb_seq_to_list;
|
9
11
|
use crate::set::set_at_idx;
|
@@ -142,7 +144,7 @@ impl RbSeries {
|
|
142
144
|
}
|
143
145
|
|
144
146
|
pub fn get_fmt(&self, index: usize, str_lengths: usize) -> String {
|
145
|
-
let val = format!("{}", self.series.borrow().get(index));
|
147
|
+
let val = format!("{}", self.series.borrow().get(index).unwrap());
|
146
148
|
if let DataType::Utf8 | DataType::Categorical(_) = self.series.borrow().dtype() {
|
147
149
|
let v_trunc = &val[..val
|
148
150
|
.char_indices()
|
@@ -170,8 +172,8 @@ impl RbSeries {
|
|
170
172
|
}
|
171
173
|
}
|
172
174
|
|
173
|
-
pub fn get_idx(&self, idx: usize) -> Value {
|
174
|
-
Wrap(self.series.borrow().get(idx)).into()
|
175
|
+
pub fn get_idx(&self, idx: usize) -> RbResult<Value> {
|
176
|
+
Ok(Wrap(self.series.borrow().get(idx).map_err(RbPolarsErr::from)?).into())
|
175
177
|
}
|
176
178
|
|
177
179
|
pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> {
|
@@ -245,16 +247,37 @@ impl RbSeries {
|
|
245
247
|
}
|
246
248
|
}
|
247
249
|
|
248
|
-
pub fn max(&self) -> Value {
|
249
|
-
Wrap(
|
250
|
+
pub fn max(&self) -> RbResult<Value> {
|
251
|
+
Ok(Wrap(
|
252
|
+
self.series
|
253
|
+
.borrow()
|
254
|
+
.max_as_series()
|
255
|
+
.get(0)
|
256
|
+
.map_err(RbPolarsErr::from)?,
|
257
|
+
)
|
258
|
+
.into())
|
250
259
|
}
|
251
260
|
|
252
|
-
pub fn min(&self) -> Value {
|
253
|
-
Wrap(
|
261
|
+
pub fn min(&self) -> RbResult<Value> {
|
262
|
+
Ok(Wrap(
|
263
|
+
self.series
|
264
|
+
.borrow()
|
265
|
+
.min_as_series()
|
266
|
+
.get(0)
|
267
|
+
.map_err(RbPolarsErr::from)?,
|
268
|
+
)
|
269
|
+
.into())
|
254
270
|
}
|
255
271
|
|
256
|
-
pub fn sum(&self) -> Value {
|
257
|
-
Wrap(
|
272
|
+
pub fn sum(&self) -> RbResult<Value> {
|
273
|
+
Ok(Wrap(
|
274
|
+
self.series
|
275
|
+
.borrow()
|
276
|
+
.sum_as_series()
|
277
|
+
.get(0)
|
278
|
+
.map_err(RbPolarsErr::from)?,
|
279
|
+
)
|
280
|
+
.into())
|
258
281
|
}
|
259
282
|
|
260
283
|
pub fn n_chunks(&self) -> usize {
|
@@ -520,7 +543,8 @@ impl RbSeries {
|
|
520
543
|
.borrow()
|
521
544
|
.quantile_as_series(quantile, interpolation.0)
|
522
545
|
.map_err(|_| RbValueError::new_err("invalid quantile".into()))?
|
523
|
-
.get(0)
|
546
|
+
.get(0)
|
547
|
+
.unwrap_or(AnyValue::Null),
|
524
548
|
)
|
525
549
|
.into())
|
526
550
|
}
|
@@ -529,6 +553,198 @@ impl RbSeries {
|
|
529
553
|
RbSeries::new(self.series.borrow().clone())
|
530
554
|
}
|
531
555
|
|
556
|
+
pub fn apply_lambda(
|
557
|
+
&self,
|
558
|
+
lambda: Value,
|
559
|
+
output_type: Option<Wrap<DataType>>,
|
560
|
+
skip_nulls: bool,
|
561
|
+
) -> RbResult<Self> {
|
562
|
+
let series = &self.series.borrow();
|
563
|
+
|
564
|
+
let output_type = output_type.map(|dt| dt.0);
|
565
|
+
|
566
|
+
macro_rules! dispatch_apply {
|
567
|
+
($self:expr, $method:ident, $($args:expr),*) => {
|
568
|
+
if matches!($self.dtype(), DataType::Object(_)) {
|
569
|
+
// let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
|
570
|
+
// ca.$method($($args),*)
|
571
|
+
todo!()
|
572
|
+
} else {
|
573
|
+
apply_method_all_arrow_series2!(
|
574
|
+
$self,
|
575
|
+
$method,
|
576
|
+
$($args),*
|
577
|
+
)
|
578
|
+
}
|
579
|
+
|
580
|
+
}
|
581
|
+
|
582
|
+
}
|
583
|
+
|
584
|
+
if matches!(
|
585
|
+
series.dtype(),
|
586
|
+
DataType::Datetime(_, _)
|
587
|
+
| DataType::Date
|
588
|
+
| DataType::Duration(_)
|
589
|
+
| DataType::Categorical(_)
|
590
|
+
| DataType::Time
|
591
|
+
) || !skip_nulls
|
592
|
+
{
|
593
|
+
let mut avs = Vec::with_capacity(series.len());
|
594
|
+
let iter = series.iter().map(|av| {
|
595
|
+
let input = Wrap(av);
|
596
|
+
call_lambda_and_extract::<_, Wrap<AnyValue>>(lambda, input)
|
597
|
+
.unwrap()
|
598
|
+
.0
|
599
|
+
});
|
600
|
+
avs.extend(iter);
|
601
|
+
return Ok(Series::new(&self.name(), &avs).into());
|
602
|
+
}
|
603
|
+
|
604
|
+
let out = match output_type {
|
605
|
+
Some(DataType::Int8) => {
|
606
|
+
let ca: Int8Chunked = dispatch_apply!(
|
607
|
+
series,
|
608
|
+
apply_lambda_with_primitive_out_type,
|
609
|
+
lambda,
|
610
|
+
0,
|
611
|
+
None
|
612
|
+
)?;
|
613
|
+
ca.into_series()
|
614
|
+
}
|
615
|
+
Some(DataType::Int16) => {
|
616
|
+
let ca: Int16Chunked = dispatch_apply!(
|
617
|
+
series,
|
618
|
+
apply_lambda_with_primitive_out_type,
|
619
|
+
lambda,
|
620
|
+
0,
|
621
|
+
None
|
622
|
+
)?;
|
623
|
+
ca.into_series()
|
624
|
+
}
|
625
|
+
Some(DataType::Int32) => {
|
626
|
+
let ca: Int32Chunked = dispatch_apply!(
|
627
|
+
series,
|
628
|
+
apply_lambda_with_primitive_out_type,
|
629
|
+
lambda,
|
630
|
+
0,
|
631
|
+
None
|
632
|
+
)?;
|
633
|
+
ca.into_series()
|
634
|
+
}
|
635
|
+
Some(DataType::Int64) => {
|
636
|
+
let ca: Int64Chunked = dispatch_apply!(
|
637
|
+
series,
|
638
|
+
apply_lambda_with_primitive_out_type,
|
639
|
+
lambda,
|
640
|
+
0,
|
641
|
+
None
|
642
|
+
)?;
|
643
|
+
ca.into_series()
|
644
|
+
}
|
645
|
+
Some(DataType::UInt8) => {
|
646
|
+
let ca: UInt8Chunked = dispatch_apply!(
|
647
|
+
series,
|
648
|
+
apply_lambda_with_primitive_out_type,
|
649
|
+
lambda,
|
650
|
+
0,
|
651
|
+
None
|
652
|
+
)?;
|
653
|
+
ca.into_series()
|
654
|
+
}
|
655
|
+
Some(DataType::UInt16) => {
|
656
|
+
let ca: UInt16Chunked = dispatch_apply!(
|
657
|
+
series,
|
658
|
+
apply_lambda_with_primitive_out_type,
|
659
|
+
lambda,
|
660
|
+
0,
|
661
|
+
None
|
662
|
+
)?;
|
663
|
+
ca.into_series()
|
664
|
+
}
|
665
|
+
Some(DataType::UInt32) => {
|
666
|
+
let ca: UInt32Chunked = dispatch_apply!(
|
667
|
+
series,
|
668
|
+
apply_lambda_with_primitive_out_type,
|
669
|
+
lambda,
|
670
|
+
0,
|
671
|
+
None
|
672
|
+
)?;
|
673
|
+
ca.into_series()
|
674
|
+
}
|
675
|
+
Some(DataType::UInt64) => {
|
676
|
+
let ca: UInt64Chunked = dispatch_apply!(
|
677
|
+
series,
|
678
|
+
apply_lambda_with_primitive_out_type,
|
679
|
+
lambda,
|
680
|
+
0,
|
681
|
+
None
|
682
|
+
)?;
|
683
|
+
ca.into_series()
|
684
|
+
}
|
685
|
+
Some(DataType::Float32) => {
|
686
|
+
let ca: Float32Chunked = dispatch_apply!(
|
687
|
+
series,
|
688
|
+
apply_lambda_with_primitive_out_type,
|
689
|
+
lambda,
|
690
|
+
0,
|
691
|
+
None
|
692
|
+
)?;
|
693
|
+
ca.into_series()
|
694
|
+
}
|
695
|
+
Some(DataType::Float64) => {
|
696
|
+
let ca: Float64Chunked = dispatch_apply!(
|
697
|
+
series,
|
698
|
+
apply_lambda_with_primitive_out_type,
|
699
|
+
lambda,
|
700
|
+
0,
|
701
|
+
None
|
702
|
+
)?;
|
703
|
+
ca.into_series()
|
704
|
+
}
|
705
|
+
Some(DataType::Boolean) => {
|
706
|
+
let ca: BooleanChunked =
|
707
|
+
dispatch_apply!(series, apply_lambda_with_bool_out_type, lambda, 0, None)?;
|
708
|
+
ca.into_series()
|
709
|
+
}
|
710
|
+
Some(DataType::Date) => {
|
711
|
+
let ca: Int32Chunked = dispatch_apply!(
|
712
|
+
series,
|
713
|
+
apply_lambda_with_primitive_out_type,
|
714
|
+
lambda,
|
715
|
+
0,
|
716
|
+
None
|
717
|
+
)?;
|
718
|
+
ca.into_date().into_series()
|
719
|
+
}
|
720
|
+
Some(DataType::Datetime(tu, tz)) => {
|
721
|
+
let ca: Int64Chunked = dispatch_apply!(
|
722
|
+
series,
|
723
|
+
apply_lambda_with_primitive_out_type,
|
724
|
+
lambda,
|
725
|
+
0,
|
726
|
+
None
|
727
|
+
)?;
|
728
|
+
ca.into_datetime(tu, tz).into_series()
|
729
|
+
}
|
730
|
+
Some(DataType::Utf8) => {
|
731
|
+
let ca = dispatch_apply!(series, apply_lambda_with_utf8_out_type, lambda, 0, None)?;
|
732
|
+
|
733
|
+
ca.into_series()
|
734
|
+
}
|
735
|
+
Some(DataType::Object(_)) => {
|
736
|
+
let ca =
|
737
|
+
dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
|
738
|
+
ca.into_series()
|
739
|
+
}
|
740
|
+
None => return dispatch_apply!(series, apply_lambda_unknown, lambda),
|
741
|
+
|
742
|
+
_ => return dispatch_apply!(series, apply_lambda_unknown, lambda),
|
743
|
+
};
|
744
|
+
|
745
|
+
Ok(RbSeries::new(out))
|
746
|
+
}
|
747
|
+
|
532
748
|
pub fn zip_with(&self, mask: &RbSeries, other: &RbSeries) -> RbResult<Self> {
|
533
749
|
let binding = mask.series.borrow();
|
534
750
|
let mask = binding.bool().map_err(RbPolarsErr::from)?;
|
@@ -627,6 +843,108 @@ impl RbSeries {
|
|
627
843
|
}
|
628
844
|
}
|
629
845
|
|
846
|
+
macro_rules! impl_set_with_mask {
|
847
|
+
($name:ident, $native:ty, $cast:ident, $variant:ident) => {
|
848
|
+
fn $name(
|
849
|
+
series: &Series,
|
850
|
+
filter: &RbSeries,
|
851
|
+
value: Option<$native>,
|
852
|
+
) -> PolarsResult<Series> {
|
853
|
+
let binding = filter.series.borrow();
|
854
|
+
let mask = binding.bool()?;
|
855
|
+
let ca = series.$cast()?;
|
856
|
+
let new = ca.set(mask, value)?;
|
857
|
+
Ok(new.into_series())
|
858
|
+
}
|
859
|
+
|
860
|
+
impl RbSeries {
|
861
|
+
pub fn $name(&self, filter: &RbSeries, value: Option<$native>) -> RbResult<Self> {
|
862
|
+
let series =
|
863
|
+
$name(&self.series.borrow(), filter, value).map_err(RbPolarsErr::from)?;
|
864
|
+
Ok(Self::new(series))
|
865
|
+
}
|
866
|
+
}
|
867
|
+
};
|
868
|
+
}
|
869
|
+
|
870
|
+
// impl_set_with_mask!(set_with_mask_str, &str, utf8, Utf8);
|
871
|
+
impl_set_with_mask!(set_with_mask_f64, f64, f64, Float64);
|
872
|
+
impl_set_with_mask!(set_with_mask_f32, f32, f32, Float32);
|
873
|
+
impl_set_with_mask!(set_with_mask_u8, u8, u8, UInt8);
|
874
|
+
impl_set_with_mask!(set_with_mask_u16, u16, u16, UInt16);
|
875
|
+
impl_set_with_mask!(set_with_mask_u32, u32, u32, UInt32);
|
876
|
+
impl_set_with_mask!(set_with_mask_u64, u64, u64, UInt64);
|
877
|
+
impl_set_with_mask!(set_with_mask_i8, i8, i8, Int8);
|
878
|
+
impl_set_with_mask!(set_with_mask_i16, i16, i16, Int16);
|
879
|
+
impl_set_with_mask!(set_with_mask_i32, i32, i32, Int32);
|
880
|
+
impl_set_with_mask!(set_with_mask_i64, i64, i64, Int64);
|
881
|
+
impl_set_with_mask!(set_with_mask_bool, bool, bool, Boolean);
|
882
|
+
|
883
|
+
macro_rules! impl_arithmetic {
|
884
|
+
($name:ident, $type:ty, $operand:tt) => {
|
885
|
+
impl RbSeries {
|
886
|
+
pub fn $name(&self, other: $type) -> RbResult<Self> {
|
887
|
+
Ok(RbSeries::new(&*self.series.borrow() $operand other))
|
888
|
+
}
|
889
|
+
}
|
890
|
+
};
|
891
|
+
}
|
892
|
+
|
893
|
+
impl_arithmetic!(add_u8, u8, +);
|
894
|
+
impl_arithmetic!(add_u16, u16, +);
|
895
|
+
impl_arithmetic!(add_u32, u32, +);
|
896
|
+
impl_arithmetic!(add_u64, u64, +);
|
897
|
+
impl_arithmetic!(add_i8, i8, +);
|
898
|
+
impl_arithmetic!(add_i16, i16, +);
|
899
|
+
impl_arithmetic!(add_i32, i32, +);
|
900
|
+
impl_arithmetic!(add_i64, i64, +);
|
901
|
+
impl_arithmetic!(add_datetime, i64, +);
|
902
|
+
impl_arithmetic!(add_duration, i64, +);
|
903
|
+
impl_arithmetic!(add_f32, f32, +);
|
904
|
+
impl_arithmetic!(add_f64, f64, +);
|
905
|
+
impl_arithmetic!(sub_u8, u8, -);
|
906
|
+
impl_arithmetic!(sub_u16, u16, -);
|
907
|
+
impl_arithmetic!(sub_u32, u32, -);
|
908
|
+
impl_arithmetic!(sub_u64, u64, -);
|
909
|
+
impl_arithmetic!(sub_i8, i8, -);
|
910
|
+
impl_arithmetic!(sub_i16, i16, -);
|
911
|
+
impl_arithmetic!(sub_i32, i32, -);
|
912
|
+
impl_arithmetic!(sub_i64, i64, -);
|
913
|
+
impl_arithmetic!(sub_datetime, i64, -);
|
914
|
+
impl_arithmetic!(sub_duration, i64, -);
|
915
|
+
impl_arithmetic!(sub_f32, f32, -);
|
916
|
+
impl_arithmetic!(sub_f64, f64, -);
|
917
|
+
impl_arithmetic!(div_u8, u8, /);
|
918
|
+
impl_arithmetic!(div_u16, u16, /);
|
919
|
+
impl_arithmetic!(div_u32, u32, /);
|
920
|
+
impl_arithmetic!(div_u64, u64, /);
|
921
|
+
impl_arithmetic!(div_i8, i8, /);
|
922
|
+
impl_arithmetic!(div_i16, i16, /);
|
923
|
+
impl_arithmetic!(div_i32, i32, /);
|
924
|
+
impl_arithmetic!(div_i64, i64, /);
|
925
|
+
impl_arithmetic!(div_f32, f32, /);
|
926
|
+
impl_arithmetic!(div_f64, f64, /);
|
927
|
+
impl_arithmetic!(mul_u8, u8, *);
|
928
|
+
impl_arithmetic!(mul_u16, u16, *);
|
929
|
+
impl_arithmetic!(mul_u32, u32, *);
|
930
|
+
impl_arithmetic!(mul_u64, u64, *);
|
931
|
+
impl_arithmetic!(mul_i8, i8, *);
|
932
|
+
impl_arithmetic!(mul_i16, i16, *);
|
933
|
+
impl_arithmetic!(mul_i32, i32, *);
|
934
|
+
impl_arithmetic!(mul_i64, i64, *);
|
935
|
+
impl_arithmetic!(mul_f32, f32, *);
|
936
|
+
impl_arithmetic!(mul_f64, f64, *);
|
937
|
+
impl_arithmetic!(rem_u8, u8, %);
|
938
|
+
impl_arithmetic!(rem_u16, u16, %);
|
939
|
+
impl_arithmetic!(rem_u32, u32, %);
|
940
|
+
impl_arithmetic!(rem_u64, u64, %);
|
941
|
+
impl_arithmetic!(rem_i8, i8, %);
|
942
|
+
impl_arithmetic!(rem_i16, i16, %);
|
943
|
+
impl_arithmetic!(rem_i32, i32, %);
|
944
|
+
impl_arithmetic!(rem_i64, i64, %);
|
945
|
+
impl_arithmetic!(rem_f32, f32, %);
|
946
|
+
impl_arithmetic!(rem_f64, f64, %);
|
947
|
+
|
630
948
|
macro_rules! impl_eq_num {
|
631
949
|
($name:ident, $type:ty) => {
|
632
950
|
impl RbSeries {
|
data/ext/polars/src/utils.rs
CHANGED
@@ -17,3 +17,28 @@ pub fn reinterpret(s: &Series, signed: bool) -> polars::prelude::PolarsResult<Se
|
|
17
17
|
)),
|
18
18
|
}
|
19
19
|
}
|
20
|
+
|
21
|
+
#[macro_export]
|
22
|
+
macro_rules! apply_method_all_arrow_series2 {
|
23
|
+
($self:expr, $method:ident, $($args:expr),*) => {
|
24
|
+
match $self.dtype() {
|
25
|
+
DataType::Boolean => $self.bool().unwrap().$method($($args),*),
|
26
|
+
DataType::Utf8 => $self.utf8().unwrap().$method($($args),*),
|
27
|
+
DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
|
28
|
+
DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
|
29
|
+
DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
|
30
|
+
DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
|
31
|
+
DataType::Int8 => $self.i8().unwrap().$method($($args),*),
|
32
|
+
DataType::Int16 => $self.i16().unwrap().$method($($args),*),
|
33
|
+
DataType::Int32 => $self.i32().unwrap().$method($($args),*),
|
34
|
+
DataType::Int64 => $self.i64().unwrap().$method($($args),*),
|
35
|
+
DataType::Float32 => $self.f32().unwrap().$method($($args),*),
|
36
|
+
DataType::Float64 => $self.f64().unwrap().$method($($args),*),
|
37
|
+
DataType::Date => $self.date().unwrap().$method($($args),*),
|
38
|
+
DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
|
39
|
+
// DataType::List(_) => $self.list().unwrap().$method($($args),*),
|
40
|
+
DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
|
41
|
+
dt => panic!("dtype {:?} not supported", dt)
|
42
|
+
}
|
43
|
+
}
|
44
|
+
}
|
@@ -0,0 +1,100 @@
|
|
1
|
+
module Polars
|
2
|
+
module Convert
|
3
|
+
# Construct a DataFrame from a dictionary of sequences.
|
4
|
+
#
|
5
|
+
# This operation clones data, unless you pass in a `Hash<String, Series>`.
|
6
|
+
#
|
7
|
+
# @param data [Hash]
|
8
|
+
# Two-dimensional data represented as a hash. Hash must contain
|
9
|
+
# arrays.
|
10
|
+
# @param columns [Array]
|
11
|
+
# Column labels to use for resulting DataFrame. If specified, overrides any
|
12
|
+
# labels already present in the data. Must match data dimensions.
|
13
|
+
#
|
14
|
+
# @return [DataFrame]
|
15
|
+
#
|
16
|
+
# @example
|
17
|
+
# data = {"a" => [1, 2], "b" => [3, 4]}
|
18
|
+
# Polars.from_hash(data)
|
19
|
+
# # =>
|
20
|
+
# # shape: (2, 2)
|
21
|
+
# # ┌─────┬─────┐
|
22
|
+
# # │ a ┆ b │
|
23
|
+
# # │ --- ┆ --- │
|
24
|
+
# # │ i64 ┆ i64 │
|
25
|
+
# # ╞═════╪═════╡
|
26
|
+
# # │ 1 ┆ 3 │
|
27
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
28
|
+
# # │ 2 ┆ 4 │
|
29
|
+
# # └─────┴─────┘
|
30
|
+
def from_hash(data, columns: nil)
|
31
|
+
DataFrame._from_hash(data, columns: columns)
|
32
|
+
end
|
33
|
+
|
34
|
+
# Construct a DataFrame from a sequence of dictionaries. This operation clones data.
|
35
|
+
#
|
36
|
+
# @param hashes [Array]
|
37
|
+
# Array with hashes mapping column name to value.
|
38
|
+
# @param infer_schema_length [Integer]
|
39
|
+
# How many hashes/rows to scan to determine the data types
|
40
|
+
# if set to `nil` all rows are scanned. This will be slow.
|
41
|
+
# @param schema [Object]
|
42
|
+
# Schema that (partially) overwrites the inferred schema.
|
43
|
+
#
|
44
|
+
# @return [DataFrame]
|
45
|
+
#
|
46
|
+
# @example
|
47
|
+
# data = [{"a" => 1, "b" => 4}, {"a" => 2, "b" => 5}, {"a" => 3, "b" => 6}]
|
48
|
+
# Polars.from_hashes(data)
|
49
|
+
# # =>
|
50
|
+
# # shape: (3, 2)
|
51
|
+
# # ┌─────┬─────┐
|
52
|
+
# # │ a ┆ b │
|
53
|
+
# # │ --- ┆ --- │
|
54
|
+
# # │ i64 ┆ i64 │
|
55
|
+
# # ╞═════╪═════╡
|
56
|
+
# # │ 1 ┆ 4 │
|
57
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
58
|
+
# # │ 2 ┆ 5 │
|
59
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
60
|
+
# # │ 3 ┆ 6 │
|
61
|
+
# # └─────┴─────┘
|
62
|
+
#
|
63
|
+
# @example Overwrite first column name and dtype
|
64
|
+
# Polars.from_hashes(data, schema: {"c" => :i32})
|
65
|
+
# # =>
|
66
|
+
# # shape: (3, 2)
|
67
|
+
# # ┌─────┬─────┐
|
68
|
+
# # │ c ┆ b │
|
69
|
+
# # │ --- ┆ --- │
|
70
|
+
# # │ i32 ┆ i64 │
|
71
|
+
# # ╞═════╪═════╡
|
72
|
+
# # │ 1 ┆ 4 │
|
73
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
74
|
+
# # │ 2 ┆ 5 │
|
75
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
76
|
+
# # │ 3 ┆ 6 │
|
77
|
+
# # └─────┴─────┘
|
78
|
+
#
|
79
|
+
# @example Let polars infer the dtypes but inform about a 3rd column
|
80
|
+
# Polars.from_hashes(data, schema: {"a" => :unknown, "b" => :unknown, "c" => :i32})
|
81
|
+
# # shape: (3, 3)
|
82
|
+
# # ┌─────┬─────┬──────┐
|
83
|
+
# # │ a ┆ b ┆ c │
|
84
|
+
# # │ --- ┆ --- ┆ --- │
|
85
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
86
|
+
# # ╞═════╪═════╪══════╡
|
87
|
+
# # │ 1 ┆ 4 ┆ null │
|
88
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
|
89
|
+
# # │ 2 ┆ 5 ┆ null │
|
90
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
|
91
|
+
# # │ 3 ┆ 6 ┆ null │
|
92
|
+
# # └─────┴─────┴──────┘
|
93
|
+
# def from_hashes(hashes, infer_schema_length: 50, schema: nil)
|
94
|
+
# DataFrame._from_hashes(hashes, infer_schema_length: infer_schema_length, schema: schema)
|
95
|
+
# end
|
96
|
+
|
97
|
+
# def from_records
|
98
|
+
# end
|
99
|
+
end
|
100
|
+
end
|