polars-df 0.1.4 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,6 +4,8 @@ use polars::prelude::*;
4
4
  use polars::series::IsSorted;
5
5
  use std::cell::RefCell;
6
6
 
7
+ use crate::apply::series::{call_lambda_and_extract, ApplyLambda};
8
+ use crate::apply_method_all_arrow_series2;
7
9
  use crate::conversion::*;
8
10
  use crate::list_construction::rb_seq_to_list;
9
11
  use crate::set::set_at_idx;
@@ -142,7 +144,7 @@ impl RbSeries {
142
144
  }
143
145
 
144
146
  pub fn get_fmt(&self, index: usize, str_lengths: usize) -> String {
145
- let val = format!("{}", self.series.borrow().get(index));
147
+ let val = format!("{}", self.series.borrow().get(index).unwrap());
146
148
  if let DataType::Utf8 | DataType::Categorical(_) = self.series.borrow().dtype() {
147
149
  let v_trunc = &val[..val
148
150
  .char_indices()
@@ -170,8 +172,8 @@ impl RbSeries {
170
172
  }
171
173
  }
172
174
 
173
- pub fn get_idx(&self, idx: usize) -> Value {
174
- Wrap(self.series.borrow().get(idx)).into()
175
+ pub fn get_idx(&self, idx: usize) -> RbResult<Value> {
176
+ Ok(Wrap(self.series.borrow().get(idx).map_err(RbPolarsErr::from)?).into())
175
177
  }
176
178
 
177
179
  pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> {
@@ -245,16 +247,37 @@ impl RbSeries {
245
247
  }
246
248
  }
247
249
 
248
- pub fn max(&self) -> Value {
249
- Wrap(self.series.borrow().max_as_series().get(0)).into()
250
+ pub fn max(&self) -> RbResult<Value> {
251
+ Ok(Wrap(
252
+ self.series
253
+ .borrow()
254
+ .max_as_series()
255
+ .get(0)
256
+ .map_err(RbPolarsErr::from)?,
257
+ )
258
+ .into())
250
259
  }
251
260
 
252
- pub fn min(&self) -> Value {
253
- Wrap(self.series.borrow().min_as_series().get(0)).into()
261
+ pub fn min(&self) -> RbResult<Value> {
262
+ Ok(Wrap(
263
+ self.series
264
+ .borrow()
265
+ .min_as_series()
266
+ .get(0)
267
+ .map_err(RbPolarsErr::from)?,
268
+ )
269
+ .into())
254
270
  }
255
271
 
256
- pub fn sum(&self) -> Value {
257
- Wrap(self.series.borrow().sum_as_series().get(0)).into()
272
+ pub fn sum(&self) -> RbResult<Value> {
273
+ Ok(Wrap(
274
+ self.series
275
+ .borrow()
276
+ .sum_as_series()
277
+ .get(0)
278
+ .map_err(RbPolarsErr::from)?,
279
+ )
280
+ .into())
258
281
  }
259
282
 
260
283
  pub fn n_chunks(&self) -> usize {
@@ -520,7 +543,8 @@ impl RbSeries {
520
543
  .borrow()
521
544
  .quantile_as_series(quantile, interpolation.0)
522
545
  .map_err(|_| RbValueError::new_err("invalid quantile".into()))?
523
- .get(0),
546
+ .get(0)
547
+ .unwrap_or(AnyValue::Null),
524
548
  )
525
549
  .into())
526
550
  }
@@ -529,6 +553,198 @@ impl RbSeries {
529
553
  RbSeries::new(self.series.borrow().clone())
530
554
  }
531
555
 
556
+ pub fn apply_lambda(
557
+ &self,
558
+ lambda: Value,
559
+ output_type: Option<Wrap<DataType>>,
560
+ skip_nulls: bool,
561
+ ) -> RbResult<Self> {
562
+ let series = &self.series.borrow();
563
+
564
+ let output_type = output_type.map(|dt| dt.0);
565
+
566
+ macro_rules! dispatch_apply {
567
+ ($self:expr, $method:ident, $($args:expr),*) => {
568
+ if matches!($self.dtype(), DataType::Object(_)) {
569
+ // let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
570
+ // ca.$method($($args),*)
571
+ todo!()
572
+ } else {
573
+ apply_method_all_arrow_series2!(
574
+ $self,
575
+ $method,
576
+ $($args),*
577
+ )
578
+ }
579
+
580
+ }
581
+
582
+ }
583
+
584
+ if matches!(
585
+ series.dtype(),
586
+ DataType::Datetime(_, _)
587
+ | DataType::Date
588
+ | DataType::Duration(_)
589
+ | DataType::Categorical(_)
590
+ | DataType::Time
591
+ ) || !skip_nulls
592
+ {
593
+ let mut avs = Vec::with_capacity(series.len());
594
+ let iter = series.iter().map(|av| {
595
+ let input = Wrap(av);
596
+ call_lambda_and_extract::<_, Wrap<AnyValue>>(lambda, input)
597
+ .unwrap()
598
+ .0
599
+ });
600
+ avs.extend(iter);
601
+ return Ok(Series::new(&self.name(), &avs).into());
602
+ }
603
+
604
+ let out = match output_type {
605
+ Some(DataType::Int8) => {
606
+ let ca: Int8Chunked = dispatch_apply!(
607
+ series,
608
+ apply_lambda_with_primitive_out_type,
609
+ lambda,
610
+ 0,
611
+ None
612
+ )?;
613
+ ca.into_series()
614
+ }
615
+ Some(DataType::Int16) => {
616
+ let ca: Int16Chunked = dispatch_apply!(
617
+ series,
618
+ apply_lambda_with_primitive_out_type,
619
+ lambda,
620
+ 0,
621
+ None
622
+ )?;
623
+ ca.into_series()
624
+ }
625
+ Some(DataType::Int32) => {
626
+ let ca: Int32Chunked = dispatch_apply!(
627
+ series,
628
+ apply_lambda_with_primitive_out_type,
629
+ lambda,
630
+ 0,
631
+ None
632
+ )?;
633
+ ca.into_series()
634
+ }
635
+ Some(DataType::Int64) => {
636
+ let ca: Int64Chunked = dispatch_apply!(
637
+ series,
638
+ apply_lambda_with_primitive_out_type,
639
+ lambda,
640
+ 0,
641
+ None
642
+ )?;
643
+ ca.into_series()
644
+ }
645
+ Some(DataType::UInt8) => {
646
+ let ca: UInt8Chunked = dispatch_apply!(
647
+ series,
648
+ apply_lambda_with_primitive_out_type,
649
+ lambda,
650
+ 0,
651
+ None
652
+ )?;
653
+ ca.into_series()
654
+ }
655
+ Some(DataType::UInt16) => {
656
+ let ca: UInt16Chunked = dispatch_apply!(
657
+ series,
658
+ apply_lambda_with_primitive_out_type,
659
+ lambda,
660
+ 0,
661
+ None
662
+ )?;
663
+ ca.into_series()
664
+ }
665
+ Some(DataType::UInt32) => {
666
+ let ca: UInt32Chunked = dispatch_apply!(
667
+ series,
668
+ apply_lambda_with_primitive_out_type,
669
+ lambda,
670
+ 0,
671
+ None
672
+ )?;
673
+ ca.into_series()
674
+ }
675
+ Some(DataType::UInt64) => {
676
+ let ca: UInt64Chunked = dispatch_apply!(
677
+ series,
678
+ apply_lambda_with_primitive_out_type,
679
+ lambda,
680
+ 0,
681
+ None
682
+ )?;
683
+ ca.into_series()
684
+ }
685
+ Some(DataType::Float32) => {
686
+ let ca: Float32Chunked = dispatch_apply!(
687
+ series,
688
+ apply_lambda_with_primitive_out_type,
689
+ lambda,
690
+ 0,
691
+ None
692
+ )?;
693
+ ca.into_series()
694
+ }
695
+ Some(DataType::Float64) => {
696
+ let ca: Float64Chunked = dispatch_apply!(
697
+ series,
698
+ apply_lambda_with_primitive_out_type,
699
+ lambda,
700
+ 0,
701
+ None
702
+ )?;
703
+ ca.into_series()
704
+ }
705
+ Some(DataType::Boolean) => {
706
+ let ca: BooleanChunked =
707
+ dispatch_apply!(series, apply_lambda_with_bool_out_type, lambda, 0, None)?;
708
+ ca.into_series()
709
+ }
710
+ Some(DataType::Date) => {
711
+ let ca: Int32Chunked = dispatch_apply!(
712
+ series,
713
+ apply_lambda_with_primitive_out_type,
714
+ lambda,
715
+ 0,
716
+ None
717
+ )?;
718
+ ca.into_date().into_series()
719
+ }
720
+ Some(DataType::Datetime(tu, tz)) => {
721
+ let ca: Int64Chunked = dispatch_apply!(
722
+ series,
723
+ apply_lambda_with_primitive_out_type,
724
+ lambda,
725
+ 0,
726
+ None
727
+ )?;
728
+ ca.into_datetime(tu, tz).into_series()
729
+ }
730
+ Some(DataType::Utf8) => {
731
+ let ca = dispatch_apply!(series, apply_lambda_with_utf8_out_type, lambda, 0, None)?;
732
+
733
+ ca.into_series()
734
+ }
735
+ Some(DataType::Object(_)) => {
736
+ let ca =
737
+ dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
738
+ ca.into_series()
739
+ }
740
+ None => return dispatch_apply!(series, apply_lambda_unknown, lambda),
741
+
742
+ _ => return dispatch_apply!(series, apply_lambda_unknown, lambda),
743
+ };
744
+
745
+ Ok(RbSeries::new(out))
746
+ }
747
+
532
748
  pub fn zip_with(&self, mask: &RbSeries, other: &RbSeries) -> RbResult<Self> {
533
749
  let binding = mask.series.borrow();
534
750
  let mask = binding.bool().map_err(RbPolarsErr::from)?;
@@ -627,6 +843,108 @@ impl RbSeries {
627
843
  }
628
844
  }
629
845
 
846
+ macro_rules! impl_set_with_mask {
847
+ ($name:ident, $native:ty, $cast:ident, $variant:ident) => {
848
+ fn $name(
849
+ series: &Series,
850
+ filter: &RbSeries,
851
+ value: Option<$native>,
852
+ ) -> PolarsResult<Series> {
853
+ let binding = filter.series.borrow();
854
+ let mask = binding.bool()?;
855
+ let ca = series.$cast()?;
856
+ let new = ca.set(mask, value)?;
857
+ Ok(new.into_series())
858
+ }
859
+
860
+ impl RbSeries {
861
+ pub fn $name(&self, filter: &RbSeries, value: Option<$native>) -> RbResult<Self> {
862
+ let series =
863
+ $name(&self.series.borrow(), filter, value).map_err(RbPolarsErr::from)?;
864
+ Ok(Self::new(series))
865
+ }
866
+ }
867
+ };
868
+ }
869
+
870
+ // impl_set_with_mask!(set_with_mask_str, &str, utf8, Utf8);
871
+ impl_set_with_mask!(set_with_mask_f64, f64, f64, Float64);
872
+ impl_set_with_mask!(set_with_mask_f32, f32, f32, Float32);
873
+ impl_set_with_mask!(set_with_mask_u8, u8, u8, UInt8);
874
+ impl_set_with_mask!(set_with_mask_u16, u16, u16, UInt16);
875
+ impl_set_with_mask!(set_with_mask_u32, u32, u32, UInt32);
876
+ impl_set_with_mask!(set_with_mask_u64, u64, u64, UInt64);
877
+ impl_set_with_mask!(set_with_mask_i8, i8, i8, Int8);
878
+ impl_set_with_mask!(set_with_mask_i16, i16, i16, Int16);
879
+ impl_set_with_mask!(set_with_mask_i32, i32, i32, Int32);
880
+ impl_set_with_mask!(set_with_mask_i64, i64, i64, Int64);
881
+ impl_set_with_mask!(set_with_mask_bool, bool, bool, Boolean);
882
+
883
+ macro_rules! impl_arithmetic {
884
+ ($name:ident, $type:ty, $operand:tt) => {
885
+ impl RbSeries {
886
+ pub fn $name(&self, other: $type) -> RbResult<Self> {
887
+ Ok(RbSeries::new(&*self.series.borrow() $operand other))
888
+ }
889
+ }
890
+ };
891
+ }
892
+
893
+ impl_arithmetic!(add_u8, u8, +);
894
+ impl_arithmetic!(add_u16, u16, +);
895
+ impl_arithmetic!(add_u32, u32, +);
896
+ impl_arithmetic!(add_u64, u64, +);
897
+ impl_arithmetic!(add_i8, i8, +);
898
+ impl_arithmetic!(add_i16, i16, +);
899
+ impl_arithmetic!(add_i32, i32, +);
900
+ impl_arithmetic!(add_i64, i64, +);
901
+ impl_arithmetic!(add_datetime, i64, +);
902
+ impl_arithmetic!(add_duration, i64, +);
903
+ impl_arithmetic!(add_f32, f32, +);
904
+ impl_arithmetic!(add_f64, f64, +);
905
+ impl_arithmetic!(sub_u8, u8, -);
906
+ impl_arithmetic!(sub_u16, u16, -);
907
+ impl_arithmetic!(sub_u32, u32, -);
908
+ impl_arithmetic!(sub_u64, u64, -);
909
+ impl_arithmetic!(sub_i8, i8, -);
910
+ impl_arithmetic!(sub_i16, i16, -);
911
+ impl_arithmetic!(sub_i32, i32, -);
912
+ impl_arithmetic!(sub_i64, i64, -);
913
+ impl_arithmetic!(sub_datetime, i64, -);
914
+ impl_arithmetic!(sub_duration, i64, -);
915
+ impl_arithmetic!(sub_f32, f32, -);
916
+ impl_arithmetic!(sub_f64, f64, -);
917
+ impl_arithmetic!(div_u8, u8, /);
918
+ impl_arithmetic!(div_u16, u16, /);
919
+ impl_arithmetic!(div_u32, u32, /);
920
+ impl_arithmetic!(div_u64, u64, /);
921
+ impl_arithmetic!(div_i8, i8, /);
922
+ impl_arithmetic!(div_i16, i16, /);
923
+ impl_arithmetic!(div_i32, i32, /);
924
+ impl_arithmetic!(div_i64, i64, /);
925
+ impl_arithmetic!(div_f32, f32, /);
926
+ impl_arithmetic!(div_f64, f64, /);
927
+ impl_arithmetic!(mul_u8, u8, *);
928
+ impl_arithmetic!(mul_u16, u16, *);
929
+ impl_arithmetic!(mul_u32, u32, *);
930
+ impl_arithmetic!(mul_u64, u64, *);
931
+ impl_arithmetic!(mul_i8, i8, *);
932
+ impl_arithmetic!(mul_i16, i16, *);
933
+ impl_arithmetic!(mul_i32, i32, *);
934
+ impl_arithmetic!(mul_i64, i64, *);
935
+ impl_arithmetic!(mul_f32, f32, *);
936
+ impl_arithmetic!(mul_f64, f64, *);
937
+ impl_arithmetic!(rem_u8, u8, %);
938
+ impl_arithmetic!(rem_u16, u16, %);
939
+ impl_arithmetic!(rem_u32, u32, %);
940
+ impl_arithmetic!(rem_u64, u64, %);
941
+ impl_arithmetic!(rem_i8, i8, %);
942
+ impl_arithmetic!(rem_i16, i16, %);
943
+ impl_arithmetic!(rem_i32, i32, %);
944
+ impl_arithmetic!(rem_i64, i64, %);
945
+ impl_arithmetic!(rem_f32, f32, %);
946
+ impl_arithmetic!(rem_f64, f64, %);
947
+
630
948
  macro_rules! impl_eq_num {
631
949
  ($name:ident, $type:ty) => {
632
950
  impl RbSeries {
@@ -17,3 +17,28 @@ pub fn reinterpret(s: &Series, signed: bool) -> polars::prelude::PolarsResult<Se
17
17
  )),
18
18
  }
19
19
  }
20
+
21
+ #[macro_export]
22
+ macro_rules! apply_method_all_arrow_series2 {
23
+ ($self:expr, $method:ident, $($args:expr),*) => {
24
+ match $self.dtype() {
25
+ DataType::Boolean => $self.bool().unwrap().$method($($args),*),
26
+ DataType::Utf8 => $self.utf8().unwrap().$method($($args),*),
27
+ DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
28
+ DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
29
+ DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
30
+ DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
31
+ DataType::Int8 => $self.i8().unwrap().$method($($args),*),
32
+ DataType::Int16 => $self.i16().unwrap().$method($($args),*),
33
+ DataType::Int32 => $self.i32().unwrap().$method($($args),*),
34
+ DataType::Int64 => $self.i64().unwrap().$method($($args),*),
35
+ DataType::Float32 => $self.f32().unwrap().$method($($args),*),
36
+ DataType::Float64 => $self.f64().unwrap().$method($($args),*),
37
+ DataType::Date => $self.date().unwrap().$method($($args),*),
38
+ DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
39
+ // DataType::List(_) => $self.list().unwrap().$method($($args),*),
40
+ DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
41
+ dt => panic!("dtype {:?} not supported", dt)
42
+ }
43
+ }
44
+ }
@@ -0,0 +1,100 @@
1
+ module Polars
2
+ module Convert
3
+ # Construct a DataFrame from a dictionary of sequences.
4
+ #
5
+ # This operation clones data, unless you pass in a `Hash<String, Series>`.
6
+ #
7
+ # @param data [Hash]
8
+ # Two-dimensional data represented as a hash. Hash must contain
9
+ # arrays.
10
+ # @param columns [Array]
11
+ # Column labels to use for resulting DataFrame. If specified, overrides any
12
+ # labels already present in the data. Must match data dimensions.
13
+ #
14
+ # @return [DataFrame]
15
+ #
16
+ # @example
17
+ # data = {"a" => [1, 2], "b" => [3, 4]}
18
+ # Polars.from_hash(data)
19
+ # # =>
20
+ # # shape: (2, 2)
21
+ # # ┌─────┬─────┐
22
+ # # │ a ┆ b │
23
+ # # │ --- ┆ --- │
24
+ # # │ i64 ┆ i64 │
25
+ # # ╞═════╪═════╡
26
+ # # │ 1 ┆ 3 │
27
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
28
+ # # │ 2 ┆ 4 │
29
+ # # └─────┴─────┘
30
+ def from_hash(data, columns: nil)
31
+ DataFrame._from_hash(data, columns: columns)
32
+ end
33
+
34
+ # Construct a DataFrame from a sequence of dictionaries. This operation clones data.
35
+ #
36
+ # @param hashes [Array]
37
+ # Array with hashes mapping column name to value.
38
+ # @param infer_schema_length [Integer]
39
+ # How many hashes/rows to scan to determine the data types
40
+ # if set to `nil` all rows are scanned. This will be slow.
41
+ # @param schema [Object]
42
+ # Schema that (partially) overwrites the inferred schema.
43
+ #
44
+ # @return [DataFrame]
45
+ #
46
+ # @example
47
+ # data = [{"a" => 1, "b" => 4}, {"a" => 2, "b" => 5}, {"a" => 3, "b" => 6}]
48
+ # Polars.from_hashes(data)
49
+ # # =>
50
+ # # shape: (3, 2)
51
+ # # ┌─────┬─────┐
52
+ # # │ a ┆ b │
53
+ # # │ --- ┆ --- │
54
+ # # │ i64 ┆ i64 │
55
+ # # ╞═════╪═════╡
56
+ # # │ 1 ┆ 4 │
57
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
58
+ # # │ 2 ┆ 5 │
59
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
60
+ # # │ 3 ┆ 6 │
61
+ # # └─────┴─────┘
62
+ #
63
+ # @example Overwrite first column name and dtype
64
+ # Polars.from_hashes(data, schema: {"c" => :i32})
65
+ # # =>
66
+ # # shape: (3, 2)
67
+ # # ┌─────┬─────┐
68
+ # # │ c ┆ b │
69
+ # # │ --- ┆ --- │
70
+ # # │ i32 ┆ i64 │
71
+ # # ╞═════╪═════╡
72
+ # # │ 1 ┆ 4 │
73
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
74
+ # # │ 2 ┆ 5 │
75
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
76
+ # # │ 3 ┆ 6 │
77
+ # # └─────┴─────┘
78
+ #
79
+ # @example Let polars infer the dtypes but inform about a 3rd column
80
+ # Polars.from_hashes(data, schema: {"a" => :unknown, "b" => :unknown, "c" => :i32})
81
+ # # shape: (3, 3)
82
+ # # ┌─────┬─────┬──────┐
83
+ # # │ a ┆ b ┆ c │
84
+ # # │ --- ┆ --- ┆ --- │
85
+ # # │ i64 ┆ i64 ┆ i32 │
86
+ # # ╞═════╪═════╪══════╡
87
+ # # │ 1 ┆ 4 ┆ null │
88
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
89
+ # # │ 2 ┆ 5 ┆ null │
90
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
91
+ # # │ 3 ┆ 6 ┆ null │
92
+ # # └─────┴─────┴──────┘
93
+ # def from_hashes(hashes, infer_schema_length: 50, schema: nil)
94
+ # DataFrame._from_hashes(hashes, infer_schema_length: infer_schema_length, schema: schema)
95
+ # end
96
+
97
+ # def from_records
98
+ # end
99
+ end
100
+ end