polars-df 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,8 @@ use polars::prelude::*;
4
4
  use polars::series::IsSorted;
5
5
  use std::cell::RefCell;
6
6
 
7
+ use crate::apply::series::{call_lambda_and_extract, ApplyLambda};
8
+ use crate::apply_method_all_arrow_series2;
7
9
  use crate::conversion::*;
8
10
  use crate::list_construction::rb_seq_to_list;
9
11
  use crate::set::set_at_idx;
@@ -142,7 +144,7 @@ impl RbSeries {
142
144
  }
143
145
 
144
146
  pub fn get_fmt(&self, index: usize, str_lengths: usize) -> String {
145
- let val = format!("{}", self.series.borrow().get(index));
147
+ let val = format!("{}", self.series.borrow().get(index).unwrap());
146
148
  if let DataType::Utf8 | DataType::Categorical(_) = self.series.borrow().dtype() {
147
149
  let v_trunc = &val[..val
148
150
  .char_indices()
@@ -170,8 +172,8 @@ impl RbSeries {
170
172
  }
171
173
  }
172
174
 
173
- pub fn get_idx(&self, idx: usize) -> Value {
174
- Wrap(self.series.borrow().get(idx)).into()
175
+ pub fn get_idx(&self, idx: usize) -> RbResult<Value> {
176
+ Ok(Wrap(self.series.borrow().get(idx).map_err(RbPolarsErr::from)?).into())
175
177
  }
176
178
 
177
179
  pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> {
@@ -245,16 +247,37 @@ impl RbSeries {
245
247
  }
246
248
  }
247
249
 
248
- pub fn max(&self) -> Value {
249
- Wrap(self.series.borrow().max_as_series().get(0)).into()
250
+ pub fn max(&self) -> RbResult<Value> {
251
+ Ok(Wrap(
252
+ self.series
253
+ .borrow()
254
+ .max_as_series()
255
+ .get(0)
256
+ .map_err(RbPolarsErr::from)?,
257
+ )
258
+ .into())
250
259
  }
251
260
 
252
- pub fn min(&self) -> Value {
253
- Wrap(self.series.borrow().min_as_series().get(0)).into()
261
+ pub fn min(&self) -> RbResult<Value> {
262
+ Ok(Wrap(
263
+ self.series
264
+ .borrow()
265
+ .min_as_series()
266
+ .get(0)
267
+ .map_err(RbPolarsErr::from)?,
268
+ )
269
+ .into())
254
270
  }
255
271
 
256
- pub fn sum(&self) -> Value {
257
- Wrap(self.series.borrow().sum_as_series().get(0)).into()
272
+ pub fn sum(&self) -> RbResult<Value> {
273
+ Ok(Wrap(
274
+ self.series
275
+ .borrow()
276
+ .sum_as_series()
277
+ .get(0)
278
+ .map_err(RbPolarsErr::from)?,
279
+ )
280
+ .into())
258
281
  }
259
282
 
260
283
  pub fn n_chunks(&self) -> usize {
@@ -520,7 +543,8 @@ impl RbSeries {
520
543
  .borrow()
521
544
  .quantile_as_series(quantile, interpolation.0)
522
545
  .map_err(|_| RbValueError::new_err("invalid quantile".into()))?
523
- .get(0),
546
+ .get(0)
547
+ .unwrap_or(AnyValue::Null),
524
548
  )
525
549
  .into())
526
550
  }
@@ -529,6 +553,198 @@ impl RbSeries {
529
553
  RbSeries::new(self.series.borrow().clone())
530
554
  }
531
555
 
556
+ pub fn apply_lambda(
557
+ &self,
558
+ lambda: Value,
559
+ output_type: Option<Wrap<DataType>>,
560
+ skip_nulls: bool,
561
+ ) -> RbResult<Self> {
562
+ let series = &self.series.borrow();
563
+
564
+ let output_type = output_type.map(|dt| dt.0);
565
+
566
+ macro_rules! dispatch_apply {
567
+ ($self:expr, $method:ident, $($args:expr),*) => {
568
+ if matches!($self.dtype(), DataType::Object(_)) {
569
+ // let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
570
+ // ca.$method($($args),*)
571
+ todo!()
572
+ } else {
573
+ apply_method_all_arrow_series2!(
574
+ $self,
575
+ $method,
576
+ $($args),*
577
+ )
578
+ }
579
+
580
+ }
581
+
582
+ }
583
+
584
+ if matches!(
585
+ series.dtype(),
586
+ DataType::Datetime(_, _)
587
+ | DataType::Date
588
+ | DataType::Duration(_)
589
+ | DataType::Categorical(_)
590
+ | DataType::Time
591
+ ) || !skip_nulls
592
+ {
593
+ let mut avs = Vec::with_capacity(series.len());
594
+ let iter = series.iter().map(|av| {
595
+ let input = Wrap(av);
596
+ call_lambda_and_extract::<_, Wrap<AnyValue>>(lambda, input)
597
+ .unwrap()
598
+ .0
599
+ });
600
+ avs.extend(iter);
601
+ return Ok(Series::new(&self.name(), &avs).into());
602
+ }
603
+
604
+ let out = match output_type {
605
+ Some(DataType::Int8) => {
606
+ let ca: Int8Chunked = dispatch_apply!(
607
+ series,
608
+ apply_lambda_with_primitive_out_type,
609
+ lambda,
610
+ 0,
611
+ None
612
+ )?;
613
+ ca.into_series()
614
+ }
615
+ Some(DataType::Int16) => {
616
+ let ca: Int16Chunked = dispatch_apply!(
617
+ series,
618
+ apply_lambda_with_primitive_out_type,
619
+ lambda,
620
+ 0,
621
+ None
622
+ )?;
623
+ ca.into_series()
624
+ }
625
+ Some(DataType::Int32) => {
626
+ let ca: Int32Chunked = dispatch_apply!(
627
+ series,
628
+ apply_lambda_with_primitive_out_type,
629
+ lambda,
630
+ 0,
631
+ None
632
+ )?;
633
+ ca.into_series()
634
+ }
635
+ Some(DataType::Int64) => {
636
+ let ca: Int64Chunked = dispatch_apply!(
637
+ series,
638
+ apply_lambda_with_primitive_out_type,
639
+ lambda,
640
+ 0,
641
+ None
642
+ )?;
643
+ ca.into_series()
644
+ }
645
+ Some(DataType::UInt8) => {
646
+ let ca: UInt8Chunked = dispatch_apply!(
647
+ series,
648
+ apply_lambda_with_primitive_out_type,
649
+ lambda,
650
+ 0,
651
+ None
652
+ )?;
653
+ ca.into_series()
654
+ }
655
+ Some(DataType::UInt16) => {
656
+ let ca: UInt16Chunked = dispatch_apply!(
657
+ series,
658
+ apply_lambda_with_primitive_out_type,
659
+ lambda,
660
+ 0,
661
+ None
662
+ )?;
663
+ ca.into_series()
664
+ }
665
+ Some(DataType::UInt32) => {
666
+ let ca: UInt32Chunked = dispatch_apply!(
667
+ series,
668
+ apply_lambda_with_primitive_out_type,
669
+ lambda,
670
+ 0,
671
+ None
672
+ )?;
673
+ ca.into_series()
674
+ }
675
+ Some(DataType::UInt64) => {
676
+ let ca: UInt64Chunked = dispatch_apply!(
677
+ series,
678
+ apply_lambda_with_primitive_out_type,
679
+ lambda,
680
+ 0,
681
+ None
682
+ )?;
683
+ ca.into_series()
684
+ }
685
+ Some(DataType::Float32) => {
686
+ let ca: Float32Chunked = dispatch_apply!(
687
+ series,
688
+ apply_lambda_with_primitive_out_type,
689
+ lambda,
690
+ 0,
691
+ None
692
+ )?;
693
+ ca.into_series()
694
+ }
695
+ Some(DataType::Float64) => {
696
+ let ca: Float64Chunked = dispatch_apply!(
697
+ series,
698
+ apply_lambda_with_primitive_out_type,
699
+ lambda,
700
+ 0,
701
+ None
702
+ )?;
703
+ ca.into_series()
704
+ }
705
+ Some(DataType::Boolean) => {
706
+ let ca: BooleanChunked =
707
+ dispatch_apply!(series, apply_lambda_with_bool_out_type, lambda, 0, None)?;
708
+ ca.into_series()
709
+ }
710
+ Some(DataType::Date) => {
711
+ let ca: Int32Chunked = dispatch_apply!(
712
+ series,
713
+ apply_lambda_with_primitive_out_type,
714
+ lambda,
715
+ 0,
716
+ None
717
+ )?;
718
+ ca.into_date().into_series()
719
+ }
720
+ Some(DataType::Datetime(tu, tz)) => {
721
+ let ca: Int64Chunked = dispatch_apply!(
722
+ series,
723
+ apply_lambda_with_primitive_out_type,
724
+ lambda,
725
+ 0,
726
+ None
727
+ )?;
728
+ ca.into_datetime(tu, tz).into_series()
729
+ }
730
+ Some(DataType::Utf8) => {
731
+ let ca = dispatch_apply!(series, apply_lambda_with_utf8_out_type, lambda, 0, None)?;
732
+
733
+ ca.into_series()
734
+ }
735
+ Some(DataType::Object(_)) => {
736
+ let ca =
737
+ dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
738
+ ca.into_series()
739
+ }
740
+ None => return dispatch_apply!(series, apply_lambda_unknown, lambda),
741
+
742
+ _ => return dispatch_apply!(series, apply_lambda_unknown, lambda),
743
+ };
744
+
745
+ Ok(RbSeries::new(out))
746
+ }
747
+
532
748
  pub fn zip_with(&self, mask: &RbSeries, other: &RbSeries) -> RbResult<Self> {
533
749
  let binding = mask.series.borrow();
534
750
  let mask = binding.bool().map_err(RbPolarsErr::from)?;
@@ -627,6 +843,108 @@ impl RbSeries {
627
843
  }
628
844
  }
629
845
 
846
+ macro_rules! impl_set_with_mask {
847
+ ($name:ident, $native:ty, $cast:ident, $variant:ident) => {
848
+ fn $name(
849
+ series: &Series,
850
+ filter: &RbSeries,
851
+ value: Option<$native>,
852
+ ) -> PolarsResult<Series> {
853
+ let binding = filter.series.borrow();
854
+ let mask = binding.bool()?;
855
+ let ca = series.$cast()?;
856
+ let new = ca.set(mask, value)?;
857
+ Ok(new.into_series())
858
+ }
859
+
860
+ impl RbSeries {
861
+ pub fn $name(&self, filter: &RbSeries, value: Option<$native>) -> RbResult<Self> {
862
+ let series =
863
+ $name(&self.series.borrow(), filter, value).map_err(RbPolarsErr::from)?;
864
+ Ok(Self::new(series))
865
+ }
866
+ }
867
+ };
868
+ }
869
+
870
+ // impl_set_with_mask!(set_with_mask_str, &str, utf8, Utf8);
871
+ impl_set_with_mask!(set_with_mask_f64, f64, f64, Float64);
872
+ impl_set_with_mask!(set_with_mask_f32, f32, f32, Float32);
873
+ impl_set_with_mask!(set_with_mask_u8, u8, u8, UInt8);
874
+ impl_set_with_mask!(set_with_mask_u16, u16, u16, UInt16);
875
+ impl_set_with_mask!(set_with_mask_u32, u32, u32, UInt32);
876
+ impl_set_with_mask!(set_with_mask_u64, u64, u64, UInt64);
877
+ impl_set_with_mask!(set_with_mask_i8, i8, i8, Int8);
878
+ impl_set_with_mask!(set_with_mask_i16, i16, i16, Int16);
879
+ impl_set_with_mask!(set_with_mask_i32, i32, i32, Int32);
880
+ impl_set_with_mask!(set_with_mask_i64, i64, i64, Int64);
881
+ impl_set_with_mask!(set_with_mask_bool, bool, bool, Boolean);
882
+
883
+ macro_rules! impl_arithmetic {
884
+ ($name:ident, $type:ty, $operand:tt) => {
885
+ impl RbSeries {
886
+ pub fn $name(&self, other: $type) -> RbResult<Self> {
887
+ Ok(RbSeries::new(&*self.series.borrow() $operand other))
888
+ }
889
+ }
890
+ };
891
+ }
892
+
893
+ impl_arithmetic!(add_u8, u8, +);
894
+ impl_arithmetic!(add_u16, u16, +);
895
+ impl_arithmetic!(add_u32, u32, +);
896
+ impl_arithmetic!(add_u64, u64, +);
897
+ impl_arithmetic!(add_i8, i8, +);
898
+ impl_arithmetic!(add_i16, i16, +);
899
+ impl_arithmetic!(add_i32, i32, +);
900
+ impl_arithmetic!(add_i64, i64, +);
901
+ impl_arithmetic!(add_datetime, i64, +);
902
+ impl_arithmetic!(add_duration, i64, +);
903
+ impl_arithmetic!(add_f32, f32, +);
904
+ impl_arithmetic!(add_f64, f64, +);
905
+ impl_arithmetic!(sub_u8, u8, -);
906
+ impl_arithmetic!(sub_u16, u16, -);
907
+ impl_arithmetic!(sub_u32, u32, -);
908
+ impl_arithmetic!(sub_u64, u64, -);
909
+ impl_arithmetic!(sub_i8, i8, -);
910
+ impl_arithmetic!(sub_i16, i16, -);
911
+ impl_arithmetic!(sub_i32, i32, -);
912
+ impl_arithmetic!(sub_i64, i64, -);
913
+ impl_arithmetic!(sub_datetime, i64, -);
914
+ impl_arithmetic!(sub_duration, i64, -);
915
+ impl_arithmetic!(sub_f32, f32, -);
916
+ impl_arithmetic!(sub_f64, f64, -);
917
+ impl_arithmetic!(div_u8, u8, /);
918
+ impl_arithmetic!(div_u16, u16, /);
919
+ impl_arithmetic!(div_u32, u32, /);
920
+ impl_arithmetic!(div_u64, u64, /);
921
+ impl_arithmetic!(div_i8, i8, /);
922
+ impl_arithmetic!(div_i16, i16, /);
923
+ impl_arithmetic!(div_i32, i32, /);
924
+ impl_arithmetic!(div_i64, i64, /);
925
+ impl_arithmetic!(div_f32, f32, /);
926
+ impl_arithmetic!(div_f64, f64, /);
927
+ impl_arithmetic!(mul_u8, u8, *);
928
+ impl_arithmetic!(mul_u16, u16, *);
929
+ impl_arithmetic!(mul_u32, u32, *);
930
+ impl_arithmetic!(mul_u64, u64, *);
931
+ impl_arithmetic!(mul_i8, i8, *);
932
+ impl_arithmetic!(mul_i16, i16, *);
933
+ impl_arithmetic!(mul_i32, i32, *);
934
+ impl_arithmetic!(mul_i64, i64, *);
935
+ impl_arithmetic!(mul_f32, f32, *);
936
+ impl_arithmetic!(mul_f64, f64, *);
937
+ impl_arithmetic!(rem_u8, u8, %);
938
+ impl_arithmetic!(rem_u16, u16, %);
939
+ impl_arithmetic!(rem_u32, u32, %);
940
+ impl_arithmetic!(rem_u64, u64, %);
941
+ impl_arithmetic!(rem_i8, i8, %);
942
+ impl_arithmetic!(rem_i16, i16, %);
943
+ impl_arithmetic!(rem_i32, i32, %);
944
+ impl_arithmetic!(rem_i64, i64, %);
945
+ impl_arithmetic!(rem_f32, f32, %);
946
+ impl_arithmetic!(rem_f64, f64, %);
947
+
630
948
  macro_rules! impl_eq_num {
631
949
  ($name:ident, $type:ty) => {
632
950
  impl RbSeries {
@@ -17,3 +17,28 @@ pub fn reinterpret(s: &Series, signed: bool) -> polars::prelude::PolarsResult<Se
17
17
  )),
18
18
  }
19
19
  }
20
+
21
+ #[macro_export]
22
+ macro_rules! apply_method_all_arrow_series2 {
23
+ ($self:expr, $method:ident, $($args:expr),*) => {
24
+ match $self.dtype() {
25
+ DataType::Boolean => $self.bool().unwrap().$method($($args),*),
26
+ DataType::Utf8 => $self.utf8().unwrap().$method($($args),*),
27
+ DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
28
+ DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
29
+ DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
30
+ DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
31
+ DataType::Int8 => $self.i8().unwrap().$method($($args),*),
32
+ DataType::Int16 => $self.i16().unwrap().$method($($args),*),
33
+ DataType::Int32 => $self.i32().unwrap().$method($($args),*),
34
+ DataType::Int64 => $self.i64().unwrap().$method($($args),*),
35
+ DataType::Float32 => $self.f32().unwrap().$method($($args),*),
36
+ DataType::Float64 => $self.f64().unwrap().$method($($args),*),
37
+ DataType::Date => $self.date().unwrap().$method($($args),*),
38
+ DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
39
+ // DataType::List(_) => $self.list().unwrap().$method($($args),*),
40
+ DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
41
+ dt => panic!("dtype {:?} not supported", dt)
42
+ }
43
+ }
44
+ }
@@ -0,0 +1,100 @@
1
+ module Polars
2
+ module Convert
3
+ # Construct a DataFrame from a dictionary of sequences.
4
+ #
5
+ # This operation clones data, unless you pass in a `Hash<String, Series>`.
6
+ #
7
+ # @param data [Hash]
8
+ # Two-dimensional data represented as a hash. Hash must contain
9
+ # arrays.
10
+ # @param columns [Array]
11
+ # Column labels to use for resulting DataFrame. If specified, overrides any
12
+ # labels already present in the data. Must match data dimensions.
13
+ #
14
+ # @return [DataFrame]
15
+ #
16
+ # @example
17
+ # data = {"a" => [1, 2], "b" => [3, 4]}
18
+ # Polars.from_hash(data)
19
+ # # =>
20
+ # # shape: (2, 2)
21
+ # # ┌─────┬─────┐
22
+ # # │ a ┆ b │
23
+ # # │ --- ┆ --- │
24
+ # # │ i64 ┆ i64 │
25
+ # # ╞═════╪═════╡
26
+ # # │ 1 ┆ 3 │
27
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
28
+ # # │ 2 ┆ 4 │
29
+ # # └─────┴─────┘
30
+ def from_hash(data, columns: nil)
31
+ DataFrame._from_hash(data, columns: columns)
32
+ end
33
+
34
+ # Construct a DataFrame from a sequence of dictionaries. This operation clones data.
35
+ #
36
+ # @param hashes [Array]
37
+ # Array with hashes mapping column name to value.
38
+ # @param infer_schema_length [Integer]
39
+ # How many hashes/rows to scan to determine the data types
40
+ # if set to `nil` all rows are scanned. This will be slow.
41
+ # @param schema [Object]
42
+ # Schema that (partially) overwrites the inferred schema.
43
+ #
44
+ # @return [DataFrame]
45
+ #
46
+ # @example
47
+ # data = [{"a" => 1, "b" => 4}, {"a" => 2, "b" => 5}, {"a" => 3, "b" => 6}]
48
+ # Polars.from_hashes(data)
49
+ # # =>
50
+ # # shape: (3, 2)
51
+ # # ┌─────┬─────┐
52
+ # # │ a ┆ b │
53
+ # # │ --- ┆ --- │
54
+ # # │ i64 ┆ i64 │
55
+ # # ╞═════╪═════╡
56
+ # # │ 1 ┆ 4 │
57
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
58
+ # # │ 2 ┆ 5 │
59
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
60
+ # # │ 3 ┆ 6 │
61
+ # # └─────┴─────┘
62
+ #
63
+ # @example Overwrite first column name and dtype
64
+ # Polars.from_hashes(data, schema: {"c" => :i32})
65
+ # # =>
66
+ # # shape: (3, 2)
67
+ # # ┌─────┬─────┐
68
+ # # │ c ┆ b │
69
+ # # │ --- ┆ --- │
70
+ # # │ i32 ┆ i64 │
71
+ # # ╞═════╪═════╡
72
+ # # │ 1 ┆ 4 │
73
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
74
+ # # │ 2 ┆ 5 │
75
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
76
+ # # │ 3 ┆ 6 │
77
+ # # └─────┴─────┘
78
+ #
79
+ # @example Let polars infer the dtypes but inform about a 3rd column
80
+ # Polars.from_hashes(data, schema: {"a" => :unknown, "b" => :unknown, "c" => :i32})
81
+ # # shape: (3, 3)
82
+ # # ┌─────┬─────┬──────┐
83
+ # # │ a ┆ b ┆ c │
84
+ # # │ --- ┆ --- ┆ --- │
85
+ # # │ i64 ┆ i64 ┆ i32 │
86
+ # # ╞═════╪═════╪══════╡
87
+ # # │ 1 ┆ 4 ┆ null │
88
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
89
+ # # │ 2 ┆ 5 ┆ null │
90
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
91
+ # # │ 3 ┆ 6 ┆ null │
92
+ # # └─────┴─────┴──────┘
93
+ # def from_hashes(hashes, infer_schema_length: 50, schema: nil)
94
+ # DataFrame._from_hashes(hashes, infer_schema_length: infer_schema_length, schema: schema)
95
+ # end
96
+
97
+ # def from_records
98
+ # end
99
+ end
100
+ end