polars-df 0.3.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -1
  3. data/Cargo.lock +486 -380
  4. data/Cargo.toml +0 -2
  5. data/README.md +31 -2
  6. data/ext/polars/Cargo.toml +10 -4
  7. data/ext/polars/src/apply/dataframe.rs +2 -2
  8. data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
  9. data/ext/polars/src/apply/mod.rs +1 -0
  10. data/ext/polars/src/batched_csv.rs +36 -19
  11. data/ext/polars/src/conversion.rs +159 -16
  12. data/ext/polars/src/dataframe.rs +51 -52
  13. data/ext/polars/src/error.rs +0 -4
  14. data/ext/polars/src/expr/binary.rs +69 -0
  15. data/ext/polars/src/expr/categorical.rs +10 -0
  16. data/ext/polars/src/expr/datetime.rs +223 -0
  17. data/ext/polars/src/{lazy/dsl.rs → expr/general.rs} +22 -799
  18. data/ext/polars/src/expr/list.rs +146 -0
  19. data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
  20. data/ext/polars/src/expr/string.rs +313 -0
  21. data/ext/polars/src/expr/struct.rs +15 -0
  22. data/ext/polars/src/expr.rs +33 -0
  23. data/ext/polars/src/functions/eager.rs +93 -0
  24. data/ext/polars/src/functions/io.rs +34 -0
  25. data/ext/polars/src/functions/lazy.rs +209 -0
  26. data/ext/polars/src/functions/meta.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/whenthen.rs +43 -0
  29. data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +58 -45
  30. data/ext/polars/src/lazygroupby.rs +29 -0
  31. data/ext/polars/src/lib.rs +216 -300
  32. data/ext/polars/src/rb_modules.rs +8 -0
  33. data/ext/polars/src/series/aggregation.rs +83 -0
  34. data/ext/polars/src/series/arithmetic.rs +88 -0
  35. data/ext/polars/src/series/comparison.rs +251 -0
  36. data/ext/polars/src/series/construction.rs +164 -0
  37. data/ext/polars/src/series.rs +103 -531
  38. data/lib/polars/batched_csv_reader.rb +1 -1
  39. data/lib/polars/binary_expr.rb +77 -0
  40. data/lib/polars/binary_name_space.rb +66 -0
  41. data/lib/polars/convert.rb +2 -2
  42. data/lib/polars/data_frame.rb +263 -87
  43. data/lib/polars/data_types.rb +6 -4
  44. data/lib/polars/date_time_expr.rb +148 -8
  45. data/lib/polars/expr.rb +78 -11
  46. data/lib/polars/io.rb +73 -62
  47. data/lib/polars/lazy_frame.rb +107 -10
  48. data/lib/polars/lazy_functions.rb +7 -3
  49. data/lib/polars/list_expr.rb +70 -21
  50. data/lib/polars/list_name_space.rb +2 -2
  51. data/lib/polars/series.rb +190 -74
  52. data/lib/polars/string_expr.rb +150 -44
  53. data/lib/polars/string_name_space.rb +4 -4
  54. data/lib/polars/struct_name_space.rb +32 -0
  55. data/lib/polars/utils.rb +51 -9
  56. data/lib/polars/version.rb +1 -1
  57. data/lib/polars.rb +4 -2
  58. metadata +29 -12
  59. data/ext/polars/src/lazy/mod.rs +0 -5
  60. data/ext/polars/src/lazy/utils.rs +0 -13
  61. data/ext/polars/src/list_construction.rs +0 -100
  62. /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
  63. /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
@@ -1,28 +1,13 @@
1
- use magnus::block::Proc;
2
- use magnus::{class, IntoValue, RArray, RString, Value};
3
- use polars::chunked_array::ops::SortOptions;
1
+ use magnus::{block::Proc, IntoValue, RArray, Value};
4
2
  use polars::lazy::dsl;
5
- use polars::lazy::dsl::Operator;
6
3
  use polars::prelude::*;
7
4
  use polars::series::ops::NullBehavior;
8
5
 
9
- use crate::conversion::*;
10
- use crate::lazy::apply::*;
11
- use crate::lazy::utils::rb_exprs_to_exprs;
6
+ use crate::apply::lazy::map_single;
7
+ use crate::conversion::{parse_fill_null_strategy, Wrap};
8
+ use crate::rb_exprs_to_exprs;
12
9
  use crate::utils::reinterpret;
13
- use crate::{RbResult, RbSeries};
14
-
15
- #[magnus::wrap(class = "Polars::RbExpr")]
16
- #[derive(Clone)]
17
- pub struct RbExpr {
18
- pub inner: dsl::Expr,
19
- }
20
-
21
- impl From<dsl::Expr> for RbExpr {
22
- fn from(inner: dsl::Expr) -> Self {
23
- RbExpr { inner }
24
- }
25
- }
10
+ use crate::{RbExpr, RbResult};
26
11
 
27
12
  impl RbExpr {
28
13
  pub fn add(&self, rhs: &RbExpr) -> RbResult<Self> {
@@ -156,12 +141,13 @@ impl RbExpr {
156
141
  pub fn first(&self) -> Self {
157
142
  self.clone().inner.first().into()
158
143
  }
144
+
159
145
  pub fn last(&self) -> Self {
160
146
  self.clone().inner.last().into()
161
147
  }
162
148
 
163
- pub fn list(&self) -> Self {
164
- self.clone().inner.list().into()
149
+ pub fn implode(&self) -> Self {
150
+ self.clone().inner.implode().into()
165
151
  }
166
152
 
167
153
  pub fn quantile(
@@ -230,8 +216,12 @@ impl RbExpr {
230
216
  .into()
231
217
  }
232
218
 
233
- pub fn top_k(&self, k: usize, reverse: bool) -> Self {
234
- self.inner.clone().top_k(k, reverse).into()
219
+ pub fn top_k(&self, k: usize) -> Self {
220
+ self.inner.clone().top_k(k).into()
221
+ }
222
+
223
+ pub fn bottom_k(&self, k: usize) -> Self {
224
+ self.inner.clone().bottom_k(k).into()
235
225
  }
236
226
 
237
227
  pub fn arg_max(&self) -> Self {
@@ -329,6 +319,10 @@ impl RbExpr {
329
319
  self.clone().inner.is_unique().into()
330
320
  }
331
321
 
322
+ pub fn approx_unique(&self) -> Self {
323
+ self.clone().inner.approx_unique().into()
324
+ }
325
+
332
326
  pub fn is_first(&self) -> Self {
333
327
  self.clone().inner.is_first().into()
334
328
  }
@@ -518,499 +512,6 @@ impl RbExpr {
518
512
  self.inner.clone().shrink_dtype().into()
519
513
  }
520
514
 
521
- pub fn str_parse_date(
522
- &self,
523
- fmt: Option<String>,
524
- strict: bool,
525
- exact: bool,
526
- cache: bool,
527
- ) -> Self {
528
- self.inner
529
- .clone()
530
- .str()
531
- .strptime(StrpTimeOptions {
532
- date_dtype: DataType::Date,
533
- fmt,
534
- strict,
535
- exact,
536
- cache,
537
- tz_aware: false,
538
- utc: false,
539
- })
540
- .into()
541
- }
542
-
543
- pub fn str_parse_datetime(
544
- &self,
545
- fmt: Option<String>,
546
- strict: bool,
547
- exact: bool,
548
- cache: bool,
549
- tz_aware: bool,
550
- utc: bool,
551
- ) -> Self {
552
- let tu = match fmt {
553
- Some(ref fmt) => {
554
- if fmt.contains("%.9f")
555
- || fmt.contains("%9f")
556
- || fmt.contains("%f")
557
- || fmt.contains("%.f")
558
- {
559
- TimeUnit::Nanoseconds
560
- } else if fmt.contains("%.3f") || fmt.contains("%3f") {
561
- TimeUnit::Milliseconds
562
- } else {
563
- TimeUnit::Microseconds
564
- }
565
- }
566
- None => TimeUnit::Microseconds,
567
- };
568
- self.inner
569
- .clone()
570
- .str()
571
- .strptime(StrpTimeOptions {
572
- date_dtype: DataType::Datetime(tu, None),
573
- fmt,
574
- strict,
575
- exact,
576
- cache,
577
- tz_aware,
578
- utc,
579
- })
580
- .into()
581
- }
582
-
583
- pub fn str_parse_time(
584
- &self,
585
- fmt: Option<String>,
586
- strict: bool,
587
- exact: bool,
588
- cache: bool,
589
- ) -> Self {
590
- self.inner
591
- .clone()
592
- .str()
593
- .strptime(StrpTimeOptions {
594
- date_dtype: DataType::Time,
595
- fmt,
596
- strict,
597
- exact,
598
- cache,
599
- tz_aware: false,
600
- utc: false,
601
- })
602
- .into()
603
- }
604
-
605
- pub fn str_strip(&self, matches: Option<String>) -> Self {
606
- self.inner.clone().str().strip(matches).into()
607
- }
608
-
609
- pub fn str_rstrip(&self, matches: Option<String>) -> Self {
610
- self.inner.clone().str().rstrip(matches).into()
611
- }
612
-
613
- pub fn str_lstrip(&self, matches: Option<String>) -> Self {
614
- self.inner.clone().str().lstrip(matches).into()
615
- }
616
-
617
- pub fn str_slice(&self, start: i64, length: Option<u64>) -> Self {
618
- let function = move |s: Series| {
619
- let ca = s.utf8()?;
620
- Ok(Some(ca.str_slice(start, length)?.into_series()))
621
- };
622
- self.clone()
623
- .inner
624
- .map(function, GetOutput::from_type(DataType::Utf8))
625
- .with_fmt("str.slice")
626
- .into()
627
- }
628
-
629
- pub fn str_to_uppercase(&self) -> Self {
630
- self.inner.clone().str().to_uppercase().into()
631
- }
632
-
633
- pub fn str_to_lowercase(&self) -> Self {
634
- self.inner.clone().str().to_lowercase().into()
635
- }
636
-
637
- pub fn str_lengths(&self) -> Self {
638
- let function = |s: Series| {
639
- let ca = s.utf8()?;
640
- Ok(Some(ca.str_lengths().into_series()))
641
- };
642
- self.clone()
643
- .inner
644
- .map(function, GetOutput::from_type(DataType::UInt32))
645
- .with_fmt("str.lengths")
646
- .into()
647
- }
648
-
649
- pub fn str_n_chars(&self) -> Self {
650
- let function = |s: Series| {
651
- let ca = s.utf8()?;
652
- Ok(Some(ca.str_n_chars().into_series()))
653
- };
654
- self.clone()
655
- .inner
656
- .map(function, GetOutput::from_type(DataType::UInt32))
657
- .with_fmt("str.n_chars")
658
- .into()
659
- }
660
-
661
- pub fn str_replace(&self, pat: &RbExpr, val: &RbExpr, literal: bool) -> Self {
662
- self.inner
663
- .clone()
664
- .str()
665
- .replace(pat.inner.clone(), val.inner.clone(), literal)
666
- .into()
667
- }
668
-
669
- pub fn str_replace_all(&self, pat: &RbExpr, val: &RbExpr, literal: bool) -> Self {
670
- self.inner
671
- .clone()
672
- .str()
673
- .replace_all(pat.inner.clone(), val.inner.clone(), literal)
674
- .into()
675
- }
676
-
677
- pub fn str_zfill(&self, alignment: usize) -> Self {
678
- self.clone().inner.str().zfill(alignment).into()
679
- }
680
-
681
- pub fn str_ljust(&self, width: usize, fillchar: char) -> Self {
682
- self.clone().inner.str().ljust(width, fillchar).into()
683
- }
684
-
685
- pub fn str_rjust(&self, width: usize, fillchar: char) -> Self {
686
- self.clone().inner.str().rjust(width, fillchar).into()
687
- }
688
-
689
- pub fn str_contains(&self, pat: &RbExpr, literal: Option<bool>, strict: bool) -> Self {
690
- match literal {
691
- Some(true) => self
692
- .inner
693
- .clone()
694
- .str()
695
- .contains_literal(pat.inner.clone())
696
- .into(),
697
- _ => self
698
- .inner
699
- .clone()
700
- .str()
701
- .contains(pat.inner.clone(), strict)
702
- .into(),
703
- }
704
- }
705
-
706
- pub fn str_ends_with(&self, sub: &RbExpr) -> Self {
707
- self.inner.clone().str().ends_with(sub.inner.clone()).into()
708
- }
709
-
710
- pub fn str_starts_with(&self, sub: &RbExpr) -> Self {
711
- self.inner
712
- .clone()
713
- .str()
714
- .starts_with(sub.inner.clone())
715
- .into()
716
- }
717
-
718
- pub fn str_hex_encode(&self) -> Self {
719
- self.clone()
720
- .inner
721
- .map(
722
- move |s| s.utf8().map(|s| Some(s.hex_encode().into_series())),
723
- GetOutput::same_type(),
724
- )
725
- .with_fmt("str.hex_encode")
726
- .into()
727
- }
728
-
729
- pub fn str_hex_decode(&self, strict: bool) -> Self {
730
- self.clone()
731
- .inner
732
- .map(
733
- move |s| s.utf8()?.hex_decode(strict).map(|s| Some(s.into_series())),
734
- GetOutput::same_type(),
735
- )
736
- .with_fmt("str.hex_decode")
737
- .into()
738
- }
739
-
740
- pub fn str_base64_encode(&self) -> Self {
741
- self.clone()
742
- .inner
743
- .map(
744
- move |s| s.utf8().map(|s| Some(s.base64_encode().into_series())),
745
- GetOutput::same_type(),
746
- )
747
- .with_fmt("str.base64_encode")
748
- .into()
749
- }
750
-
751
- pub fn str_base64_decode(&self, strict: bool) -> Self {
752
- self.clone()
753
- .inner
754
- .map(
755
- move |s| {
756
- s.utf8()?
757
- .base64_decode(strict)
758
- .map(|s| Some(s.into_series()))
759
- },
760
- GetOutput::same_type(),
761
- )
762
- .with_fmt("str.base64_decode")
763
- .into()
764
- }
765
-
766
- pub fn str_json_path_match(&self, pat: String) -> Self {
767
- let function = move |s: Series| {
768
- let ca = s.utf8()?;
769
- match ca.json_path_match(&pat) {
770
- Ok(ca) => Ok(Some(ca.into_series())),
771
- Err(e) => Err(PolarsError::ComputeError(format!("{:?}", e).into())),
772
- }
773
- };
774
- self.clone()
775
- .inner
776
- .map(function, GetOutput::from_type(DataType::Utf8))
777
- .with_fmt("str.json_path_match")
778
- .into()
779
- }
780
-
781
- pub fn str_extract(&self, pat: String, group_index: usize) -> Self {
782
- self.inner.clone().str().extract(&pat, group_index).into()
783
- }
784
-
785
- pub fn str_extract_all(&self, pat: &RbExpr) -> Self {
786
- self.inner
787
- .clone()
788
- .str()
789
- .extract_all(pat.inner.clone())
790
- .into()
791
- }
792
-
793
- pub fn count_match(&self, pat: String) -> Self {
794
- self.inner.clone().str().count_match(&pat).into()
795
- }
796
-
797
- pub fn strftime(&self, fmt: String) -> Self {
798
- self.inner.clone().dt().strftime(&fmt).into()
799
- }
800
-
801
- pub fn str_split(&self, by: String) -> Self {
802
- self.inner.clone().str().split(&by).into()
803
- }
804
-
805
- pub fn str_split_inclusive(&self, by: String) -> Self {
806
- self.inner.clone().str().split_inclusive(&by).into()
807
- }
808
-
809
- pub fn str_split_exact(&self, by: String, n: usize) -> Self {
810
- self.inner.clone().str().split_exact(&by, n).into()
811
- }
812
-
813
- pub fn str_split_exact_inclusive(&self, by: String, n: usize) -> Self {
814
- self.inner
815
- .clone()
816
- .str()
817
- .split_exact_inclusive(&by, n)
818
- .into()
819
- }
820
-
821
- pub fn str_splitn(&self, by: String, n: usize) -> Self {
822
- self.inner.clone().str().splitn(&by, n).into()
823
- }
824
-
825
- pub fn arr_lengths(&self) -> Self {
826
- self.inner.clone().arr().lengths().into()
827
- }
828
-
829
- pub fn arr_contains(&self, other: &RbExpr) -> Self {
830
- self.inner
831
- .clone()
832
- .arr()
833
- .contains(other.inner.clone())
834
- .into()
835
- }
836
-
837
- pub fn year(&self) -> Self {
838
- self.clone().inner.dt().year().into()
839
- }
840
-
841
- pub fn iso_year(&self) -> Self {
842
- self.clone().inner.dt().iso_year().into()
843
- }
844
-
845
- pub fn quarter(&self) -> Self {
846
- self.clone().inner.dt().quarter().into()
847
- }
848
-
849
- pub fn month(&self) -> Self {
850
- self.clone().inner.dt().month().into()
851
- }
852
-
853
- pub fn week(&self) -> Self {
854
- self.clone().inner.dt().week().into()
855
- }
856
-
857
- pub fn weekday(&self) -> Self {
858
- self.clone().inner.dt().weekday().into()
859
- }
860
-
861
- pub fn day(&self) -> Self {
862
- self.clone().inner.dt().day().into()
863
- }
864
-
865
- pub fn ordinal_day(&self) -> Self {
866
- self.clone().inner.dt().ordinal_day().into()
867
- }
868
-
869
- pub fn hour(&self) -> Self {
870
- self.clone().inner.dt().hour().into()
871
- }
872
-
873
- pub fn minute(&self) -> Self {
874
- self.clone().inner.dt().minute().into()
875
- }
876
-
877
- pub fn second(&self) -> Self {
878
- self.clone().inner.dt().second().into()
879
- }
880
-
881
- pub fn millisecond(&self) -> Self {
882
- self.clone().inner.dt().millisecond().into()
883
- }
884
-
885
- pub fn microsecond(&self) -> Self {
886
- self.clone().inner.dt().microsecond().into()
887
- }
888
-
889
- pub fn nanosecond(&self) -> Self {
890
- self.clone().inner.dt().nanosecond().into()
891
- }
892
-
893
- pub fn duration_days(&self) -> Self {
894
- self.inner
895
- .clone()
896
- .map(
897
- |s| Ok(Some(s.duration()?.days().into_series())),
898
- GetOutput::from_type(DataType::Int64),
899
- )
900
- .into()
901
- }
902
-
903
- pub fn duration_hours(&self) -> Self {
904
- self.inner
905
- .clone()
906
- .map(
907
- |s| Ok(Some(s.duration()?.hours().into_series())),
908
- GetOutput::from_type(DataType::Int64),
909
- )
910
- .into()
911
- }
912
-
913
- pub fn duration_minutes(&self) -> Self {
914
- self.inner
915
- .clone()
916
- .map(
917
- |s| Ok(Some(s.duration()?.minutes().into_series())),
918
- GetOutput::from_type(DataType::Int64),
919
- )
920
- .into()
921
- }
922
-
923
- pub fn duration_seconds(&self) -> Self {
924
- self.inner
925
- .clone()
926
- .map(
927
- |s| Ok(Some(s.duration()?.seconds().into_series())),
928
- GetOutput::from_type(DataType::Int64),
929
- )
930
- .into()
931
- }
932
-
933
- pub fn duration_nanoseconds(&self) -> Self {
934
- self.inner
935
- .clone()
936
- .map(
937
- |s| Ok(Some(s.duration()?.nanoseconds().into_series())),
938
- GetOutput::from_type(DataType::Int64),
939
- )
940
- .into()
941
- }
942
-
943
- pub fn duration_microseconds(&self) -> Self {
944
- self.inner
945
- .clone()
946
- .map(
947
- |s| Ok(Some(s.duration()?.microseconds().into_series())),
948
- GetOutput::from_type(DataType::Int64),
949
- )
950
- .into()
951
- }
952
-
953
- pub fn duration_milliseconds(&self) -> Self {
954
- self.inner
955
- .clone()
956
- .map(
957
- |s| Ok(Some(s.duration()?.milliseconds().into_series())),
958
- GetOutput::from_type(DataType::Int64),
959
- )
960
- .into()
961
- }
962
-
963
- pub fn timestamp(&self, tu: Wrap<TimeUnit>) -> Self {
964
- self.inner.clone().dt().timestamp(tu.0).into()
965
- }
966
-
967
- pub fn dt_offset_by(&self, by: String) -> Self {
968
- let by = Duration::parse(&by);
969
- self.inner.clone().dt().offset_by(by).into()
970
- }
971
-
972
- pub fn dt_epoch_seconds(&self) -> Self {
973
- self.clone()
974
- .inner
975
- .map(
976
- |s| {
977
- s.timestamp(TimeUnit::Milliseconds)
978
- .map(|ca| Some((ca / 1000).into_series()))
979
- },
980
- GetOutput::from_type(DataType::Int64),
981
- )
982
- .into()
983
- }
984
-
985
- pub fn dt_with_time_unit(&self, tu: Wrap<TimeUnit>) -> Self {
986
- self.inner.clone().dt().with_time_unit(tu.0).into()
987
- }
988
-
989
- pub fn dt_convert_time_zone(&self, tz: TimeZone) -> Self {
990
- self.inner.clone().dt().convert_time_zone(tz).into()
991
- }
992
-
993
- pub fn dt_cast_time_unit(&self, tu: Wrap<TimeUnit>) -> Self {
994
- self.inner.clone().dt().cast_time_unit(tu.0).into()
995
- }
996
-
997
- pub fn dt_replace_time_zone(&self, tz: Option<String>) -> Self {
998
- self.inner.clone().dt().replace_time_zone(tz).into()
999
- }
1000
-
1001
- #[allow(deprecated)]
1002
- pub fn dt_tz_localize(&self, tz: String) -> Self {
1003
- self.inner.clone().dt().tz_localize(tz).into()
1004
- }
1005
-
1006
- pub fn dt_truncate(&self, every: String, offset: String) -> Self {
1007
- self.inner.clone().dt().truncate(&every, &offset).into()
1008
- }
1009
-
1010
- pub fn dt_round(&self, every: String, offset: String) -> Self {
1011
- self.inner.clone().dt().round(&every, &offset).into()
1012
- }
1013
-
1014
515
  pub fn map(&self, lambda: Value, output_type: Option<Wrap<DataType>>, agg_list: bool) -> Self {
1015
516
  map_single(self, lambda, output_type, agg_list)
1016
517
  }
@@ -1258,96 +759,6 @@ impl RbExpr {
1258
759
  self.inner.clone().upper_bound().into()
1259
760
  }
1260
761
 
1261
- pub fn lst_max(&self) -> Self {
1262
- self.inner.clone().arr().max().into()
1263
- }
1264
-
1265
- pub fn lst_min(&self) -> Self {
1266
- self.inner.clone().arr().min().into()
1267
- }
1268
-
1269
- pub fn lst_sum(&self) -> Self {
1270
- self.inner.clone().arr().sum().with_fmt("arr.sum").into()
1271
- }
1272
-
1273
- pub fn lst_mean(&self) -> Self {
1274
- self.inner.clone().arr().mean().with_fmt("arr.mean").into()
1275
- }
1276
-
1277
- pub fn lst_sort(&self, reverse: bool) -> Self {
1278
- self.inner
1279
- .clone()
1280
- .arr()
1281
- .sort(SortOptions {
1282
- descending: reverse,
1283
- ..Default::default()
1284
- })
1285
- .with_fmt("arr.sort")
1286
- .into()
1287
- }
1288
-
1289
- pub fn lst_reverse(&self) -> Self {
1290
- self.inner
1291
- .clone()
1292
- .arr()
1293
- .reverse()
1294
- .with_fmt("arr.reverse")
1295
- .into()
1296
- }
1297
-
1298
- pub fn lst_unique(&self) -> Self {
1299
- self.inner
1300
- .clone()
1301
- .arr()
1302
- .unique()
1303
- .with_fmt("arr.unique")
1304
- .into()
1305
- }
1306
-
1307
- pub fn lst_get(&self, index: &RbExpr) -> Self {
1308
- self.inner.clone().arr().get(index.inner.clone()).into()
1309
- }
1310
-
1311
- pub fn lst_join(&self, separator: String) -> Self {
1312
- self.inner.clone().arr().join(&separator).into()
1313
- }
1314
-
1315
- pub fn lst_arg_min(&self) -> Self {
1316
- self.inner.clone().arr().arg_min().into()
1317
- }
1318
-
1319
- pub fn lst_arg_max(&self) -> Self {
1320
- self.inner.clone().arr().arg_max().into()
1321
- }
1322
-
1323
- pub fn lst_diff(&self, n: usize, null_behavior: Wrap<NullBehavior>) -> RbResult<Self> {
1324
- Ok(self.inner.clone().arr().diff(n, null_behavior.0).into())
1325
- }
1326
-
1327
- pub fn lst_shift(&self, periods: i64) -> Self {
1328
- self.inner.clone().arr().shift(periods).into()
1329
- }
1330
-
1331
- pub fn lst_slice(&self, offset: &RbExpr, length: Option<&RbExpr>) -> Self {
1332
- let length = match length {
1333
- Some(i) => i.inner.clone(),
1334
- None => dsl::lit(i64::MAX),
1335
- };
1336
- self.inner
1337
- .clone()
1338
- .arr()
1339
- .slice(offset.inner.clone(), length)
1340
- .into()
1341
- }
1342
-
1343
- pub fn lst_eval(&self, expr: &RbExpr, parallel: bool) -> Self {
1344
- self.inner
1345
- .clone()
1346
- .arr()
1347
- .eval(expr.inner.clone(), parallel)
1348
- .into()
1349
- }
1350
-
1351
762
  pub fn cumulative_eval(&self, expr: &RbExpr, min_periods: usize, parallel: bool) -> Self {
1352
763
  self.inner
1353
764
  .clone()
@@ -1355,42 +766,19 @@ impl RbExpr {
1355
766
  .into()
1356
767
  }
1357
768
 
1358
- pub fn lst_to_struct(
1359
- &self,
1360
- width_strat: Wrap<ListToStructWidthStrategy>,
1361
- _name_gen: Option<Value>,
1362
- upper_bound: usize,
1363
- ) -> RbResult<Self> {
1364
- // TODO fix
1365
- let name_gen = None;
1366
- // let name_gen = name_gen.map(|lambda| {
1367
- // Arc::new(move |idx: usize| {
1368
- // let out: Value = lambda.funcall("call", (idx,)).unwrap();
1369
- // out.try_convert::<String>().unwrap()
1370
- // }) as NameGenerator
1371
- // });
1372
-
1373
- Ok(self
1374
- .inner
1375
- .clone()
1376
- .arr()
1377
- .to_struct(width_strat.0, name_gen, upper_bound)
1378
- .into())
1379
- }
1380
-
1381
- pub fn rank(&self, method: Wrap<RankMethod>, reverse: bool) -> Self {
769
+ pub fn rank(&self, method: Wrap<RankMethod>, reverse: bool, seed: Option<u64>) -> Self {
1382
770
  let options = RankOptions {
1383
771
  method: method.0,
1384
772
  descending: reverse,
1385
773
  };
1386
- self.inner.clone().rank(options).into()
774
+ self.inner.clone().rank(options, seed).into()
1387
775
  }
1388
776
 
1389
- pub fn diff(&self, n: usize, null_behavior: Wrap<NullBehavior>) -> Self {
777
+ pub fn diff(&self, n: i64, null_behavior: Wrap<NullBehavior>) -> Self {
1390
778
  self.inner.clone().diff(n, null_behavior.0).into()
1391
779
  }
1392
780
 
1393
- pub fn pct_change(&self, n: usize) -> Self {
781
+ pub fn pct_change(&self, n: i64) -> Self {
1394
782
  self.inner.clone().pct_change(n).into()
1395
783
  }
1396
784
 
@@ -1402,14 +790,6 @@ impl RbExpr {
1402
790
  self.inner.clone().kurtosis(fisher, bias).into()
1403
791
  }
1404
792
 
1405
- pub fn str_concat(&self, delimiter: String) -> Self {
1406
- self.inner.clone().str().concat(&delimiter).into()
1407
- }
1408
-
1409
- pub fn cat_set_ordering(&self, ordering: Wrap<CategoricalOrdering>) -> Self {
1410
- self.inner.clone().cat().set_ordering(ordering.0).into()
1411
- }
1412
-
1413
793
  pub fn reshape(&self, dims: Vec<i64>) -> Self {
1414
794
  self.inner.clone().reshape(&dims).into()
1415
795
  }
@@ -1535,18 +915,6 @@ impl RbExpr {
1535
915
  self.inner.clone().all().into()
1536
916
  }
1537
917
 
1538
- pub fn struct_field_by_name(&self, name: String) -> Self {
1539
- self.inner.clone().struct_().field_by_name(&name).into()
1540
- }
1541
-
1542
- pub fn struct_field_by_index(&self, index: i64) -> Self {
1543
- self.inner.clone().struct_().field_by_index(index).into()
1544
- }
1545
-
1546
- pub fn struct_rename_fields(&self, names: Vec<String>) -> Self {
1547
- self.inner.clone().struct_().rename_fields(names).into()
1548
- }
1549
-
1550
918
  pub fn log(&self, base: f64) -> Self {
1551
919
  self.inner.clone().log(base).into()
1552
920
  }
@@ -1563,148 +931,3 @@ impl RbExpr {
1563
931
  self.inner.clone().hash(seed, seed_1, seed_2, seed_3).into()
1564
932
  }
1565
933
  }
1566
-
1567
- pub fn col(name: String) -> RbExpr {
1568
- dsl::col(&name).into()
1569
- }
1570
-
1571
- pub fn count() -> RbExpr {
1572
- dsl::count().into()
1573
- }
1574
-
1575
- pub fn first() -> RbExpr {
1576
- dsl::first().into()
1577
- }
1578
-
1579
- pub fn last() -> RbExpr {
1580
- dsl::last().into()
1581
- }
1582
-
1583
- pub fn cols(names: Vec<String>) -> RbExpr {
1584
- dsl::cols(names).into()
1585
- }
1586
-
1587
- pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
1588
- dsl::dtype_cols(dtypes).into()
1589
- }
1590
-
1591
- pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
1592
- let exprs = rb_exprs_to_exprs(exprs)?;
1593
-
1594
- let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
1595
- Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
1596
- }
1597
-
1598
- pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
1599
- let exprs = rb_exprs_to_exprs(exprs)?;
1600
-
1601
- let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
1602
- Ok(polars::lazy::dsl::cumfold_exprs(acc.inner.clone(), func, exprs, include_init).into())
1603
- }
1604
-
1605
- // TODO improve
1606
- pub fn lit(value: Value) -> RbResult<RbExpr> {
1607
- if value.is_nil() {
1608
- Ok(dsl::lit(Null {}).into())
1609
- } else if let Ok(series) = value.try_convert::<&RbSeries>() {
1610
- Ok(dsl::lit(series.series.borrow().clone()).into())
1611
- } else if let Some(v) = RString::from_value(value) {
1612
- Ok(dsl::lit(v.try_convert::<String>()?).into())
1613
- } else if value.is_kind_of(class::integer()) {
1614
- match value.try_convert::<i64>() {
1615
- Ok(val) => {
1616
- if val > 0 && val < i32::MAX as i64 || val < 0 && val > i32::MIN as i64 {
1617
- Ok(dsl::lit(val as i32).into())
1618
- } else {
1619
- Ok(dsl::lit(val).into())
1620
- }
1621
- }
1622
- _ => {
1623
- let val = value.try_convert::<u64>()?;
1624
- Ok(dsl::lit(val).into())
1625
- }
1626
- }
1627
- } else {
1628
- Ok(dsl::lit(value.try_convert::<f64>()?).into())
1629
- }
1630
- }
1631
-
1632
- pub fn arange(low: &RbExpr, high: &RbExpr, step: usize) -> RbExpr {
1633
- polars::lazy::dsl::arange(low.inner.clone(), high.inner.clone(), step).into()
1634
- }
1635
-
1636
- pub fn repeat(value: Value, n_times: &RbExpr) -> RbResult<RbExpr> {
1637
- if value.is_nil() {
1638
- Ok(polars::lazy::dsl::repeat(Null {}, n_times.inner.clone()).into())
1639
- } else {
1640
- todo!();
1641
- }
1642
- }
1643
-
1644
- pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
1645
- polars::lazy::dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
1646
- }
1647
-
1648
- pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool) -> RbExpr {
1649
- polars::lazy::dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans)
1650
- .into()
1651
- }
1652
-
1653
- pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
1654
- polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
1655
- }
1656
-
1657
- pub fn argsort_by(by: RArray, reverse: Vec<bool>) -> RbResult<RbExpr> {
1658
- let by = rb_exprs_to_exprs(by)?;
1659
- Ok(polars::lazy::dsl::argsort_by(by, &reverse).into())
1660
- }
1661
-
1662
- #[magnus::wrap(class = "Polars::RbWhen")]
1663
- #[derive(Clone)]
1664
- pub struct RbWhen {
1665
- pub inner: dsl::When,
1666
- }
1667
-
1668
- impl From<dsl::When> for RbWhen {
1669
- fn from(inner: dsl::When) -> Self {
1670
- RbWhen { inner }
1671
- }
1672
- }
1673
-
1674
- #[magnus::wrap(class = "Polars::RbWhenThen")]
1675
- #[derive(Clone)]
1676
- pub struct RbWhenThen {
1677
- pub inner: dsl::WhenThen,
1678
- }
1679
-
1680
- impl From<dsl::WhenThen> for RbWhenThen {
1681
- fn from(inner: dsl::WhenThen) -> Self {
1682
- RbWhenThen { inner }
1683
- }
1684
- }
1685
-
1686
- impl RbWhen {
1687
- pub fn then(&self, expr: &RbExpr) -> RbWhenThen {
1688
- self.inner.clone().then(expr.inner.clone()).into()
1689
- }
1690
- }
1691
-
1692
- impl RbWhenThen {
1693
- pub fn overwise(&self, expr: &RbExpr) -> RbExpr {
1694
- self.inner.clone().otherwise(expr.inner.clone()).into()
1695
- }
1696
- }
1697
-
1698
- pub fn when(predicate: &RbExpr) -> RbWhen {
1699
- dsl::when(predicate.inner.clone()).into()
1700
- }
1701
-
1702
- pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
1703
- let s = rb_exprs_to_exprs(s)?;
1704
- Ok(dsl::concat_str(s, &sep).into())
1705
- }
1706
-
1707
- pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
1708
- let s = rb_exprs_to_exprs(s)?;
1709
- Ok(dsl::concat_lst(s).into())
1710
- }