polars-df 0.3.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -1
  3. data/Cargo.lock +486 -380
  4. data/Cargo.toml +0 -2
  5. data/README.md +31 -2
  6. data/ext/polars/Cargo.toml +10 -4
  7. data/ext/polars/src/apply/dataframe.rs +2 -2
  8. data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
  9. data/ext/polars/src/apply/mod.rs +1 -0
  10. data/ext/polars/src/batched_csv.rs +36 -19
  11. data/ext/polars/src/conversion.rs +159 -16
  12. data/ext/polars/src/dataframe.rs +51 -52
  13. data/ext/polars/src/error.rs +0 -4
  14. data/ext/polars/src/expr/binary.rs +69 -0
  15. data/ext/polars/src/expr/categorical.rs +10 -0
  16. data/ext/polars/src/expr/datetime.rs +223 -0
  17. data/ext/polars/src/{lazy/dsl.rs → expr/general.rs} +22 -799
  18. data/ext/polars/src/expr/list.rs +146 -0
  19. data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
  20. data/ext/polars/src/expr/string.rs +313 -0
  21. data/ext/polars/src/expr/struct.rs +15 -0
  22. data/ext/polars/src/expr.rs +33 -0
  23. data/ext/polars/src/functions/eager.rs +93 -0
  24. data/ext/polars/src/functions/io.rs +34 -0
  25. data/ext/polars/src/functions/lazy.rs +209 -0
  26. data/ext/polars/src/functions/meta.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/whenthen.rs +43 -0
  29. data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +58 -45
  30. data/ext/polars/src/lazygroupby.rs +29 -0
  31. data/ext/polars/src/lib.rs +216 -300
  32. data/ext/polars/src/rb_modules.rs +8 -0
  33. data/ext/polars/src/series/aggregation.rs +83 -0
  34. data/ext/polars/src/series/arithmetic.rs +88 -0
  35. data/ext/polars/src/series/comparison.rs +251 -0
  36. data/ext/polars/src/series/construction.rs +164 -0
  37. data/ext/polars/src/series.rs +103 -531
  38. data/lib/polars/batched_csv_reader.rb +1 -1
  39. data/lib/polars/binary_expr.rb +77 -0
  40. data/lib/polars/binary_name_space.rb +66 -0
  41. data/lib/polars/convert.rb +2 -2
  42. data/lib/polars/data_frame.rb +263 -87
  43. data/lib/polars/data_types.rb +6 -4
  44. data/lib/polars/date_time_expr.rb +148 -8
  45. data/lib/polars/expr.rb +78 -11
  46. data/lib/polars/io.rb +73 -62
  47. data/lib/polars/lazy_frame.rb +107 -10
  48. data/lib/polars/lazy_functions.rb +7 -3
  49. data/lib/polars/list_expr.rb +70 -21
  50. data/lib/polars/list_name_space.rb +2 -2
  51. data/lib/polars/series.rb +190 -74
  52. data/lib/polars/string_expr.rb +150 -44
  53. data/lib/polars/string_name_space.rb +4 -4
  54. data/lib/polars/struct_name_space.rb +32 -0
  55. data/lib/polars/utils.rb +51 -9
  56. data/lib/polars/version.rb +1 -1
  57. data/lib/polars.rb +4 -2
  58. metadata +29 -12
  59. data/ext/polars/src/lazy/mod.rs +0 -5
  60. data/ext/polars/src/lazy/utils.rs +0 -13
  61. data/ext/polars/src/list_construction.rs +0 -100
  62. /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
  63. /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
@@ -1,28 +1,13 @@
1
- use magnus::block::Proc;
2
- use magnus::{class, IntoValue, RArray, RString, Value};
3
- use polars::chunked_array::ops::SortOptions;
1
+ use magnus::{block::Proc, IntoValue, RArray, Value};
4
2
  use polars::lazy::dsl;
5
- use polars::lazy::dsl::Operator;
6
3
  use polars::prelude::*;
7
4
  use polars::series::ops::NullBehavior;
8
5
 
9
- use crate::conversion::*;
10
- use crate::lazy::apply::*;
11
- use crate::lazy::utils::rb_exprs_to_exprs;
6
+ use crate::apply::lazy::map_single;
7
+ use crate::conversion::{parse_fill_null_strategy, Wrap};
8
+ use crate::rb_exprs_to_exprs;
12
9
  use crate::utils::reinterpret;
13
- use crate::{RbResult, RbSeries};
14
-
15
- #[magnus::wrap(class = "Polars::RbExpr")]
16
- #[derive(Clone)]
17
- pub struct RbExpr {
18
- pub inner: dsl::Expr,
19
- }
20
-
21
- impl From<dsl::Expr> for RbExpr {
22
- fn from(inner: dsl::Expr) -> Self {
23
- RbExpr { inner }
24
- }
25
- }
10
+ use crate::{RbExpr, RbResult};
26
11
 
27
12
  impl RbExpr {
28
13
  pub fn add(&self, rhs: &RbExpr) -> RbResult<Self> {
@@ -156,12 +141,13 @@ impl RbExpr {
156
141
  pub fn first(&self) -> Self {
157
142
  self.clone().inner.first().into()
158
143
  }
144
+
159
145
  pub fn last(&self) -> Self {
160
146
  self.clone().inner.last().into()
161
147
  }
162
148
 
163
- pub fn list(&self) -> Self {
164
- self.clone().inner.list().into()
149
+ pub fn implode(&self) -> Self {
150
+ self.clone().inner.implode().into()
165
151
  }
166
152
 
167
153
  pub fn quantile(
@@ -230,8 +216,12 @@ impl RbExpr {
230
216
  .into()
231
217
  }
232
218
 
233
- pub fn top_k(&self, k: usize, reverse: bool) -> Self {
234
- self.inner.clone().top_k(k, reverse).into()
219
+ pub fn top_k(&self, k: usize) -> Self {
220
+ self.inner.clone().top_k(k).into()
221
+ }
222
+
223
+ pub fn bottom_k(&self, k: usize) -> Self {
224
+ self.inner.clone().bottom_k(k).into()
235
225
  }
236
226
 
237
227
  pub fn arg_max(&self) -> Self {
@@ -329,6 +319,10 @@ impl RbExpr {
329
319
  self.clone().inner.is_unique().into()
330
320
  }
331
321
 
322
+ pub fn approx_unique(&self) -> Self {
323
+ self.clone().inner.approx_unique().into()
324
+ }
325
+
332
326
  pub fn is_first(&self) -> Self {
333
327
  self.clone().inner.is_first().into()
334
328
  }
@@ -518,499 +512,6 @@ impl RbExpr {
518
512
  self.inner.clone().shrink_dtype().into()
519
513
  }
520
514
 
521
- pub fn str_parse_date(
522
- &self,
523
- fmt: Option<String>,
524
- strict: bool,
525
- exact: bool,
526
- cache: bool,
527
- ) -> Self {
528
- self.inner
529
- .clone()
530
- .str()
531
- .strptime(StrpTimeOptions {
532
- date_dtype: DataType::Date,
533
- fmt,
534
- strict,
535
- exact,
536
- cache,
537
- tz_aware: false,
538
- utc: false,
539
- })
540
- .into()
541
- }
542
-
543
- pub fn str_parse_datetime(
544
- &self,
545
- fmt: Option<String>,
546
- strict: bool,
547
- exact: bool,
548
- cache: bool,
549
- tz_aware: bool,
550
- utc: bool,
551
- ) -> Self {
552
- let tu = match fmt {
553
- Some(ref fmt) => {
554
- if fmt.contains("%.9f")
555
- || fmt.contains("%9f")
556
- || fmt.contains("%f")
557
- || fmt.contains("%.f")
558
- {
559
- TimeUnit::Nanoseconds
560
- } else if fmt.contains("%.3f") || fmt.contains("%3f") {
561
- TimeUnit::Milliseconds
562
- } else {
563
- TimeUnit::Microseconds
564
- }
565
- }
566
- None => TimeUnit::Microseconds,
567
- };
568
- self.inner
569
- .clone()
570
- .str()
571
- .strptime(StrpTimeOptions {
572
- date_dtype: DataType::Datetime(tu, None),
573
- fmt,
574
- strict,
575
- exact,
576
- cache,
577
- tz_aware,
578
- utc,
579
- })
580
- .into()
581
- }
582
-
583
- pub fn str_parse_time(
584
- &self,
585
- fmt: Option<String>,
586
- strict: bool,
587
- exact: bool,
588
- cache: bool,
589
- ) -> Self {
590
- self.inner
591
- .clone()
592
- .str()
593
- .strptime(StrpTimeOptions {
594
- date_dtype: DataType::Time,
595
- fmt,
596
- strict,
597
- exact,
598
- cache,
599
- tz_aware: false,
600
- utc: false,
601
- })
602
- .into()
603
- }
604
-
605
- pub fn str_strip(&self, matches: Option<String>) -> Self {
606
- self.inner.clone().str().strip(matches).into()
607
- }
608
-
609
- pub fn str_rstrip(&self, matches: Option<String>) -> Self {
610
- self.inner.clone().str().rstrip(matches).into()
611
- }
612
-
613
- pub fn str_lstrip(&self, matches: Option<String>) -> Self {
614
- self.inner.clone().str().lstrip(matches).into()
615
- }
616
-
617
- pub fn str_slice(&self, start: i64, length: Option<u64>) -> Self {
618
- let function = move |s: Series| {
619
- let ca = s.utf8()?;
620
- Ok(Some(ca.str_slice(start, length)?.into_series()))
621
- };
622
- self.clone()
623
- .inner
624
- .map(function, GetOutput::from_type(DataType::Utf8))
625
- .with_fmt("str.slice")
626
- .into()
627
- }
628
-
629
- pub fn str_to_uppercase(&self) -> Self {
630
- self.inner.clone().str().to_uppercase().into()
631
- }
632
-
633
- pub fn str_to_lowercase(&self) -> Self {
634
- self.inner.clone().str().to_lowercase().into()
635
- }
636
-
637
- pub fn str_lengths(&self) -> Self {
638
- let function = |s: Series| {
639
- let ca = s.utf8()?;
640
- Ok(Some(ca.str_lengths().into_series()))
641
- };
642
- self.clone()
643
- .inner
644
- .map(function, GetOutput::from_type(DataType::UInt32))
645
- .with_fmt("str.lengths")
646
- .into()
647
- }
648
-
649
- pub fn str_n_chars(&self) -> Self {
650
- let function = |s: Series| {
651
- let ca = s.utf8()?;
652
- Ok(Some(ca.str_n_chars().into_series()))
653
- };
654
- self.clone()
655
- .inner
656
- .map(function, GetOutput::from_type(DataType::UInt32))
657
- .with_fmt("str.n_chars")
658
- .into()
659
- }
660
-
661
- pub fn str_replace(&self, pat: &RbExpr, val: &RbExpr, literal: bool) -> Self {
662
- self.inner
663
- .clone()
664
- .str()
665
- .replace(pat.inner.clone(), val.inner.clone(), literal)
666
- .into()
667
- }
668
-
669
- pub fn str_replace_all(&self, pat: &RbExpr, val: &RbExpr, literal: bool) -> Self {
670
- self.inner
671
- .clone()
672
- .str()
673
- .replace_all(pat.inner.clone(), val.inner.clone(), literal)
674
- .into()
675
- }
676
-
677
- pub fn str_zfill(&self, alignment: usize) -> Self {
678
- self.clone().inner.str().zfill(alignment).into()
679
- }
680
-
681
- pub fn str_ljust(&self, width: usize, fillchar: char) -> Self {
682
- self.clone().inner.str().ljust(width, fillchar).into()
683
- }
684
-
685
- pub fn str_rjust(&self, width: usize, fillchar: char) -> Self {
686
- self.clone().inner.str().rjust(width, fillchar).into()
687
- }
688
-
689
- pub fn str_contains(&self, pat: &RbExpr, literal: Option<bool>, strict: bool) -> Self {
690
- match literal {
691
- Some(true) => self
692
- .inner
693
- .clone()
694
- .str()
695
- .contains_literal(pat.inner.clone())
696
- .into(),
697
- _ => self
698
- .inner
699
- .clone()
700
- .str()
701
- .contains(pat.inner.clone(), strict)
702
- .into(),
703
- }
704
- }
705
-
706
- pub fn str_ends_with(&self, sub: &RbExpr) -> Self {
707
- self.inner.clone().str().ends_with(sub.inner.clone()).into()
708
- }
709
-
710
- pub fn str_starts_with(&self, sub: &RbExpr) -> Self {
711
- self.inner
712
- .clone()
713
- .str()
714
- .starts_with(sub.inner.clone())
715
- .into()
716
- }
717
-
718
- pub fn str_hex_encode(&self) -> Self {
719
- self.clone()
720
- .inner
721
- .map(
722
- move |s| s.utf8().map(|s| Some(s.hex_encode().into_series())),
723
- GetOutput::same_type(),
724
- )
725
- .with_fmt("str.hex_encode")
726
- .into()
727
- }
728
-
729
- pub fn str_hex_decode(&self, strict: bool) -> Self {
730
- self.clone()
731
- .inner
732
- .map(
733
- move |s| s.utf8()?.hex_decode(strict).map(|s| Some(s.into_series())),
734
- GetOutput::same_type(),
735
- )
736
- .with_fmt("str.hex_decode")
737
- .into()
738
- }
739
-
740
- pub fn str_base64_encode(&self) -> Self {
741
- self.clone()
742
- .inner
743
- .map(
744
- move |s| s.utf8().map(|s| Some(s.base64_encode().into_series())),
745
- GetOutput::same_type(),
746
- )
747
- .with_fmt("str.base64_encode")
748
- .into()
749
- }
750
-
751
- pub fn str_base64_decode(&self, strict: bool) -> Self {
752
- self.clone()
753
- .inner
754
- .map(
755
- move |s| {
756
- s.utf8()?
757
- .base64_decode(strict)
758
- .map(|s| Some(s.into_series()))
759
- },
760
- GetOutput::same_type(),
761
- )
762
- .with_fmt("str.base64_decode")
763
- .into()
764
- }
765
-
766
- pub fn str_json_path_match(&self, pat: String) -> Self {
767
- let function = move |s: Series| {
768
- let ca = s.utf8()?;
769
- match ca.json_path_match(&pat) {
770
- Ok(ca) => Ok(Some(ca.into_series())),
771
- Err(e) => Err(PolarsError::ComputeError(format!("{:?}", e).into())),
772
- }
773
- };
774
- self.clone()
775
- .inner
776
- .map(function, GetOutput::from_type(DataType::Utf8))
777
- .with_fmt("str.json_path_match")
778
- .into()
779
- }
780
-
781
- pub fn str_extract(&self, pat: String, group_index: usize) -> Self {
782
- self.inner.clone().str().extract(&pat, group_index).into()
783
- }
784
-
785
- pub fn str_extract_all(&self, pat: &RbExpr) -> Self {
786
- self.inner
787
- .clone()
788
- .str()
789
- .extract_all(pat.inner.clone())
790
- .into()
791
- }
792
-
793
- pub fn count_match(&self, pat: String) -> Self {
794
- self.inner.clone().str().count_match(&pat).into()
795
- }
796
-
797
- pub fn strftime(&self, fmt: String) -> Self {
798
- self.inner.clone().dt().strftime(&fmt).into()
799
- }
800
-
801
- pub fn str_split(&self, by: String) -> Self {
802
- self.inner.clone().str().split(&by).into()
803
- }
804
-
805
- pub fn str_split_inclusive(&self, by: String) -> Self {
806
- self.inner.clone().str().split_inclusive(&by).into()
807
- }
808
-
809
- pub fn str_split_exact(&self, by: String, n: usize) -> Self {
810
- self.inner.clone().str().split_exact(&by, n).into()
811
- }
812
-
813
- pub fn str_split_exact_inclusive(&self, by: String, n: usize) -> Self {
814
- self.inner
815
- .clone()
816
- .str()
817
- .split_exact_inclusive(&by, n)
818
- .into()
819
- }
820
-
821
- pub fn str_splitn(&self, by: String, n: usize) -> Self {
822
- self.inner.clone().str().splitn(&by, n).into()
823
- }
824
-
825
- pub fn arr_lengths(&self) -> Self {
826
- self.inner.clone().arr().lengths().into()
827
- }
828
-
829
- pub fn arr_contains(&self, other: &RbExpr) -> Self {
830
- self.inner
831
- .clone()
832
- .arr()
833
- .contains(other.inner.clone())
834
- .into()
835
- }
836
-
837
- pub fn year(&self) -> Self {
838
- self.clone().inner.dt().year().into()
839
- }
840
-
841
- pub fn iso_year(&self) -> Self {
842
- self.clone().inner.dt().iso_year().into()
843
- }
844
-
845
- pub fn quarter(&self) -> Self {
846
- self.clone().inner.dt().quarter().into()
847
- }
848
-
849
- pub fn month(&self) -> Self {
850
- self.clone().inner.dt().month().into()
851
- }
852
-
853
- pub fn week(&self) -> Self {
854
- self.clone().inner.dt().week().into()
855
- }
856
-
857
- pub fn weekday(&self) -> Self {
858
- self.clone().inner.dt().weekday().into()
859
- }
860
-
861
- pub fn day(&self) -> Self {
862
- self.clone().inner.dt().day().into()
863
- }
864
-
865
- pub fn ordinal_day(&self) -> Self {
866
- self.clone().inner.dt().ordinal_day().into()
867
- }
868
-
869
- pub fn hour(&self) -> Self {
870
- self.clone().inner.dt().hour().into()
871
- }
872
-
873
- pub fn minute(&self) -> Self {
874
- self.clone().inner.dt().minute().into()
875
- }
876
-
877
- pub fn second(&self) -> Self {
878
- self.clone().inner.dt().second().into()
879
- }
880
-
881
- pub fn millisecond(&self) -> Self {
882
- self.clone().inner.dt().millisecond().into()
883
- }
884
-
885
- pub fn microsecond(&self) -> Self {
886
- self.clone().inner.dt().microsecond().into()
887
- }
888
-
889
- pub fn nanosecond(&self) -> Self {
890
- self.clone().inner.dt().nanosecond().into()
891
- }
892
-
893
- pub fn duration_days(&self) -> Self {
894
- self.inner
895
- .clone()
896
- .map(
897
- |s| Ok(Some(s.duration()?.days().into_series())),
898
- GetOutput::from_type(DataType::Int64),
899
- )
900
- .into()
901
- }
902
-
903
- pub fn duration_hours(&self) -> Self {
904
- self.inner
905
- .clone()
906
- .map(
907
- |s| Ok(Some(s.duration()?.hours().into_series())),
908
- GetOutput::from_type(DataType::Int64),
909
- )
910
- .into()
911
- }
912
-
913
- pub fn duration_minutes(&self) -> Self {
914
- self.inner
915
- .clone()
916
- .map(
917
- |s| Ok(Some(s.duration()?.minutes().into_series())),
918
- GetOutput::from_type(DataType::Int64),
919
- )
920
- .into()
921
- }
922
-
923
- pub fn duration_seconds(&self) -> Self {
924
- self.inner
925
- .clone()
926
- .map(
927
- |s| Ok(Some(s.duration()?.seconds().into_series())),
928
- GetOutput::from_type(DataType::Int64),
929
- )
930
- .into()
931
- }
932
-
933
- pub fn duration_nanoseconds(&self) -> Self {
934
- self.inner
935
- .clone()
936
- .map(
937
- |s| Ok(Some(s.duration()?.nanoseconds().into_series())),
938
- GetOutput::from_type(DataType::Int64),
939
- )
940
- .into()
941
- }
942
-
943
- pub fn duration_microseconds(&self) -> Self {
944
- self.inner
945
- .clone()
946
- .map(
947
- |s| Ok(Some(s.duration()?.microseconds().into_series())),
948
- GetOutput::from_type(DataType::Int64),
949
- )
950
- .into()
951
- }
952
-
953
- pub fn duration_milliseconds(&self) -> Self {
954
- self.inner
955
- .clone()
956
- .map(
957
- |s| Ok(Some(s.duration()?.milliseconds().into_series())),
958
- GetOutput::from_type(DataType::Int64),
959
- )
960
- .into()
961
- }
962
-
963
- pub fn timestamp(&self, tu: Wrap<TimeUnit>) -> Self {
964
- self.inner.clone().dt().timestamp(tu.0).into()
965
- }
966
-
967
- pub fn dt_offset_by(&self, by: String) -> Self {
968
- let by = Duration::parse(&by);
969
- self.inner.clone().dt().offset_by(by).into()
970
- }
971
-
972
- pub fn dt_epoch_seconds(&self) -> Self {
973
- self.clone()
974
- .inner
975
- .map(
976
- |s| {
977
- s.timestamp(TimeUnit::Milliseconds)
978
- .map(|ca| Some((ca / 1000).into_series()))
979
- },
980
- GetOutput::from_type(DataType::Int64),
981
- )
982
- .into()
983
- }
984
-
985
- pub fn dt_with_time_unit(&self, tu: Wrap<TimeUnit>) -> Self {
986
- self.inner.clone().dt().with_time_unit(tu.0).into()
987
- }
988
-
989
- pub fn dt_convert_time_zone(&self, tz: TimeZone) -> Self {
990
- self.inner.clone().dt().convert_time_zone(tz).into()
991
- }
992
-
993
- pub fn dt_cast_time_unit(&self, tu: Wrap<TimeUnit>) -> Self {
994
- self.inner.clone().dt().cast_time_unit(tu.0).into()
995
- }
996
-
997
- pub fn dt_replace_time_zone(&self, tz: Option<String>) -> Self {
998
- self.inner.clone().dt().replace_time_zone(tz).into()
999
- }
1000
-
1001
- #[allow(deprecated)]
1002
- pub fn dt_tz_localize(&self, tz: String) -> Self {
1003
- self.inner.clone().dt().tz_localize(tz).into()
1004
- }
1005
-
1006
- pub fn dt_truncate(&self, every: String, offset: String) -> Self {
1007
- self.inner.clone().dt().truncate(&every, &offset).into()
1008
- }
1009
-
1010
- pub fn dt_round(&self, every: String, offset: String) -> Self {
1011
- self.inner.clone().dt().round(&every, &offset).into()
1012
- }
1013
-
1014
515
  pub fn map(&self, lambda: Value, output_type: Option<Wrap<DataType>>, agg_list: bool) -> Self {
1015
516
  map_single(self, lambda, output_type, agg_list)
1016
517
  }
@@ -1258,96 +759,6 @@ impl RbExpr {
1258
759
  self.inner.clone().upper_bound().into()
1259
760
  }
1260
761
 
1261
- pub fn lst_max(&self) -> Self {
1262
- self.inner.clone().arr().max().into()
1263
- }
1264
-
1265
- pub fn lst_min(&self) -> Self {
1266
- self.inner.clone().arr().min().into()
1267
- }
1268
-
1269
- pub fn lst_sum(&self) -> Self {
1270
- self.inner.clone().arr().sum().with_fmt("arr.sum").into()
1271
- }
1272
-
1273
- pub fn lst_mean(&self) -> Self {
1274
- self.inner.clone().arr().mean().with_fmt("arr.mean").into()
1275
- }
1276
-
1277
- pub fn lst_sort(&self, reverse: bool) -> Self {
1278
- self.inner
1279
- .clone()
1280
- .arr()
1281
- .sort(SortOptions {
1282
- descending: reverse,
1283
- ..Default::default()
1284
- })
1285
- .with_fmt("arr.sort")
1286
- .into()
1287
- }
1288
-
1289
- pub fn lst_reverse(&self) -> Self {
1290
- self.inner
1291
- .clone()
1292
- .arr()
1293
- .reverse()
1294
- .with_fmt("arr.reverse")
1295
- .into()
1296
- }
1297
-
1298
- pub fn lst_unique(&self) -> Self {
1299
- self.inner
1300
- .clone()
1301
- .arr()
1302
- .unique()
1303
- .with_fmt("arr.unique")
1304
- .into()
1305
- }
1306
-
1307
- pub fn lst_get(&self, index: &RbExpr) -> Self {
1308
- self.inner.clone().arr().get(index.inner.clone()).into()
1309
- }
1310
-
1311
- pub fn lst_join(&self, separator: String) -> Self {
1312
- self.inner.clone().arr().join(&separator).into()
1313
- }
1314
-
1315
- pub fn lst_arg_min(&self) -> Self {
1316
- self.inner.clone().arr().arg_min().into()
1317
- }
1318
-
1319
- pub fn lst_arg_max(&self) -> Self {
1320
- self.inner.clone().arr().arg_max().into()
1321
- }
1322
-
1323
- pub fn lst_diff(&self, n: usize, null_behavior: Wrap<NullBehavior>) -> RbResult<Self> {
1324
- Ok(self.inner.clone().arr().diff(n, null_behavior.0).into())
1325
- }
1326
-
1327
- pub fn lst_shift(&self, periods: i64) -> Self {
1328
- self.inner.clone().arr().shift(periods).into()
1329
- }
1330
-
1331
- pub fn lst_slice(&self, offset: &RbExpr, length: Option<&RbExpr>) -> Self {
1332
- let length = match length {
1333
- Some(i) => i.inner.clone(),
1334
- None => dsl::lit(i64::MAX),
1335
- };
1336
- self.inner
1337
- .clone()
1338
- .arr()
1339
- .slice(offset.inner.clone(), length)
1340
- .into()
1341
- }
1342
-
1343
- pub fn lst_eval(&self, expr: &RbExpr, parallel: bool) -> Self {
1344
- self.inner
1345
- .clone()
1346
- .arr()
1347
- .eval(expr.inner.clone(), parallel)
1348
- .into()
1349
- }
1350
-
1351
762
  pub fn cumulative_eval(&self, expr: &RbExpr, min_periods: usize, parallel: bool) -> Self {
1352
763
  self.inner
1353
764
  .clone()
@@ -1355,42 +766,19 @@ impl RbExpr {
1355
766
  .into()
1356
767
  }
1357
768
 
1358
- pub fn lst_to_struct(
1359
- &self,
1360
- width_strat: Wrap<ListToStructWidthStrategy>,
1361
- _name_gen: Option<Value>,
1362
- upper_bound: usize,
1363
- ) -> RbResult<Self> {
1364
- // TODO fix
1365
- let name_gen = None;
1366
- // let name_gen = name_gen.map(|lambda| {
1367
- // Arc::new(move |idx: usize| {
1368
- // let out: Value = lambda.funcall("call", (idx,)).unwrap();
1369
- // out.try_convert::<String>().unwrap()
1370
- // }) as NameGenerator
1371
- // });
1372
-
1373
- Ok(self
1374
- .inner
1375
- .clone()
1376
- .arr()
1377
- .to_struct(width_strat.0, name_gen, upper_bound)
1378
- .into())
1379
- }
1380
-
1381
- pub fn rank(&self, method: Wrap<RankMethod>, reverse: bool) -> Self {
769
+ pub fn rank(&self, method: Wrap<RankMethod>, reverse: bool, seed: Option<u64>) -> Self {
1382
770
  let options = RankOptions {
1383
771
  method: method.0,
1384
772
  descending: reverse,
1385
773
  };
1386
- self.inner.clone().rank(options).into()
774
+ self.inner.clone().rank(options, seed).into()
1387
775
  }
1388
776
 
1389
- pub fn diff(&self, n: usize, null_behavior: Wrap<NullBehavior>) -> Self {
777
+ pub fn diff(&self, n: i64, null_behavior: Wrap<NullBehavior>) -> Self {
1390
778
  self.inner.clone().diff(n, null_behavior.0).into()
1391
779
  }
1392
780
 
1393
- pub fn pct_change(&self, n: usize) -> Self {
781
+ pub fn pct_change(&self, n: i64) -> Self {
1394
782
  self.inner.clone().pct_change(n).into()
1395
783
  }
1396
784
 
@@ -1402,14 +790,6 @@ impl RbExpr {
1402
790
  self.inner.clone().kurtosis(fisher, bias).into()
1403
791
  }
1404
792
 
1405
- pub fn str_concat(&self, delimiter: String) -> Self {
1406
- self.inner.clone().str().concat(&delimiter).into()
1407
- }
1408
-
1409
- pub fn cat_set_ordering(&self, ordering: Wrap<CategoricalOrdering>) -> Self {
1410
- self.inner.clone().cat().set_ordering(ordering.0).into()
1411
- }
1412
-
1413
793
  pub fn reshape(&self, dims: Vec<i64>) -> Self {
1414
794
  self.inner.clone().reshape(&dims).into()
1415
795
  }
@@ -1535,18 +915,6 @@ impl RbExpr {
1535
915
  self.inner.clone().all().into()
1536
916
  }
1537
917
 
1538
- pub fn struct_field_by_name(&self, name: String) -> Self {
1539
- self.inner.clone().struct_().field_by_name(&name).into()
1540
- }
1541
-
1542
- pub fn struct_field_by_index(&self, index: i64) -> Self {
1543
- self.inner.clone().struct_().field_by_index(index).into()
1544
- }
1545
-
1546
- pub fn struct_rename_fields(&self, names: Vec<String>) -> Self {
1547
- self.inner.clone().struct_().rename_fields(names).into()
1548
- }
1549
-
1550
918
  pub fn log(&self, base: f64) -> Self {
1551
919
  self.inner.clone().log(base).into()
1552
920
  }
@@ -1563,148 +931,3 @@ impl RbExpr {
1563
931
  self.inner.clone().hash(seed, seed_1, seed_2, seed_3).into()
1564
932
  }
1565
933
  }
1566
-
1567
- pub fn col(name: String) -> RbExpr {
1568
- dsl::col(&name).into()
1569
- }
1570
-
1571
- pub fn count() -> RbExpr {
1572
- dsl::count().into()
1573
- }
1574
-
1575
- pub fn first() -> RbExpr {
1576
- dsl::first().into()
1577
- }
1578
-
1579
- pub fn last() -> RbExpr {
1580
- dsl::last().into()
1581
- }
1582
-
1583
- pub fn cols(names: Vec<String>) -> RbExpr {
1584
- dsl::cols(names).into()
1585
- }
1586
-
1587
- pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
1588
- dsl::dtype_cols(dtypes).into()
1589
- }
1590
-
1591
- pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
1592
- let exprs = rb_exprs_to_exprs(exprs)?;
1593
-
1594
- let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
1595
- Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
1596
- }
1597
-
1598
- pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
1599
- let exprs = rb_exprs_to_exprs(exprs)?;
1600
-
1601
- let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
1602
- Ok(polars::lazy::dsl::cumfold_exprs(acc.inner.clone(), func, exprs, include_init).into())
1603
- }
1604
-
1605
- // TODO improve
1606
- pub fn lit(value: Value) -> RbResult<RbExpr> {
1607
- if value.is_nil() {
1608
- Ok(dsl::lit(Null {}).into())
1609
- } else if let Ok(series) = value.try_convert::<&RbSeries>() {
1610
- Ok(dsl::lit(series.series.borrow().clone()).into())
1611
- } else if let Some(v) = RString::from_value(value) {
1612
- Ok(dsl::lit(v.try_convert::<String>()?).into())
1613
- } else if value.is_kind_of(class::integer()) {
1614
- match value.try_convert::<i64>() {
1615
- Ok(val) => {
1616
- if val > 0 && val < i32::MAX as i64 || val < 0 && val > i32::MIN as i64 {
1617
- Ok(dsl::lit(val as i32).into())
1618
- } else {
1619
- Ok(dsl::lit(val).into())
1620
- }
1621
- }
1622
- _ => {
1623
- let val = value.try_convert::<u64>()?;
1624
- Ok(dsl::lit(val).into())
1625
- }
1626
- }
1627
- } else {
1628
- Ok(dsl::lit(value.try_convert::<f64>()?).into())
1629
- }
1630
- }
1631
-
1632
- pub fn arange(low: &RbExpr, high: &RbExpr, step: usize) -> RbExpr {
1633
- polars::lazy::dsl::arange(low.inner.clone(), high.inner.clone(), step).into()
1634
- }
1635
-
1636
- pub fn repeat(value: Value, n_times: &RbExpr) -> RbResult<RbExpr> {
1637
- if value.is_nil() {
1638
- Ok(polars::lazy::dsl::repeat(Null {}, n_times.inner.clone()).into())
1639
- } else {
1640
- todo!();
1641
- }
1642
- }
1643
-
1644
- pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
1645
- polars::lazy::dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
1646
- }
1647
-
1648
- pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool) -> RbExpr {
1649
- polars::lazy::dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans)
1650
- .into()
1651
- }
1652
-
1653
- pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
1654
- polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
1655
- }
1656
-
1657
- pub fn argsort_by(by: RArray, reverse: Vec<bool>) -> RbResult<RbExpr> {
1658
- let by = rb_exprs_to_exprs(by)?;
1659
- Ok(polars::lazy::dsl::argsort_by(by, &reverse).into())
1660
- }
1661
-
1662
- #[magnus::wrap(class = "Polars::RbWhen")]
1663
- #[derive(Clone)]
1664
- pub struct RbWhen {
1665
- pub inner: dsl::When,
1666
- }
1667
-
1668
- impl From<dsl::When> for RbWhen {
1669
- fn from(inner: dsl::When) -> Self {
1670
- RbWhen { inner }
1671
- }
1672
- }
1673
-
1674
- #[magnus::wrap(class = "Polars::RbWhenThen")]
1675
- #[derive(Clone)]
1676
- pub struct RbWhenThen {
1677
- pub inner: dsl::WhenThen,
1678
- }
1679
-
1680
- impl From<dsl::WhenThen> for RbWhenThen {
1681
- fn from(inner: dsl::WhenThen) -> Self {
1682
- RbWhenThen { inner }
1683
- }
1684
- }
1685
-
1686
- impl RbWhen {
1687
- pub fn then(&self, expr: &RbExpr) -> RbWhenThen {
1688
- self.inner.clone().then(expr.inner.clone()).into()
1689
- }
1690
- }
1691
-
1692
- impl RbWhenThen {
1693
- pub fn overwise(&self, expr: &RbExpr) -> RbExpr {
1694
- self.inner.clone().otherwise(expr.inner.clone()).into()
1695
- }
1696
- }
1697
-
1698
- pub fn when(predicate: &RbExpr) -> RbWhen {
1699
- dsl::when(predicate.inner.clone()).into()
1700
- }
1701
-
1702
- pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
1703
- let s = rb_exprs_to_exprs(s)?;
1704
- Ok(dsl::concat_str(s, &sep).into())
1705
- }
1706
-
1707
- pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
1708
- let s = rb_exprs_to_exprs(s)?;
1709
- Ok(dsl::concat_lst(s).into())
1710
- }