polars-df 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +127 -1
- data/Cargo.lock +72 -58
- data/README.md +31 -27
- data/ext/polars/Cargo.toml +15 -6
- data/ext/polars/src/batched_csv.rs +35 -39
- data/ext/polars/src/c_api/allocator.rs +7 -0
- data/ext/polars/src/c_api/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +123 -101
- data/ext/polars/src/conversion/any_value.rs +13 -17
- data/ext/polars/src/conversion/chunked_array.rs +5 -5
- data/ext/polars/src/conversion/datetime.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +50 -45
- data/ext/polars/src/dataframe/export.rs +13 -13
- data/ext/polars/src/dataframe/general.rs +223 -223
- data/ext/polars/src/dataframe/io.rs +27 -141
- data/ext/polars/src/dataframe/mod.rs +13 -5
- data/ext/polars/src/dataframe/serde.rs +1 -1
- data/ext/polars/src/error.rs +44 -7
- data/ext/polars/src/exceptions.rs +45 -12
- data/ext/polars/src/expr/array.rs +12 -0
- data/ext/polars/src/expr/datatype.rs +2 -2
- data/ext/polars/src/expr/datetime.rs +4 -5
- data/ext/polars/src/expr/general.rs +49 -13
- data/ext/polars/src/expr/list.rs +4 -0
- data/ext/polars/src/expr/meta.rs +8 -3
- data/ext/polars/src/expr/mod.rs +22 -6
- data/ext/polars/src/expr/name.rs +19 -8
- data/ext/polars/src/expr/rolling.rs +50 -1
- data/ext/polars/src/expr/string.rs +0 -1
- data/ext/polars/src/expr/struct.rs +7 -2
- data/ext/polars/src/file.rs +136 -103
- data/ext/polars/src/functions/aggregation.rs +9 -8
- data/ext/polars/src/functions/io.rs +81 -10
- data/ext/polars/src/functions/lazy.rs +95 -21
- data/ext/polars/src/functions/mod.rs +2 -0
- data/ext/polars/src/functions/range.rs +19 -3
- data/ext/polars/src/functions/strings.rs +6 -0
- data/ext/polars/src/functions/utils.rs +6 -0
- data/ext/polars/src/interop/arrow/mod.rs +50 -1
- data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
- data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/lazyframe/exitable.rs +39 -0
- data/ext/polars/src/lazyframe/general.rs +340 -236
- data/ext/polars/src/lazyframe/mod.rs +46 -10
- data/ext/polars/src/lazyframe/optflags.rs +5 -4
- data/ext/polars/src/lazyframe/serde.rs +11 -3
- data/ext/polars/src/lazyframe/sink.rs +10 -5
- data/ext/polars/src/lazygroupby.rs +6 -7
- data/ext/polars/src/lib.rs +141 -76
- data/ext/polars/src/map/dataframe.rs +12 -12
- data/ext/polars/src/map/lazy.rs +7 -5
- data/ext/polars/src/map/mod.rs +15 -8
- data/ext/polars/src/map/series.rs +3 -3
- data/ext/polars/src/on_startup.rs +16 -8
- data/ext/polars/src/prelude.rs +1 -0
- data/ext/polars/src/rb_modules.rs +19 -49
- data/ext/polars/src/series/aggregation.rs +79 -140
- data/ext/polars/src/series/arithmetic.rs +16 -22
- data/ext/polars/src/series/comparison.rs +101 -222
- data/ext/polars/src/series/construction.rs +17 -18
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +254 -289
- data/ext/polars/src/series/import.rs +17 -0
- data/ext/polars/src/series/map.rs +178 -160
- data/ext/polars/src/series/mod.rs +28 -12
- data/ext/polars/src/series/scatter.rs +12 -9
- data/ext/polars/src/sql.rs +16 -9
- data/ext/polars/src/testing/frame.rs +31 -0
- data/ext/polars/src/testing/mod.rs +5 -0
- data/ext/polars/src/testing/series.rs +31 -0
- data/ext/polars/src/timeout.rs +105 -0
- data/ext/polars/src/utils.rs +159 -1
- data/lib/polars/array_expr.rb +81 -12
- data/lib/polars/array_name_space.rb +74 -7
- data/lib/polars/batched_csv_reader.rb +21 -21
- data/lib/polars/binary_name_space.rb +1 -1
- data/lib/polars/cat_expr.rb +7 -7
- data/lib/polars/config.rb +1 -1
- data/lib/polars/convert.rb +189 -34
- data/lib/polars/data_frame.rb +1066 -831
- data/lib/polars/data_frame_plot.rb +173 -0
- data/lib/polars/data_type_group.rb +1 -0
- data/lib/polars/data_types.rb +31 -12
- data/lib/polars/date_time_expr.rb +51 -69
- data/lib/polars/date_time_name_space.rb +80 -112
- data/lib/polars/dynamic_group_by.rb +7 -7
- data/lib/polars/exceptions.rb +50 -10
- data/lib/polars/expr.rb +470 -517
- data/lib/polars/functions/aggregation/horizontal.rb +0 -1
- data/lib/polars/functions/aggregation/vertical.rb +2 -3
- data/lib/polars/functions/as_datatype.rb +290 -8
- data/lib/polars/functions/eager.rb +204 -10
- data/lib/polars/functions/escape_regex.rb +21 -0
- data/lib/polars/functions/lazy.rb +409 -169
- data/lib/polars/functions/lit.rb +17 -1
- data/lib/polars/functions/range/int_range.rb +74 -2
- data/lib/polars/functions/range/linear_space.rb +77 -0
- data/lib/polars/functions/range/time_range.rb +1 -1
- data/lib/polars/functions/repeat.rb +3 -12
- data/lib/polars/functions/whenthen.rb +2 -2
- data/lib/polars/group_by.rb +72 -20
- data/lib/polars/iceberg_dataset.rb +1 -6
- data/lib/polars/in_process_query.rb +37 -0
- data/lib/polars/io/cloud.rb +18 -0
- data/lib/polars/io/csv.rb +265 -126
- data/lib/polars/io/database.rb +0 -1
- data/lib/polars/io/delta.rb +15 -7
- data/lib/polars/io/ipc.rb +24 -17
- data/lib/polars/io/ndjson.rb +161 -24
- data/lib/polars/io/parquet.rb +101 -38
- data/lib/polars/lazy_frame.rb +849 -558
- data/lib/polars/lazy_group_by.rb +327 -2
- data/lib/polars/list_expr.rb +94 -16
- data/lib/polars/list_name_space.rb +88 -24
- data/lib/polars/meta_expr.rb +42 -1
- data/lib/polars/name_expr.rb +41 -4
- data/lib/polars/query_opt_flags.rb +198 -2
- data/lib/polars/rolling_group_by.rb +3 -3
- data/lib/polars/schema.rb +21 -3
- data/lib/polars/selector.rb +37 -2
- data/lib/polars/selectors.rb +45 -9
- data/lib/polars/series.rb +1156 -728
- data/lib/polars/series_plot.rb +72 -0
- data/lib/polars/slice.rb +1 -1
- data/lib/polars/sql_context.rb +11 -4
- data/lib/polars/string_expr.rb +59 -68
- data/lib/polars/string_name_space.rb +51 -87
- data/lib/polars/struct_expr.rb +36 -18
- data/lib/polars/testing.rb +24 -273
- data/lib/polars/utils/constants.rb +2 -0
- data/lib/polars/utils/construction/data_frame.rb +410 -0
- data/lib/polars/utils/construction/series.rb +364 -0
- data/lib/polars/utils/construction/utils.rb +9 -0
- data/lib/polars/utils/deprecation.rb +11 -0
- data/lib/polars/utils/serde.rb +8 -3
- data/lib/polars/utils/unstable.rb +19 -0
- data/lib/polars/utils/various.rb +59 -0
- data/lib/polars/utils.rb +46 -47
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +47 -1
- metadata +25 -6
- data/ext/polars/src/allocator.rs +0 -13
- data/lib/polars/plot.rb +0 -109
|
@@ -6,6 +6,20 @@ module Polars
|
|
|
6
6
|
# Name of the field(s) to select.
|
|
7
7
|
#
|
|
8
8
|
# @return [Expr]
|
|
9
|
+
#
|
|
10
|
+
# @example
|
|
11
|
+
# df = Polars::DataFrame.new({"a" => [{"x" => 5, "y" => 2}, {"x" => 3, "y" => 4}]})
|
|
12
|
+
# df.select(Polars.col("a").struct.with_fields(Polars.field("x") ** 2))
|
|
13
|
+
# # =>
|
|
14
|
+
# # shape: (2, 1)
|
|
15
|
+
# # ┌───────────┐
|
|
16
|
+
# # │ a │
|
|
17
|
+
# # │ --- │
|
|
18
|
+
# # │ struct[2] │
|
|
19
|
+
# # ╞═══════════╡
|
|
20
|
+
# # │ {25,2} │
|
|
21
|
+
# # │ {9,4} │
|
|
22
|
+
# # └───────────┘
|
|
9
23
|
def field(name)
|
|
10
24
|
if name.is_a?(::String)
|
|
11
25
|
name = [name]
|
|
@@ -19,7 +33,7 @@ module Polars
|
|
|
19
33
|
#
|
|
20
34
|
# @example A horizontal rank computation by taking the elements of a list
|
|
21
35
|
# df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
|
|
22
|
-
# df.
|
|
36
|
+
# df.with_columns(
|
|
23
37
|
# Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
|
|
24
38
|
# )
|
|
25
39
|
# # =>
|
|
@@ -285,7 +299,6 @@ module Polars
|
|
|
285
299
|
def mean(*columns)
|
|
286
300
|
col(*columns).mean
|
|
287
301
|
end
|
|
288
|
-
alias_method :avg, :mean
|
|
289
302
|
|
|
290
303
|
# Get the median value.
|
|
291
304
|
#
|
|
@@ -692,8 +705,12 @@ module Polars
|
|
|
692
705
|
# By default ddof is 1.
|
|
693
706
|
# @param propagate_nans [Boolean]
|
|
694
707
|
# If `true` any `NaN` encountered will lead to `NaN` in the output.
|
|
695
|
-
# Defaults to `
|
|
708
|
+
# Defaults to `false` where `NaN` are regarded as larger than any finite number
|
|
696
709
|
# and thus lead to the highest rank.
|
|
710
|
+
# @param eager [Boolean]
|
|
711
|
+
# Evaluate immediately and return a `Series`; this requires that at least one
|
|
712
|
+
# of the given arguments is a `Series`. If set to `false` (default), return
|
|
713
|
+
# an expression instead.
|
|
697
714
|
#
|
|
698
715
|
# @return [Expr]
|
|
699
716
|
#
|
|
@@ -734,27 +751,63 @@ module Polars
|
|
|
734
751
|
# # ╞═════╡
|
|
735
752
|
# # │ 0.5 │
|
|
736
753
|
# # └─────┘
|
|
754
|
+
#
|
|
755
|
+
# @example Eager evaluation:
|
|
756
|
+
# s1 = Polars::Series.new("a", [1, 8, 3])
|
|
757
|
+
# s2 = Polars::Series.new("b", [4, 5, 2])
|
|
758
|
+
# Polars.corr(s1, s2, eager: true)
|
|
759
|
+
# # =>
|
|
760
|
+
# # shape: (1,)
|
|
761
|
+
# # Series: 'a' [f64]
|
|
762
|
+
# # [
|
|
763
|
+
# # 0.544705
|
|
764
|
+
# # ]
|
|
765
|
+
#
|
|
766
|
+
# @example
|
|
767
|
+
# Polars.corr(s1, s2, method: "spearman", eager: true)
|
|
768
|
+
# # =>
|
|
769
|
+
# # shape: (1,)
|
|
770
|
+
# # Series: 'a' [f64]
|
|
771
|
+
# # [
|
|
772
|
+
# # 0.5
|
|
773
|
+
# # ]
|
|
737
774
|
def corr(
|
|
738
775
|
a,
|
|
739
776
|
b,
|
|
740
777
|
method: "pearson",
|
|
741
778
|
ddof: nil,
|
|
742
|
-
propagate_nans: false
|
|
779
|
+
propagate_nans: false,
|
|
780
|
+
eager: false
|
|
743
781
|
)
|
|
744
782
|
if !ddof.nil?
|
|
745
|
-
|
|
783
|
+
Utils.issue_deprecation_warning(
|
|
784
|
+
"The `ddof` parameter has no effect. Do not use it."
|
|
785
|
+
)
|
|
746
786
|
end
|
|
747
787
|
|
|
748
|
-
|
|
749
|
-
|
|
788
|
+
if eager
|
|
789
|
+
if !(a.is_a?(Series) || b.is_a?(Series))
|
|
790
|
+
msg = "expected at least one Series in 'corr' inputs if 'eager: true'"
|
|
791
|
+
raise ArgumentError, msg
|
|
792
|
+
end
|
|
750
793
|
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
794
|
+
frame = Polars::DataFrame.new([a, b].filter_map { |e| e if e.is_a?(Series) })
|
|
795
|
+
exprs = [a, b].map { |e| e.is_a?(Series) ? e.name : e }
|
|
796
|
+
frame.select(
|
|
797
|
+
corr(*exprs, eager: false, method: method, propagate_nans: propagate_nans)
|
|
798
|
+
).to_series
|
|
755
799
|
else
|
|
756
|
-
|
|
757
|
-
|
|
800
|
+
a = Utils.parse_into_expression(a)
|
|
801
|
+
b = Utils.parse_into_expression(b)
|
|
802
|
+
|
|
803
|
+
if method == "pearson"
|
|
804
|
+
Utils.wrap_expr(Plr.pearson_corr(a, b))
|
|
805
|
+
elsif method == "spearman"
|
|
806
|
+
Utils.wrap_expr(Plr.spearman_rank_corr(a, b, propagate_nans))
|
|
807
|
+
else
|
|
808
|
+
msg = "method must be one of {{'pearson', 'spearman'}}, got #{method}"
|
|
809
|
+
raise ArgumentError, msg
|
|
810
|
+
end
|
|
758
811
|
end
|
|
759
812
|
end
|
|
760
813
|
|
|
@@ -768,6 +821,10 @@ module Polars
|
|
|
768
821
|
# "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof,
|
|
769
822
|
# where N represents the number of elements.
|
|
770
823
|
# By default ddof is 1.
|
|
824
|
+
# @param eager [Boolean]
|
|
825
|
+
# Evaluate immediately and return a `Series`; this requires that at least one
|
|
826
|
+
# of the given arguments is a `Series`. If set to `false` (default), return
|
|
827
|
+
# an expression instead.
|
|
771
828
|
#
|
|
772
829
|
# @return [Expr]
|
|
773
830
|
#
|
|
@@ -789,10 +846,32 @@ module Polars
|
|
|
789
846
|
# # ╞═════╡
|
|
790
847
|
# # │ 3.0 │
|
|
791
848
|
# # └─────┘
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
849
|
+
#
|
|
850
|
+
# @example Eager evaluation:
|
|
851
|
+
# s1 = Polars::Series.new("a", [1, 8, 3])
|
|
852
|
+
# s2 = Polars::Series.new("b", [4, 5, 2])
|
|
853
|
+
# Polars.cov(s1, s2, eager: true)
|
|
854
|
+
# # =>
|
|
855
|
+
# # shape: (1,)
|
|
856
|
+
# # Series: 'a' [f64]
|
|
857
|
+
# # [
|
|
858
|
+
# # 3.0
|
|
859
|
+
# # ]
|
|
860
|
+
def cov(a, b, ddof: 1, eager: false)
|
|
861
|
+
if eager
|
|
862
|
+
if !(a.is_a?(Series) || b.is_a?(Series))
|
|
863
|
+
msg = "expected at least one Series in 'cov' inputs if 'eager: true'"
|
|
864
|
+
raise ArgumentError, msg
|
|
865
|
+
end
|
|
866
|
+
|
|
867
|
+
frame = Polars::DataFrame.new([a, b].filter_map { |e| e if e.is_a?(Series) })
|
|
868
|
+
exprs = [a, b].map { |e| e.is_a?(Series) ? e.name : e }
|
|
869
|
+
frame.select(cov(*exprs, eager: false, ddof: ddof)).to_series
|
|
870
|
+
else
|
|
871
|
+
a_rbexpr = Utils.parse_into_expression(a)
|
|
872
|
+
b_rbexpr = Utils.parse_into_expression(b)
|
|
873
|
+
Utils.wrap_expr(Plr.cov(a_rbexpr, b_rbexpr, ddof))
|
|
874
|
+
end
|
|
796
875
|
end
|
|
797
876
|
|
|
798
877
|
# def map
|
|
@@ -860,6 +939,9 @@ module Polars
|
|
|
860
939
|
returns_scalar: false,
|
|
861
940
|
return_dtype: nil
|
|
862
941
|
)
|
|
942
|
+
# need to mark function for GC
|
|
943
|
+
raise Todo
|
|
944
|
+
|
|
863
945
|
acc = Utils.parse_into_expression(acc, str_as_lit: true)
|
|
864
946
|
if exprs.is_a?(Expr)
|
|
865
947
|
exprs = [exprs]
|
|
@@ -882,8 +964,72 @@ module Polars
|
|
|
882
964
|
)
|
|
883
965
|
end
|
|
884
966
|
|
|
885
|
-
#
|
|
886
|
-
#
|
|
967
|
+
# Accumulate over multiple columns horizontally/ row wise with a left fold.
|
|
968
|
+
#
|
|
969
|
+
# @param function [Object]
|
|
970
|
+
# Function to apply over the accumulator and the value.
|
|
971
|
+
# Fn(acc, value) -> new_value
|
|
972
|
+
# @param exprs [Object]
|
|
973
|
+
# Expressions to aggregate over. May also be a wildcard expression.
|
|
974
|
+
# @param returns_scalar [Boolean]
|
|
975
|
+
# Whether or not `function` applied returns a scalar. This must be set correctly
|
|
976
|
+
# by the user.
|
|
977
|
+
# @param return_dtype [Object]
|
|
978
|
+
# Output datatype.
|
|
979
|
+
# If not set, the dtype will be inferred based on the dtype of the input
|
|
980
|
+
# expressions.
|
|
981
|
+
#
|
|
982
|
+
# @return [Expr]
|
|
983
|
+
#
|
|
984
|
+
# @example Horizontally sum over all columns.
|
|
985
|
+
# df = Polars::DataFrame.new(
|
|
986
|
+
# {
|
|
987
|
+
# "a" => [1, 2, 3],
|
|
988
|
+
# "b" => [0, 1, 2]
|
|
989
|
+
# }
|
|
990
|
+
# )
|
|
991
|
+
# df.select(
|
|
992
|
+
# Polars.reduce(function: ->(acc, x) { acc + x }, exprs: Polars.col("*")).alias("sum")
|
|
993
|
+
# )
|
|
994
|
+
# # =>
|
|
995
|
+
# # shape: (3, 1)
|
|
996
|
+
# # ┌─────┐
|
|
997
|
+
# # │ sum │
|
|
998
|
+
# # │ --- │
|
|
999
|
+
# # │ i64 │
|
|
1000
|
+
# # ╞═════╡
|
|
1001
|
+
# # │ 1 │
|
|
1002
|
+
# # │ 3 │
|
|
1003
|
+
# # │ 5 │
|
|
1004
|
+
# # └─────┘
|
|
1005
|
+
def reduce(
|
|
1006
|
+
function:,
|
|
1007
|
+
exprs:,
|
|
1008
|
+
returns_scalar: false,
|
|
1009
|
+
return_dtype: nil
|
|
1010
|
+
)
|
|
1011
|
+
# need to mark function for GC
|
|
1012
|
+
raise Todo
|
|
1013
|
+
|
|
1014
|
+
if exprs.is_a?(Expr)
|
|
1015
|
+
exprs = [exprs]
|
|
1016
|
+
end
|
|
1017
|
+
|
|
1018
|
+
rt = nil
|
|
1019
|
+
if !return_dtype.nil?
|
|
1020
|
+
rt = Utils.parse_into_datatype_expr(return_dtype)._rbdatatype_expr
|
|
1021
|
+
end
|
|
1022
|
+
|
|
1023
|
+
rbexprs = Utils.parse_into_list_of_expressions(exprs)
|
|
1024
|
+
Utils.wrap_expr(
|
|
1025
|
+
Plr.reduce(
|
|
1026
|
+
_wrap_acc_lamba(function),
|
|
1027
|
+
rbexprs,
|
|
1028
|
+
returns_scalar,
|
|
1029
|
+
rt
|
|
1030
|
+
)
|
|
1031
|
+
)
|
|
1032
|
+
end
|
|
887
1033
|
|
|
888
1034
|
# Cumulatively accumulate over multiple columns horizontally/row wise with a left fold.
|
|
889
1035
|
#
|
|
@@ -942,6 +1088,9 @@ module Polars
|
|
|
942
1088
|
return_dtype: nil,
|
|
943
1089
|
include_init: false
|
|
944
1090
|
)
|
|
1091
|
+
# need to mark function for GC
|
|
1092
|
+
raise Todo
|
|
1093
|
+
|
|
945
1094
|
acc = Utils.parse_into_expression(acc, str_as_lit: true)
|
|
946
1095
|
if exprs.is_a?(Expr)
|
|
947
1096
|
exprs = [exprs]
|
|
@@ -964,60 +1113,79 @@ module Polars
|
|
|
964
1113
|
)._alias("cum_fold")
|
|
965
1114
|
)
|
|
966
1115
|
end
|
|
967
|
-
alias_method :cumfold, :cum_fold
|
|
968
|
-
|
|
969
|
-
# def cum_reduce
|
|
970
|
-
# end
|
|
971
1116
|
|
|
972
|
-
#
|
|
1117
|
+
# Cumulatively reduce horizontally across columns with a left fold.
|
|
973
1118
|
#
|
|
974
|
-
#
|
|
975
|
-
# positive x-axis and the ray from the origin to (x,y).
|
|
1119
|
+
# Every cumulative result is added as a separate field in a Struct column.
|
|
976
1120
|
#
|
|
977
|
-
# @param
|
|
978
|
-
#
|
|
979
|
-
#
|
|
980
|
-
#
|
|
1121
|
+
# @param function [Object]
|
|
1122
|
+
# Function to apply over the accumulator and the value.
|
|
1123
|
+
# Fn(acc, value) -> new_value
|
|
1124
|
+
# @param exprs [Object]
|
|
1125
|
+
# Expressions to aggregate over. May also be a wildcard expression.
|
|
1126
|
+
# @param returns_scalar [Boolean]
|
|
1127
|
+
# Whether or not `function` applied returns a scalar. This must be set correctly
|
|
1128
|
+
# by the user.
|
|
1129
|
+
# @param return_dtype [Object]
|
|
1130
|
+
# Output datatype.
|
|
1131
|
+
# If not set, the dtype will be inferred based on the dtype of the input
|
|
1132
|
+
# expressions.
|
|
981
1133
|
#
|
|
982
1134
|
# @return [Expr]
|
|
983
1135
|
#
|
|
984
1136
|
# @example
|
|
985
|
-
# twoRootTwo = Math.sqrt(2) / 2
|
|
986
1137
|
# df = Polars::DataFrame.new(
|
|
987
1138
|
# {
|
|
988
|
-
# "
|
|
989
|
-
# "
|
|
1139
|
+
# "a" => [1, 2, 3],
|
|
1140
|
+
# "b" => [3, 4, 5],
|
|
1141
|
+
# "c" => [5, 6, 7]
|
|
990
1142
|
# }
|
|
991
1143
|
# )
|
|
992
|
-
# df.
|
|
993
|
-
# Polars.arctan2d("y", "x").alias("atan2d"), Polars.arctan2("y", "x").alias("atan2")
|
|
994
|
-
# )
|
|
1144
|
+
# df.with_columns(Polars.cum_reduce(function: ->(acc, x) { acc + x }, exprs: Polars.all))
|
|
995
1145
|
# # =>
|
|
996
|
-
# # shape: (
|
|
997
|
-
# #
|
|
998
|
-
# # │
|
|
999
|
-
# # │ ---
|
|
1000
|
-
# # │
|
|
1001
|
-
# #
|
|
1002
|
-
# # │
|
|
1003
|
-
# # │
|
|
1004
|
-
# # │
|
|
1005
|
-
# #
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1146
|
+
# # shape: (3, 4)
|
|
1147
|
+
# # ┌─────┬─────┬─────┬────────────┐
|
|
1148
|
+
# # │ a ┆ b ┆ c ┆ cum_reduce │
|
|
1149
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
|
1150
|
+
# # │ i64 ┆ i64 ┆ i64 ┆ struct[3] │
|
|
1151
|
+
# # ╞═════╪═════╪═════╪════════════╡
|
|
1152
|
+
# # │ 1 ┆ 3 ┆ 5 ┆ {1,4,9} │
|
|
1153
|
+
# # │ 2 ┆ 4 ┆ 6 ┆ {2,6,12} │
|
|
1154
|
+
# # │ 3 ┆ 5 ┆ 7 ┆ {3,8,15} │
|
|
1155
|
+
# # └─────┴─────┴─────┴────────────┘
|
|
1156
|
+
def cum_reduce(
|
|
1157
|
+
function:,
|
|
1158
|
+
exprs:,
|
|
1159
|
+
returns_scalar: false,
|
|
1160
|
+
return_dtype: nil
|
|
1161
|
+
)
|
|
1162
|
+
# need to mark function for GC
|
|
1163
|
+
raise Todo
|
|
1164
|
+
|
|
1165
|
+
if exprs.is_a?(Expr)
|
|
1166
|
+
exprs = [exprs]
|
|
1010
1167
|
end
|
|
1011
|
-
|
|
1012
|
-
|
|
1168
|
+
|
|
1169
|
+
rt = nil
|
|
1170
|
+
if !return_dtype.nil?
|
|
1171
|
+
rt = Utils.parse_into_datatype_expr(return_dtype)._rbdatatype_expr
|
|
1013
1172
|
end
|
|
1014
|
-
|
|
1173
|
+
|
|
1174
|
+
rbexprs = Utils.parse_into_list_of_expressions(exprs)
|
|
1175
|
+
Utils.wrap_expr(
|
|
1176
|
+
Plr.cum_reduce(
|
|
1177
|
+
_wrap_acc_lamba(function),
|
|
1178
|
+
rbexprs,
|
|
1179
|
+
returns_scalar,
|
|
1180
|
+
rt
|
|
1181
|
+
).alias("cum_reduce")
|
|
1182
|
+
)
|
|
1015
1183
|
end
|
|
1016
1184
|
|
|
1017
|
-
# Compute two argument arctan in
|
|
1185
|
+
# Compute two argument arctan in radians.
|
|
1018
1186
|
#
|
|
1019
|
-
# Returns the angle (in
|
|
1020
|
-
# and the ray from the origin to (x,y).
|
|
1187
|
+
# Returns the angle (in radians) in the plane between the
|
|
1188
|
+
# positive x-axis and the ray from the origin to (x,y).
|
|
1021
1189
|
#
|
|
1022
1190
|
# @param y [Object]
|
|
1023
1191
|
# Column name or Expression.
|
|
@@ -1027,47 +1195,44 @@ module Polars
|
|
|
1027
1195
|
# @return [Expr]
|
|
1028
1196
|
#
|
|
1029
1197
|
# @example
|
|
1030
|
-
#
|
|
1198
|
+
# c = Math.sqrt(2) / 2
|
|
1031
1199
|
# df = Polars::DataFrame.new(
|
|
1032
1200
|
# {
|
|
1033
|
-
# "y" => [
|
|
1034
|
-
# "x" => [
|
|
1201
|
+
# "y" => [c, -c, c, -c],
|
|
1202
|
+
# "x" => [c, c, -c, -c]
|
|
1035
1203
|
# }
|
|
1036
1204
|
# )
|
|
1037
|
-
# df.
|
|
1038
|
-
# Polars.arctan2d("y", "x").alias("atan2d"), Polars.arctan2("y", "x").alias("atan2")
|
|
1039
|
-
# )
|
|
1205
|
+
# df.with_columns(Polars.arctan2("y", "x").alias("atan2"))
|
|
1040
1206
|
# # =>
|
|
1041
|
-
# # shape: (4,
|
|
1042
|
-
# #
|
|
1043
|
-
# # │
|
|
1044
|
-
# # │ ---
|
|
1045
|
-
# # │ f64
|
|
1046
|
-
# #
|
|
1047
|
-
# # │
|
|
1048
|
-
# # │ -
|
|
1049
|
-
# # │
|
|
1050
|
-
# # │ -
|
|
1051
|
-
# #
|
|
1052
|
-
def
|
|
1207
|
+
# # shape: (4, 3)
|
|
1208
|
+
# # ┌───────────┬───────────┬───────────┐
|
|
1209
|
+
# # │ y ┆ x ┆ atan2 │
|
|
1210
|
+
# # │ --- ┆ --- ┆ --- │
|
|
1211
|
+
# # │ f64 ┆ f64 ┆ f64 │
|
|
1212
|
+
# # ╞═══════════╪═══════════╪═══════════╡
|
|
1213
|
+
# # │ 0.707107 ┆ 0.707107 ┆ 0.785398 │
|
|
1214
|
+
# # │ -0.707107 ┆ 0.707107 ┆ -0.785398 │
|
|
1215
|
+
# # │ 0.707107 ┆ -0.707107 ┆ 2.356194 │
|
|
1216
|
+
# # │ -0.707107 ┆ -0.707107 ┆ -2.356194 │
|
|
1217
|
+
# # └───────────┴───────────┴───────────┘
|
|
1218
|
+
def arctan2(y, x)
|
|
1053
1219
|
if Utils.strlike?(y)
|
|
1054
1220
|
y = col(y)
|
|
1055
1221
|
end
|
|
1056
1222
|
if Utils.strlike?(x)
|
|
1057
1223
|
x = col(x)
|
|
1058
1224
|
end
|
|
1059
|
-
Utils.wrap_expr(Plr.
|
|
1225
|
+
Utils.wrap_expr(Plr.arctan2(y._rbexpr, x._rbexpr))
|
|
1060
1226
|
end
|
|
1061
1227
|
|
|
1062
1228
|
# Exclude certain columns from a wildcard/regex selection.
|
|
1063
1229
|
#
|
|
1064
1230
|
# @param columns [Object]
|
|
1065
|
-
#
|
|
1066
|
-
#
|
|
1067
|
-
#
|
|
1068
|
-
#
|
|
1069
|
-
#
|
|
1070
|
-
# - a dtype or multiple dtypes
|
|
1231
|
+
# The name or datatype of the column(s) to exclude. Accepts regular expression
|
|
1232
|
+
# input. Regular expressions should start with `^` and end with `$`.
|
|
1233
|
+
# @param more_columns [Array]
|
|
1234
|
+
# Additional names or datatypes of columns to exclude, specified as positional
|
|
1235
|
+
# arguments.
|
|
1071
1236
|
#
|
|
1072
1237
|
# @return [Object]
|
|
1073
1238
|
#
|
|
@@ -1118,8 +1283,8 @@ module Polars
|
|
|
1118
1283
|
# # │ 2.5 │
|
|
1119
1284
|
# # │ 1.5 │
|
|
1120
1285
|
# # └──────┘
|
|
1121
|
-
def exclude(columns)
|
|
1122
|
-
col("*").exclude(columns)
|
|
1286
|
+
def exclude(columns, *more_columns)
|
|
1287
|
+
col("*").exclude(columns, *more_columns)
|
|
1123
1288
|
end
|
|
1124
1289
|
|
|
1125
1290
|
# Syntactic sugar for `Polars.col("foo").agg_groups`.
|
|
@@ -1153,7 +1318,7 @@ module Polars
|
|
|
1153
1318
|
# Columns use to determine the ordering.
|
|
1154
1319
|
# @param more_exprs [Array]
|
|
1155
1320
|
# Additional columns to arg sort by, specified as positional arguments.
|
|
1156
|
-
# @param
|
|
1321
|
+
# @param descending [Boolean]
|
|
1157
1322
|
# Default is ascending.
|
|
1158
1323
|
# @param nulls_last [Boolean]
|
|
1159
1324
|
# Place null values last.
|
|
@@ -1187,7 +1352,7 @@ module Polars
|
|
|
1187
1352
|
# # └─────┘
|
|
1188
1353
|
#
|
|
1189
1354
|
# @example Compute the arg sort by multiple columns by either passing a list of columns, or by specifying each column as a positional argument.
|
|
1190
|
-
# df.select(Polars.arg_sort_by(["a", "b"],
|
|
1355
|
+
# df.select(Polars.arg_sort_by(["a", "b"], descending: true))
|
|
1191
1356
|
# # =>
|
|
1192
1357
|
# # shape: (4, 1)
|
|
1193
1358
|
# # ┌─────┐
|
|
@@ -1218,17 +1383,16 @@ module Polars
|
|
|
1218
1383
|
def arg_sort_by(
|
|
1219
1384
|
exprs,
|
|
1220
1385
|
*more_exprs,
|
|
1221
|
-
|
|
1386
|
+
descending: false,
|
|
1222
1387
|
nulls_last: false,
|
|
1223
1388
|
multithreaded: true,
|
|
1224
1389
|
maintain_order: false
|
|
1225
1390
|
)
|
|
1226
1391
|
exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
|
|
1227
|
-
|
|
1392
|
+
descending = Utils.extend_bool(descending, exprs.length, "descending", "exprs")
|
|
1228
1393
|
nulls_last = Utils.extend_bool(nulls_last, exprs.length, "nulls_last", "exprs")
|
|
1229
|
-
Utils.wrap_expr(Plr.arg_sort_by(exprs,
|
|
1394
|
+
Utils.wrap_expr(Plr.arg_sort_by(exprs, descending, nulls_last, multithreaded, maintain_order))
|
|
1230
1395
|
end
|
|
1231
|
-
alias_method :argsort_by, :arg_sort_by
|
|
1232
1396
|
|
|
1233
1397
|
# Collect multiple LazyFrames at the same time.
|
|
1234
1398
|
#
|
|
@@ -1236,62 +1400,31 @@ module Polars
|
|
|
1236
1400
|
#
|
|
1237
1401
|
# @param lazy_frames [Boolean]
|
|
1238
1402
|
# A list of LazyFrames to collect.
|
|
1239
|
-
# @param
|
|
1240
|
-
#
|
|
1241
|
-
#
|
|
1242
|
-
#
|
|
1243
|
-
# @param
|
|
1244
|
-
#
|
|
1245
|
-
#
|
|
1246
|
-
#
|
|
1247
|
-
#
|
|
1248
|
-
#
|
|
1249
|
-
#
|
|
1250
|
-
#
|
|
1251
|
-
# @param slice_pushdown [Boolean]
|
|
1252
|
-
# Slice pushdown optimization.
|
|
1253
|
-
# @param common_subplan_elimination [Boolean]
|
|
1254
|
-
# Will try to cache branching subplans that occur on self-joins or unions.
|
|
1255
|
-
# @param allow_streaming [Boolean]
|
|
1256
|
-
# Run parts of the query in a streaming fashion (this is in an alpha state)
|
|
1403
|
+
# @param optimizations
|
|
1404
|
+
# The optimization passes done during query optimization.
|
|
1405
|
+
#
|
|
1406
|
+
# This has no effect if `lazy` is set to `true`.
|
|
1407
|
+
# @param engine
|
|
1408
|
+
# Select the engine used to process the query, optional.
|
|
1409
|
+
# At the moment, if set to `"auto"` (default), the query is run
|
|
1410
|
+
# using the polars streaming engine. Polars will also
|
|
1411
|
+
# attempt to use the engine set by the `POLARS_ENGINE_AFFINITY`
|
|
1412
|
+
# environment variable. If it cannot run the query using the
|
|
1413
|
+
# selected engine, the query is run using the polars streaming
|
|
1414
|
+
# engine.
|
|
1257
1415
|
#
|
|
1258
1416
|
# @return [Array]
|
|
1259
1417
|
def collect_all(
|
|
1260
1418
|
lazy_frames,
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
projection_pushdown: true,
|
|
1264
|
-
simplify_expression: true,
|
|
1265
|
-
string_cache: false,
|
|
1266
|
-
no_optimization: false,
|
|
1267
|
-
slice_pushdown: true,
|
|
1268
|
-
common_subplan_elimination: true,
|
|
1269
|
-
allow_streaming: false
|
|
1419
|
+
optimizations: DEFAULT_QUERY_OPT_FLAGS,
|
|
1420
|
+
engine: "auto"
|
|
1270
1421
|
)
|
|
1271
|
-
if
|
|
1272
|
-
|
|
1273
|
-
projection_pushdown = false
|
|
1274
|
-
slice_pushdown = false
|
|
1275
|
-
common_subplan_elimination = false
|
|
1422
|
+
if engine == "streaming"
|
|
1423
|
+
Utils.issue_unstable_warning("streaming mode is considered unstable.")
|
|
1276
1424
|
end
|
|
1277
1425
|
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
lazy_frames.each do |lf|
|
|
1281
|
-
ldf = lf._ldf.optimization_toggle(
|
|
1282
|
-
type_coercion,
|
|
1283
|
-
predicate_pushdown,
|
|
1284
|
-
projection_pushdown,
|
|
1285
|
-
simplify_expression,
|
|
1286
|
-
slice_pushdown,
|
|
1287
|
-
common_subplan_elimination,
|
|
1288
|
-
allow_streaming,
|
|
1289
|
-
false
|
|
1290
|
-
)
|
|
1291
|
-
prepared << ldf
|
|
1292
|
-
end
|
|
1293
|
-
|
|
1294
|
-
out = Plr.collect_all(prepared)
|
|
1426
|
+
lfs = lazy_frames.map { |lf| lf._ldf }
|
|
1427
|
+
out = Plr.collect_all(lfs, engine, optimizations._rboptflags)
|
|
1295
1428
|
|
|
1296
1429
|
# wrap the rbdataframes into dataframe
|
|
1297
1430
|
result = out.map { |rbdf| Utils.wrap_df(rbdf) }
|
|
@@ -1307,6 +1440,9 @@ module Polars
|
|
|
1307
1440
|
# Column(s) to select, specified as positional arguments.
|
|
1308
1441
|
# Accepts expression input. Strings are parsed as column names,
|
|
1309
1442
|
# other non-expression inputs are parsed as literals.
|
|
1443
|
+
# @param eager [Boolean]
|
|
1444
|
+
# Evaluate immediately and return a `DataFrame` (default); if set to `false`,
|
|
1445
|
+
# return a `LazyFrame` instead.
|
|
1310
1446
|
# @param named_exprs [Hash]
|
|
1311
1447
|
# Additional columns to select, specified as keyword arguments.
|
|
1312
1448
|
# The columns will be renamed to the keyword used.
|
|
@@ -1328,8 +1464,9 @@ module Polars
|
|
|
1328
1464
|
# # │ 2 │
|
|
1329
1465
|
# # │ 1 │
|
|
1330
1466
|
# # └─────┘
|
|
1331
|
-
def select(*exprs, **named_exprs)
|
|
1332
|
-
DataFrame.new
|
|
1467
|
+
def select(*exprs, eager: true, **named_exprs)
|
|
1468
|
+
empty_frame = eager ? Polars::DataFrame.new : Polars::LazyFrame.new
|
|
1469
|
+
empty_frame.select(*exprs, **named_exprs)
|
|
1333
1470
|
end
|
|
1334
1471
|
|
|
1335
1472
|
# Return indices where `condition` evaluates `true`.
|
|
@@ -1374,6 +1511,10 @@ module Polars
|
|
|
1374
1511
|
# names, other non-expression inputs are parsed as literals.
|
|
1375
1512
|
# @param more_exprs [Hash]
|
|
1376
1513
|
# Additional columns to coalesce, specified as positional arguments.
|
|
1514
|
+
# @param eager [Boolean]
|
|
1515
|
+
# Evaluate immediately and return a `Series`; this requires that at least one
|
|
1516
|
+
# of the given arguments is a `Series`. If set to `false` (default), return
|
|
1517
|
+
# an expression instead.
|
|
1377
1518
|
#
|
|
1378
1519
|
# @return [Expr]
|
|
1379
1520
|
#
|
|
@@ -1413,32 +1554,55 @@ module Polars
|
|
|
1413
1554
|
# # │ null ┆ null ┆ 3 ┆ 3.0 │
|
|
1414
1555
|
# # │ null ┆ null ┆ null ┆ 10.0 │
|
|
1415
1556
|
# # └──────┴──────┴──────┴──────┘
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1557
|
+
#
|
|
1558
|
+
# @example
|
|
1559
|
+
# s1 = Polars::Series.new("a", [nil, 2, nil])
|
|
1560
|
+
# s2 = Polars::Series.new("b", [1, nil, 3])
|
|
1561
|
+
# Polars.coalesce(s1, s2, eager: true)
|
|
1562
|
+
# # =>
|
|
1563
|
+
# # shape: (3,)
|
|
1564
|
+
# # Series: 'a' [i64]
|
|
1565
|
+
# # [
|
|
1566
|
+
# # 1
|
|
1567
|
+
# # 2
|
|
1568
|
+
# # 3
|
|
1569
|
+
# # ]
|
|
1570
|
+
def coalesce(exprs, *more_exprs, eager: false)
|
|
1571
|
+
if eager
|
|
1572
|
+
exprs = [exprs] + more_exprs
|
|
1573
|
+
series = exprs.filter_map { |e| e if e.is_a?(Series) }
|
|
1574
|
+
if !series.any?
|
|
1575
|
+
msg = "expected at least one Series in 'coalesce' if 'eager: true'"
|
|
1576
|
+
raise ArgumentError, msg
|
|
1577
|
+
end
|
|
1578
|
+
|
|
1579
|
+
exprs = exprs.map { |e| e.is_a?(Series) ? e.name : e }
|
|
1580
|
+
Polars::DataFrame.new(series).select(coalesce(exprs, eager: false)).to_series
|
|
1581
|
+
else
|
|
1582
|
+
exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
|
|
1583
|
+
Utils.wrap_expr(Plr.coalesce(exprs))
|
|
1584
|
+
end
|
|
1419
1585
|
end
|
|
1420
1586
|
|
|
1421
1587
|
# Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).
|
|
1422
1588
|
#
|
|
1423
1589
|
# Depending on the `unit` provided, this function will return a different dtype:
|
|
1424
|
-
# -
|
|
1425
|
-
# -
|
|
1426
|
-
# -
|
|
1427
|
-
# -
|
|
1428
|
-
# -
|
|
1590
|
+
# - time_unit: "d" returns pl.Date
|
|
1591
|
+
# - time_unit: "s" returns pl.Datetime["us"] (pl.Datetime's default)
|
|
1592
|
+
# - time_unit: "ms" returns pl.Datetime["ms"]
|
|
1593
|
+
# - time_unit: "us" returns pl.Datetime["us"]
|
|
1594
|
+
# - time_unit: "ns" returns pl.Datetime["ns"]
|
|
1429
1595
|
#
|
|
1430
1596
|
# @param column [Object]
|
|
1431
1597
|
# Series or expression to parse integers to pl.Datetime.
|
|
1432
|
-
# @param
|
|
1598
|
+
# @param time_unit [String]
|
|
1433
1599
|
# The unit of the timesteps since epoch time.
|
|
1434
|
-
# @param eager [Boolean]
|
|
1435
|
-
# If eager evaluation is `true`, a Series is returned instead of an Expr.
|
|
1436
1600
|
#
|
|
1437
1601
|
# @return [Object]
|
|
1438
1602
|
#
|
|
1439
1603
|
# @example
|
|
1440
1604
|
# df = Polars::DataFrame.new({"timestamp" => [1666683077, 1666683099]}).lazy
|
|
1441
|
-
# df.select(Polars.from_epoch(Polars.col("timestamp"),
|
|
1605
|
+
# df.select(Polars.from_epoch(Polars.col("timestamp"), time_unit: "s")).collect
|
|
1442
1606
|
# # =>
|
|
1443
1607
|
# # shape: (2, 1)
|
|
1444
1608
|
# # ┌─────────────────────┐
|
|
@@ -1449,32 +1613,102 @@ module Polars
|
|
|
1449
1613
|
# # │ 2022-10-25 07:31:17 │
|
|
1450
1614
|
# # │ 2022-10-25 07:31:39 │
|
|
1451
1615
|
# # └─────────────────────┘
|
|
1452
|
-
def from_epoch(column,
|
|
1616
|
+
def from_epoch(column, time_unit: "s")
|
|
1453
1617
|
if Utils.strlike?(column)
|
|
1454
|
-
column = col(column)
|
|
1618
|
+
column = F.col(column)
|
|
1455
1619
|
elsif !column.is_a?(Series) && !column.is_a?(Expr)
|
|
1456
1620
|
column = Series.new(column)
|
|
1457
1621
|
end
|
|
1458
1622
|
|
|
1459
|
-
if
|
|
1460
|
-
|
|
1461
|
-
elsif
|
|
1462
|
-
|
|
1463
|
-
elsif Utils::DTYPE_TEMPORAL_UNITS.include?(
|
|
1464
|
-
|
|
1623
|
+
if time_unit == "d"
|
|
1624
|
+
column.cast(Date)
|
|
1625
|
+
elsif time_unit == "s"
|
|
1626
|
+
(column.cast(Int64) * 1_000_000).cast(Datetime.new("us"))
|
|
1627
|
+
elsif Utils::DTYPE_TEMPORAL_UNITS.include?(time_unit)
|
|
1628
|
+
column.cast(Datetime.new(time_unit))
|
|
1465
1629
|
else
|
|
1466
|
-
raise ArgumentError, "
|
|
1630
|
+
raise ArgumentError, "`time_unit` must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got #{time_unit.inspect}."
|
|
1467
1631
|
end
|
|
1632
|
+
end
|
|
1468
1633
|
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1634
|
+
# Compute the rolling covariance between two columns/ expressions.
|
|
1635
|
+
#
|
|
1636
|
+
# The window at a given row includes the row itself and the
|
|
1637
|
+
# `window_size - 1` elements before it.
|
|
1638
|
+
#
|
|
1639
|
+
# @param a [Object]
|
|
1640
|
+
# Column name or Expression.
|
|
1641
|
+
# @param b [Object]
|
|
1642
|
+
# Column name or Expression.
|
|
1643
|
+
# @param window_size [Integer]
|
|
1644
|
+
# The length of the window.
|
|
1645
|
+
# @param min_samples [Integer]
|
|
1646
|
+
# The number of values in the window that should be non-null before computing
|
|
1647
|
+
# a result. If nil, it will be set equal to window size.
|
|
1648
|
+
# @param ddof [Integer]
|
|
1649
|
+
# Delta degrees of freedom. The divisor used in calculations
|
|
1650
|
+
# is `N - ddof`, where `N` represents the number of elements.
|
|
1651
|
+
#
|
|
1652
|
+
# @return [Expr]
|
|
1653
|
+
def rolling_cov(
|
|
1654
|
+
a,
|
|
1655
|
+
b,
|
|
1656
|
+
window_size:,
|
|
1657
|
+
min_samples: nil,
|
|
1658
|
+
ddof: 1
|
|
1659
|
+
)
|
|
1660
|
+
if min_samples.nil?
|
|
1661
|
+
min_samples = window_size
|
|
1477
1662
|
end
|
|
1663
|
+
if Utils.strlike?(a)
|
|
1664
|
+
a = F.col(a)
|
|
1665
|
+
end
|
|
1666
|
+
if Utils.strlike?(b)
|
|
1667
|
+
b = F.col(b)
|
|
1668
|
+
end
|
|
1669
|
+
Utils.wrap_expr(
|
|
1670
|
+
Plr.rolling_cov(a._rbexpr, b._rbexpr, window_size, min_samples, ddof)
|
|
1671
|
+
)
|
|
1672
|
+
end
|
|
1673
|
+
|
|
1674
|
+
# Compute the rolling correlation between two columns/ expressions.
|
|
1675
|
+
#
|
|
1676
|
+
# The window at a given row includes the row itself and the
|
|
1677
|
+
# `window_size - 1` elements before it.
|
|
1678
|
+
#
|
|
1679
|
+
# @param a [Object]
|
|
1680
|
+
# Column name or Expression.
|
|
1681
|
+
# @param b [Object]
|
|
1682
|
+
# Column name or Expression.
|
|
1683
|
+
# @param window_size [Integer]
|
|
1684
|
+
# The length of the window.
|
|
1685
|
+
# @param min_samples [Integer]
|
|
1686
|
+
# The number of values in the window that should be non-null before computing
|
|
1687
|
+
# a result. If nil, it will be set equal to window size.
|
|
1688
|
+
# @param ddof [Integer]
|
|
1689
|
+
# Delta degrees of freedom. The divisor used in calculations
|
|
1690
|
+
# is `N - ddof`, where `N` represents the number of elements.
|
|
1691
|
+
#
|
|
1692
|
+
# @return [Expr]
|
|
1693
|
+
def rolling_corr(
|
|
1694
|
+
a,
|
|
1695
|
+
b,
|
|
1696
|
+
window_size:,
|
|
1697
|
+
min_samples: nil,
|
|
1698
|
+
ddof: 1
|
|
1699
|
+
)
|
|
1700
|
+
if min_samples.nil?
|
|
1701
|
+
min_samples = window_size
|
|
1702
|
+
end
|
|
1703
|
+
if Utils.strlike?(a)
|
|
1704
|
+
a = F.col(a)
|
|
1705
|
+
end
|
|
1706
|
+
if Utils.strlike?(b)
|
|
1707
|
+
b = F.col(b)
|
|
1708
|
+
end
|
|
1709
|
+
Utils.wrap_expr(
|
|
1710
|
+
Plr.rolling_corr(a._rbexpr, b._rbexpr, window_size, min_samples, ddof)
|
|
1711
|
+
)
|
|
1478
1712
|
end
|
|
1479
1713
|
|
|
1480
1714
|
# Parse one or more SQL expressions to polars expression(s).
|
|
@@ -1519,5 +1753,11 @@ module Polars
|
|
|
1519
1753
|
sql.map { |q| Utils.wrap_expr(Plr.sql_expr(q)) }
|
|
1520
1754
|
end
|
|
1521
1755
|
end
|
|
1756
|
+
|
|
1757
|
+
private
|
|
1758
|
+
|
|
1759
|
+
def _wrap_acc_lamba(function)
|
|
1760
|
+
raise Todo
|
|
1761
|
+
end
|
|
1522
1762
|
end
|
|
1523
1763
|
end
|