polars-df 0.10.0-aarch64-linux → 0.11.0-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Cargo.lock +90 -48
- data/LICENSE-THIRD-PARTY.txt +152 -79
- data/README.md +6 -6
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/batched_csv_reader.rb +9 -3
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +83 -302
- data/lib/polars/date_time_expr.rb +1 -0
- data/lib/polars/date_time_name_space.rb +5 -1
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/exceptions.rb +4 -0
- data/lib/polars/expr.rb +1134 -20
- data/lib/polars/functions/range/date_range.rb +92 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +296 -490
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +18 -0
- data/lib/polars/io/ndjson.rb +69 -0
- data/lib/polars/io/parquet.rb +226 -0
- data/lib/polars/lazy_frame.rb +23 -166
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +2 -2
- data/lib/polars/string_expr.rb +37 -36
- data/lib/polars/utils.rb +35 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +9 -1
- metadata +12 -4
data/lib/polars/expr.rb
CHANGED
@@ -1345,9 +1345,9 @@ module Polars
|
|
1345
1345
|
# # │ 3 ┆ 4 │
|
1346
1346
|
# # │ 2 ┆ 98 │
|
1347
1347
|
# # └───────┴──────────┘
|
1348
|
-
def top_k(k: 5)
|
1348
|
+
def top_k(k: 5, nulls_last: false, multithreaded: true)
|
1349
1349
|
k = Utils.parse_as_expression(k)
|
1350
|
-
_from_rbexpr(_rbexpr.top_k(k))
|
1350
|
+
_from_rbexpr(_rbexpr.top_k(k, nulls_last, multithreaded))
|
1351
1351
|
end
|
1352
1352
|
|
1353
1353
|
# Return the `k` smallest elements.
|
@@ -1384,9 +1384,9 @@ module Polars
|
|
1384
1384
|
# # │ 3 ┆ 4 │
|
1385
1385
|
# # │ 2 ┆ 98 │
|
1386
1386
|
# # └───────┴──────────┘
|
1387
|
-
def bottom_k(k: 5)
|
1387
|
+
def bottom_k(k: 5, nulls_last: false, multithreaded: true)
|
1388
1388
|
k = Utils.parse_as_expression(k)
|
1389
|
-
_from_rbexpr(_rbexpr.bottom_k(k))
|
1389
|
+
_from_rbexpr(_rbexpr.bottom_k(k, nulls_last, multithreaded))
|
1390
1390
|
end
|
1391
1391
|
|
1392
1392
|
# Get the index values that would sort this column.
|
@@ -2764,6 +2764,9 @@ module Polars
|
|
2764
2764
|
# Dtype of the output Series.
|
2765
2765
|
# @param agg_list [Boolean]
|
2766
2766
|
# Aggregate list.
|
2767
|
+
# @param is_elementwise [Boolean]
|
2768
|
+
# If set to true this can run in the streaming engine, but may yield
|
2769
|
+
# incorrect results in group-by. Ensure you know what you are doing!
|
2767
2770
|
#
|
2768
2771
|
# @return [Expr]
|
2769
2772
|
#
|
@@ -2784,12 +2787,21 @@ module Polars
|
|
2784
2787
|
# # ╞══════╪════════╡
|
2785
2788
|
# # │ 1 ┆ 0 │
|
2786
2789
|
# # └──────┴────────┘
|
2787
|
-
# def
|
2790
|
+
# def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, &f)
|
2788
2791
|
# if !return_dtype.nil?
|
2789
2792
|
# return_dtype = Utils.rb_type_to_dtype(return_dtype)
|
2790
2793
|
# end
|
2791
|
-
# _from_rbexpr(
|
2794
|
+
# _from_rbexpr(
|
2795
|
+
# _rbexpr.map_batches(
|
2796
|
+
# # TODO _map_batches_wrapper
|
2797
|
+
# f,
|
2798
|
+
# return_dtype,
|
2799
|
+
# agg_list,
|
2800
|
+
# is_elementwise
|
2801
|
+
# )
|
2802
|
+
# )
|
2792
2803
|
# end
|
2804
|
+
# alias_method :map, :map_batches
|
2793
2805
|
|
2794
2806
|
# Apply a custom/user-defined function (UDF) in a GroupBy or Projection context.
|
2795
2807
|
#
|
@@ -2831,7 +2843,7 @@ module Polars
|
|
2831
2843
|
#
|
2832
2844
|
# @example In a selection context, the function is applied by row.
|
2833
2845
|
# df.with_column(
|
2834
|
-
# Polars.col("a").
|
2846
|
+
# Polars.col("a").map_elements { |x| x * 2 }.alias("a_times_2")
|
2835
2847
|
# )
|
2836
2848
|
# # =>
|
2837
2849
|
# # shape: (4, 3)
|
@@ -2851,7 +2863,7 @@ module Polars
|
|
2851
2863
|
# .group_by("b", maintain_order: true)
|
2852
2864
|
# .agg(
|
2853
2865
|
# [
|
2854
|
-
# Polars.col("a").
|
2866
|
+
# Polars.col("a").map_elements { |x| x.sum }
|
2855
2867
|
# ]
|
2856
2868
|
# )
|
2857
2869
|
# .collect
|
@@ -2866,12 +2878,23 @@ module Polars
|
|
2866
2878
|
# # │ b ┆ 2 │
|
2867
2879
|
# # │ c ┆ 4 │
|
2868
2880
|
# # └─────┴─────┘
|
2869
|
-
# def
|
2870
|
-
#
|
2871
|
-
#
|
2881
|
+
# def map_elements(
|
2882
|
+
# return_dtype: nil,
|
2883
|
+
# skip_nulls: true,
|
2884
|
+
# pass_name: false,
|
2885
|
+
# strategy: "thread_local",
|
2886
|
+
# &f
|
2887
|
+
# )
|
2888
|
+
# if pass_name
|
2889
|
+
# raise Todo
|
2890
|
+
# else
|
2891
|
+
# wrap_f = lambda do |x|
|
2892
|
+
# x.map_elements(return_dtype: return_dtype, skip_nulls: skip_nulls, &f)
|
2893
|
+
# end
|
2872
2894
|
# end
|
2873
|
-
#
|
2895
|
+
# map_batches(agg_list: true, return_dtype: return_dtype, &wrap_f)
|
2874
2896
|
# end
|
2897
|
+
# alias_method :apply, :map_elements
|
2875
2898
|
|
2876
2899
|
# Explode a list or utf8 Series. This means that every item is expanded to a new
|
2877
2900
|
# row.
|
@@ -3857,6 +3880,1002 @@ module Polars
|
|
3857
3880
|
_from_rbexpr(_rbexpr.interpolate(method))
|
3858
3881
|
end
|
3859
3882
|
|
3883
|
+
# Apply a rolling min based on another column.
|
3884
|
+
#
|
3885
|
+
# @param by [String]
|
3886
|
+
# This column must be of dtype Datetime or Date.
|
3887
|
+
# @param window_size [String]
|
3888
|
+
# The length of the window. Can be a dynamic temporal
|
3889
|
+
# size indicated by a timedelta or the following string language:
|
3890
|
+
#
|
3891
|
+
# - 1ns (1 nanosecond)
|
3892
|
+
# - 1us (1 microsecond)
|
3893
|
+
# - 1ms (1 millisecond)
|
3894
|
+
# - 1s (1 second)
|
3895
|
+
# - 1m (1 minute)
|
3896
|
+
# - 1h (1 hour)
|
3897
|
+
# - 1d (1 calendar day)
|
3898
|
+
# - 1w (1 calendar week)
|
3899
|
+
# - 1mo (1 calendar month)
|
3900
|
+
# - 1q (1 calendar quarter)
|
3901
|
+
# - 1y (1 calendar year)
|
3902
|
+
#
|
3903
|
+
# By "calendar day", we mean the corresponding time on the next day
|
3904
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
3905
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
3906
|
+
# "calendar year".
|
3907
|
+
# @param min_periods [Integer]
|
3908
|
+
# The number of values in the window that should be non-null before computing
|
3909
|
+
# a result.
|
3910
|
+
# @param closed ['left', 'right', 'both', 'none']
|
3911
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
3912
|
+
# defaults to `'right'`.
|
3913
|
+
# @param warn_if_unsorted [Boolean]
|
3914
|
+
# Warn if data is not known to be sorted by `by` column.
|
3915
|
+
#
|
3916
|
+
# @return [Expr]
|
3917
|
+
#
|
3918
|
+
# @note
|
3919
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3920
|
+
# window, consider using `rolling` - this method can cache the window size
|
3921
|
+
# computation.
|
3922
|
+
#
|
3923
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
3924
|
+
# start = DateTime.new(2001, 1, 1)
|
3925
|
+
# stop = DateTime.new(2001, 1, 2)
|
3926
|
+
# df_temporal = Polars::DataFrame.new(
|
3927
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
3928
|
+
# ).with_row_index
|
3929
|
+
# # =>
|
3930
|
+
# # shape: (25, 2)
|
3931
|
+
# # ┌───────┬─────────────────────┐
|
3932
|
+
# # │ index ┆ date │
|
3933
|
+
# # │ --- ┆ --- │
|
3934
|
+
# # │ u32 ┆ datetime[ns] │
|
3935
|
+
# # ╞═══════╪═════════════════════╡
|
3936
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
3937
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
3938
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
3939
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
3940
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
3941
|
+
# # │ … ┆ … │
|
3942
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
3943
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
3944
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
3945
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
3946
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
3947
|
+
# # └───────┴─────────────────────┘
|
3948
|
+
#
|
3949
|
+
# @example Compute the rolling min with the temporal windows closed on the right (default)
|
3950
|
+
# df_temporal.with_columns(
|
3951
|
+
# rolling_row_min: Polars.col("index").rolling_min_by("date", "2h")
|
3952
|
+
# )
|
3953
|
+
# # =>
|
3954
|
+
# # shape: (25, 3)
|
3955
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
3956
|
+
# # │ index ┆ date ┆ rolling_row_min │
|
3957
|
+
# # │ --- ┆ --- ┆ --- │
|
3958
|
+
# # │ u32 ┆ datetime[ns] ┆ u32 │
|
3959
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
3960
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
|
3961
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0 │
|
3962
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1 │
|
3963
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2 │
|
3964
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3 │
|
3965
|
+
# # │ … ┆ … ┆ … │
|
3966
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19 │
|
3967
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20 │
|
3968
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21 │
|
3969
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22 │
|
3970
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23 │
|
3971
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
3972
|
+
def rolling_min_by(
|
3973
|
+
by,
|
3974
|
+
window_size,
|
3975
|
+
min_periods: 1,
|
3976
|
+
closed: "right",
|
3977
|
+
warn_if_unsorted: nil
|
3978
|
+
)
|
3979
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
3980
|
+
by = Utils.parse_as_expression(by)
|
3981
|
+
_from_rbexpr(
|
3982
|
+
_rbexpr.rolling_min_by(by, window_size, min_periods, closed)
|
3983
|
+
)
|
3984
|
+
end
|
3985
|
+
|
3986
|
+
# Apply a rolling max based on another column.
|
3987
|
+
#
|
3988
|
+
# @param by [String]
|
3989
|
+
# This column must be of dtype Datetime or Date.
|
3990
|
+
# @param window_size [String]
|
3991
|
+
# The length of the window. Can be a dynamic temporal
|
3992
|
+
# size indicated by a timedelta or the following string language:
|
3993
|
+
#
|
3994
|
+
# - 1ns (1 nanosecond)
|
3995
|
+
# - 1us (1 microsecond)
|
3996
|
+
# - 1ms (1 millisecond)
|
3997
|
+
# - 1s (1 second)
|
3998
|
+
# - 1m (1 minute)
|
3999
|
+
# - 1h (1 hour)
|
4000
|
+
# - 1d (1 calendar day)
|
4001
|
+
# - 1w (1 calendar week)
|
4002
|
+
# - 1mo (1 calendar month)
|
4003
|
+
# - 1q (1 calendar quarter)
|
4004
|
+
# - 1y (1 calendar year)
|
4005
|
+
#
|
4006
|
+
# By "calendar day", we mean the corresponding time on the next day
|
4007
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
4008
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
4009
|
+
# "calendar year".
|
4010
|
+
# @param min_periods [Integer]
|
4011
|
+
# The number of values in the window that should be non-null before computing
|
4012
|
+
# a result.
|
4013
|
+
# @param closed ['left', 'right', 'both', 'none']
|
4014
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
4015
|
+
# defaults to `'right'`.
|
4016
|
+
# @param warn_if_unsorted [Boolean]
|
4017
|
+
# Warn if data is not known to be sorted by `by` column.
|
4018
|
+
#
|
4019
|
+
# @return [Expr]
|
4020
|
+
#
|
4021
|
+
# @note
|
4022
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
4023
|
+
# window, consider using `rolling` - this method can cache the window size
|
4024
|
+
# computation.
|
4025
|
+
#
|
4026
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
4027
|
+
# start = DateTime.new(2001, 1, 1)
|
4028
|
+
# stop = DateTime.new(2001, 1, 2)
|
4029
|
+
# df_temporal = Polars::DataFrame.new(
|
4030
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
4031
|
+
# ).with_row_index
|
4032
|
+
# # =>
|
4033
|
+
# # shape: (25, 2)
|
4034
|
+
# # ┌───────┬─────────────────────┐
|
4035
|
+
# # │ index ┆ date │
|
4036
|
+
# # │ --- ┆ --- │
|
4037
|
+
# # │ u32 ┆ datetime[ns] │
|
4038
|
+
# # ╞═══════╪═════════════════════╡
|
4039
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
4040
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
4041
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
4042
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
4043
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
4044
|
+
# # │ … ┆ … │
|
4045
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
4046
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
4047
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
4048
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
4049
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
4050
|
+
# # └───────┴─────────────────────┘
|
4051
|
+
#
|
4052
|
+
# @example Compute the rolling max with the temporal windows closed on the right (default)
|
4053
|
+
# df_temporal.with_columns(
|
4054
|
+
# rolling_row_max: Polars.col("index").rolling_max_by("date", "2h")
|
4055
|
+
# )
|
4056
|
+
# # =>
|
4057
|
+
# # shape: (25, 3)
|
4058
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4059
|
+
# # │ index ┆ date ┆ rolling_row_max │
|
4060
|
+
# # │ --- ┆ --- ┆ --- │
|
4061
|
+
# # │ u32 ┆ datetime[ns] ┆ u32 │
|
4062
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4063
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
|
4064
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
|
4065
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 2 │
|
4066
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 3 │
|
4067
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 4 │
|
4068
|
+
# # │ … ┆ … ┆ … │
|
4069
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 20 │
|
4070
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 21 │
|
4071
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 22 │
|
4072
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 23 │
|
4073
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 24 │
|
4074
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4075
|
+
#
|
4076
|
+
# @example Compute the rolling max with the closure of windows on both sides
|
4077
|
+
# df_temporal.with_columns(
|
4078
|
+
# rolling_row_max: Polars.col("index").rolling_max_by(
|
4079
|
+
# "date", "2h", closed: "both"
|
4080
|
+
# )
|
4081
|
+
# )
|
4082
|
+
# # =>
|
4083
|
+
# # shape: (25, 3)
|
4084
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4085
|
+
# # │ index ┆ date ┆ rolling_row_max │
|
4086
|
+
# # │ --- ┆ --- ┆ --- │
|
4087
|
+
# # │ u32 ┆ datetime[ns] ┆ u32 │
|
4088
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4089
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
|
4090
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
|
4091
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 2 │
|
4092
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 3 │
|
4093
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 4 │
|
4094
|
+
# # │ … ┆ … ┆ … │
|
4095
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 20 │
|
4096
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 21 │
|
4097
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 22 │
|
4098
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 23 │
|
4099
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 24 │
|
4100
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4101
|
+
def rolling_max_by(
|
4102
|
+
by,
|
4103
|
+
window_size,
|
4104
|
+
min_periods: 1,
|
4105
|
+
closed: "right",
|
4106
|
+
warn_if_unsorted: nil
|
4107
|
+
)
|
4108
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
4109
|
+
by = Utils.parse_as_expression(by)
|
4110
|
+
_from_rbexpr(
|
4111
|
+
_rbexpr.rolling_max_by(by, window_size, min_periods, closed)
|
4112
|
+
)
|
4113
|
+
end
|
4114
|
+
|
4115
|
+
# Apply a rolling mean based on another column.
|
4116
|
+
#
|
4117
|
+
# @param by [String]
|
4118
|
+
# This column must be of dtype Datetime or Date.
|
4119
|
+
# @param window_size [String]
|
4120
|
+
# The length of the window. Can be a dynamic temporal
|
4121
|
+
# size indicated by a timedelta or the following string language:
|
4122
|
+
#
|
4123
|
+
# - 1ns (1 nanosecond)
|
4124
|
+
# - 1us (1 microsecond)
|
4125
|
+
# - 1ms (1 millisecond)
|
4126
|
+
# - 1s (1 second)
|
4127
|
+
# - 1m (1 minute)
|
4128
|
+
# - 1h (1 hour)
|
4129
|
+
# - 1d (1 calendar day)
|
4130
|
+
# - 1w (1 calendar week)
|
4131
|
+
# - 1mo (1 calendar month)
|
4132
|
+
# - 1q (1 calendar quarter)
|
4133
|
+
# - 1y (1 calendar year)
|
4134
|
+
#
|
4135
|
+
# By "calendar day", we mean the corresponding time on the next day
|
4136
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
4137
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
4138
|
+
# "calendar year".
|
4139
|
+
# @param min_periods [Integer]
|
4140
|
+
# The number of values in the window that should be non-null before computing
|
4141
|
+
# a result.
|
4142
|
+
# @param closed ['left', 'right', 'both', 'none']
|
4143
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
4144
|
+
# defaults to `'right'`.
|
4145
|
+
# @param warn_if_unsorted [Boolean]
|
4146
|
+
# Warn if data is not known to be sorted by `by` column.
|
4147
|
+
#
|
4148
|
+
# @return [Expr]
|
4149
|
+
#
|
4150
|
+
# @note
|
4151
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
4152
|
+
# window, consider using `rolling` - this method can cache the window size
|
4153
|
+
# computation.
|
4154
|
+
#
|
4155
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
4156
|
+
# start = DateTime.new(2001, 1, 1)
|
4157
|
+
# stop = DateTime.new(2001, 1, 2)
|
4158
|
+
# df_temporal = Polars::DataFrame.new(
|
4159
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
4160
|
+
# ).with_row_index
|
4161
|
+
# # =>
|
4162
|
+
# # shape: (25, 2)
|
4163
|
+
# # ┌───────┬─────────────────────┐
|
4164
|
+
# # │ index ┆ date │
|
4165
|
+
# # │ --- ┆ --- │
|
4166
|
+
# # │ u32 ┆ datetime[ns] │
|
4167
|
+
# # ╞═══════╪═════════════════════╡
|
4168
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
4169
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
4170
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
4171
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
4172
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
4173
|
+
# # │ … ┆ … │
|
4174
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
4175
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
4176
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
4177
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
4178
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
4179
|
+
# # └───────┴─────────────────────┘
|
4180
|
+
#
|
4181
|
+
# @example Compute the rolling mean with the temporal windows closed on the right (default)
|
4182
|
+
# df_temporal.with_columns(
|
4183
|
+
# rolling_row_mean: Polars.col("index").rolling_mean_by(
|
4184
|
+
# "date", "2h"
|
4185
|
+
# )
|
4186
|
+
# )
|
4187
|
+
# # =>
|
4188
|
+
# # shape: (25, 3)
|
4189
|
+
# # ┌───────┬─────────────────────┬──────────────────┐
|
4190
|
+
# # │ index ┆ date ┆ rolling_row_mean │
|
4191
|
+
# # │ --- ┆ --- ┆ --- │
|
4192
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4193
|
+
# # ╞═══════╪═════════════════════╪══════════════════╡
|
4194
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
|
4195
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
|
4196
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.5 │
|
4197
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.5 │
|
4198
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.5 │
|
4199
|
+
# # │ … ┆ … ┆ … │
|
4200
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.5 │
|
4201
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.5 │
|
4202
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.5 │
|
4203
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.5 │
|
4204
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.5 │
|
4205
|
+
# # └───────┴─────────────────────┴──────────────────┘
|
4206
|
+
#
|
4207
|
+
# @example Compute the rolling mean with the closure of windows on both sides
|
4208
|
+
# df_temporal.with_columns(
|
4209
|
+
# rolling_row_mean: Polars.col("index").rolling_mean_by(
|
4210
|
+
# "date", "2h", closed: "both"
|
4211
|
+
# )
|
4212
|
+
# )
|
4213
|
+
# # =>
|
4214
|
+
# # shape: (25, 3)
|
4215
|
+
# # ┌───────┬─────────────────────┬──────────────────┐
|
4216
|
+
# # │ index ┆ date ┆ rolling_row_mean │
|
4217
|
+
# # │ --- ┆ --- ┆ --- │
|
4218
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4219
|
+
# # ╞═══════╪═════════════════════╪══════════════════╡
|
4220
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
|
4221
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
|
4222
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
|
4223
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.0 │
|
4224
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.0 │
|
4225
|
+
# # │ … ┆ … ┆ … │
|
4226
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.0 │
|
4227
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.0 │
|
4228
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.0 │
|
4229
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.0 │
|
4230
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.0 │
|
4231
|
+
# # └───────┴─────────────────────┴──────────────────┘
|
4232
|
+
def rolling_mean_by(
|
4233
|
+
by,
|
4234
|
+
window_size,
|
4235
|
+
min_periods: 1,
|
4236
|
+
closed: "right",
|
4237
|
+
warn_if_unsorted: nil
|
4238
|
+
)
|
4239
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
4240
|
+
by = Utils.parse_as_expression(by)
|
4241
|
+
_from_rbexpr(
|
4242
|
+
_rbexpr.rolling_mean_by(
|
4243
|
+
by,
|
4244
|
+
window_size,
|
4245
|
+
min_periods,
|
4246
|
+
closed
|
4247
|
+
)
|
4248
|
+
)
|
4249
|
+
end
|
4250
|
+
|
4251
|
+
# Apply a rolling sum based on another column.
|
4252
|
+
#
|
4253
|
+
# @param by [String]
|
4254
|
+
# This column must of dtype `{Date, Datetime}`
|
4255
|
+
# @param window_size [String]
|
4256
|
+
# The length of the window. Can be a dynamic temporal
|
4257
|
+
# size indicated by a timedelta or the following string language:
|
4258
|
+
#
|
4259
|
+
# - 1ns (1 nanosecond)
|
4260
|
+
# - 1us (1 microsecond)
|
4261
|
+
# - 1ms (1 millisecond)
|
4262
|
+
# - 1s (1 second)
|
4263
|
+
# - 1m (1 minute)
|
4264
|
+
# - 1h (1 hour)
|
4265
|
+
# - 1d (1 calendar day)
|
4266
|
+
# - 1w (1 calendar week)
|
4267
|
+
# - 1mo (1 calendar month)
|
4268
|
+
# - 1q (1 calendar quarter)
|
4269
|
+
# - 1y (1 calendar year)
|
4270
|
+
#
|
4271
|
+
# By "calendar day", we mean the corresponding time on the next day
|
4272
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
4273
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
4274
|
+
# "calendar year".
|
4275
|
+
# @param min_periods [Integer]
|
4276
|
+
# The number of values in the window that should be non-null before computing
|
4277
|
+
# a result.
|
4278
|
+
# @param closed ['left', 'right', 'both', 'none']
|
4279
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
4280
|
+
# defaults to `'right'`.
|
4281
|
+
# @param warn_if_unsorted [Boolean]
|
4282
|
+
# Warn if data is not known to be sorted by `by` column.
|
4283
|
+
#
|
4284
|
+
# @return [Expr]
|
4285
|
+
#
|
4286
|
+
# @note
|
4287
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
4288
|
+
# window, consider using `rolling` - this method can cache the window size
|
4289
|
+
# computation.
|
4290
|
+
#
|
4291
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
4292
|
+
# start = DateTime.new(2001, 1, 1)
|
4293
|
+
# stop = DateTime.new(2001, 1, 2)
|
4294
|
+
# df_temporal = Polars::DataFrame.new(
|
4295
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
4296
|
+
# ).with_row_index
|
4297
|
+
# # =>
|
4298
|
+
# # shape: (25, 2)
|
4299
|
+
# # ┌───────┬─────────────────────┐
|
4300
|
+
# # │ index ┆ date │
|
4301
|
+
# # │ --- ┆ --- │
|
4302
|
+
# # │ u32 ┆ datetime[ns] │
|
4303
|
+
# # ╞═══════╪═════════════════════╡
|
4304
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
4305
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
4306
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
4307
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
4308
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
4309
|
+
# # │ … ┆ … │
|
4310
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
4311
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
4312
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
4313
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
4314
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
4315
|
+
# # └───────┴─────────────────────┘
|
4316
|
+
#
|
4317
|
+
# @example Compute the rolling sum with the temporal windows closed on the right (default)
|
4318
|
+
# df_temporal.with_columns(
|
4319
|
+
# rolling_row_sum: Polars.col("index").rolling_sum_by("date", "2h")
|
4320
|
+
# )
|
4321
|
+
# # =>
|
4322
|
+
# # shape: (25, 3)
|
4323
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4324
|
+
# # │ index ┆ date ┆ rolling_row_sum │
|
4325
|
+
# # │ --- ┆ --- ┆ --- │
|
4326
|
+
# # │ u32 ┆ datetime[ns] ┆ u32 │
|
4327
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4328
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
|
4329
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
|
4330
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 3 │
|
4331
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 5 │
|
4332
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 7 │
|
4333
|
+
# # │ … ┆ … ┆ … │
|
4334
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 39 │
|
4335
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 41 │
|
4336
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 43 │
|
4337
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 45 │
|
4338
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 47 │
|
4339
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4340
|
+
#
|
4341
|
+
# @example Compute the rolling sum with the closure of windows on both sides
|
4342
|
+
# df_temporal.with_columns(
|
4343
|
+
# rolling_row_sum: Polars.col("index").rolling_sum_by(
|
4344
|
+
# "date", "2h", closed: "both"
|
4345
|
+
# )
|
4346
|
+
# )
|
4347
|
+
# # =>
|
4348
|
+
# # shape: (25, 3)
|
4349
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4350
|
+
# # │ index ┆ date ┆ rolling_row_sum │
|
4351
|
+
# # │ --- ┆ --- ┆ --- │
|
4352
|
+
# # │ u32 ┆ datetime[ns] ┆ u32 │
|
4353
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4354
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
|
4355
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
|
4356
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 3 │
|
4357
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 6 │
|
4358
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 9 │
|
4359
|
+
# # │ … ┆ … ┆ … │
|
4360
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 57 │
|
4361
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 60 │
|
4362
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 63 │
|
4363
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 66 │
|
4364
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 69 │
|
4365
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4366
|
+
def rolling_sum_by(
|
4367
|
+
by,
|
4368
|
+
window_size,
|
4369
|
+
min_periods: 1,
|
4370
|
+
closed: "right",
|
4371
|
+
warn_if_unsorted: nil
|
4372
|
+
)
|
4373
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
4374
|
+
by = Utils.parse_as_expression(by)
|
4375
|
+
_from_rbexpr(
|
4376
|
+
_rbexpr.rolling_sum_by(by, window_size, min_periods, closed)
|
4377
|
+
)
|
4378
|
+
end
|
4379
|
+
|
4380
|
+
# Compute a rolling standard deviation based on another column.
|
4381
|
+
#
|
4382
|
+
# @param by [String]
|
4383
|
+
# This column must be of dtype Datetime or Date.
|
4384
|
+
# @param window_size [String]
|
4385
|
+
# The length of the window. Can be a dynamic temporal
|
4386
|
+
# size indicated by a timedelta or the following string language:
|
4387
|
+
#
|
4388
|
+
# - 1ns (1 nanosecond)
|
4389
|
+
# - 1us (1 microsecond)
|
4390
|
+
# - 1ms (1 millisecond)
|
4391
|
+
# - 1s (1 second)
|
4392
|
+
# - 1m (1 minute)
|
4393
|
+
# - 1h (1 hour)
|
4394
|
+
# - 1d (1 calendar day)
|
4395
|
+
# - 1w (1 calendar week)
|
4396
|
+
# - 1mo (1 calendar month)
|
4397
|
+
# - 1q (1 calendar quarter)
|
4398
|
+
# - 1y (1 calendar year)
|
4399
|
+
#
|
4400
|
+
# By "calendar day", we mean the corresponding time on the next day
|
4401
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
4402
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
4403
|
+
# "calendar year".
|
4404
|
+
# @param min_periods [Integer]
|
4405
|
+
# The number of values in the window that should be non-null before computing
|
4406
|
+
# a result.
|
4407
|
+
# @param closed ['left', 'right', 'both', 'none']
|
4408
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
4409
|
+
# defaults to `'right'`.
|
4410
|
+
# @param ddof [Integer]
|
4411
|
+
# "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
|
4412
|
+
# @param warn_if_unsorted [Boolean]
|
4413
|
+
# Warn if data is not known to be sorted by `by` column.
|
4414
|
+
#
|
4415
|
+
# @return [Expr]
|
4416
|
+
#
|
4417
|
+
# @note
|
4418
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
4419
|
+
# window, consider using `rolling` - this method can cache the window size
|
4420
|
+
# computation.
|
4421
|
+
#
|
4422
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
4423
|
+
# start = DateTime.new(2001, 1, 1)
|
4424
|
+
# stop = DateTime.new(2001, 1, 2)
|
4425
|
+
# df_temporal = Polars::DataFrame.new(
|
4426
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
4427
|
+
# ).with_row_index
|
4428
|
+
# # =>
|
4429
|
+
# # shape: (25, 2)
|
4430
|
+
# # ┌───────┬─────────────────────┐
|
4431
|
+
# # │ index ┆ date │
|
4432
|
+
# # │ --- ┆ --- │
|
4433
|
+
# # │ u32 ┆ datetime[ns] │
|
4434
|
+
# # ╞═══════╪═════════════════════╡
|
4435
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
4436
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
4437
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
4438
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
4439
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
4440
|
+
# # │ … ┆ … │
|
4441
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
4442
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
4443
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
4444
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
4445
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
4446
|
+
# # └───────┴─────────────────────┘
|
4447
|
+
#
|
4448
|
+
# @example Compute the rolling std with the temporal windows closed on the right (default)
|
4449
|
+
# df_temporal.with_columns(
|
4450
|
+
# rolling_row_std: Polars.col("index").rolling_std_by("date", "2h")
|
4451
|
+
# )
|
4452
|
+
# # =>
|
4453
|
+
# # shape: (25, 3)
|
4454
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4455
|
+
# # │ index ┆ date ┆ rolling_row_std │
|
4456
|
+
# # │ --- ┆ --- ┆ --- │
|
4457
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4458
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4459
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
|
4460
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.707107 │
|
4461
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 0.707107 │
|
4462
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 0.707107 │
|
4463
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 0.707107 │
|
4464
|
+
# # │ … ┆ … ┆ … │
|
4465
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 0.707107 │
|
4466
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 0.707107 │
|
4467
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 0.707107 │
|
4468
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 0.707107 │
|
4469
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 0.707107 │
|
4470
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4471
|
+
#
|
4472
|
+
# @example Compute the rolling std with the closure of windows on both sides
|
4473
|
+
# df_temporal.with_columns(
|
4474
|
+
# rolling_row_std: Polars.col("index").rolling_std_by(
|
4475
|
+
# "date", "2h", closed: "both"
|
4476
|
+
# )
|
4477
|
+
# )
|
4478
|
+
# # =>
|
4479
|
+
# # shape: (25, 3)
|
4480
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4481
|
+
# # │ index ┆ date ┆ rolling_row_std │
|
4482
|
+
# # │ --- ┆ --- ┆ --- │
|
4483
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4484
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4485
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
|
4486
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.707107 │
|
4487
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
|
4488
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 1.0 │
|
4489
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 1.0 │
|
4490
|
+
# # │ … ┆ … ┆ … │
|
4491
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 1.0 │
|
4492
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 1.0 │
|
4493
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 1.0 │
|
4494
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 1.0 │
|
4495
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 1.0 │
|
4496
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4497
|
+
def rolling_std_by(
|
4498
|
+
by,
|
4499
|
+
window_size,
|
4500
|
+
min_periods: 1,
|
4501
|
+
closed: "right",
|
4502
|
+
ddof: 1,
|
4503
|
+
warn_if_unsorted: nil
|
4504
|
+
)
|
4505
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
4506
|
+
by = Utils.parse_as_expression(by)
|
4507
|
+
_from_rbexpr(
|
4508
|
+
_rbexpr.rolling_std_by(
|
4509
|
+
by,
|
4510
|
+
window_size,
|
4511
|
+
min_periods,
|
4512
|
+
closed,
|
4513
|
+
ddof
|
4514
|
+
)
|
4515
|
+
)
|
4516
|
+
end
|
4517
|
+
|
4518
|
+
# Compute a rolling variance based on another column.
|
4519
|
+
#
|
4520
|
+
# @param by [String]
|
4521
|
+
# This column must be of dtype Datetime or Date.
|
4522
|
+
# @param window_size [String]
|
4523
|
+
# The length of the window. Can be a dynamic temporal
|
4524
|
+
# size indicated by a timedelta or the following string language:
|
4525
|
+
#
|
4526
|
+
# - 1ns (1 nanosecond)
|
4527
|
+
# - 1us (1 microsecond)
|
4528
|
+
# - 1ms (1 millisecond)
|
4529
|
+
# - 1s (1 second)
|
4530
|
+
# - 1m (1 minute)
|
4531
|
+
# - 1h (1 hour)
|
4532
|
+
# - 1d (1 calendar day)
|
4533
|
+
# - 1w (1 calendar week)
|
4534
|
+
# - 1mo (1 calendar month)
|
4535
|
+
# - 1q (1 calendar quarter)
|
4536
|
+
# - 1y (1 calendar year)
|
4537
|
+
#
|
4538
|
+
# By "calendar day", we mean the corresponding time on the next day
|
4539
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
4540
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
4541
|
+
# "calendar year".
|
4542
|
+
# @param min_periods [Integer]
|
4543
|
+
# The number of values in the window that should be non-null before computing
|
4544
|
+
# a result.
|
4545
|
+
# @param closed ['left', 'right', 'both', 'none']
|
4546
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
4547
|
+
# defaults to `'right'`.
|
4548
|
+
# @param ddof [Integer]
|
4549
|
+
# "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
|
4550
|
+
# @param warn_if_unsorted [Boolean]
|
4551
|
+
# Warn if data is not known to be sorted by `by` column.
|
4552
|
+
#
|
4553
|
+
# @return [Expr]
|
4554
|
+
#
|
4555
|
+
# @note
|
4556
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
4557
|
+
# window, consider using `rolling` - this method can cache the window size
|
4558
|
+
# computation.
|
4559
|
+
#
|
4560
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
4561
|
+
# start = DateTime.new(2001, 1, 1)
|
4562
|
+
# stop = DateTime.new(2001, 1, 2)
|
4563
|
+
# df_temporal = Polars::DataFrame.new(
|
4564
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
4565
|
+
# ).with_row_index
|
4566
|
+
# # =>
|
4567
|
+
# # shape: (25, 2)
|
4568
|
+
# # ┌───────┬─────────────────────┐
|
4569
|
+
# # │ index ┆ date │
|
4570
|
+
# # │ --- ┆ --- │
|
4571
|
+
# # │ u32 ┆ datetime[ns] │
|
4572
|
+
# # ╞═══════╪═════════════════════╡
|
4573
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
4574
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
4575
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
4576
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
4577
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
4578
|
+
# # │ … ┆ … │
|
4579
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
4580
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
4581
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
4582
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
4583
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
4584
|
+
# # └───────┴─────────────────────┘
|
4585
|
+
#
|
4586
|
+
# @example Compute the rolling var with the temporal windows closed on the right (default)
|
4587
|
+
# df_temporal.with_columns(
|
4588
|
+
# rolling_row_var: Polars.col("index").rolling_var_by("date", "2h")
|
4589
|
+
# )
|
4590
|
+
# # =>
|
4591
|
+
# # shape: (25, 3)
|
4592
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4593
|
+
# # │ index ┆ date ┆ rolling_row_var │
|
4594
|
+
# # │ --- ┆ --- ┆ --- │
|
4595
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4596
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4597
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
|
4598
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
|
4599
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 0.5 │
|
4600
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 0.5 │
|
4601
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 0.5 │
|
4602
|
+
# # │ … ┆ … ┆ … │
|
4603
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 0.5 │
|
4604
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 0.5 │
|
4605
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 0.5 │
|
4606
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 0.5 │
|
4607
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 0.5 │
|
4608
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4609
|
+
#
|
4610
|
+
# @example Compute the rolling var with the closure of windows on both sides
|
4611
|
+
# df_temporal.with_columns(
|
4612
|
+
# rolling_row_var: Polars.col("index").rolling_var_by(
|
4613
|
+
# "date", "2h", closed: "both"
|
4614
|
+
# )
|
4615
|
+
# )
|
4616
|
+
# # =>
|
4617
|
+
# # shape: (25, 3)
|
4618
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4619
|
+
# # │ index ┆ date ┆ rolling_row_var │
|
4620
|
+
# # │ --- ┆ --- ┆ --- │
|
4621
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4622
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4623
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
|
4624
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
|
4625
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
|
4626
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 1.0 │
|
4627
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 1.0 │
|
4628
|
+
# # │ … ┆ … ┆ … │
|
4629
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 1.0 │
|
4630
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 1.0 │
|
4631
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 1.0 │
|
4632
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 1.0 │
|
4633
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 1.0 │
|
4634
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4635
|
+
def rolling_var_by(
|
4636
|
+
by,
|
4637
|
+
window_size,
|
4638
|
+
min_periods: 1,
|
4639
|
+
closed: "right",
|
4640
|
+
ddof: 1,
|
4641
|
+
warn_if_unsorted: nil
|
4642
|
+
)
|
4643
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
4644
|
+
by = Utils.parse_as_expression(by)
|
4645
|
+
_from_rbexpr(
|
4646
|
+
_rbexpr.rolling_var_by(
|
4647
|
+
by,
|
4648
|
+
window_size,
|
4649
|
+
min_periods,
|
4650
|
+
closed,
|
4651
|
+
ddof
|
4652
|
+
)
|
4653
|
+
)
|
4654
|
+
end
|
4655
|
+
|
4656
|
+
# Compute a rolling median based on another column.
|
4657
|
+
#
|
4658
|
+
# @param by [String]
|
4659
|
+
# This column must be of dtype Datetime or Date.
|
4660
|
+
# @param window_size [String]
|
4661
|
+
# The length of the window. Can be a dynamic temporal
|
4662
|
+
# size indicated by a timedelta or the following string language:
|
4663
|
+
#
|
4664
|
+
# - 1ns (1 nanosecond)
|
4665
|
+
# - 1us (1 microsecond)
|
4666
|
+
# - 1ms (1 millisecond)
|
4667
|
+
# - 1s (1 second)
|
4668
|
+
# - 1m (1 minute)
|
4669
|
+
# - 1h (1 hour)
|
4670
|
+
# - 1d (1 calendar day)
|
4671
|
+
# - 1w (1 calendar week)
|
4672
|
+
# - 1mo (1 calendar month)
|
4673
|
+
# - 1q (1 calendar quarter)
|
4674
|
+
# - 1y (1 calendar year)
|
4675
|
+
#
|
4676
|
+
# By "calendar day", we mean the corresponding time on the next day
|
4677
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
4678
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
4679
|
+
# "calendar year".
|
4680
|
+
# @param min_periods [Integer]
|
4681
|
+
# The number of values in the window that should be non-null before computing
|
4682
|
+
# a result.
|
4683
|
+
# @param closed ['left', 'right', 'both', 'none']
|
4684
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
4685
|
+
# defaults to `'right'`.
|
4686
|
+
# @param warn_if_unsorted [Boolean]
|
4687
|
+
# Warn if data is not known to be sorted by `by` column.
|
4688
|
+
#
|
4689
|
+
# @return [Expr]
|
4690
|
+
#
|
4691
|
+
# @note
|
4692
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
4693
|
+
# window, consider using `rolling` - this method can cache the window size
|
4694
|
+
# computation.
|
4695
|
+
#
|
4696
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
4697
|
+
# start = DateTime.new(2001, 1, 1)
|
4698
|
+
# stop = DateTime.new(2001, 1, 2)
|
4699
|
+
# df_temporal = Polars::DataFrame.new(
|
4700
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
4701
|
+
# ).with_row_index
|
4702
|
+
# # =>
|
4703
|
+
# # shape: (25, 2)
|
4704
|
+
# # ┌───────┬─────────────────────┐
|
4705
|
+
# # │ index ┆ date │
|
4706
|
+
# # │ --- ┆ --- │
|
4707
|
+
# # │ u32 ┆ datetime[ns] │
|
4708
|
+
# # ╞═══════╪═════════════════════╡
|
4709
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
4710
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
4711
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
4712
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
4713
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
4714
|
+
# # │ … ┆ … │
|
4715
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
4716
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
4717
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
4718
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
4719
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
4720
|
+
# # └───────┴─────────────────────┘
|
4721
|
+
#
|
4722
|
+
# @example Compute the rolling median with the temporal windows closed on the right:
|
4723
|
+
# df_temporal.with_columns(
|
4724
|
+
# rolling_row_median: Polars.col("index").rolling_median_by(
|
4725
|
+
# "date", "2h"
|
4726
|
+
# )
|
4727
|
+
# )
|
4728
|
+
# # =>
|
4729
|
+
# # shape: (25, 3)
|
4730
|
+
# # ┌───────┬─────────────────────┬────────────────────┐
|
4731
|
+
# # │ index ┆ date ┆ rolling_row_median │
|
4732
|
+
# # │ --- ┆ --- ┆ --- │
|
4733
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4734
|
+
# # ╞═══════╪═════════════════════╪════════════════════╡
|
4735
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
|
4736
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
|
4737
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.5 │
|
4738
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.5 │
|
4739
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.5 │
|
4740
|
+
# # │ … ┆ … ┆ … │
|
4741
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.5 │
|
4742
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.5 │
|
4743
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.5 │
|
4744
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.5 │
|
4745
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.5 │
|
4746
|
+
# # └───────┴─────────────────────┴────────────────────┘
|
4747
|
+
def rolling_median_by(
|
4748
|
+
by,
|
4749
|
+
window_size,
|
4750
|
+
min_periods: 1,
|
4751
|
+
closed: "right",
|
4752
|
+
warn_if_unsorted: nil
|
4753
|
+
)
|
4754
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
4755
|
+
by = Utils.parse_as_expression(by)
|
4756
|
+
_from_rbexpr(
|
4757
|
+
_rbexpr.rolling_median_by(by, window_size, min_periods, closed)
|
4758
|
+
)
|
4759
|
+
end
|
4760
|
+
|
4761
|
+
# Compute a rolling quantile based on another column.
|
4762
|
+
#
|
4763
|
+
# @param by [String]
|
4764
|
+
# This column must be of dtype Datetime or Date.
|
4765
|
+
# @param quantile [Float]
|
4766
|
+
# Quantile between 0.0 and 1.0.
|
4767
|
+
# @param interpolation ['nearest', 'higher', 'lower', 'midpoint', 'linear']
|
4768
|
+
# Interpolation method.
|
4769
|
+
# @param window_size [String]
|
4770
|
+
# The length of the window. Can be a dynamic
|
4771
|
+
# temporal size indicated by a timedelta or the following string language:
|
4772
|
+
#
|
4773
|
+
# - 1ns (1 nanosecond)
|
4774
|
+
# - 1us (1 microsecond)
|
4775
|
+
# - 1ms (1 millisecond)
|
4776
|
+
# - 1s (1 second)
|
4777
|
+
# - 1m (1 minute)
|
4778
|
+
# - 1h (1 hour)
|
4779
|
+
# - 1d (1 calendar day)
|
4780
|
+
# - 1w (1 calendar week)
|
4781
|
+
# - 1mo (1 calendar month)
|
4782
|
+
# - 1q (1 calendar quarter)
|
4783
|
+
# - 1y (1 calendar year)
|
4784
|
+
#
|
4785
|
+
# By "calendar day", we mean the corresponding time on the next day
|
4786
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
4787
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
4788
|
+
# "calendar year".
|
4789
|
+
# @param min_periods [Integer]
|
4790
|
+
# The number of values in the window that should be non-null before computing
|
4791
|
+
# a result.
|
4792
|
+
# @param closed ['left', 'right', 'both', 'none']
|
4793
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
4794
|
+
# defaults to `'right'`.
|
4795
|
+
# @param warn_if_unsorted [Boolean]
|
4796
|
+
# Warn if data is not known to be sorted by `by` column.
|
4797
|
+
#
|
4798
|
+
# @return [Expr]
|
4799
|
+
#
|
4800
|
+
# @note
|
4801
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
4802
|
+
# window, consider using `rolling` - this method can cache the window size
|
4803
|
+
# computation.
|
4804
|
+
#
|
4805
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
4806
|
+
# start = DateTime.new(2001, 1, 1)
|
4807
|
+
# stop = DateTime.new(2001, 1, 2)
|
4808
|
+
# df_temporal = Polars::DataFrame.new(
|
4809
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
4810
|
+
# ).with_row_index
|
4811
|
+
# # =>
|
4812
|
+
# # shape: (25, 2)
|
4813
|
+
# # ┌───────┬─────────────────────┐
|
4814
|
+
# # │ index ┆ date │
|
4815
|
+
# # │ --- ┆ --- │
|
4816
|
+
# # │ u32 ┆ datetime[ns] │
|
4817
|
+
# # ╞═══════╪═════════════════════╡
|
4818
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
4819
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
4820
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
4821
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
4822
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
4823
|
+
# # │ … ┆ … │
|
4824
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
4825
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
4826
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
4827
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
4828
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
4829
|
+
# # └───────┴─────────────────────┘
|
4830
|
+
#
|
4831
|
+
# @example Compute the rolling quantile with the temporal windows closed on the right:
|
4832
|
+
# df_temporal.with_columns(
|
4833
|
+
# rolling_row_quantile: Polars.col("index").rolling_quantile_by(
|
4834
|
+
# "date", "2h", quantile: 0.3
|
4835
|
+
# )
|
4836
|
+
# )
|
4837
|
+
# # =>
|
4838
|
+
# # shape: (25, 3)
|
4839
|
+
# # ┌───────┬─────────────────────┬──────────────────────┐
|
4840
|
+
# # │ index ┆ date ┆ rolling_row_quantile │
|
4841
|
+
# # │ --- ┆ --- ┆ --- │
|
4842
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4843
|
+
# # ╞═══════╪═════════════════════╪══════════════════════╡
|
4844
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
|
4845
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.0 │
|
4846
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
|
4847
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.0 │
|
4848
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.0 │
|
4849
|
+
# # │ … ┆ … ┆ … │
|
4850
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.0 │
|
4851
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.0 │
|
4852
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.0 │
|
4853
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.0 │
|
4854
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.0 │
|
4855
|
+
# # └───────┴─────────────────────┴──────────────────────┘
|
4856
|
+
def rolling_quantile_by(
|
4857
|
+
by,
|
4858
|
+
window_size,
|
4859
|
+
quantile:,
|
4860
|
+
interpolation: "nearest",
|
4861
|
+
min_periods: 1,
|
4862
|
+
closed: "right",
|
4863
|
+
warn_if_unsorted: nil
|
4864
|
+
)
|
4865
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
4866
|
+
by = Utils.parse_as_expression(by)
|
4867
|
+
_from_rbexpr(
|
4868
|
+
_rbexpr.rolling_quantile_by(
|
4869
|
+
by,
|
4870
|
+
quantile,
|
4871
|
+
interpolation,
|
4872
|
+
window_size,
|
4873
|
+
min_periods,
|
4874
|
+
closed,
|
4875
|
+
)
|
4876
|
+
)
|
4877
|
+
end
|
4878
|
+
|
3860
4879
|
# Apply a rolling min (moving min) over the values in this array.
|
3861
4880
|
#
|
3862
4881
|
# A window of length `window_size` will traverse the array. The values that fill
|
@@ -3939,9 +4958,20 @@ module Polars
|
|
3939
4958
|
window_size, min_periods = _prepare_rolling_window_args(
|
3940
4959
|
window_size, min_periods
|
3941
4960
|
)
|
4961
|
+
if !by.nil?
|
4962
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
4963
|
+
return rolling_min_by(
|
4964
|
+
by,
|
4965
|
+
window_size,
|
4966
|
+
min_periods: min_periods,
|
4967
|
+
closed: closed || "right",
|
4968
|
+
warn_if_unsorted: warn_if_unsorted
|
4969
|
+
)
|
4970
|
+
end
|
4971
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
3942
4972
|
_from_rbexpr(
|
3943
4973
|
_rbexpr.rolling_min(
|
3944
|
-
window_size, weights, min_periods, center
|
4974
|
+
window_size, weights, min_periods, center
|
3945
4975
|
)
|
3946
4976
|
)
|
3947
4977
|
end
|
@@ -4028,9 +5058,20 @@ module Polars
|
|
4028
5058
|
window_size, min_periods = _prepare_rolling_window_args(
|
4029
5059
|
window_size, min_periods
|
4030
5060
|
)
|
5061
|
+
if !by.nil?
|
5062
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5063
|
+
return rolling_max_by(
|
5064
|
+
by,
|
5065
|
+
window_size,
|
5066
|
+
min_periods: min_periods,
|
5067
|
+
closed: closed || "right",
|
5068
|
+
warn_if_unsorted: warn_if_unsorted
|
5069
|
+
)
|
5070
|
+
end
|
5071
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
4031
5072
|
_from_rbexpr(
|
4032
5073
|
_rbexpr.rolling_max(
|
4033
|
-
window_size, weights, min_periods, center
|
5074
|
+
window_size, weights, min_periods, center
|
4034
5075
|
)
|
4035
5076
|
)
|
4036
5077
|
end
|
@@ -4117,9 +5158,20 @@ module Polars
|
|
4117
5158
|
window_size, min_periods = _prepare_rolling_window_args(
|
4118
5159
|
window_size, min_periods
|
4119
5160
|
)
|
5161
|
+
if !by.nil?
|
5162
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5163
|
+
return rolling_mean_by(
|
5164
|
+
by,
|
5165
|
+
window_size,
|
5166
|
+
min_periods: min_periods,
|
5167
|
+
closed: closed || "right",
|
5168
|
+
warn_if_unsorted: warn_if_unsorted
|
5169
|
+
)
|
5170
|
+
end
|
5171
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
4120
5172
|
_from_rbexpr(
|
4121
5173
|
_rbexpr.rolling_mean(
|
4122
|
-
window_size, weights, min_periods, center
|
5174
|
+
window_size, weights, min_periods, center
|
4123
5175
|
)
|
4124
5176
|
)
|
4125
5177
|
end
|
@@ -4206,9 +5258,20 @@ module Polars
|
|
4206
5258
|
window_size, min_periods = _prepare_rolling_window_args(
|
4207
5259
|
window_size, min_periods
|
4208
5260
|
)
|
5261
|
+
if !by.nil?
|
5262
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5263
|
+
return rolling_sum_by(
|
5264
|
+
by,
|
5265
|
+
window_size,
|
5266
|
+
min_periods: min_periods,
|
5267
|
+
closed: closed || "right",
|
5268
|
+
warn_if_unsorted: warn_if_unsorted
|
5269
|
+
)
|
5270
|
+
end
|
5271
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
4209
5272
|
_from_rbexpr(
|
4210
5273
|
_rbexpr.rolling_sum(
|
4211
|
-
window_size, weights, min_periods, center
|
5274
|
+
window_size, weights, min_periods, center
|
4212
5275
|
)
|
4213
5276
|
)
|
4214
5277
|
end
|
@@ -4297,9 +5360,21 @@ module Polars
|
|
4297
5360
|
window_size, min_periods = _prepare_rolling_window_args(
|
4298
5361
|
window_size, min_periods
|
4299
5362
|
)
|
5363
|
+
if !by.nil?
|
5364
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5365
|
+
return rolling_std_by(
|
5366
|
+
by,
|
5367
|
+
window_size,
|
5368
|
+
min_periods: min_periods,
|
5369
|
+
closed: closed || "right",
|
5370
|
+
ddof: ddof,
|
5371
|
+
warn_if_unsorted: warn_if_unsorted
|
5372
|
+
)
|
5373
|
+
end
|
5374
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
4300
5375
|
_from_rbexpr(
|
4301
5376
|
_rbexpr.rolling_std(
|
4302
|
-
window_size, weights, min_periods, center,
|
5377
|
+
window_size, weights, min_periods, center, ddof
|
4303
5378
|
)
|
4304
5379
|
)
|
4305
5380
|
end
|
@@ -4388,9 +5463,21 @@ module Polars
|
|
4388
5463
|
window_size, min_periods = _prepare_rolling_window_args(
|
4389
5464
|
window_size, min_periods
|
4390
5465
|
)
|
5466
|
+
if !by.nil?
|
5467
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5468
|
+
return rolling_var_by(
|
5469
|
+
by,
|
5470
|
+
window_size,
|
5471
|
+
min_periods: min_periods,
|
5472
|
+
closed: closed || "right",
|
5473
|
+
ddof: ddof,
|
5474
|
+
warn_if_unsorted: warn_if_unsorted
|
5475
|
+
)
|
5476
|
+
end
|
5477
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
4391
5478
|
_from_rbexpr(
|
4392
5479
|
_rbexpr.rolling_var(
|
4393
|
-
window_size, weights, min_periods, center,
|
5480
|
+
window_size, weights, min_periods, center, ddof
|
4394
5481
|
)
|
4395
5482
|
)
|
4396
5483
|
end
|
@@ -4474,9 +5561,20 @@ module Polars
|
|
4474
5561
|
window_size, min_periods = _prepare_rolling_window_args(
|
4475
5562
|
window_size, min_periods
|
4476
5563
|
)
|
5564
|
+
if !by.nil?
|
5565
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5566
|
+
return rolling_median_by(
|
5567
|
+
by,
|
5568
|
+
window_size,
|
5569
|
+
min_periods: min_periods,
|
5570
|
+
closed: closed || "right",
|
5571
|
+
warn_if_unsorted: warn_if_unsorted
|
5572
|
+
)
|
5573
|
+
end
|
5574
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
4477
5575
|
_from_rbexpr(
|
4478
5576
|
_rbexpr.rolling_median(
|
4479
|
-
window_size, weights, min_periods, center
|
5577
|
+
window_size, weights, min_periods, center
|
4480
5578
|
)
|
4481
5579
|
)
|
4482
5580
|
end
|
@@ -4566,9 +5664,21 @@ module Polars
|
|
4566
5664
|
window_size, min_periods = _prepare_rolling_window_args(
|
4567
5665
|
window_size, min_periods
|
4568
5666
|
)
|
5667
|
+
if !by.nil?
|
5668
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5669
|
+
return rolling_quantile_by(
|
5670
|
+
by,
|
5671
|
+
window_size,
|
5672
|
+
min_periods: min_periods,
|
5673
|
+
closed: closed || "right",
|
5674
|
+
warn_if_unsorted: warn_if_unsorted,
|
5675
|
+
quantile: quantile
|
5676
|
+
)
|
5677
|
+
end
|
5678
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
4569
5679
|
_from_rbexpr(
|
4570
5680
|
_rbexpr.rolling_quantile(
|
4571
|
-
quantile, interpolation, window_size, weights, min_periods, center
|
5681
|
+
quantile, interpolation, window_size, weights, min_periods, center
|
4572
5682
|
)
|
4573
5683
|
)
|
4574
5684
|
end
|
@@ -6101,5 +7211,9 @@ module Polars
|
|
6101
7211
|
end
|
6102
7212
|
[window_size, min_periods]
|
6103
7213
|
end
|
7214
|
+
|
7215
|
+
def _prepare_rolling_by_window_args(window_size)
|
7216
|
+
window_size
|
7217
|
+
end
|
6104
7218
|
end
|
6105
7219
|
end
|