polars-df 0.10.0-aarch64-linux → 0.11.0-aarch64-linux

Sign up to get free protection for your applications and to get access to all the features.
data/lib/polars/expr.rb CHANGED
@@ -1345,9 +1345,9 @@ module Polars
1345
1345
  # # │ 3 ┆ 4 │
1346
1346
  # # │ 2 ┆ 98 │
1347
1347
  # # └───────┴──────────┘
1348
- def top_k(k: 5)
1348
+ def top_k(k: 5, nulls_last: false, multithreaded: true)
1349
1349
  k = Utils.parse_as_expression(k)
1350
- _from_rbexpr(_rbexpr.top_k(k))
1350
+ _from_rbexpr(_rbexpr.top_k(k, nulls_last, multithreaded))
1351
1351
  end
1352
1352
 
1353
1353
  # Return the `k` smallest elements.
@@ -1384,9 +1384,9 @@ module Polars
1384
1384
  # # │ 3 ┆ 4 │
1385
1385
  # # │ 2 ┆ 98 │
1386
1386
  # # └───────┴──────────┘
1387
- def bottom_k(k: 5)
1387
+ def bottom_k(k: 5, nulls_last: false, multithreaded: true)
1388
1388
  k = Utils.parse_as_expression(k)
1389
- _from_rbexpr(_rbexpr.bottom_k(k))
1389
+ _from_rbexpr(_rbexpr.bottom_k(k, nulls_last, multithreaded))
1390
1390
  end
1391
1391
 
1392
1392
  # Get the index values that would sort this column.
@@ -2764,6 +2764,9 @@ module Polars
2764
2764
  # Dtype of the output Series.
2765
2765
  # @param agg_list [Boolean]
2766
2766
  # Aggregate list.
2767
+ # @param is_elementwise [Boolean]
2768
+ # If set to true this can run in the streaming engine, but may yield
2769
+ # incorrect results in group-by. Ensure you know what you are doing!
2767
2770
  #
2768
2771
  # @return [Expr]
2769
2772
  #
@@ -2784,12 +2787,21 @@ module Polars
2784
2787
  # # ╞══════╪════════╡
2785
2788
  # # │ 1 ┆ 0 │
2786
2789
  # # └──────┴────────┘
2787
- # def map(return_dtype: nil, agg_list: false, &f)
2790
+ # def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, &f)
2788
2791
  # if !return_dtype.nil?
2789
2792
  # return_dtype = Utils.rb_type_to_dtype(return_dtype)
2790
2793
  # end
2791
- # _from_rbexpr(_rbexpr.map(f, return_dtype, agg_list))
2794
+ # _from_rbexpr(
2795
+ # _rbexpr.map_batches(
2796
+ # # TODO _map_batches_wrapper
2797
+ # f,
2798
+ # return_dtype,
2799
+ # agg_list,
2800
+ # is_elementwise
2801
+ # )
2802
+ # )
2792
2803
  # end
2804
+ # alias_method :map, :map_batches
2793
2805
 
2794
2806
  # Apply a custom/user-defined function (UDF) in a GroupBy or Projection context.
2795
2807
  #
@@ -2831,7 +2843,7 @@ module Polars
2831
2843
  #
2832
2844
  # @example In a selection context, the function is applied by row.
2833
2845
  # df.with_column(
2834
- # Polars.col("a").apply { |x| x * 2 }.alias("a_times_2")
2846
+ # Polars.col("a").map_elements { |x| x * 2 }.alias("a_times_2")
2835
2847
  # )
2836
2848
  # # =>
2837
2849
  # # shape: (4, 3)
@@ -2851,7 +2863,7 @@ module Polars
2851
2863
  # .group_by("b", maintain_order: true)
2852
2864
  # .agg(
2853
2865
  # [
2854
- # Polars.col("a").apply { |x| x.sum }
2866
+ # Polars.col("a").map_elements { |x| x.sum }
2855
2867
  # ]
2856
2868
  # )
2857
2869
  # .collect
@@ -2866,12 +2878,23 @@ module Polars
2866
2878
  # # │ b ┆ 2 │
2867
2879
  # # │ c ┆ 4 │
2868
2880
  # # └─────┴─────┘
2869
- # def apply(return_dtype: nil, &f)
2870
- # wrap_f = lambda do |x|
2871
- # x.apply(return_dtype: return_dtype, &f)
2881
+ # def map_elements(
2882
+ # return_dtype: nil,
2883
+ # skip_nulls: true,
2884
+ # pass_name: false,
2885
+ # strategy: "thread_local",
2886
+ # &f
2887
+ # )
2888
+ # if pass_name
2889
+ # raise Todo
2890
+ # else
2891
+ # wrap_f = lambda do |x|
2892
+ # x.map_elements(return_dtype: return_dtype, skip_nulls: skip_nulls, &f)
2893
+ # end
2872
2894
  # end
2873
- # map(agg_list: true, return_dtype: return_dtype, &wrap_f)
2895
+ # map_batches(agg_list: true, return_dtype: return_dtype, &wrap_f)
2874
2896
  # end
2897
+ # alias_method :apply, :map_elements
2875
2898
 
2876
2899
  # Explode a list or utf8 Series. This means that every item is expanded to a new
2877
2900
  # row.
@@ -3857,6 +3880,1002 @@ module Polars
3857
3880
  _from_rbexpr(_rbexpr.interpolate(method))
3858
3881
  end
3859
3882
 
3883
+ # Apply a rolling min based on another column.
3884
+ #
3885
+ # @param by [String]
3886
+ # This column must be of dtype Datetime or Date.
3887
+ # @param window_size [String]
3888
+ # The length of the window. Can be a dynamic temporal
3889
+ # size indicated by a timedelta or the following string language:
3890
+ #
3891
+ # - 1ns (1 nanosecond)
3892
+ # - 1us (1 microsecond)
3893
+ # - 1ms (1 millisecond)
3894
+ # - 1s (1 second)
3895
+ # - 1m (1 minute)
3896
+ # - 1h (1 hour)
3897
+ # - 1d (1 calendar day)
3898
+ # - 1w (1 calendar week)
3899
+ # - 1mo (1 calendar month)
3900
+ # - 1q (1 calendar quarter)
3901
+ # - 1y (1 calendar year)
3902
+ #
3903
+ # By "calendar day", we mean the corresponding time on the next day
3904
+ # (which may not be 24 hours, due to daylight savings). Similarly for
3905
+ # "calendar week", "calendar month", "calendar quarter", and
3906
+ # "calendar year".
3907
+ # @param min_periods [Integer]
3908
+ # The number of values in the window that should be non-null before computing
3909
+ # a result.
3910
+ # @param closed ['left', 'right', 'both', 'none']
3911
+ # Define which sides of the temporal interval are closed (inclusive),
3912
+ # defaults to `'right'`.
3913
+ # @param warn_if_unsorted [Boolean]
3914
+ # Warn if data is not known to be sorted by `by` column.
3915
+ #
3916
+ # @return [Expr]
3917
+ #
3918
+ # @note
3919
+ # If you want to compute multiple aggregation statistics over the same dynamic
3920
+ # window, consider using `rolling` - this method can cache the window size
3921
+ # computation.
3922
+ #
3923
+ # @example Create a DataFrame with a datetime column and a row number column
3924
+ # start = DateTime.new(2001, 1, 1)
3925
+ # stop = DateTime.new(2001, 1, 2)
3926
+ # df_temporal = Polars::DataFrame.new(
3927
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
3928
+ # ).with_row_index
3929
+ # # =>
3930
+ # # shape: (25, 2)
3931
+ # # ┌───────┬─────────────────────┐
3932
+ # # │ index ┆ date │
3933
+ # # │ --- ┆ --- │
3934
+ # # │ u32 ┆ datetime[ns] │
3935
+ # # ╞═══════╪═════════════════════╡
3936
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
3937
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
3938
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
3939
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
3940
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
3941
+ # # │ … ┆ … │
3942
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
3943
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
3944
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
3945
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
3946
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
3947
+ # # └───────┴─────────────────────┘
3948
+ #
3949
+ # @example Compute the rolling min with the temporal windows closed on the right (default)
3950
+ # df_temporal.with_columns(
3951
+ # rolling_row_min: Polars.col("index").rolling_min_by("date", "2h")
3952
+ # )
3953
+ # # =>
3954
+ # # shape: (25, 3)
3955
+ # # ┌───────┬─────────────────────┬─────────────────┐
3956
+ # # │ index ┆ date ┆ rolling_row_min │
3957
+ # # │ --- ┆ --- ┆ --- │
3958
+ # # │ u32 ┆ datetime[ns] ┆ u32 │
3959
+ # # ╞═══════╪═════════════════════╪═════════════════╡
3960
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
3961
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0 │
3962
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1 │
3963
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2 │
3964
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3 │
3965
+ # # │ … ┆ … ┆ … │
3966
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19 │
3967
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20 │
3968
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21 │
3969
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22 │
3970
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23 │
3971
+ # # └───────┴─────────────────────┴─────────────────┘
3972
+ def rolling_min_by(
3973
+ by,
3974
+ window_size,
3975
+ min_periods: 1,
3976
+ closed: "right",
3977
+ warn_if_unsorted: nil
3978
+ )
3979
+ window_size = _prepare_rolling_by_window_args(window_size)
3980
+ by = Utils.parse_as_expression(by)
3981
+ _from_rbexpr(
3982
+ _rbexpr.rolling_min_by(by, window_size, min_periods, closed)
3983
+ )
3984
+ end
3985
+
3986
+ # Apply a rolling max based on another column.
3987
+ #
3988
+ # @param by [String]
3989
+ # This column must be of dtype Datetime or Date.
3990
+ # @param window_size [String]
3991
+ # The length of the window. Can be a dynamic temporal
3992
+ # size indicated by a timedelta or the following string language:
3993
+ #
3994
+ # - 1ns (1 nanosecond)
3995
+ # - 1us (1 microsecond)
3996
+ # - 1ms (1 millisecond)
3997
+ # - 1s (1 second)
3998
+ # - 1m (1 minute)
3999
+ # - 1h (1 hour)
4000
+ # - 1d (1 calendar day)
4001
+ # - 1w (1 calendar week)
4002
+ # - 1mo (1 calendar month)
4003
+ # - 1q (1 calendar quarter)
4004
+ # - 1y (1 calendar year)
4005
+ #
4006
+ # By "calendar day", we mean the corresponding time on the next day
4007
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4008
+ # "calendar week", "calendar month", "calendar quarter", and
4009
+ # "calendar year".
4010
+ # @param min_periods [Integer]
4011
+ # The number of values in the window that should be non-null before computing
4012
+ # a result.
4013
+ # @param closed ['left', 'right', 'both', 'none']
4014
+ # Define which sides of the temporal interval are closed (inclusive),
4015
+ # defaults to `'right'`.
4016
+ # @param warn_if_unsorted [Boolean]
4017
+ # Warn if data is not known to be sorted by `by` column.
4018
+ #
4019
+ # @return [Expr]
4020
+ #
4021
+ # @note
4022
+ # If you want to compute multiple aggregation statistics over the same dynamic
4023
+ # window, consider using `rolling` - this method can cache the window size
4024
+ # computation.
4025
+ #
4026
+ # @example Create a DataFrame with a datetime column and a row number column
4027
+ # start = DateTime.new(2001, 1, 1)
4028
+ # stop = DateTime.new(2001, 1, 2)
4029
+ # df_temporal = Polars::DataFrame.new(
4030
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4031
+ # ).with_row_index
4032
+ # # =>
4033
+ # # shape: (25, 2)
4034
+ # # ┌───────┬─────────────────────┐
4035
+ # # │ index ┆ date │
4036
+ # # │ --- ┆ --- │
4037
+ # # │ u32 ┆ datetime[ns] │
4038
+ # # ╞═══════╪═════════════════════╡
4039
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4040
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4041
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4042
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4043
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4044
+ # # │ … ┆ … │
4045
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4046
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4047
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4048
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4049
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4050
+ # # └───────┴─────────────────────┘
4051
+ #
4052
+ # @example Compute the rolling max with the temporal windows closed on the right (default)
4053
+ # df_temporal.with_columns(
4054
+ # rolling_row_max: Polars.col("index").rolling_max_by("date", "2h")
4055
+ # )
4056
+ # # =>
4057
+ # # shape: (25, 3)
4058
+ # # ┌───────┬─────────────────────┬─────────────────┐
4059
+ # # │ index ┆ date ┆ rolling_row_max │
4060
+ # # │ --- ┆ --- ┆ --- │
4061
+ # # │ u32 ┆ datetime[ns] ┆ u32 │
4062
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4063
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
4064
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
4065
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 2 │
4066
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 3 │
4067
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 4 │
4068
+ # # │ … ┆ … ┆ … │
4069
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 20 │
4070
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 21 │
4071
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 22 │
4072
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 23 │
4073
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 24 │
4074
+ # # └───────┴─────────────────────┴─────────────────┘
4075
+ #
4076
+ # @example Compute the rolling max with the closure of windows on both sides
4077
+ # df_temporal.with_columns(
4078
+ # rolling_row_max: Polars.col("index").rolling_max_by(
4079
+ # "date", "2h", closed: "both"
4080
+ # )
4081
+ # )
4082
+ # # =>
4083
+ # # shape: (25, 3)
4084
+ # # ┌───────┬─────────────────────┬─────────────────┐
4085
+ # # │ index ┆ date ┆ rolling_row_max │
4086
+ # # │ --- ┆ --- ┆ --- │
4087
+ # # │ u32 ┆ datetime[ns] ┆ u32 │
4088
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4089
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
4090
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
4091
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 2 │
4092
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 3 │
4093
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 4 │
4094
+ # # │ … ┆ … ┆ … │
4095
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 20 │
4096
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 21 │
4097
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 22 │
4098
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 23 │
4099
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 24 │
4100
+ # # └───────┴─────────────────────┴─────────────────┘
4101
+ def rolling_max_by(
4102
+ by,
4103
+ window_size,
4104
+ min_periods: 1,
4105
+ closed: "right",
4106
+ warn_if_unsorted: nil
4107
+ )
4108
+ window_size = _prepare_rolling_by_window_args(window_size)
4109
+ by = Utils.parse_as_expression(by)
4110
+ _from_rbexpr(
4111
+ _rbexpr.rolling_max_by(by, window_size, min_periods, closed)
4112
+ )
4113
+ end
4114
+
4115
+ # Apply a rolling mean based on another column.
4116
+ #
4117
+ # @param by [String]
4118
+ # This column must be of dtype Datetime or Date.
4119
+ # @param window_size [String]
4120
+ # The length of the window. Can be a dynamic temporal
4121
+ # size indicated by a timedelta or the following string language:
4122
+ #
4123
+ # - 1ns (1 nanosecond)
4124
+ # - 1us (1 microsecond)
4125
+ # - 1ms (1 millisecond)
4126
+ # - 1s (1 second)
4127
+ # - 1m (1 minute)
4128
+ # - 1h (1 hour)
4129
+ # - 1d (1 calendar day)
4130
+ # - 1w (1 calendar week)
4131
+ # - 1mo (1 calendar month)
4132
+ # - 1q (1 calendar quarter)
4133
+ # - 1y (1 calendar year)
4134
+ #
4135
+ # By "calendar day", we mean the corresponding time on the next day
4136
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4137
+ # "calendar week", "calendar month", "calendar quarter", and
4138
+ # "calendar year".
4139
+ # @param min_periods [Integer]
4140
+ # The number of values in the window that should be non-null before computing
4141
+ # a result.
4142
+ # @param closed ['left', 'right', 'both', 'none']
4143
+ # Define which sides of the temporal interval are closed (inclusive),
4144
+ # defaults to `'right'`.
4145
+ # @param warn_if_unsorted [Boolean]
4146
+ # Warn if data is not known to be sorted by `by` column.
4147
+ #
4148
+ # @return [Expr]
4149
+ #
4150
+ # @note
4151
+ # If you want to compute multiple aggregation statistics over the same dynamic
4152
+ # window, consider using `rolling` - this method can cache the window size
4153
+ # computation.
4154
+ #
4155
+ # @example Create a DataFrame with a datetime column and a row number column
4156
+ # start = DateTime.new(2001, 1, 1)
4157
+ # stop = DateTime.new(2001, 1, 2)
4158
+ # df_temporal = Polars::DataFrame.new(
4159
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4160
+ # ).with_row_index
4161
+ # # =>
4162
+ # # shape: (25, 2)
4163
+ # # ┌───────┬─────────────────────┐
4164
+ # # │ index ┆ date │
4165
+ # # │ --- ┆ --- │
4166
+ # # │ u32 ┆ datetime[ns] │
4167
+ # # ╞═══════╪═════════════════════╡
4168
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4169
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4170
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4171
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4172
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4173
+ # # │ … ┆ … │
4174
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4175
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4176
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4177
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4178
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4179
+ # # └───────┴─────────────────────┘
4180
+ #
4181
+ # @example Compute the rolling mean with the temporal windows closed on the right (default)
4182
+ # df_temporal.with_columns(
4183
+ # rolling_row_mean: Polars.col("index").rolling_mean_by(
4184
+ # "date", "2h"
4185
+ # )
4186
+ # )
4187
+ # # =>
4188
+ # # shape: (25, 3)
4189
+ # # ┌───────┬─────────────────────┬──────────────────┐
4190
+ # # │ index ┆ date ┆ rolling_row_mean │
4191
+ # # │ --- ┆ --- ┆ --- │
4192
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4193
+ # # ╞═══════╪═════════════════════╪══════════════════╡
4194
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
4195
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
4196
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.5 │
4197
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.5 │
4198
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.5 │
4199
+ # # │ … ┆ … ┆ … │
4200
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.5 │
4201
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.5 │
4202
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.5 │
4203
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.5 │
4204
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.5 │
4205
+ # # └───────┴─────────────────────┴──────────────────┘
4206
+ #
4207
+ # @example Compute the rolling mean with the closure of windows on both sides
4208
+ # df_temporal.with_columns(
4209
+ # rolling_row_mean: Polars.col("index").rolling_mean_by(
4210
+ # "date", "2h", closed: "both"
4211
+ # )
4212
+ # )
4213
+ # # =>
4214
+ # # shape: (25, 3)
4215
+ # # ┌───────┬─────────────────────┬──────────────────┐
4216
+ # # │ index ┆ date ┆ rolling_row_mean │
4217
+ # # │ --- ┆ --- ┆ --- │
4218
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4219
+ # # ╞═══════╪═════════════════════╪══════════════════╡
4220
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
4221
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
4222
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
4223
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.0 │
4224
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.0 │
4225
+ # # │ … ┆ … ┆ … │
4226
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.0 │
4227
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.0 │
4228
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.0 │
4229
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.0 │
4230
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.0 │
4231
+ # # └───────┴─────────────────────┴──────────────────┘
4232
+ def rolling_mean_by(
4233
+ by,
4234
+ window_size,
4235
+ min_periods: 1,
4236
+ closed: "right",
4237
+ warn_if_unsorted: nil
4238
+ )
4239
+ window_size = _prepare_rolling_by_window_args(window_size)
4240
+ by = Utils.parse_as_expression(by)
4241
+ _from_rbexpr(
4242
+ _rbexpr.rolling_mean_by(
4243
+ by,
4244
+ window_size,
4245
+ min_periods,
4246
+ closed
4247
+ )
4248
+ )
4249
+ end
4250
+
4251
+ # Apply a rolling sum based on another column.
4252
+ #
4253
+ # @param by [String]
4254
+ # This column must of dtype `{Date, Datetime}`
4255
+ # @param window_size [String]
4256
+ # The length of the window. Can be a dynamic temporal
4257
+ # size indicated by a timedelta or the following string language:
4258
+ #
4259
+ # - 1ns (1 nanosecond)
4260
+ # - 1us (1 microsecond)
4261
+ # - 1ms (1 millisecond)
4262
+ # - 1s (1 second)
4263
+ # - 1m (1 minute)
4264
+ # - 1h (1 hour)
4265
+ # - 1d (1 calendar day)
4266
+ # - 1w (1 calendar week)
4267
+ # - 1mo (1 calendar month)
4268
+ # - 1q (1 calendar quarter)
4269
+ # - 1y (1 calendar year)
4270
+ #
4271
+ # By "calendar day", we mean the corresponding time on the next day
4272
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4273
+ # "calendar week", "calendar month", "calendar quarter", and
4274
+ # "calendar year".
4275
+ # @param min_periods [Integer]
4276
+ # The number of values in the window that should be non-null before computing
4277
+ # a result.
4278
+ # @param closed ['left', 'right', 'both', 'none']
4279
+ # Define which sides of the temporal interval are closed (inclusive),
4280
+ # defaults to `'right'`.
4281
+ # @param warn_if_unsorted [Boolean]
4282
+ # Warn if data is not known to be sorted by `by` column.
4283
+ #
4284
+ # @return [Expr]
4285
+ #
4286
+ # @note
4287
+ # If you want to compute multiple aggregation statistics over the same dynamic
4288
+ # window, consider using `rolling` - this method can cache the window size
4289
+ # computation.
4290
+ #
4291
+ # @example Create a DataFrame with a datetime column and a row number column
4292
+ # start = DateTime.new(2001, 1, 1)
4293
+ # stop = DateTime.new(2001, 1, 2)
4294
+ # df_temporal = Polars::DataFrame.new(
4295
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4296
+ # ).with_row_index
4297
+ # # =>
4298
+ # # shape: (25, 2)
4299
+ # # ┌───────┬─────────────────────┐
4300
+ # # │ index ┆ date │
4301
+ # # │ --- ┆ --- │
4302
+ # # │ u32 ┆ datetime[ns] │
4303
+ # # ╞═══════╪═════════════════════╡
4304
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4305
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4306
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4307
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4308
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4309
+ # # │ … ┆ … │
4310
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4311
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4312
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4313
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4314
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4315
+ # # └───────┴─────────────────────┘
4316
+ #
4317
+ # @example Compute the rolling sum with the temporal windows closed on the right (default)
4318
+ # df_temporal.with_columns(
4319
+ # rolling_row_sum: Polars.col("index").rolling_sum_by("date", "2h")
4320
+ # )
4321
+ # # =>
4322
+ # # shape: (25, 3)
4323
+ # # ┌───────┬─────────────────────┬─────────────────┐
4324
+ # # │ index ┆ date ┆ rolling_row_sum │
4325
+ # # │ --- ┆ --- ┆ --- │
4326
+ # # │ u32 ┆ datetime[ns] ┆ u32 │
4327
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4328
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
4329
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
4330
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 3 │
4331
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 5 │
4332
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 7 │
4333
+ # # │ … ┆ … ┆ … │
4334
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 39 │
4335
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 41 │
4336
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 43 │
4337
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 45 │
4338
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 47 │
4339
+ # # └───────┴─────────────────────┴─────────────────┘
4340
+ #
4341
+ # @example Compute the rolling sum with the closure of windows on both sides
4342
+ # df_temporal.with_columns(
4343
+ # rolling_row_sum: Polars.col("index").rolling_sum_by(
4344
+ # "date", "2h", closed: "both"
4345
+ # )
4346
+ # )
4347
+ # # =>
4348
+ # # shape: (25, 3)
4349
+ # # ┌───────┬─────────────────────┬─────────────────┐
4350
+ # # │ index ┆ date ┆ rolling_row_sum │
4351
+ # # │ --- ┆ --- ┆ --- │
4352
+ # # │ u32 ┆ datetime[ns] ┆ u32 │
4353
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4354
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
4355
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
4356
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 3 │
4357
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 6 │
4358
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 9 │
4359
+ # # │ … ┆ … ┆ … │
4360
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 57 │
4361
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 60 │
4362
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 63 │
4363
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 66 │
4364
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 69 │
4365
+ # # └───────┴─────────────────────┴─────────────────┘
4366
+ def rolling_sum_by(
4367
+ by,
4368
+ window_size,
4369
+ min_periods: 1,
4370
+ closed: "right",
4371
+ warn_if_unsorted: nil
4372
+ )
4373
+ window_size = _prepare_rolling_by_window_args(window_size)
4374
+ by = Utils.parse_as_expression(by)
4375
+ _from_rbexpr(
4376
+ _rbexpr.rolling_sum_by(by, window_size, min_periods, closed)
4377
+ )
4378
+ end
4379
+
4380
+ # Compute a rolling standard deviation based on another column.
4381
+ #
4382
+ # @param by [String]
4383
+ # This column must be of dtype Datetime or Date.
4384
+ # @param window_size [String]
4385
+ # The length of the window. Can be a dynamic temporal
4386
+ # size indicated by a timedelta or the following string language:
4387
+ #
4388
+ # - 1ns (1 nanosecond)
4389
+ # - 1us (1 microsecond)
4390
+ # - 1ms (1 millisecond)
4391
+ # - 1s (1 second)
4392
+ # - 1m (1 minute)
4393
+ # - 1h (1 hour)
4394
+ # - 1d (1 calendar day)
4395
+ # - 1w (1 calendar week)
4396
+ # - 1mo (1 calendar month)
4397
+ # - 1q (1 calendar quarter)
4398
+ # - 1y (1 calendar year)
4399
+ #
4400
+ # By "calendar day", we mean the corresponding time on the next day
4401
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4402
+ # "calendar week", "calendar month", "calendar quarter", and
4403
+ # "calendar year".
4404
+ # @param min_periods [Integer]
4405
+ # The number of values in the window that should be non-null before computing
4406
+ # a result.
4407
+ # @param closed ['left', 'right', 'both', 'none']
4408
+ # Define which sides of the temporal interval are closed (inclusive),
4409
+ # defaults to `'right'`.
4410
+ # @param ddof [Integer]
4411
+ # "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
4412
+ # @param warn_if_unsorted [Boolean]
4413
+ # Warn if data is not known to be sorted by `by` column.
4414
+ #
4415
+ # @return [Expr]
4416
+ #
4417
+ # @note
4418
+ # If you want to compute multiple aggregation statistics over the same dynamic
4419
+ # window, consider using `rolling` - this method can cache the window size
4420
+ # computation.
4421
+ #
4422
+ # @example Create a DataFrame with a datetime column and a row number column
4423
+ # start = DateTime.new(2001, 1, 1)
4424
+ # stop = DateTime.new(2001, 1, 2)
4425
+ # df_temporal = Polars::DataFrame.new(
4426
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4427
+ # ).with_row_index
4428
+ # # =>
4429
+ # # shape: (25, 2)
4430
+ # # ┌───────┬─────────────────────┐
4431
+ # # │ index ┆ date │
4432
+ # # │ --- ┆ --- │
4433
+ # # │ u32 ┆ datetime[ns] │
4434
+ # # ╞═══════╪═════════════════════╡
4435
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4436
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4437
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4438
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4439
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4440
+ # # │ … ┆ … │
4441
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4442
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4443
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4444
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4445
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4446
+ # # └───────┴─────────────────────┘
4447
+ #
4448
+ # @example Compute the rolling std with the temporal windows closed on the right (default)
4449
+ # df_temporal.with_columns(
4450
+ # rolling_row_std: Polars.col("index").rolling_std_by("date", "2h")
4451
+ # )
4452
+ # # =>
4453
+ # # shape: (25, 3)
4454
+ # # ┌───────┬─────────────────────┬─────────────────┐
4455
+ # # │ index ┆ date ┆ rolling_row_std │
4456
+ # # │ --- ┆ --- ┆ --- │
4457
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4458
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4459
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
4460
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.707107 │
4461
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 0.707107 │
4462
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 0.707107 │
4463
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 0.707107 │
4464
+ # # │ … ┆ … ┆ … │
4465
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 0.707107 │
4466
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 0.707107 │
4467
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 0.707107 │
4468
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 0.707107 │
4469
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 0.707107 │
4470
+ # # └───────┴─────────────────────┴─────────────────┘
4471
+ #
4472
+ # @example Compute the rolling std with the closure of windows on both sides
4473
+ # df_temporal.with_columns(
4474
+ # rolling_row_std: Polars.col("index").rolling_std_by(
4475
+ # "date", "2h", closed: "both"
4476
+ # )
4477
+ # )
4478
+ # # =>
4479
+ # # shape: (25, 3)
4480
+ # # ┌───────┬─────────────────────┬─────────────────┐
4481
+ # # │ index ┆ date ┆ rolling_row_std │
4482
+ # # │ --- ┆ --- ┆ --- │
4483
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4484
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4485
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
4486
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.707107 │
4487
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
4488
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 1.0 │
4489
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 1.0 │
4490
+ # # │ … ┆ … ┆ … │
4491
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 1.0 │
4492
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 1.0 │
4493
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 1.0 │
4494
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 1.0 │
4495
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 1.0 │
4496
+ # # └───────┴─────────────────────┴─────────────────┘
4497
+ def rolling_std_by(
4498
+ by,
4499
+ window_size,
4500
+ min_periods: 1,
4501
+ closed: "right",
4502
+ ddof: 1,
4503
+ warn_if_unsorted: nil
4504
+ )
4505
+ window_size = _prepare_rolling_by_window_args(window_size)
4506
+ by = Utils.parse_as_expression(by)
4507
+ _from_rbexpr(
4508
+ _rbexpr.rolling_std_by(
4509
+ by,
4510
+ window_size,
4511
+ min_periods,
4512
+ closed,
4513
+ ddof
4514
+ )
4515
+ )
4516
+ end
4517
+
4518
+ # Compute a rolling variance based on another column.
4519
+ #
4520
+ # @param by [String]
4521
+ # This column must be of dtype Datetime or Date.
4522
+ # @param window_size [String]
4523
+ # The length of the window. Can be a dynamic temporal
4524
+ # size indicated by a timedelta or the following string language:
4525
+ #
4526
+ # - 1ns (1 nanosecond)
4527
+ # - 1us (1 microsecond)
4528
+ # - 1ms (1 millisecond)
4529
+ # - 1s (1 second)
4530
+ # - 1m (1 minute)
4531
+ # - 1h (1 hour)
4532
+ # - 1d (1 calendar day)
4533
+ # - 1w (1 calendar week)
4534
+ # - 1mo (1 calendar month)
4535
+ # - 1q (1 calendar quarter)
4536
+ # - 1y (1 calendar year)
4537
+ #
4538
+ # By "calendar day", we mean the corresponding time on the next day
4539
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4540
+ # "calendar week", "calendar month", "calendar quarter", and
4541
+ # "calendar year".
4542
+ # @param min_periods [Integer]
4543
+ # The number of values in the window that should be non-null before computing
4544
+ # a result.
4545
+ # @param closed ['left', 'right', 'both', 'none']
4546
+ # Define which sides of the temporal interval are closed (inclusive),
4547
+ # defaults to `'right'`.
4548
+ # @param ddof [Integer]
4549
+ # "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
4550
+ # @param warn_if_unsorted [Boolean]
4551
+ # Warn if data is not known to be sorted by `by` column.
4552
+ #
4553
+ # @return [Expr]
4554
+ #
4555
+ # @note
4556
+ # If you want to compute multiple aggregation statistics over the same dynamic
4557
+ # window, consider using `rolling` - this method can cache the window size
4558
+ # computation.
4559
+ #
4560
+ # @example Create a DataFrame with a datetime column and a row number column
4561
+ # start = DateTime.new(2001, 1, 1)
4562
+ # stop = DateTime.new(2001, 1, 2)
4563
+ # df_temporal = Polars::DataFrame.new(
4564
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4565
+ # ).with_row_index
4566
+ # # =>
4567
+ # # shape: (25, 2)
4568
+ # # ┌───────┬─────────────────────┐
4569
+ # # │ index ┆ date │
4570
+ # # │ --- ┆ --- │
4571
+ # # │ u32 ┆ datetime[ns] │
4572
+ # # ╞═══════╪═════════════════════╡
4573
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4574
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4575
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4576
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4577
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4578
+ # # │ … ┆ … │
4579
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4580
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4581
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4582
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4583
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4584
+ # # └───────┴─────────────────────┘
4585
+ #
4586
+ # @example Compute the rolling var with the temporal windows closed on the right (default)
4587
+ # df_temporal.with_columns(
4588
+ # rolling_row_var: Polars.col("index").rolling_var_by("date", "2h")
4589
+ # )
4590
+ # # =>
4591
+ # # shape: (25, 3)
4592
+ # # ┌───────┬─────────────────────┬─────────────────┐
4593
+ # # │ index ┆ date ┆ rolling_row_var │
4594
+ # # │ --- ┆ --- ┆ --- │
4595
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4596
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4597
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
4598
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
4599
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 0.5 │
4600
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 0.5 │
4601
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 0.5 │
4602
+ # # │ … ┆ … ┆ … │
4603
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 0.5 │
4604
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 0.5 │
4605
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 0.5 │
4606
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 0.5 │
4607
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 0.5 │
4608
+ # # └───────┴─────────────────────┴─────────────────┘
4609
+ #
4610
+ # @example Compute the rolling var with the closure of windows on both sides
4611
+ # df_temporal.with_columns(
4612
+ # rolling_row_var: Polars.col("index").rolling_var_by(
4613
+ # "date", "2h", closed: "both"
4614
+ # )
4615
+ # )
4616
+ # # =>
4617
+ # # shape: (25, 3)
4618
+ # # ┌───────┬─────────────────────┬─────────────────┐
4619
+ # # │ index ┆ date ┆ rolling_row_var │
4620
+ # # │ --- ┆ --- ┆ --- │
4621
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4622
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4623
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
4624
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
4625
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
4626
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 1.0 │
4627
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 1.0 │
4628
+ # # │ … ┆ … ┆ … │
4629
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 1.0 │
4630
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 1.0 │
4631
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 1.0 │
4632
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 1.0 │
4633
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 1.0 │
4634
+ # # └───────┴─────────────────────┴─────────────────┘
4635
+ def rolling_var_by(
4636
+ by,
4637
+ window_size,
4638
+ min_periods: 1,
4639
+ closed: "right",
4640
+ ddof: 1,
4641
+ warn_if_unsorted: nil
4642
+ )
4643
+ window_size = _prepare_rolling_by_window_args(window_size)
4644
+ by = Utils.parse_as_expression(by)
4645
+ _from_rbexpr(
4646
+ _rbexpr.rolling_var_by(
4647
+ by,
4648
+ window_size,
4649
+ min_periods,
4650
+ closed,
4651
+ ddof
4652
+ )
4653
+ )
4654
+ end
4655
+
4656
+ # Compute a rolling median based on another column.
4657
+ #
4658
+ # @param by [String]
4659
+ # This column must be of dtype Datetime or Date.
4660
+ # @param window_size [String]
4661
+ # The length of the window. Can be a dynamic temporal
4662
+ # size indicated by a timedelta or the following string language:
4663
+ #
4664
+ # - 1ns (1 nanosecond)
4665
+ # - 1us (1 microsecond)
4666
+ # - 1ms (1 millisecond)
4667
+ # - 1s (1 second)
4668
+ # - 1m (1 minute)
4669
+ # - 1h (1 hour)
4670
+ # - 1d (1 calendar day)
4671
+ # - 1w (1 calendar week)
4672
+ # - 1mo (1 calendar month)
4673
+ # - 1q (1 calendar quarter)
4674
+ # - 1y (1 calendar year)
4675
+ #
4676
+ # By "calendar day", we mean the corresponding time on the next day
4677
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4678
+ # "calendar week", "calendar month", "calendar quarter", and
4679
+ # "calendar year".
4680
+ # @param min_periods [Integer]
4681
+ # The number of values in the window that should be non-null before computing
4682
+ # a result.
4683
+ # @param closed ['left', 'right', 'both', 'none']
4684
+ # Define which sides of the temporal interval are closed (inclusive),
4685
+ # defaults to `'right'`.
4686
+ # @param warn_if_unsorted [Boolean]
4687
+ # Warn if data is not known to be sorted by `by` column.
4688
+ #
4689
+ # @return [Expr]
4690
+ #
4691
+ # @note
4692
+ # If you want to compute multiple aggregation statistics over the same dynamic
4693
+ # window, consider using `rolling` - this method can cache the window size
4694
+ # computation.
4695
+ #
4696
+ # @example Create a DataFrame with a datetime column and a row number column
4697
+ # start = DateTime.new(2001, 1, 1)
4698
+ # stop = DateTime.new(2001, 1, 2)
4699
+ # df_temporal = Polars::DataFrame.new(
4700
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4701
+ # ).with_row_index
4702
+ # # =>
4703
+ # # shape: (25, 2)
4704
+ # # ┌───────┬─────────────────────┐
4705
+ # # │ index ┆ date │
4706
+ # # │ --- ┆ --- │
4707
+ # # │ u32 ┆ datetime[ns] │
4708
+ # # ╞═══════╪═════════════════════╡
4709
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4710
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4711
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4712
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4713
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4714
+ # # │ … ┆ … │
4715
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4716
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4717
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4718
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4719
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4720
+ # # └───────┴─────────────────────┘
4721
+ #
4722
+ # @example Compute the rolling median with the temporal windows closed on the right:
4723
+ # df_temporal.with_columns(
4724
+ # rolling_row_median: Polars.col("index").rolling_median_by(
4725
+ # "date", "2h"
4726
+ # )
4727
+ # )
4728
+ # # =>
4729
+ # # shape: (25, 3)
4730
+ # # ┌───────┬─────────────────────┬────────────────────┐
4731
+ # # │ index ┆ date ┆ rolling_row_median │
4732
+ # # │ --- ┆ --- ┆ --- │
4733
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4734
+ # # ╞═══════╪═════════════════════╪════════════════════╡
4735
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
4736
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
4737
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.5 │
4738
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.5 │
4739
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.5 │
4740
+ # # │ … ┆ … ┆ … │
4741
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.5 │
4742
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.5 │
4743
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.5 │
4744
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.5 │
4745
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.5 │
4746
+ # # └───────┴─────────────────────┴────────────────────┘
4747
+ def rolling_median_by(
4748
+ by,
4749
+ window_size,
4750
+ min_periods: 1,
4751
+ closed: "right",
4752
+ warn_if_unsorted: nil
4753
+ )
4754
+ window_size = _prepare_rolling_by_window_args(window_size)
4755
+ by = Utils.parse_as_expression(by)
4756
+ _from_rbexpr(
4757
+ _rbexpr.rolling_median_by(by, window_size, min_periods, closed)
4758
+ )
4759
+ end
4760
+
4761
+ # Compute a rolling quantile based on another column.
4762
+ #
4763
+ # @param by [String]
4764
+ # This column must be of dtype Datetime or Date.
4765
+ # @param quantile [Float]
4766
+ # Quantile between 0.0 and 1.0.
4767
+ # @param interpolation ['nearest', 'higher', 'lower', 'midpoint', 'linear']
4768
+ # Interpolation method.
4769
+ # @param window_size [String]
4770
+ # The length of the window. Can be a dynamic
4771
+ # temporal size indicated by a timedelta or the following string language:
4772
+ #
4773
+ # - 1ns (1 nanosecond)
4774
+ # - 1us (1 microsecond)
4775
+ # - 1ms (1 millisecond)
4776
+ # - 1s (1 second)
4777
+ # - 1m (1 minute)
4778
+ # - 1h (1 hour)
4779
+ # - 1d (1 calendar day)
4780
+ # - 1w (1 calendar week)
4781
+ # - 1mo (1 calendar month)
4782
+ # - 1q (1 calendar quarter)
4783
+ # - 1y (1 calendar year)
4784
+ #
4785
+ # By "calendar day", we mean the corresponding time on the next day
4786
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4787
+ # "calendar week", "calendar month", "calendar quarter", and
4788
+ # "calendar year".
4789
+ # @param min_periods [Integer]
4790
+ # The number of values in the window that should be non-null before computing
4791
+ # a result.
4792
+ # @param closed ['left', 'right', 'both', 'none']
4793
+ # Define which sides of the temporal interval are closed (inclusive),
4794
+ # defaults to `'right'`.
4795
+ # @param warn_if_unsorted [Boolean]
4796
+ # Warn if data is not known to be sorted by `by` column.
4797
+ #
4798
+ # @return [Expr]
4799
+ #
4800
+ # @note
4801
+ # If you want to compute multiple aggregation statistics over the same dynamic
4802
+ # window, consider using `rolling` - this method can cache the window size
4803
+ # computation.
4804
+ #
4805
+ # @example Create a DataFrame with a datetime column and a row number column
4806
+ # start = DateTime.new(2001, 1, 1)
4807
+ # stop = DateTime.new(2001, 1, 2)
4808
+ # df_temporal = Polars::DataFrame.new(
4809
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4810
+ # ).with_row_index
4811
+ # # =>
4812
+ # # shape: (25, 2)
4813
+ # # ┌───────┬─────────────────────┐
4814
+ # # │ index ┆ date │
4815
+ # # │ --- ┆ --- │
4816
+ # # │ u32 ┆ datetime[ns] │
4817
+ # # ╞═══════╪═════════════════════╡
4818
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4819
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4820
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4821
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4822
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4823
+ # # │ … ┆ … │
4824
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4825
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4826
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4827
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4828
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4829
+ # # └───────┴─────────────────────┘
4830
+ #
4831
+ # @example Compute the rolling quantile with the temporal windows closed on the right:
4832
+ # df_temporal.with_columns(
4833
+ # rolling_row_quantile: Polars.col("index").rolling_quantile_by(
4834
+ # "date", "2h", quantile: 0.3
4835
+ # )
4836
+ # )
4837
+ # # =>
4838
+ # # shape: (25, 3)
4839
+ # # ┌───────┬─────────────────────┬──────────────────────┐
4840
+ # # │ index ┆ date ┆ rolling_row_quantile │
4841
+ # # │ --- ┆ --- ┆ --- │
4842
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4843
+ # # ╞═══════╪═════════════════════╪══════════════════════╡
4844
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
4845
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.0 │
4846
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
4847
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.0 │
4848
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.0 │
4849
+ # # │ … ┆ … ┆ … │
4850
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.0 │
4851
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.0 │
4852
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.0 │
4853
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.0 │
4854
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.0 │
4855
+ # # └───────┴─────────────────────┴──────────────────────┘
4856
+ def rolling_quantile_by(
4857
+ by,
4858
+ window_size,
4859
+ quantile:,
4860
+ interpolation: "nearest",
4861
+ min_periods: 1,
4862
+ closed: "right",
4863
+ warn_if_unsorted: nil
4864
+ )
4865
+ window_size = _prepare_rolling_by_window_args(window_size)
4866
+ by = Utils.parse_as_expression(by)
4867
+ _from_rbexpr(
4868
+ _rbexpr.rolling_quantile_by(
4869
+ by,
4870
+ quantile,
4871
+ interpolation,
4872
+ window_size,
4873
+ min_periods,
4874
+ closed,
4875
+ )
4876
+ )
4877
+ end
4878
+
3860
4879
  # Apply a rolling min (moving min) over the values in this array.
3861
4880
  #
3862
4881
  # A window of length `window_size` will traverse the array. The values that fill
@@ -3939,9 +4958,20 @@ module Polars
3939
4958
  window_size, min_periods = _prepare_rolling_window_args(
3940
4959
  window_size, min_periods
3941
4960
  )
4961
+ if !by.nil?
4962
+ Utils.validate_rolling_by_aggs_arguments(weights, center: center)
4963
+ return rolling_min_by(
4964
+ by,
4965
+ window_size,
4966
+ min_periods: min_periods,
4967
+ closed: closed || "right",
4968
+ warn_if_unsorted: warn_if_unsorted
4969
+ )
4970
+ end
4971
+ window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
3942
4972
  _from_rbexpr(
3943
4973
  _rbexpr.rolling_min(
3944
- window_size, weights, min_periods, center, by, closed
4974
+ window_size, weights, min_periods, center
3945
4975
  )
3946
4976
  )
3947
4977
  end
@@ -4028,9 +5058,20 @@ module Polars
4028
5058
  window_size, min_periods = _prepare_rolling_window_args(
4029
5059
  window_size, min_periods
4030
5060
  )
5061
+ if !by.nil?
5062
+ Utils.validate_rolling_by_aggs_arguments(weights, center: center)
5063
+ return rolling_max_by(
5064
+ by,
5065
+ window_size,
5066
+ min_periods: min_periods,
5067
+ closed: closed || "right",
5068
+ warn_if_unsorted: warn_if_unsorted
5069
+ )
5070
+ end
5071
+ window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
4031
5072
  _from_rbexpr(
4032
5073
  _rbexpr.rolling_max(
4033
- window_size, weights, min_periods, center, by, closed
5074
+ window_size, weights, min_periods, center
4034
5075
  )
4035
5076
  )
4036
5077
  end
@@ -4117,9 +5158,20 @@ module Polars
4117
5158
  window_size, min_periods = _prepare_rolling_window_args(
4118
5159
  window_size, min_periods
4119
5160
  )
5161
+ if !by.nil?
5162
+ Utils.validate_rolling_by_aggs_arguments(weights, center: center)
5163
+ return rolling_mean_by(
5164
+ by,
5165
+ window_size,
5166
+ min_periods: min_periods,
5167
+ closed: closed || "right",
5168
+ warn_if_unsorted: warn_if_unsorted
5169
+ )
5170
+ end
5171
+ window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
4120
5172
  _from_rbexpr(
4121
5173
  _rbexpr.rolling_mean(
4122
- window_size, weights, min_periods, center, by, closed
5174
+ window_size, weights, min_periods, center
4123
5175
  )
4124
5176
  )
4125
5177
  end
@@ -4206,9 +5258,20 @@ module Polars
4206
5258
  window_size, min_periods = _prepare_rolling_window_args(
4207
5259
  window_size, min_periods
4208
5260
  )
5261
+ if !by.nil?
5262
+ Utils.validate_rolling_by_aggs_arguments(weights, center: center)
5263
+ return rolling_sum_by(
5264
+ by,
5265
+ window_size,
5266
+ min_periods: min_periods,
5267
+ closed: closed || "right",
5268
+ warn_if_unsorted: warn_if_unsorted
5269
+ )
5270
+ end
5271
+ window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
4209
5272
  _from_rbexpr(
4210
5273
  _rbexpr.rolling_sum(
4211
- window_size, weights, min_periods, center, by, closed
5274
+ window_size, weights, min_periods, center
4212
5275
  )
4213
5276
  )
4214
5277
  end
@@ -4297,9 +5360,21 @@ module Polars
4297
5360
  window_size, min_periods = _prepare_rolling_window_args(
4298
5361
  window_size, min_periods
4299
5362
  )
5363
+ if !by.nil?
5364
+ Utils.validate_rolling_by_aggs_arguments(weights, center: center)
5365
+ return rolling_std_by(
5366
+ by,
5367
+ window_size,
5368
+ min_periods: min_periods,
5369
+ closed: closed || "right",
5370
+ ddof: ddof,
5371
+ warn_if_unsorted: warn_if_unsorted
5372
+ )
5373
+ end
5374
+ window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
4300
5375
  _from_rbexpr(
4301
5376
  _rbexpr.rolling_std(
4302
- window_size, weights, min_periods, center, by, closed, ddof, warn_if_unsorted
5377
+ window_size, weights, min_periods, center, ddof
4303
5378
  )
4304
5379
  )
4305
5380
  end
@@ -4388,9 +5463,21 @@ module Polars
4388
5463
  window_size, min_periods = _prepare_rolling_window_args(
4389
5464
  window_size, min_periods
4390
5465
  )
5466
+ if !by.nil?
5467
+ Utils.validate_rolling_by_aggs_arguments(weights, center: center)
5468
+ return rolling_var_by(
5469
+ by,
5470
+ window_size,
5471
+ min_periods: min_periods,
5472
+ closed: closed || "right",
5473
+ ddof: ddof,
5474
+ warn_if_unsorted: warn_if_unsorted
5475
+ )
5476
+ end
5477
+ window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
4391
5478
  _from_rbexpr(
4392
5479
  _rbexpr.rolling_var(
4393
- window_size, weights, min_periods, center, by, closed, ddof, warn_if_unsorted
5480
+ window_size, weights, min_periods, center, ddof
4394
5481
  )
4395
5482
  )
4396
5483
  end
@@ -4474,9 +5561,20 @@ module Polars
4474
5561
  window_size, min_periods = _prepare_rolling_window_args(
4475
5562
  window_size, min_periods
4476
5563
  )
5564
+ if !by.nil?
5565
+ Utils.validate_rolling_by_aggs_arguments(weights, center: center)
5566
+ return rolling_median_by(
5567
+ by,
5568
+ window_size,
5569
+ min_periods: min_periods,
5570
+ closed: closed || "right",
5571
+ warn_if_unsorted: warn_if_unsorted
5572
+ )
5573
+ end
5574
+ window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
4477
5575
  _from_rbexpr(
4478
5576
  _rbexpr.rolling_median(
4479
- window_size, weights, min_periods, center, by, closed, warn_if_unsorted
5577
+ window_size, weights, min_periods, center
4480
5578
  )
4481
5579
  )
4482
5580
  end
@@ -4566,9 +5664,21 @@ module Polars
4566
5664
  window_size, min_periods = _prepare_rolling_window_args(
4567
5665
  window_size, min_periods
4568
5666
  )
5667
+ if !by.nil?
5668
+ Utils.validate_rolling_by_aggs_arguments(weights, center: center)
5669
+ return rolling_quantile_by(
5670
+ by,
5671
+ window_size,
5672
+ min_periods: min_periods,
5673
+ closed: closed || "right",
5674
+ warn_if_unsorted: warn_if_unsorted,
5675
+ quantile: quantile
5676
+ )
5677
+ end
5678
+ window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
4569
5679
  _from_rbexpr(
4570
5680
  _rbexpr.rolling_quantile(
4571
- quantile, interpolation, window_size, weights, min_periods, center, by, closed, warn_if_unsorted
5681
+ quantile, interpolation, window_size, weights, min_periods, center
4572
5682
  )
4573
5683
  )
4574
5684
  end
@@ -6101,5 +7211,9 @@ module Polars
6101
7211
  end
6102
7212
  [window_size, min_periods]
6103
7213
  end
7214
+
7215
+ def _prepare_rolling_by_window_args(window_size)
7216
+ window_size
7217
+ end
6104
7218
  end
6105
7219
  end