polars-df 0.10.0-arm64-darwin → 0.11.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/polars/expr.rb CHANGED
@@ -1345,9 +1345,9 @@ module Polars
1345
1345
  # # │ 3 ┆ 4 │
1346
1346
  # # │ 2 ┆ 98 │
1347
1347
  # # └───────┴──────────┘
1348
- def top_k(k: 5)
1348
+ def top_k(k: 5, nulls_last: false, multithreaded: true)
1349
1349
  k = Utils.parse_as_expression(k)
1350
- _from_rbexpr(_rbexpr.top_k(k))
1350
+ _from_rbexpr(_rbexpr.top_k(k, nulls_last, multithreaded))
1351
1351
  end
1352
1352
 
1353
1353
  # Return the `k` smallest elements.
@@ -1384,9 +1384,9 @@ module Polars
1384
1384
  # # │ 3 ┆ 4 │
1385
1385
  # # │ 2 ┆ 98 │
1386
1386
  # # └───────┴──────────┘
1387
- def bottom_k(k: 5)
1387
+ def bottom_k(k: 5, nulls_last: false, multithreaded: true)
1388
1388
  k = Utils.parse_as_expression(k)
1389
- _from_rbexpr(_rbexpr.bottom_k(k))
1389
+ _from_rbexpr(_rbexpr.bottom_k(k, nulls_last, multithreaded))
1390
1390
  end
1391
1391
 
1392
1392
  # Get the index values that would sort this column.
@@ -2764,6 +2764,9 @@ module Polars
2764
2764
  # Dtype of the output Series.
2765
2765
  # @param agg_list [Boolean]
2766
2766
  # Aggregate list.
2767
+ # @param is_elementwise [Boolean]
2768
+ # If set to true this can run in the streaming engine, but may yield
2769
+ # incorrect results in group-by. Ensure you know what you are doing!
2767
2770
  #
2768
2771
  # @return [Expr]
2769
2772
  #
@@ -2784,12 +2787,21 @@ module Polars
2784
2787
  # # ╞══════╪════════╡
2785
2788
  # # │ 1 ┆ 0 │
2786
2789
  # # └──────┴────────┘
2787
- # def map(return_dtype: nil, agg_list: false, &f)
2790
+ # def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, &f)
2788
2791
  # if !return_dtype.nil?
2789
2792
  # return_dtype = Utils.rb_type_to_dtype(return_dtype)
2790
2793
  # end
2791
- # _from_rbexpr(_rbexpr.map(f, return_dtype, agg_list))
2794
+ # _from_rbexpr(
2795
+ # _rbexpr.map_batches(
2796
+ # # TODO _map_batches_wrapper
2797
+ # f,
2798
+ # return_dtype,
2799
+ # agg_list,
2800
+ # is_elementwise
2801
+ # )
2802
+ # )
2792
2803
  # end
2804
+ # alias_method :map, :map_batches
2793
2805
 
2794
2806
  # Apply a custom/user-defined function (UDF) in a GroupBy or Projection context.
2795
2807
  #
@@ -2831,7 +2843,7 @@ module Polars
2831
2843
  #
2832
2844
  # @example In a selection context, the function is applied by row.
2833
2845
  # df.with_column(
2834
- # Polars.col("a").apply { |x| x * 2 }.alias("a_times_2")
2846
+ # Polars.col("a").map_elements { |x| x * 2 }.alias("a_times_2")
2835
2847
  # )
2836
2848
  # # =>
2837
2849
  # # shape: (4, 3)
@@ -2851,7 +2863,7 @@ module Polars
2851
2863
  # .group_by("b", maintain_order: true)
2852
2864
  # .agg(
2853
2865
  # [
2854
- # Polars.col("a").apply { |x| x.sum }
2866
+ # Polars.col("a").map_elements { |x| x.sum }
2855
2867
  # ]
2856
2868
  # )
2857
2869
  # .collect
@@ -2866,12 +2878,23 @@ module Polars
2866
2878
  # # │ b ┆ 2 │
2867
2879
  # # │ c ┆ 4 │
2868
2880
  # # └─────┴─────┘
2869
- # def apply(return_dtype: nil, &f)
2870
- # wrap_f = lambda do |x|
2871
- # x.apply(return_dtype: return_dtype, &f)
2881
+ # def map_elements(
2882
+ # return_dtype: nil,
2883
+ # skip_nulls: true,
2884
+ # pass_name: false,
2885
+ # strategy: "thread_local",
2886
+ # &f
2887
+ # )
2888
+ # if pass_name
2889
+ # raise Todo
2890
+ # else
2891
+ # wrap_f = lambda do |x|
2892
+ # x.map_elements(return_dtype: return_dtype, skip_nulls: skip_nulls, &f)
2893
+ # end
2872
2894
  # end
2873
- # map(agg_list: true, return_dtype: return_dtype, &wrap_f)
2895
+ # map_batches(agg_list: true, return_dtype: return_dtype, &wrap_f)
2874
2896
  # end
2897
+ # alias_method :apply, :map_elements
2875
2898
 
2876
2899
  # Explode a list or utf8 Series. This means that every item is expanded to a new
2877
2900
  # row.
@@ -3857,6 +3880,1002 @@ module Polars
3857
3880
  _from_rbexpr(_rbexpr.interpolate(method))
3858
3881
  end
3859
3882
 
3883
+ # Apply a rolling min based on another column.
3884
+ #
3885
+ # @param by [String]
3886
+ # This column must be of dtype Datetime or Date.
3887
+ # @param window_size [String]
3888
+ # The length of the window. Can be a dynamic temporal
3889
+ # size indicated by a timedelta or the following string language:
3890
+ #
3891
+ # - 1ns (1 nanosecond)
3892
+ # - 1us (1 microsecond)
3893
+ # - 1ms (1 millisecond)
3894
+ # - 1s (1 second)
3895
+ # - 1m (1 minute)
3896
+ # - 1h (1 hour)
3897
+ # - 1d (1 calendar day)
3898
+ # - 1w (1 calendar week)
3899
+ # - 1mo (1 calendar month)
3900
+ # - 1q (1 calendar quarter)
3901
+ # - 1y (1 calendar year)
3902
+ #
3903
+ # By "calendar day", we mean the corresponding time on the next day
3904
+ # (which may not be 24 hours, due to daylight savings). Similarly for
3905
+ # "calendar week", "calendar month", "calendar quarter", and
3906
+ # "calendar year".
3907
+ # @param min_periods [Integer]
3908
+ # The number of values in the window that should be non-null before computing
3909
+ # a result.
3910
+ # @param closed ['left', 'right', 'both', 'none']
3911
+ # Define which sides of the temporal interval are closed (inclusive),
3912
+ # defaults to `'right'`.
3913
+ # @param warn_if_unsorted [Boolean]
3914
+ # Warn if data is not known to be sorted by `by` column.
3915
+ #
3916
+ # @return [Expr]
3917
+ #
3918
+ # @note
3919
+ # If you want to compute multiple aggregation statistics over the same dynamic
3920
+ # window, consider using `rolling` - this method can cache the window size
3921
+ # computation.
3922
+ #
3923
+ # @example Create a DataFrame with a datetime column and a row number column
3924
+ # start = DateTime.new(2001, 1, 1)
3925
+ # stop = DateTime.new(2001, 1, 2)
3926
+ # df_temporal = Polars::DataFrame.new(
3927
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
3928
+ # ).with_row_index
3929
+ # # =>
3930
+ # # shape: (25, 2)
3931
+ # # ┌───────┬─────────────────────┐
3932
+ # # │ index ┆ date │
3933
+ # # │ --- ┆ --- │
3934
+ # # │ u32 ┆ datetime[ns] │
3935
+ # # ╞═══════╪═════════════════════╡
3936
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
3937
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
3938
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
3939
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
3940
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
3941
+ # # │ … ┆ … │
3942
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
3943
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
3944
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
3945
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
3946
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
3947
+ # # └───────┴─────────────────────┘
3948
+ #
3949
+ # @example Compute the rolling min with the temporal windows closed on the right (default)
3950
+ # df_temporal.with_columns(
3951
+ # rolling_row_min: Polars.col("index").rolling_min_by("date", "2h")
3952
+ # )
3953
+ # # =>
3954
+ # # shape: (25, 3)
3955
+ # # ┌───────┬─────────────────────┬─────────────────┐
3956
+ # # │ index ┆ date ┆ rolling_row_min │
3957
+ # # │ --- ┆ --- ┆ --- │
3958
+ # # │ u32 ┆ datetime[ns] ┆ u32 │
3959
+ # # ╞═══════╪═════════════════════╪═════════════════╡
3960
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
3961
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0 │
3962
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1 │
3963
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2 │
3964
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3 │
3965
+ # # │ … ┆ … ┆ … │
3966
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19 │
3967
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20 │
3968
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21 │
3969
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22 │
3970
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23 │
3971
+ # # └───────┴─────────────────────┴─────────────────┘
3972
+ def rolling_min_by(
3973
+ by,
3974
+ window_size,
3975
+ min_periods: 1,
3976
+ closed: "right",
3977
+ warn_if_unsorted: nil
3978
+ )
3979
+ window_size = _prepare_rolling_by_window_args(window_size)
3980
+ by = Utils.parse_as_expression(by)
3981
+ _from_rbexpr(
3982
+ _rbexpr.rolling_min_by(by, window_size, min_periods, closed)
3983
+ )
3984
+ end
3985
+
3986
+ # Apply a rolling max based on another column.
3987
+ #
3988
+ # @param by [String]
3989
+ # This column must be of dtype Datetime or Date.
3990
+ # @param window_size [String]
3991
+ # The length of the window. Can be a dynamic temporal
3992
+ # size indicated by a timedelta or the following string language:
3993
+ #
3994
+ # - 1ns (1 nanosecond)
3995
+ # - 1us (1 microsecond)
3996
+ # - 1ms (1 millisecond)
3997
+ # - 1s (1 second)
3998
+ # - 1m (1 minute)
3999
+ # - 1h (1 hour)
4000
+ # - 1d (1 calendar day)
4001
+ # - 1w (1 calendar week)
4002
+ # - 1mo (1 calendar month)
4003
+ # - 1q (1 calendar quarter)
4004
+ # - 1y (1 calendar year)
4005
+ #
4006
+ # By "calendar day", we mean the corresponding time on the next day
4007
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4008
+ # "calendar week", "calendar month", "calendar quarter", and
4009
+ # "calendar year".
4010
+ # @param min_periods [Integer]
4011
+ # The number of values in the window that should be non-null before computing
4012
+ # a result.
4013
+ # @param closed ['left', 'right', 'both', 'none']
4014
+ # Define which sides of the temporal interval are closed (inclusive),
4015
+ # defaults to `'right'`.
4016
+ # @param warn_if_unsorted [Boolean]
4017
+ # Warn if data is not known to be sorted by `by` column.
4018
+ #
4019
+ # @return [Expr]
4020
+ #
4021
+ # @note
4022
+ # If you want to compute multiple aggregation statistics over the same dynamic
4023
+ # window, consider using `rolling` - this method can cache the window size
4024
+ # computation.
4025
+ #
4026
+ # @example Create a DataFrame with a datetime column and a row number column
4027
+ # start = DateTime.new(2001, 1, 1)
4028
+ # stop = DateTime.new(2001, 1, 2)
4029
+ # df_temporal = Polars::DataFrame.new(
4030
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4031
+ # ).with_row_index
4032
+ # # =>
4033
+ # # shape: (25, 2)
4034
+ # # ┌───────┬─────────────────────┐
4035
+ # # │ index ┆ date │
4036
+ # # │ --- ┆ --- │
4037
+ # # │ u32 ┆ datetime[ns] │
4038
+ # # ╞═══════╪═════════════════════╡
4039
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4040
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4041
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4042
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4043
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4044
+ # # │ … ┆ … │
4045
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4046
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4047
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4048
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4049
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4050
+ # # └───────┴─────────────────────┘
4051
+ #
4052
+ # @example Compute the rolling max with the temporal windows closed on the right (default)
4053
+ # df_temporal.with_columns(
4054
+ # rolling_row_max: Polars.col("index").rolling_max_by("date", "2h")
4055
+ # )
4056
+ # # =>
4057
+ # # shape: (25, 3)
4058
+ # # ┌───────┬─────────────────────┬─────────────────┐
4059
+ # # │ index ┆ date ┆ rolling_row_max │
4060
+ # # │ --- ┆ --- ┆ --- │
4061
+ # # │ u32 ┆ datetime[ns] ┆ u32 │
4062
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4063
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
4064
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
4065
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 2 │
4066
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 3 │
4067
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 4 │
4068
+ # # │ … ┆ … ┆ … │
4069
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 20 │
4070
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 21 │
4071
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 22 │
4072
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 23 │
4073
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 24 │
4074
+ # # └───────┴─────────────────────┴─────────────────┘
4075
+ #
4076
+ # @example Compute the rolling max with the closure of windows on both sides
4077
+ # df_temporal.with_columns(
4078
+ # rolling_row_max: Polars.col("index").rolling_max_by(
4079
+ # "date", "2h", closed: "both"
4080
+ # )
4081
+ # )
4082
+ # # =>
4083
+ # # shape: (25, 3)
4084
+ # # ┌───────┬─────────────────────┬─────────────────┐
4085
+ # # │ index ┆ date ┆ rolling_row_max │
4086
+ # # │ --- ┆ --- ┆ --- │
4087
+ # # │ u32 ┆ datetime[ns] ┆ u32 │
4088
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4089
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
4090
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
4091
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 2 │
4092
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 3 │
4093
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 4 │
4094
+ # # │ … ┆ … ┆ … │
4095
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 20 │
4096
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 21 │
4097
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 22 │
4098
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 23 │
4099
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 24 │
4100
+ # # └───────┴─────────────────────┴─────────────────┘
4101
+ def rolling_max_by(
4102
+ by,
4103
+ window_size,
4104
+ min_periods: 1,
4105
+ closed: "right",
4106
+ warn_if_unsorted: nil
4107
+ )
4108
+ window_size = _prepare_rolling_by_window_args(window_size)
4109
+ by = Utils.parse_as_expression(by)
4110
+ _from_rbexpr(
4111
+ _rbexpr.rolling_max_by(by, window_size, min_periods, closed)
4112
+ )
4113
+ end
4114
+
4115
+ # Apply a rolling mean based on another column.
4116
+ #
4117
+ # @param by [String]
4118
+ # This column must be of dtype Datetime or Date.
4119
+ # @param window_size [String]
4120
+ # The length of the window. Can be a dynamic temporal
4121
+ # size indicated by a timedelta or the following string language:
4122
+ #
4123
+ # - 1ns (1 nanosecond)
4124
+ # - 1us (1 microsecond)
4125
+ # - 1ms (1 millisecond)
4126
+ # - 1s (1 second)
4127
+ # - 1m (1 minute)
4128
+ # - 1h (1 hour)
4129
+ # - 1d (1 calendar day)
4130
+ # - 1w (1 calendar week)
4131
+ # - 1mo (1 calendar month)
4132
+ # - 1q (1 calendar quarter)
4133
+ # - 1y (1 calendar year)
4134
+ #
4135
+ # By "calendar day", we mean the corresponding time on the next day
4136
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4137
+ # "calendar week", "calendar month", "calendar quarter", and
4138
+ # "calendar year".
4139
+ # @param min_periods [Integer]
4140
+ # The number of values in the window that should be non-null before computing
4141
+ # a result.
4142
+ # @param closed ['left', 'right', 'both', 'none']
4143
+ # Define which sides of the temporal interval are closed (inclusive),
4144
+ # defaults to `'right'`.
4145
+ # @param warn_if_unsorted [Boolean]
4146
+ # Warn if data is not known to be sorted by `by` column.
4147
+ #
4148
+ # @return [Expr]
4149
+ #
4150
+ # @note
4151
+ # If you want to compute multiple aggregation statistics over the same dynamic
4152
+ # window, consider using `rolling` - this method can cache the window size
4153
+ # computation.
4154
+ #
4155
+ # @example Create a DataFrame with a datetime column and a row number column
4156
+ # start = DateTime.new(2001, 1, 1)
4157
+ # stop = DateTime.new(2001, 1, 2)
4158
+ # df_temporal = Polars::DataFrame.new(
4159
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4160
+ # ).with_row_index
4161
+ # # =>
4162
+ # # shape: (25, 2)
4163
+ # # ┌───────┬─────────────────────┐
4164
+ # # │ index ┆ date │
4165
+ # # │ --- ┆ --- │
4166
+ # # │ u32 ┆ datetime[ns] │
4167
+ # # ╞═══════╪═════════════════════╡
4168
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4169
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4170
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4171
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4172
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4173
+ # # │ … ┆ … │
4174
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4175
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4176
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4177
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4178
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4179
+ # # └───────┴─────────────────────┘
4180
+ #
4181
+ # @example Compute the rolling mean with the temporal windows closed on the right (default)
4182
+ # df_temporal.with_columns(
4183
+ # rolling_row_mean: Polars.col("index").rolling_mean_by(
4184
+ # "date", "2h"
4185
+ # )
4186
+ # )
4187
+ # # =>
4188
+ # # shape: (25, 3)
4189
+ # # ┌───────┬─────────────────────┬──────────────────┐
4190
+ # # │ index ┆ date ┆ rolling_row_mean │
4191
+ # # │ --- ┆ --- ┆ --- │
4192
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4193
+ # # ╞═══════╪═════════════════════╪══════════════════╡
4194
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
4195
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
4196
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.5 │
4197
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.5 │
4198
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.5 │
4199
+ # # │ … ┆ … ┆ … │
4200
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.5 │
4201
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.5 │
4202
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.5 │
4203
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.5 │
4204
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.5 │
4205
+ # # └───────┴─────────────────────┴──────────────────┘
4206
+ #
4207
+ # @example Compute the rolling mean with the closure of windows on both sides
4208
+ # df_temporal.with_columns(
4209
+ # rolling_row_mean: Polars.col("index").rolling_mean_by(
4210
+ # "date", "2h", closed: "both"
4211
+ # )
4212
+ # )
4213
+ # # =>
4214
+ # # shape: (25, 3)
4215
+ # # ┌───────┬─────────────────────┬──────────────────┐
4216
+ # # │ index ┆ date ┆ rolling_row_mean │
4217
+ # # │ --- ┆ --- ┆ --- │
4218
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4219
+ # # ╞═══════╪═════════════════════╪══════════════════╡
4220
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
4221
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
4222
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
4223
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.0 │
4224
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.0 │
4225
+ # # │ … ┆ … ┆ … │
4226
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.0 │
4227
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.0 │
4228
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.0 │
4229
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.0 │
4230
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.0 │
4231
+ # # └───────┴─────────────────────┴──────────────────┘
4232
+ def rolling_mean_by(
4233
+ by,
4234
+ window_size,
4235
+ min_periods: 1,
4236
+ closed: "right",
4237
+ warn_if_unsorted: nil
4238
+ )
4239
+ window_size = _prepare_rolling_by_window_args(window_size)
4240
+ by = Utils.parse_as_expression(by)
4241
+ _from_rbexpr(
4242
+ _rbexpr.rolling_mean_by(
4243
+ by,
4244
+ window_size,
4245
+ min_periods,
4246
+ closed
4247
+ )
4248
+ )
4249
+ end
4250
+
4251
+ # Apply a rolling sum based on another column.
4252
+ #
4253
+ # @param by [String]
4254
+ # This column must of dtype `{Date, Datetime}`
4255
+ # @param window_size [String]
4256
+ # The length of the window. Can be a dynamic temporal
4257
+ # size indicated by a timedelta or the following string language:
4258
+ #
4259
+ # - 1ns (1 nanosecond)
4260
+ # - 1us (1 microsecond)
4261
+ # - 1ms (1 millisecond)
4262
+ # - 1s (1 second)
4263
+ # - 1m (1 minute)
4264
+ # - 1h (1 hour)
4265
+ # - 1d (1 calendar day)
4266
+ # - 1w (1 calendar week)
4267
+ # - 1mo (1 calendar month)
4268
+ # - 1q (1 calendar quarter)
4269
+ # - 1y (1 calendar year)
4270
+ #
4271
+ # By "calendar day", we mean the corresponding time on the next day
4272
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4273
+ # "calendar week", "calendar month", "calendar quarter", and
4274
+ # "calendar year".
4275
+ # @param min_periods [Integer]
4276
+ # The number of values in the window that should be non-null before computing
4277
+ # a result.
4278
+ # @param closed ['left', 'right', 'both', 'none']
4279
+ # Define which sides of the temporal interval are closed (inclusive),
4280
+ # defaults to `'right'`.
4281
+ # @param warn_if_unsorted [Boolean]
4282
+ # Warn if data is not known to be sorted by `by` column.
4283
+ #
4284
+ # @return [Expr]
4285
+ #
4286
+ # @note
4287
+ # If you want to compute multiple aggregation statistics over the same dynamic
4288
+ # window, consider using `rolling` - this method can cache the window size
4289
+ # computation.
4290
+ #
4291
+ # @example Create a DataFrame with a datetime column and a row number column
4292
+ # start = DateTime.new(2001, 1, 1)
4293
+ # stop = DateTime.new(2001, 1, 2)
4294
+ # df_temporal = Polars::DataFrame.new(
4295
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4296
+ # ).with_row_index
4297
+ # # =>
4298
+ # # shape: (25, 2)
4299
+ # # ┌───────┬─────────────────────┐
4300
+ # # │ index ┆ date │
4301
+ # # │ --- ┆ --- │
4302
+ # # │ u32 ┆ datetime[ns] │
4303
+ # # ╞═══════╪═════════════════════╡
4304
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4305
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4306
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4307
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4308
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4309
+ # # │ … ┆ … │
4310
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4311
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4312
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4313
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4314
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4315
+ # # └───────┴─────────────────────┘
4316
+ #
4317
+ # @example Compute the rolling sum with the temporal windows closed on the right (default)
4318
+ # df_temporal.with_columns(
4319
+ # rolling_row_sum: Polars.col("index").rolling_sum_by("date", "2h")
4320
+ # )
4321
+ # # =>
4322
+ # # shape: (25, 3)
4323
+ # # ┌───────┬─────────────────────┬─────────────────┐
4324
+ # # │ index ┆ date ┆ rolling_row_sum │
4325
+ # # │ --- ┆ --- ┆ --- │
4326
+ # # │ u32 ┆ datetime[ns] ┆ u32 │
4327
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4328
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
4329
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
4330
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 3 │
4331
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 5 │
4332
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 7 │
4333
+ # # │ … ┆ … ┆ … │
4334
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 39 │
4335
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 41 │
4336
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 43 │
4337
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 45 │
4338
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 47 │
4339
+ # # └───────┴─────────────────────┴─────────────────┘
4340
+ #
4341
+ # @example Compute the rolling sum with the closure of windows on both sides
4342
+ # df_temporal.with_columns(
4343
+ # rolling_row_sum: Polars.col("index").rolling_sum_by(
4344
+ # "date", "2h", closed: "both"
4345
+ # )
4346
+ # )
4347
+ # # =>
4348
+ # # shape: (25, 3)
4349
+ # # ┌───────┬─────────────────────┬─────────────────┐
4350
+ # # │ index ┆ date ┆ rolling_row_sum │
4351
+ # # │ --- ┆ --- ┆ --- │
4352
+ # # │ u32 ┆ datetime[ns] ┆ u32 │
4353
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4354
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
4355
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
4356
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 3 │
4357
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 6 │
4358
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 9 │
4359
+ # # │ … ┆ … ┆ … │
4360
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 57 │
4361
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 60 │
4362
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 63 │
4363
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 66 │
4364
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 69 │
4365
+ # # └───────┴─────────────────────┴─────────────────┘
4366
+ def rolling_sum_by(
4367
+ by,
4368
+ window_size,
4369
+ min_periods: 1,
4370
+ closed: "right",
4371
+ warn_if_unsorted: nil
4372
+ )
4373
+ window_size = _prepare_rolling_by_window_args(window_size)
4374
+ by = Utils.parse_as_expression(by)
4375
+ _from_rbexpr(
4376
+ _rbexpr.rolling_sum_by(by, window_size, min_periods, closed)
4377
+ )
4378
+ end
4379
+
4380
+ # Compute a rolling standard deviation based on another column.
4381
+ #
4382
+ # @param by [String]
4383
+ # This column must be of dtype Datetime or Date.
4384
+ # @param window_size [String]
4385
+ # The length of the window. Can be a dynamic temporal
4386
+ # size indicated by a timedelta or the following string language:
4387
+ #
4388
+ # - 1ns (1 nanosecond)
4389
+ # - 1us (1 microsecond)
4390
+ # - 1ms (1 millisecond)
4391
+ # - 1s (1 second)
4392
+ # - 1m (1 minute)
4393
+ # - 1h (1 hour)
4394
+ # - 1d (1 calendar day)
4395
+ # - 1w (1 calendar week)
4396
+ # - 1mo (1 calendar month)
4397
+ # - 1q (1 calendar quarter)
4398
+ # - 1y (1 calendar year)
4399
+ #
4400
+ # By "calendar day", we mean the corresponding time on the next day
4401
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4402
+ # "calendar week", "calendar month", "calendar quarter", and
4403
+ # "calendar year".
4404
+ # @param min_periods [Integer]
4405
+ # The number of values in the window that should be non-null before computing
4406
+ # a result.
4407
+ # @param closed ['left', 'right', 'both', 'none']
4408
+ # Define which sides of the temporal interval are closed (inclusive),
4409
+ # defaults to `'right'`.
4410
+ # @param ddof [Integer]
4411
+ # "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
4412
+ # @param warn_if_unsorted [Boolean]
4413
+ # Warn if data is not known to be sorted by `by` column.
4414
+ #
4415
+ # @return [Expr]
4416
+ #
4417
+ # @note
4418
+ # If you want to compute multiple aggregation statistics over the same dynamic
4419
+ # window, consider using `rolling` - this method can cache the window size
4420
+ # computation.
4421
+ #
4422
+ # @example Create a DataFrame with a datetime column and a row number column
4423
+ # start = DateTime.new(2001, 1, 1)
4424
+ # stop = DateTime.new(2001, 1, 2)
4425
+ # df_temporal = Polars::DataFrame.new(
4426
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4427
+ # ).with_row_index
4428
+ # # =>
4429
+ # # shape: (25, 2)
4430
+ # # ┌───────┬─────────────────────┐
4431
+ # # │ index ┆ date │
4432
+ # # │ --- ┆ --- │
4433
+ # # │ u32 ┆ datetime[ns] │
4434
+ # # ╞═══════╪═════════════════════╡
4435
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4436
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4437
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4438
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4439
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4440
+ # # │ … ┆ … │
4441
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4442
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4443
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4444
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4445
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4446
+ # # └───────┴─────────────────────┘
4447
+ #
4448
+ # @example Compute the rolling std with the temporal windows closed on the right (default)
4449
+ # df_temporal.with_columns(
4450
+ # rolling_row_std: Polars.col("index").rolling_std_by("date", "2h")
4451
+ # )
4452
+ # # =>
4453
+ # # shape: (25, 3)
4454
+ # # ┌───────┬─────────────────────┬─────────────────┐
4455
+ # # │ index ┆ date ┆ rolling_row_std │
4456
+ # # │ --- ┆ --- ┆ --- │
4457
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4458
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4459
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
4460
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.707107 │
4461
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 0.707107 │
4462
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 0.707107 │
4463
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 0.707107 │
4464
+ # # │ … ┆ … ┆ … │
4465
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 0.707107 │
4466
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 0.707107 │
4467
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 0.707107 │
4468
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 0.707107 │
4469
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 0.707107 │
4470
+ # # └───────┴─────────────────────┴─────────────────┘
4471
+ #
4472
+ # @example Compute the rolling std with the closure of windows on both sides
4473
+ # df_temporal.with_columns(
4474
+ # rolling_row_std: Polars.col("index").rolling_std_by(
4475
+ # "date", "2h", closed: "both"
4476
+ # )
4477
+ # )
4478
+ # # =>
4479
+ # # shape: (25, 3)
4480
+ # # ┌───────┬─────────────────────┬─────────────────┐
4481
+ # # │ index ┆ date ┆ rolling_row_std │
4482
+ # # │ --- ┆ --- ┆ --- │
4483
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4484
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4485
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
4486
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.707107 │
4487
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
4488
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 1.0 │
4489
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 1.0 │
4490
+ # # │ … ┆ … ┆ … │
4491
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 1.0 │
4492
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 1.0 │
4493
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 1.0 │
4494
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 1.0 │
4495
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 1.0 │
4496
+ # # └───────┴─────────────────────┴─────────────────┘
4497
+ def rolling_std_by(
4498
+ by,
4499
+ window_size,
4500
+ min_periods: 1,
4501
+ closed: "right",
4502
+ ddof: 1,
4503
+ warn_if_unsorted: nil
4504
+ )
4505
+ window_size = _prepare_rolling_by_window_args(window_size)
4506
+ by = Utils.parse_as_expression(by)
4507
+ _from_rbexpr(
4508
+ _rbexpr.rolling_std_by(
4509
+ by,
4510
+ window_size,
4511
+ min_periods,
4512
+ closed,
4513
+ ddof
4514
+ )
4515
+ )
4516
+ end
4517
+
4518
+ # Compute a rolling variance based on another column.
4519
+ #
4520
+ # @param by [String]
4521
+ # This column must be of dtype Datetime or Date.
4522
+ # @param window_size [String]
4523
+ # The length of the window. Can be a dynamic temporal
4524
+ # size indicated by a timedelta or the following string language:
4525
+ #
4526
+ # - 1ns (1 nanosecond)
4527
+ # - 1us (1 microsecond)
4528
+ # - 1ms (1 millisecond)
4529
+ # - 1s (1 second)
4530
+ # - 1m (1 minute)
4531
+ # - 1h (1 hour)
4532
+ # - 1d (1 calendar day)
4533
+ # - 1w (1 calendar week)
4534
+ # - 1mo (1 calendar month)
4535
+ # - 1q (1 calendar quarter)
4536
+ # - 1y (1 calendar year)
4537
+ #
4538
+ # By "calendar day", we mean the corresponding time on the next day
4539
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4540
+ # "calendar week", "calendar month", "calendar quarter", and
4541
+ # "calendar year".
4542
+ # @param min_periods [Integer]
4543
+ # The number of values in the window that should be non-null before computing
4544
+ # a result.
4545
+ # @param closed ['left', 'right', 'both', 'none']
4546
+ # Define which sides of the temporal interval are closed (inclusive),
4547
+ # defaults to `'right'`.
4548
+ # @param ddof [Integer]
4549
+ # "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
4550
+ # @param warn_if_unsorted [Boolean]
4551
+ # Warn if data is not known to be sorted by `by` column.
4552
+ #
4553
+ # @return [Expr]
4554
+ #
4555
+ # @note
4556
+ # If you want to compute multiple aggregation statistics over the same dynamic
4557
+ # window, consider using `rolling` - this method can cache the window size
4558
+ # computation.
4559
+ #
4560
+ # @example Create a DataFrame with a datetime column and a row number column
4561
+ # start = DateTime.new(2001, 1, 1)
4562
+ # stop = DateTime.new(2001, 1, 2)
4563
+ # df_temporal = Polars::DataFrame.new(
4564
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4565
+ # ).with_row_index
4566
+ # # =>
4567
+ # # shape: (25, 2)
4568
+ # # ┌───────┬─────────────────────┐
4569
+ # # │ index ┆ date │
4570
+ # # │ --- ┆ --- │
4571
+ # # │ u32 ┆ datetime[ns] │
4572
+ # # ╞═══════╪═════════════════════╡
4573
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4574
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4575
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4576
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4577
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4578
+ # # │ … ┆ … │
4579
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4580
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4581
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4582
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4583
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4584
+ # # └───────┴─────────────────────┘
4585
+ #
4586
+ # @example Compute the rolling var with the temporal windows closed on the right (default)
4587
+ # df_temporal.with_columns(
4588
+ # rolling_row_var: Polars.col("index").rolling_var_by("date", "2h")
4589
+ # )
4590
+ # # =>
4591
+ # # shape: (25, 3)
4592
+ # # ┌───────┬─────────────────────┬─────────────────┐
4593
+ # # │ index ┆ date ┆ rolling_row_var │
4594
+ # # │ --- ┆ --- ┆ --- │
4595
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4596
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4597
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
4598
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
4599
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 0.5 │
4600
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 0.5 │
4601
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 0.5 │
4602
+ # # │ … ┆ … ┆ … │
4603
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 0.5 │
4604
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 0.5 │
4605
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 0.5 │
4606
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 0.5 │
4607
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 0.5 │
4608
+ # # └───────┴─────────────────────┴─────────────────┘
4609
+ #
4610
+ # @example Compute the rolling var with the closure of windows on both sides
4611
+ # df_temporal.with_columns(
4612
+ # rolling_row_var: Polars.col("index").rolling_var_by(
4613
+ # "date", "2h", closed: "both"
4614
+ # )
4615
+ # )
4616
+ # # =>
4617
+ # # shape: (25, 3)
4618
+ # # ┌───────┬─────────────────────┬─────────────────┐
4619
+ # # │ index ┆ date ┆ rolling_row_var │
4620
+ # # │ --- ┆ --- ┆ --- │
4621
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4622
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4623
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
4624
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
4625
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
4626
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 1.0 │
4627
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 1.0 │
4628
+ # # │ … ┆ … ┆ … │
4629
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 1.0 │
4630
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 1.0 │
4631
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 1.0 │
4632
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 1.0 │
4633
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 1.0 │
4634
+ # # └───────┴─────────────────────┴─────────────────┘
4635
+ def rolling_var_by(
4636
+ by,
4637
+ window_size,
4638
+ min_periods: 1,
4639
+ closed: "right",
4640
+ ddof: 1,
4641
+ warn_if_unsorted: nil
4642
+ )
4643
+ window_size = _prepare_rolling_by_window_args(window_size)
4644
+ by = Utils.parse_as_expression(by)
4645
+ _from_rbexpr(
4646
+ _rbexpr.rolling_var_by(
4647
+ by,
4648
+ window_size,
4649
+ min_periods,
4650
+ closed,
4651
+ ddof
4652
+ )
4653
+ )
4654
+ end
4655
+
4656
+ # Compute a rolling median based on another column.
4657
+ #
4658
+ # @param by [String]
4659
+ # This column must be of dtype Datetime or Date.
4660
+ # @param window_size [String]
4661
+ # The length of the window. Can be a dynamic temporal
4662
+ # size indicated by a timedelta or the following string language:
4663
+ #
4664
+ # - 1ns (1 nanosecond)
4665
+ # - 1us (1 microsecond)
4666
+ # - 1ms (1 millisecond)
4667
+ # - 1s (1 second)
4668
+ # - 1m (1 minute)
4669
+ # - 1h (1 hour)
4670
+ # - 1d (1 calendar day)
4671
+ # - 1w (1 calendar week)
4672
+ # - 1mo (1 calendar month)
4673
+ # - 1q (1 calendar quarter)
4674
+ # - 1y (1 calendar year)
4675
+ #
4676
+ # By "calendar day", we mean the corresponding time on the next day
4677
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4678
+ # "calendar week", "calendar month", "calendar quarter", and
4679
+ # "calendar year".
4680
+ # @param min_periods [Integer]
4681
+ # The number of values in the window that should be non-null before computing
4682
+ # a result.
4683
+ # @param closed ['left', 'right', 'both', 'none']
4684
+ # Define which sides of the temporal interval are closed (inclusive),
4685
+ # defaults to `'right'`.
4686
+ # @param warn_if_unsorted [Boolean]
4687
+ # Warn if data is not known to be sorted by `by` column.
4688
+ #
4689
+ # @return [Expr]
4690
+ #
4691
+ # @note
4692
+ # If you want to compute multiple aggregation statistics over the same dynamic
4693
+ # window, consider using `rolling` - this method can cache the window size
4694
+ # computation.
4695
+ #
4696
+ # @example Create a DataFrame with a datetime column and a row number column
4697
+ # start = DateTime.new(2001, 1, 1)
4698
+ # stop = DateTime.new(2001, 1, 2)
4699
+ # df_temporal = Polars::DataFrame.new(
4700
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4701
+ # ).with_row_index
4702
+ # # =>
4703
+ # # shape: (25, 2)
4704
+ # # ┌───────┬─────────────────────┐
4705
+ # # │ index ┆ date │
4706
+ # # │ --- ┆ --- │
4707
+ # # │ u32 ┆ datetime[ns] │
4708
+ # # ╞═══════╪═════════════════════╡
4709
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4710
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4711
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4712
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4713
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4714
+ # # │ … ┆ … │
4715
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4716
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4717
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4718
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4719
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4720
+ # # └───────┴─────────────────────┘
4721
+ #
4722
+ # @example Compute the rolling median with the temporal windows closed on the right:
4723
+ # df_temporal.with_columns(
4724
+ # rolling_row_median: Polars.col("index").rolling_median_by(
4725
+ # "date", "2h"
4726
+ # )
4727
+ # )
4728
+ # # =>
4729
+ # # shape: (25, 3)
4730
+ # # ┌───────┬─────────────────────┬────────────────────┐
4731
+ # # │ index ┆ date ┆ rolling_row_median │
4732
+ # # │ --- ┆ --- ┆ --- │
4733
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4734
+ # # ╞═══════╪═════════════════════╪════════════════════╡
4735
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
4736
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
4737
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.5 │
4738
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.5 │
4739
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.5 │
4740
+ # # │ … ┆ … ┆ … │
4741
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.5 │
4742
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.5 │
4743
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.5 │
4744
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.5 │
4745
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.5 │
4746
+ # # └───────┴─────────────────────┴────────────────────┘
4747
+ def rolling_median_by(
4748
+ by,
4749
+ window_size,
4750
+ min_periods: 1,
4751
+ closed: "right",
4752
+ warn_if_unsorted: nil
4753
+ )
4754
+ window_size = _prepare_rolling_by_window_args(window_size)
4755
+ by = Utils.parse_as_expression(by)
4756
+ _from_rbexpr(
4757
+ _rbexpr.rolling_median_by(by, window_size, min_periods, closed)
4758
+ )
4759
+ end
4760
+
4761
+ # Compute a rolling quantile based on another column.
4762
+ #
4763
+ # @param by [String]
4764
+ # This column must be of dtype Datetime or Date.
4765
+ # @param quantile [Float]
4766
+ # Quantile between 0.0 and 1.0.
4767
+ # @param interpolation ['nearest', 'higher', 'lower', 'midpoint', 'linear']
4768
+ # Interpolation method.
4769
+ # @param window_size [String]
4770
+ # The length of the window. Can be a dynamic
4771
+ # temporal size indicated by a timedelta or the following string language:
4772
+ #
4773
+ # - 1ns (1 nanosecond)
4774
+ # - 1us (1 microsecond)
4775
+ # - 1ms (1 millisecond)
4776
+ # - 1s (1 second)
4777
+ # - 1m (1 minute)
4778
+ # - 1h (1 hour)
4779
+ # - 1d (1 calendar day)
4780
+ # - 1w (1 calendar week)
4781
+ # - 1mo (1 calendar month)
4782
+ # - 1q (1 calendar quarter)
4783
+ # - 1y (1 calendar year)
4784
+ #
4785
+ # By "calendar day", we mean the corresponding time on the next day
4786
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4787
+ # "calendar week", "calendar month", "calendar quarter", and
4788
+ # "calendar year".
4789
+ # @param min_periods [Integer]
4790
+ # The number of values in the window that should be non-null before computing
4791
+ # a result.
4792
+ # @param closed ['left', 'right', 'both', 'none']
4793
+ # Define which sides of the temporal interval are closed (inclusive),
4794
+ # defaults to `'right'`.
4795
+ # @param warn_if_unsorted [Boolean]
4796
+ # Warn if data is not known to be sorted by `by` column.
4797
+ #
4798
+ # @return [Expr]
4799
+ #
4800
+ # @note
4801
+ # If you want to compute multiple aggregation statistics over the same dynamic
4802
+ # window, consider using `rolling` - this method can cache the window size
4803
+ # computation.
4804
+ #
4805
+ # @example Create a DataFrame with a datetime column and a row number column
4806
+ # start = DateTime.new(2001, 1, 1)
4807
+ # stop = DateTime.new(2001, 1, 2)
4808
+ # df_temporal = Polars::DataFrame.new(
4809
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4810
+ # ).with_row_index
4811
+ # # =>
4812
+ # # shape: (25, 2)
4813
+ # # ┌───────┬─────────────────────┐
4814
+ # # │ index ┆ date │
4815
+ # # │ --- ┆ --- │
4816
+ # # │ u32 ┆ datetime[ns] │
4817
+ # # ╞═══════╪═════════════════════╡
4818
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4819
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4820
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4821
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4822
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4823
+ # # │ … ┆ … │
4824
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4825
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4826
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4827
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4828
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4829
+ # # └───────┴─────────────────────┘
4830
+ #
4831
+ # @example Compute the rolling quantile with the temporal windows closed on the right:
4832
+ # df_temporal.with_columns(
4833
+ # rolling_row_quantile: Polars.col("index").rolling_quantile_by(
4834
+ # "date", "2h", quantile: 0.3
4835
+ # )
4836
+ # )
4837
+ # # =>
4838
+ # # shape: (25, 3)
4839
+ # # ┌───────┬─────────────────────┬──────────────────────┐
4840
+ # # │ index ┆ date ┆ rolling_row_quantile │
4841
+ # # │ --- ┆ --- ┆ --- │
4842
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4843
+ # # ╞═══════╪═════════════════════╪══════════════════════╡
4844
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
4845
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.0 │
4846
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
4847
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.0 │
4848
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.0 │
4849
+ # # │ … ┆ … ┆ … │
4850
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.0 │
4851
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.0 │
4852
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.0 │
4853
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.0 │
4854
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.0 │
4855
+ # # └───────┴─────────────────────┴──────────────────────┘
4856
+ def rolling_quantile_by(
4857
+ by,
4858
+ window_size,
4859
+ quantile:,
4860
+ interpolation: "nearest",
4861
+ min_periods: 1,
4862
+ closed: "right",
4863
+ warn_if_unsorted: nil
4864
+ )
4865
+ window_size = _prepare_rolling_by_window_args(window_size)
4866
+ by = Utils.parse_as_expression(by)
4867
+ _from_rbexpr(
4868
+ _rbexpr.rolling_quantile_by(
4869
+ by,
4870
+ quantile,
4871
+ interpolation,
4872
+ window_size,
4873
+ min_periods,
4874
+ closed,
4875
+ )
4876
+ )
4877
+ end
4878
+
3860
4879
  # Apply a rolling min (moving min) over the values in this array.
3861
4880
  #
3862
4881
  # A window of length `window_size` will traverse the array. The values that fill
@@ -3939,9 +4958,20 @@ module Polars
3939
4958
  window_size, min_periods = _prepare_rolling_window_args(
3940
4959
  window_size, min_periods
3941
4960
  )
4961
+ if !by.nil?
4962
+ Utils.validate_rolling_by_aggs_arguments(weights, center: center)
4963
+ return rolling_min_by(
4964
+ by,
4965
+ window_size,
4966
+ min_periods: min_periods,
4967
+ closed: closed || "right",
4968
+ warn_if_unsorted: warn_if_unsorted
4969
+ )
4970
+ end
4971
+ window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
3942
4972
  _from_rbexpr(
3943
4973
  _rbexpr.rolling_min(
3944
- window_size, weights, min_periods, center, by, closed
4974
+ window_size, weights, min_periods, center
3945
4975
  )
3946
4976
  )
3947
4977
  end
@@ -4028,9 +5058,20 @@ module Polars
4028
5058
  window_size, min_periods = _prepare_rolling_window_args(
4029
5059
  window_size, min_periods
4030
5060
  )
5061
+ if !by.nil?
5062
+ Utils.validate_rolling_by_aggs_arguments(weights, center: center)
5063
+ return rolling_max_by(
5064
+ by,
5065
+ window_size,
5066
+ min_periods: min_periods,
5067
+ closed: closed || "right",
5068
+ warn_if_unsorted: warn_if_unsorted
5069
+ )
5070
+ end
5071
+ window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
4031
5072
  _from_rbexpr(
4032
5073
  _rbexpr.rolling_max(
4033
- window_size, weights, min_periods, center, by, closed
5074
+ window_size, weights, min_periods, center
4034
5075
  )
4035
5076
  )
4036
5077
  end
@@ -4117,9 +5158,20 @@ module Polars
4117
5158
  window_size, min_periods = _prepare_rolling_window_args(
4118
5159
  window_size, min_periods
4119
5160
  )
5161
+ if !by.nil?
5162
+ Utils.validate_rolling_by_aggs_arguments(weights, center: center)
5163
+ return rolling_mean_by(
5164
+ by,
5165
+ window_size,
5166
+ min_periods: min_periods,
5167
+ closed: closed || "right",
5168
+ warn_if_unsorted: warn_if_unsorted
5169
+ )
5170
+ end
5171
+ window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
4120
5172
  _from_rbexpr(
4121
5173
  _rbexpr.rolling_mean(
4122
- window_size, weights, min_periods, center, by, closed
5174
+ window_size, weights, min_periods, center
4123
5175
  )
4124
5176
  )
4125
5177
  end
@@ -4206,9 +5258,20 @@ module Polars
4206
5258
  window_size, min_periods = _prepare_rolling_window_args(
4207
5259
  window_size, min_periods
4208
5260
  )
5261
+ if !by.nil?
5262
+ Utils.validate_rolling_by_aggs_arguments(weights, center: center)
5263
+ return rolling_sum_by(
5264
+ by,
5265
+ window_size,
5266
+ min_periods: min_periods,
5267
+ closed: closed || "right",
5268
+ warn_if_unsorted: warn_if_unsorted
5269
+ )
5270
+ end
5271
+ window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
4209
5272
  _from_rbexpr(
4210
5273
  _rbexpr.rolling_sum(
4211
- window_size, weights, min_periods, center, by, closed
5274
+ window_size, weights, min_periods, center
4212
5275
  )
4213
5276
  )
4214
5277
  end
@@ -4297,9 +5360,21 @@ module Polars
4297
5360
  window_size, min_periods = _prepare_rolling_window_args(
4298
5361
  window_size, min_periods
4299
5362
  )
5363
+ if !by.nil?
5364
+ Utils.validate_rolling_by_aggs_arguments(weights, center: center)
5365
+ return rolling_std_by(
5366
+ by,
5367
+ window_size,
5368
+ min_periods: min_periods,
5369
+ closed: closed || "right",
5370
+ ddof: ddof,
5371
+ warn_if_unsorted: warn_if_unsorted
5372
+ )
5373
+ end
5374
+ window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
4300
5375
  _from_rbexpr(
4301
5376
  _rbexpr.rolling_std(
4302
- window_size, weights, min_periods, center, by, closed, ddof, warn_if_unsorted
5377
+ window_size, weights, min_periods, center, ddof
4303
5378
  )
4304
5379
  )
4305
5380
  end
@@ -4388,9 +5463,21 @@ module Polars
4388
5463
  window_size, min_periods = _prepare_rolling_window_args(
4389
5464
  window_size, min_periods
4390
5465
  )
5466
+ if !by.nil?
5467
+ Utils.validate_rolling_by_aggs_arguments(weights, center: center)
5468
+ return rolling_var_by(
5469
+ by,
5470
+ window_size,
5471
+ min_periods: min_periods,
5472
+ closed: closed || "right",
5473
+ ddof: ddof,
5474
+ warn_if_unsorted: warn_if_unsorted
5475
+ )
5476
+ end
5477
+ window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
4391
5478
  _from_rbexpr(
4392
5479
  _rbexpr.rolling_var(
4393
- window_size, weights, min_periods, center, by, closed, ddof, warn_if_unsorted
5480
+ window_size, weights, min_periods, center, ddof
4394
5481
  )
4395
5482
  )
4396
5483
  end
@@ -4474,9 +5561,20 @@ module Polars
4474
5561
  window_size, min_periods = _prepare_rolling_window_args(
4475
5562
  window_size, min_periods
4476
5563
  )
5564
+ if !by.nil?
5565
+ Utils.validate_rolling_by_aggs_arguments(weights, center: center)
5566
+ return rolling_median_by(
5567
+ by,
5568
+ window_size,
5569
+ min_periods: min_periods,
5570
+ closed: closed || "right",
5571
+ warn_if_unsorted: warn_if_unsorted
5572
+ )
5573
+ end
5574
+ window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
4477
5575
  _from_rbexpr(
4478
5576
  _rbexpr.rolling_median(
4479
- window_size, weights, min_periods, center, by, closed, warn_if_unsorted
5577
+ window_size, weights, min_periods, center
4480
5578
  )
4481
5579
  )
4482
5580
  end
@@ -4566,9 +5664,21 @@ module Polars
4566
5664
  window_size, min_periods = _prepare_rolling_window_args(
4567
5665
  window_size, min_periods
4568
5666
  )
5667
+ if !by.nil?
5668
+ Utils.validate_rolling_by_aggs_arguments(weights, center: center)
5669
+ return rolling_quantile_by(
5670
+ by,
5671
+ window_size,
5672
+ min_periods: min_periods,
5673
+ closed: closed || "right",
5674
+ warn_if_unsorted: warn_if_unsorted,
5675
+ quantile: quantile
5676
+ )
5677
+ end
5678
+ window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
4569
5679
  _from_rbexpr(
4570
5680
  _rbexpr.rolling_quantile(
4571
- quantile, interpolation, window_size, weights, min_periods, center, by, closed, warn_if_unsorted
5681
+ quantile, interpolation, window_size, weights, min_periods, center
4572
5682
  )
4573
5683
  )
4574
5684
  end
@@ -6101,5 +7211,9 @@ module Polars
6101
7211
  end
6102
7212
  [window_size, min_periods]
6103
7213
  end
7214
+
7215
+ def _prepare_rolling_by_window_args(window_size)
7216
+ window_size
7217
+ end
6104
7218
  end
6105
7219
  end