polars-df 0.10.0-aarch64-linux → 0.11.0-aarch64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Cargo.lock +90 -48
- data/LICENSE-THIRD-PARTY.txt +152 -79
- data/README.md +6 -6
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/batched_csv_reader.rb +9 -3
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +83 -302
- data/lib/polars/date_time_expr.rb +1 -0
- data/lib/polars/date_time_name_space.rb +5 -1
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/exceptions.rb +4 -0
- data/lib/polars/expr.rb +1134 -20
- data/lib/polars/functions/range/date_range.rb +92 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +296 -490
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +18 -0
- data/lib/polars/io/ndjson.rb +69 -0
- data/lib/polars/io/parquet.rb +226 -0
- data/lib/polars/lazy_frame.rb +23 -166
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +2 -2
- data/lib/polars/string_expr.rb +37 -36
- data/lib/polars/utils.rb +35 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +9 -1
- metadata +12 -4
data/lib/polars/expr.rb
CHANGED
@@ -1345,9 +1345,9 @@ module Polars
|
|
1345
1345
|
# # │ 3 ┆ 4 │
|
1346
1346
|
# # │ 2 ┆ 98 │
|
1347
1347
|
# # └───────┴──────────┘
|
1348
|
-
def top_k(k: 5)
|
1348
|
+
def top_k(k: 5, nulls_last: false, multithreaded: true)
|
1349
1349
|
k = Utils.parse_as_expression(k)
|
1350
|
-
_from_rbexpr(_rbexpr.top_k(k))
|
1350
|
+
_from_rbexpr(_rbexpr.top_k(k, nulls_last, multithreaded))
|
1351
1351
|
end
|
1352
1352
|
|
1353
1353
|
# Return the `k` smallest elements.
|
@@ -1384,9 +1384,9 @@ module Polars
|
|
1384
1384
|
# # │ 3 ┆ 4 │
|
1385
1385
|
# # │ 2 ┆ 98 │
|
1386
1386
|
# # └───────┴──────────┘
|
1387
|
-
def bottom_k(k: 5)
|
1387
|
+
def bottom_k(k: 5, nulls_last: false, multithreaded: true)
|
1388
1388
|
k = Utils.parse_as_expression(k)
|
1389
|
-
_from_rbexpr(_rbexpr.bottom_k(k))
|
1389
|
+
_from_rbexpr(_rbexpr.bottom_k(k, nulls_last, multithreaded))
|
1390
1390
|
end
|
1391
1391
|
|
1392
1392
|
# Get the index values that would sort this column.
|
@@ -2764,6 +2764,9 @@ module Polars
|
|
2764
2764
|
# Dtype of the output Series.
|
2765
2765
|
# @param agg_list [Boolean]
|
2766
2766
|
# Aggregate list.
|
2767
|
+
# @param is_elementwise [Boolean]
|
2768
|
+
# If set to true this can run in the streaming engine, but may yield
|
2769
|
+
# incorrect results in group-by. Ensure you know what you are doing!
|
2767
2770
|
#
|
2768
2771
|
# @return [Expr]
|
2769
2772
|
#
|
@@ -2784,12 +2787,21 @@ module Polars
|
|
2784
2787
|
# # ╞══════╪════════╡
|
2785
2788
|
# # │ 1 ┆ 0 │
|
2786
2789
|
# # └──────┴────────┘
|
2787
|
-
# def
|
2790
|
+
# def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, &f)
|
2788
2791
|
# if !return_dtype.nil?
|
2789
2792
|
# return_dtype = Utils.rb_type_to_dtype(return_dtype)
|
2790
2793
|
# end
|
2791
|
-
# _from_rbexpr(
|
2794
|
+
# _from_rbexpr(
|
2795
|
+
# _rbexpr.map_batches(
|
2796
|
+
# # TODO _map_batches_wrapper
|
2797
|
+
# f,
|
2798
|
+
# return_dtype,
|
2799
|
+
# agg_list,
|
2800
|
+
# is_elementwise
|
2801
|
+
# )
|
2802
|
+
# )
|
2792
2803
|
# end
|
2804
|
+
# alias_method :map, :map_batches
|
2793
2805
|
|
2794
2806
|
# Apply a custom/user-defined function (UDF) in a GroupBy or Projection context.
|
2795
2807
|
#
|
@@ -2831,7 +2843,7 @@ module Polars
|
|
2831
2843
|
#
|
2832
2844
|
# @example In a selection context, the function is applied by row.
|
2833
2845
|
# df.with_column(
|
2834
|
-
# Polars.col("a").
|
2846
|
+
# Polars.col("a").map_elements { |x| x * 2 }.alias("a_times_2")
|
2835
2847
|
# )
|
2836
2848
|
# # =>
|
2837
2849
|
# # shape: (4, 3)
|
@@ -2851,7 +2863,7 @@ module Polars
|
|
2851
2863
|
# .group_by("b", maintain_order: true)
|
2852
2864
|
# .agg(
|
2853
2865
|
# [
|
2854
|
-
# Polars.col("a").
|
2866
|
+
# Polars.col("a").map_elements { |x| x.sum }
|
2855
2867
|
# ]
|
2856
2868
|
# )
|
2857
2869
|
# .collect
|
@@ -2866,12 +2878,23 @@ module Polars
|
|
2866
2878
|
# # │ b ┆ 2 │
|
2867
2879
|
# # │ c ┆ 4 │
|
2868
2880
|
# # └─────┴─────┘
|
2869
|
-
# def
|
2870
|
-
#
|
2871
|
-
#
|
2881
|
+
# def map_elements(
|
2882
|
+
# return_dtype: nil,
|
2883
|
+
# skip_nulls: true,
|
2884
|
+
# pass_name: false,
|
2885
|
+
# strategy: "thread_local",
|
2886
|
+
# &f
|
2887
|
+
# )
|
2888
|
+
# if pass_name
|
2889
|
+
# raise Todo
|
2890
|
+
# else
|
2891
|
+
# wrap_f = lambda do |x|
|
2892
|
+
# x.map_elements(return_dtype: return_dtype, skip_nulls: skip_nulls, &f)
|
2893
|
+
# end
|
2872
2894
|
# end
|
2873
|
-
#
|
2895
|
+
# map_batches(agg_list: true, return_dtype: return_dtype, &wrap_f)
|
2874
2896
|
# end
|
2897
|
+
# alias_method :apply, :map_elements
|
2875
2898
|
|
2876
2899
|
# Explode a list or utf8 Series. This means that every item is expanded to a new
|
2877
2900
|
# row.
|
@@ -3857,6 +3880,1002 @@ module Polars
|
|
3857
3880
|
_from_rbexpr(_rbexpr.interpolate(method))
|
3858
3881
|
end
|
3859
3882
|
|
3883
|
+
# Apply a rolling min based on another column.
|
3884
|
+
#
|
3885
|
+
# @param by [String]
|
3886
|
+
# This column must be of dtype Datetime or Date.
|
3887
|
+
# @param window_size [String]
|
3888
|
+
# The length of the window. Can be a dynamic temporal
|
3889
|
+
# size indicated by a timedelta or the following string language:
|
3890
|
+
#
|
3891
|
+
# - 1ns (1 nanosecond)
|
3892
|
+
# - 1us (1 microsecond)
|
3893
|
+
# - 1ms (1 millisecond)
|
3894
|
+
# - 1s (1 second)
|
3895
|
+
# - 1m (1 minute)
|
3896
|
+
# - 1h (1 hour)
|
3897
|
+
# - 1d (1 calendar day)
|
3898
|
+
# - 1w (1 calendar week)
|
3899
|
+
# - 1mo (1 calendar month)
|
3900
|
+
# - 1q (1 calendar quarter)
|
3901
|
+
# - 1y (1 calendar year)
|
3902
|
+
#
|
3903
|
+
# By "calendar day", we mean the corresponding time on the next day
|
3904
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
3905
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
3906
|
+
# "calendar year".
|
3907
|
+
# @param min_periods [Integer]
|
3908
|
+
# The number of values in the window that should be non-null before computing
|
3909
|
+
# a result.
|
3910
|
+
# @param closed ['left', 'right', 'both', 'none']
|
3911
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
3912
|
+
# defaults to `'right'`.
|
3913
|
+
# @param warn_if_unsorted [Boolean]
|
3914
|
+
# Warn if data is not known to be sorted by `by` column.
|
3915
|
+
#
|
3916
|
+
# @return [Expr]
|
3917
|
+
#
|
3918
|
+
# @note
|
3919
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3920
|
+
# window, consider using `rolling` - this method can cache the window size
|
3921
|
+
# computation.
|
3922
|
+
#
|
3923
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
3924
|
+
# start = DateTime.new(2001, 1, 1)
|
3925
|
+
# stop = DateTime.new(2001, 1, 2)
|
3926
|
+
# df_temporal = Polars::DataFrame.new(
|
3927
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
3928
|
+
# ).with_row_index
|
3929
|
+
# # =>
|
3930
|
+
# # shape: (25, 2)
|
3931
|
+
# # ┌───────┬─────────────────────┐
|
3932
|
+
# # │ index ┆ date │
|
3933
|
+
# # │ --- ┆ --- │
|
3934
|
+
# # │ u32 ┆ datetime[ns] │
|
3935
|
+
# # ╞═══════╪═════════════════════╡
|
3936
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
3937
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
3938
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
3939
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
3940
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
3941
|
+
# # │ … ┆ … │
|
3942
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
3943
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
3944
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
3945
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
3946
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
3947
|
+
# # └───────┴─────────────────────┘
|
3948
|
+
#
|
3949
|
+
# @example Compute the rolling min with the temporal windows closed on the right (default)
|
3950
|
+
# df_temporal.with_columns(
|
3951
|
+
# rolling_row_min: Polars.col("index").rolling_min_by("date", "2h")
|
3952
|
+
# )
|
3953
|
+
# # =>
|
3954
|
+
# # shape: (25, 3)
|
3955
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
3956
|
+
# # │ index ┆ date ┆ rolling_row_min │
|
3957
|
+
# # │ --- ┆ --- ┆ --- │
|
3958
|
+
# # │ u32 ┆ datetime[ns] ┆ u32 │
|
3959
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
3960
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
|
3961
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0 │
|
3962
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1 │
|
3963
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2 │
|
3964
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3 │
|
3965
|
+
# # │ … ┆ … ┆ … │
|
3966
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19 │
|
3967
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20 │
|
3968
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21 │
|
3969
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22 │
|
3970
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23 │
|
3971
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
3972
|
+
def rolling_min_by(
|
3973
|
+
by,
|
3974
|
+
window_size,
|
3975
|
+
min_periods: 1,
|
3976
|
+
closed: "right",
|
3977
|
+
warn_if_unsorted: nil
|
3978
|
+
)
|
3979
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
3980
|
+
by = Utils.parse_as_expression(by)
|
3981
|
+
_from_rbexpr(
|
3982
|
+
_rbexpr.rolling_min_by(by, window_size, min_periods, closed)
|
3983
|
+
)
|
3984
|
+
end
|
3985
|
+
|
3986
|
+
# Apply a rolling max based on another column.
|
3987
|
+
#
|
3988
|
+
# @param by [String]
|
3989
|
+
# This column must be of dtype Datetime or Date.
|
3990
|
+
# @param window_size [String]
|
3991
|
+
# The length of the window. Can be a dynamic temporal
|
3992
|
+
# size indicated by a timedelta or the following string language:
|
3993
|
+
#
|
3994
|
+
# - 1ns (1 nanosecond)
|
3995
|
+
# - 1us (1 microsecond)
|
3996
|
+
# - 1ms (1 millisecond)
|
3997
|
+
# - 1s (1 second)
|
3998
|
+
# - 1m (1 minute)
|
3999
|
+
# - 1h (1 hour)
|
4000
|
+
# - 1d (1 calendar day)
|
4001
|
+
# - 1w (1 calendar week)
|
4002
|
+
# - 1mo (1 calendar month)
|
4003
|
+
# - 1q (1 calendar quarter)
|
4004
|
+
# - 1y (1 calendar year)
|
4005
|
+
#
|
4006
|
+
# By "calendar day", we mean the corresponding time on the next day
|
4007
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
4008
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
4009
|
+
# "calendar year".
|
4010
|
+
# @param min_periods [Integer]
|
4011
|
+
# The number of values in the window that should be non-null before computing
|
4012
|
+
# a result.
|
4013
|
+
# @param closed ['left', 'right', 'both', 'none']
|
4014
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
4015
|
+
# defaults to `'right'`.
|
4016
|
+
# @param warn_if_unsorted [Boolean]
|
4017
|
+
# Warn if data is not known to be sorted by `by` column.
|
4018
|
+
#
|
4019
|
+
# @return [Expr]
|
4020
|
+
#
|
4021
|
+
# @note
|
4022
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
4023
|
+
# window, consider using `rolling` - this method can cache the window size
|
4024
|
+
# computation.
|
4025
|
+
#
|
4026
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
4027
|
+
# start = DateTime.new(2001, 1, 1)
|
4028
|
+
# stop = DateTime.new(2001, 1, 2)
|
4029
|
+
# df_temporal = Polars::DataFrame.new(
|
4030
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
4031
|
+
# ).with_row_index
|
4032
|
+
# # =>
|
4033
|
+
# # shape: (25, 2)
|
4034
|
+
# # ┌───────┬─────────────────────┐
|
4035
|
+
# # │ index ┆ date │
|
4036
|
+
# # │ --- ┆ --- │
|
4037
|
+
# # │ u32 ┆ datetime[ns] │
|
4038
|
+
# # ╞═══════╪═════════════════════╡
|
4039
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
4040
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
4041
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
4042
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
4043
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
4044
|
+
# # │ … ┆ … │
|
4045
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
4046
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
4047
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
4048
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
4049
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
4050
|
+
# # └───────┴─────────────────────┘
|
4051
|
+
#
|
4052
|
+
# @example Compute the rolling max with the temporal windows closed on the right (default)
|
4053
|
+
# df_temporal.with_columns(
|
4054
|
+
# rolling_row_max: Polars.col("index").rolling_max_by("date", "2h")
|
4055
|
+
# )
|
4056
|
+
# # =>
|
4057
|
+
# # shape: (25, 3)
|
4058
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4059
|
+
# # │ index ┆ date ┆ rolling_row_max │
|
4060
|
+
# # │ --- ┆ --- ┆ --- │
|
4061
|
+
# # │ u32 ┆ datetime[ns] ┆ u32 │
|
4062
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4063
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
|
4064
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
|
4065
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 2 │
|
4066
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 3 │
|
4067
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 4 │
|
4068
|
+
# # │ … ┆ … ┆ … │
|
4069
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 20 │
|
4070
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 21 │
|
4071
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 22 │
|
4072
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 23 │
|
4073
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 24 │
|
4074
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4075
|
+
#
|
4076
|
+
# @example Compute the rolling max with the closure of windows on both sides
|
4077
|
+
# df_temporal.with_columns(
|
4078
|
+
# rolling_row_max: Polars.col("index").rolling_max_by(
|
4079
|
+
# "date", "2h", closed: "both"
|
4080
|
+
# )
|
4081
|
+
# )
|
4082
|
+
# # =>
|
4083
|
+
# # shape: (25, 3)
|
4084
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4085
|
+
# # │ index ┆ date ┆ rolling_row_max │
|
4086
|
+
# # │ --- ┆ --- ┆ --- │
|
4087
|
+
# # │ u32 ┆ datetime[ns] ┆ u32 │
|
4088
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4089
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
|
4090
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
|
4091
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 2 │
|
4092
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 3 │
|
4093
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 4 │
|
4094
|
+
# # │ … ┆ … ┆ … │
|
4095
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 20 │
|
4096
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 21 │
|
4097
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 22 │
|
4098
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 23 │
|
4099
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 24 │
|
4100
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4101
|
+
def rolling_max_by(
|
4102
|
+
by,
|
4103
|
+
window_size,
|
4104
|
+
min_periods: 1,
|
4105
|
+
closed: "right",
|
4106
|
+
warn_if_unsorted: nil
|
4107
|
+
)
|
4108
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
4109
|
+
by = Utils.parse_as_expression(by)
|
4110
|
+
_from_rbexpr(
|
4111
|
+
_rbexpr.rolling_max_by(by, window_size, min_periods, closed)
|
4112
|
+
)
|
4113
|
+
end
|
4114
|
+
|
4115
|
+
# Apply a rolling mean based on another column.
|
4116
|
+
#
|
4117
|
+
# @param by [String]
|
4118
|
+
# This column must be of dtype Datetime or Date.
|
4119
|
+
# @param window_size [String]
|
4120
|
+
# The length of the window. Can be a dynamic temporal
|
4121
|
+
# size indicated by a timedelta or the following string language:
|
4122
|
+
#
|
4123
|
+
# - 1ns (1 nanosecond)
|
4124
|
+
# - 1us (1 microsecond)
|
4125
|
+
# - 1ms (1 millisecond)
|
4126
|
+
# - 1s (1 second)
|
4127
|
+
# - 1m (1 minute)
|
4128
|
+
# - 1h (1 hour)
|
4129
|
+
# - 1d (1 calendar day)
|
4130
|
+
# - 1w (1 calendar week)
|
4131
|
+
# - 1mo (1 calendar month)
|
4132
|
+
# - 1q (1 calendar quarter)
|
4133
|
+
# - 1y (1 calendar year)
|
4134
|
+
#
|
4135
|
+
# By "calendar day", we mean the corresponding time on the next day
|
4136
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
4137
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
4138
|
+
# "calendar year".
|
4139
|
+
# @param min_periods [Integer]
|
4140
|
+
# The number of values in the window that should be non-null before computing
|
4141
|
+
# a result.
|
4142
|
+
# @param closed ['left', 'right', 'both', 'none']
|
4143
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
4144
|
+
# defaults to `'right'`.
|
4145
|
+
# @param warn_if_unsorted [Boolean]
|
4146
|
+
# Warn if data is not known to be sorted by `by` column.
|
4147
|
+
#
|
4148
|
+
# @return [Expr]
|
4149
|
+
#
|
4150
|
+
# @note
|
4151
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
4152
|
+
# window, consider using `rolling` - this method can cache the window size
|
4153
|
+
# computation.
|
4154
|
+
#
|
4155
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
4156
|
+
# start = DateTime.new(2001, 1, 1)
|
4157
|
+
# stop = DateTime.new(2001, 1, 2)
|
4158
|
+
# df_temporal = Polars::DataFrame.new(
|
4159
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
4160
|
+
# ).with_row_index
|
4161
|
+
# # =>
|
4162
|
+
# # shape: (25, 2)
|
4163
|
+
# # ┌───────┬─────────────────────┐
|
4164
|
+
# # │ index ┆ date │
|
4165
|
+
# # │ --- ┆ --- │
|
4166
|
+
# # │ u32 ┆ datetime[ns] │
|
4167
|
+
# # ╞═══════╪═════════════════════╡
|
4168
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
4169
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
4170
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
4171
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
4172
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
4173
|
+
# # │ … ┆ … │
|
4174
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
4175
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
4176
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
4177
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
4178
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
4179
|
+
# # └───────┴─────────────────────┘
|
4180
|
+
#
|
4181
|
+
# @example Compute the rolling mean with the temporal windows closed on the right (default)
|
4182
|
+
# df_temporal.with_columns(
|
4183
|
+
# rolling_row_mean: Polars.col("index").rolling_mean_by(
|
4184
|
+
# "date", "2h"
|
4185
|
+
# )
|
4186
|
+
# )
|
4187
|
+
# # =>
|
4188
|
+
# # shape: (25, 3)
|
4189
|
+
# # ┌───────┬─────────────────────┬──────────────────┐
|
4190
|
+
# # │ index ┆ date ┆ rolling_row_mean │
|
4191
|
+
# # │ --- ┆ --- ┆ --- │
|
4192
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4193
|
+
# # ╞═══════╪═════════════════════╪══════════════════╡
|
4194
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
|
4195
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
|
4196
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.5 │
|
4197
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.5 │
|
4198
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.5 │
|
4199
|
+
# # │ … ┆ … ┆ … │
|
4200
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.5 │
|
4201
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.5 │
|
4202
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.5 │
|
4203
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.5 │
|
4204
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.5 │
|
4205
|
+
# # └───────┴─────────────────────┴──────────────────┘
|
4206
|
+
#
|
4207
|
+
# @example Compute the rolling mean with the closure of windows on both sides
|
4208
|
+
# df_temporal.with_columns(
|
4209
|
+
# rolling_row_mean: Polars.col("index").rolling_mean_by(
|
4210
|
+
# "date", "2h", closed: "both"
|
4211
|
+
# )
|
4212
|
+
# )
|
4213
|
+
# # =>
|
4214
|
+
# # shape: (25, 3)
|
4215
|
+
# # ┌───────┬─────────────────────┬──────────────────┐
|
4216
|
+
# # │ index ┆ date ┆ rolling_row_mean │
|
4217
|
+
# # │ --- ┆ --- ┆ --- │
|
4218
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4219
|
+
# # ╞═══════╪═════════════════════╪══════════════════╡
|
4220
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
|
4221
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
|
4222
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
|
4223
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.0 │
|
4224
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.0 │
|
4225
|
+
# # │ … ┆ … ┆ … │
|
4226
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.0 │
|
4227
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.0 │
|
4228
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.0 │
|
4229
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.0 │
|
4230
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.0 │
|
4231
|
+
# # └───────┴─────────────────────┴──────────────────┘
|
4232
|
+
def rolling_mean_by(
|
4233
|
+
by,
|
4234
|
+
window_size,
|
4235
|
+
min_periods: 1,
|
4236
|
+
closed: "right",
|
4237
|
+
warn_if_unsorted: nil
|
4238
|
+
)
|
4239
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
4240
|
+
by = Utils.parse_as_expression(by)
|
4241
|
+
_from_rbexpr(
|
4242
|
+
_rbexpr.rolling_mean_by(
|
4243
|
+
by,
|
4244
|
+
window_size,
|
4245
|
+
min_periods,
|
4246
|
+
closed
|
4247
|
+
)
|
4248
|
+
)
|
4249
|
+
end
|
4250
|
+
|
4251
|
+
# Apply a rolling sum based on another column.
|
4252
|
+
#
|
4253
|
+
# @param by [String]
|
4254
|
+
# This column must of dtype `{Date, Datetime}`
|
4255
|
+
# @param window_size [String]
|
4256
|
+
# The length of the window. Can be a dynamic temporal
|
4257
|
+
# size indicated by a timedelta or the following string language:
|
4258
|
+
#
|
4259
|
+
# - 1ns (1 nanosecond)
|
4260
|
+
# - 1us (1 microsecond)
|
4261
|
+
# - 1ms (1 millisecond)
|
4262
|
+
# - 1s (1 second)
|
4263
|
+
# - 1m (1 minute)
|
4264
|
+
# - 1h (1 hour)
|
4265
|
+
# - 1d (1 calendar day)
|
4266
|
+
# - 1w (1 calendar week)
|
4267
|
+
# - 1mo (1 calendar month)
|
4268
|
+
# - 1q (1 calendar quarter)
|
4269
|
+
# - 1y (1 calendar year)
|
4270
|
+
#
|
4271
|
+
# By "calendar day", we mean the corresponding time on the next day
|
4272
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
4273
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
4274
|
+
# "calendar year".
|
4275
|
+
# @param min_periods [Integer]
|
4276
|
+
# The number of values in the window that should be non-null before computing
|
4277
|
+
# a result.
|
4278
|
+
# @param closed ['left', 'right', 'both', 'none']
|
4279
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
4280
|
+
# defaults to `'right'`.
|
4281
|
+
# @param warn_if_unsorted [Boolean]
|
4282
|
+
# Warn if data is not known to be sorted by `by` column.
|
4283
|
+
#
|
4284
|
+
# @return [Expr]
|
4285
|
+
#
|
4286
|
+
# @note
|
4287
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
4288
|
+
# window, consider using `rolling` - this method can cache the window size
|
4289
|
+
# computation.
|
4290
|
+
#
|
4291
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
4292
|
+
# start = DateTime.new(2001, 1, 1)
|
4293
|
+
# stop = DateTime.new(2001, 1, 2)
|
4294
|
+
# df_temporal = Polars::DataFrame.new(
|
4295
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
4296
|
+
# ).with_row_index
|
4297
|
+
# # =>
|
4298
|
+
# # shape: (25, 2)
|
4299
|
+
# # ┌───────┬─────────────────────┐
|
4300
|
+
# # │ index ┆ date │
|
4301
|
+
# # │ --- ┆ --- │
|
4302
|
+
# # │ u32 ┆ datetime[ns] │
|
4303
|
+
# # ╞═══════╪═════════════════════╡
|
4304
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
4305
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
4306
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
4307
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
4308
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
4309
|
+
# # │ … ┆ … │
|
4310
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
4311
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
4312
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
4313
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
4314
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
4315
|
+
# # └───────┴─────────────────────┘
|
4316
|
+
#
|
4317
|
+
# @example Compute the rolling sum with the temporal windows closed on the right (default)
|
4318
|
+
# df_temporal.with_columns(
|
4319
|
+
# rolling_row_sum: Polars.col("index").rolling_sum_by("date", "2h")
|
4320
|
+
# )
|
4321
|
+
# # =>
|
4322
|
+
# # shape: (25, 3)
|
4323
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4324
|
+
# # │ index ┆ date ┆ rolling_row_sum │
|
4325
|
+
# # │ --- ┆ --- ┆ --- │
|
4326
|
+
# # │ u32 ┆ datetime[ns] ┆ u32 │
|
4327
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4328
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
|
4329
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
|
4330
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 3 │
|
4331
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 5 │
|
4332
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 7 │
|
4333
|
+
# # │ … ┆ … ┆ … │
|
4334
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 39 │
|
4335
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 41 │
|
4336
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 43 │
|
4337
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 45 │
|
4338
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 47 │
|
4339
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4340
|
+
#
|
4341
|
+
# @example Compute the rolling sum with the closure of windows on both sides
|
4342
|
+
# df_temporal.with_columns(
|
4343
|
+
# rolling_row_sum: Polars.col("index").rolling_sum_by(
|
4344
|
+
# "date", "2h", closed: "both"
|
4345
|
+
# )
|
4346
|
+
# )
|
4347
|
+
# # =>
|
4348
|
+
# # shape: (25, 3)
|
4349
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4350
|
+
# # │ index ┆ date ┆ rolling_row_sum │
|
4351
|
+
# # │ --- ┆ --- ┆ --- │
|
4352
|
+
# # │ u32 ┆ datetime[ns] ┆ u32 │
|
4353
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4354
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
|
4355
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
|
4356
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 3 │
|
4357
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 6 │
|
4358
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 9 │
|
4359
|
+
# # │ … ┆ … ┆ … │
|
4360
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 57 │
|
4361
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 60 │
|
4362
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 63 │
|
4363
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 66 │
|
4364
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 69 │
|
4365
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4366
|
+
def rolling_sum_by(
|
4367
|
+
by,
|
4368
|
+
window_size,
|
4369
|
+
min_periods: 1,
|
4370
|
+
closed: "right",
|
4371
|
+
warn_if_unsorted: nil
|
4372
|
+
)
|
4373
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
4374
|
+
by = Utils.parse_as_expression(by)
|
4375
|
+
_from_rbexpr(
|
4376
|
+
_rbexpr.rolling_sum_by(by, window_size, min_periods, closed)
|
4377
|
+
)
|
4378
|
+
end
|
4379
|
+
|
4380
|
+
# Compute a rolling standard deviation based on another column.
|
4381
|
+
#
|
4382
|
+
# @param by [String]
|
4383
|
+
# This column must be of dtype Datetime or Date.
|
4384
|
+
# @param window_size [String]
|
4385
|
+
# The length of the window. Can be a dynamic temporal
|
4386
|
+
# size indicated by a timedelta or the following string language:
|
4387
|
+
#
|
4388
|
+
# - 1ns (1 nanosecond)
|
4389
|
+
# - 1us (1 microsecond)
|
4390
|
+
# - 1ms (1 millisecond)
|
4391
|
+
# - 1s (1 second)
|
4392
|
+
# - 1m (1 minute)
|
4393
|
+
# - 1h (1 hour)
|
4394
|
+
# - 1d (1 calendar day)
|
4395
|
+
# - 1w (1 calendar week)
|
4396
|
+
# - 1mo (1 calendar month)
|
4397
|
+
# - 1q (1 calendar quarter)
|
4398
|
+
# - 1y (1 calendar year)
|
4399
|
+
#
|
4400
|
+
# By "calendar day", we mean the corresponding time on the next day
|
4401
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
4402
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
4403
|
+
# "calendar year".
|
4404
|
+
# @param min_periods [Integer]
|
4405
|
+
# The number of values in the window that should be non-null before computing
|
4406
|
+
# a result.
|
4407
|
+
# @param closed ['left', 'right', 'both', 'none']
|
4408
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
4409
|
+
# defaults to `'right'`.
|
4410
|
+
# @param ddof [Integer]
|
4411
|
+
# "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
|
4412
|
+
# @param warn_if_unsorted [Boolean]
|
4413
|
+
# Warn if data is not known to be sorted by `by` column.
|
4414
|
+
#
|
4415
|
+
# @return [Expr]
|
4416
|
+
#
|
4417
|
+
# @note
|
4418
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
4419
|
+
# window, consider using `rolling` - this method can cache the window size
|
4420
|
+
# computation.
|
4421
|
+
#
|
4422
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
4423
|
+
# start = DateTime.new(2001, 1, 1)
|
4424
|
+
# stop = DateTime.new(2001, 1, 2)
|
4425
|
+
# df_temporal = Polars::DataFrame.new(
|
4426
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
4427
|
+
# ).with_row_index
|
4428
|
+
# # =>
|
4429
|
+
# # shape: (25, 2)
|
4430
|
+
# # ┌───────┬─────────────────────┐
|
4431
|
+
# # │ index ┆ date │
|
4432
|
+
# # │ --- ┆ --- │
|
4433
|
+
# # │ u32 ┆ datetime[ns] │
|
4434
|
+
# # ╞═══════╪═════════════════════╡
|
4435
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
4436
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
4437
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
4438
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
4439
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
4440
|
+
# # │ … ┆ … │
|
4441
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
4442
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
4443
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
4444
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
4445
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
4446
|
+
# # └───────┴─────────────────────┘
|
4447
|
+
#
|
4448
|
+
# @example Compute the rolling std with the temporal windows closed on the right (default)
|
4449
|
+
# df_temporal.with_columns(
|
4450
|
+
# rolling_row_std: Polars.col("index").rolling_std_by("date", "2h")
|
4451
|
+
# )
|
4452
|
+
# # =>
|
4453
|
+
# # shape: (25, 3)
|
4454
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4455
|
+
# # │ index ┆ date ┆ rolling_row_std │
|
4456
|
+
# # │ --- ┆ --- ┆ --- │
|
4457
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4458
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4459
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
|
4460
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.707107 │
|
4461
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 0.707107 │
|
4462
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 0.707107 │
|
4463
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 0.707107 │
|
4464
|
+
# # │ … ┆ … ┆ … │
|
4465
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 0.707107 │
|
4466
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 0.707107 │
|
4467
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 0.707107 │
|
4468
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 0.707107 │
|
4469
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 0.707107 │
|
4470
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4471
|
+
#
|
4472
|
+
# @example Compute the rolling std with the closure of windows on both sides
|
4473
|
+
# df_temporal.with_columns(
|
4474
|
+
# rolling_row_std: Polars.col("index").rolling_std_by(
|
4475
|
+
# "date", "2h", closed: "both"
|
4476
|
+
# )
|
4477
|
+
# )
|
4478
|
+
# # =>
|
4479
|
+
# # shape: (25, 3)
|
4480
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4481
|
+
# # │ index ┆ date ┆ rolling_row_std │
|
4482
|
+
# # │ --- ┆ --- ┆ --- │
|
4483
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4484
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4485
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
|
4486
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.707107 │
|
4487
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
|
4488
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 1.0 │
|
4489
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 1.0 │
|
4490
|
+
# # │ … ┆ … ┆ … │
|
4491
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 1.0 │
|
4492
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 1.0 │
|
4493
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 1.0 │
|
4494
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 1.0 │
|
4495
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 1.0 │
|
4496
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4497
|
+
def rolling_std_by(
|
4498
|
+
by,
|
4499
|
+
window_size,
|
4500
|
+
min_periods: 1,
|
4501
|
+
closed: "right",
|
4502
|
+
ddof: 1,
|
4503
|
+
warn_if_unsorted: nil
|
4504
|
+
)
|
4505
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
4506
|
+
by = Utils.parse_as_expression(by)
|
4507
|
+
_from_rbexpr(
|
4508
|
+
_rbexpr.rolling_std_by(
|
4509
|
+
by,
|
4510
|
+
window_size,
|
4511
|
+
min_periods,
|
4512
|
+
closed,
|
4513
|
+
ddof
|
4514
|
+
)
|
4515
|
+
)
|
4516
|
+
end
|
4517
|
+
|
4518
|
+
# Compute a rolling variance based on another column.
|
4519
|
+
#
|
4520
|
+
# @param by [String]
|
4521
|
+
# This column must be of dtype Datetime or Date.
|
4522
|
+
# @param window_size [String]
|
4523
|
+
# The length of the window. Can be a dynamic temporal
|
4524
|
+
# size indicated by a timedelta or the following string language:
|
4525
|
+
#
|
4526
|
+
# - 1ns (1 nanosecond)
|
4527
|
+
# - 1us (1 microsecond)
|
4528
|
+
# - 1ms (1 millisecond)
|
4529
|
+
# - 1s (1 second)
|
4530
|
+
# - 1m (1 minute)
|
4531
|
+
# - 1h (1 hour)
|
4532
|
+
# - 1d (1 calendar day)
|
4533
|
+
# - 1w (1 calendar week)
|
4534
|
+
# - 1mo (1 calendar month)
|
4535
|
+
# - 1q (1 calendar quarter)
|
4536
|
+
# - 1y (1 calendar year)
|
4537
|
+
#
|
4538
|
+
# By "calendar day", we mean the corresponding time on the next day
|
4539
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
4540
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
4541
|
+
# "calendar year".
|
4542
|
+
# @param min_periods [Integer]
|
4543
|
+
# The number of values in the window that should be non-null before computing
|
4544
|
+
# a result.
|
4545
|
+
# @param closed ['left', 'right', 'both', 'none']
|
4546
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
4547
|
+
# defaults to `'right'`.
|
4548
|
+
# @param ddof [Integer]
|
4549
|
+
# "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
|
4550
|
+
# @param warn_if_unsorted [Boolean]
|
4551
|
+
# Warn if data is not known to be sorted by `by` column.
|
4552
|
+
#
|
4553
|
+
# @return [Expr]
|
4554
|
+
#
|
4555
|
+
# @note
|
4556
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
4557
|
+
# window, consider using `rolling` - this method can cache the window size
|
4558
|
+
# computation.
|
4559
|
+
#
|
4560
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
4561
|
+
# start = DateTime.new(2001, 1, 1)
|
4562
|
+
# stop = DateTime.new(2001, 1, 2)
|
4563
|
+
# df_temporal = Polars::DataFrame.new(
|
4564
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
4565
|
+
# ).with_row_index
|
4566
|
+
# # =>
|
4567
|
+
# # shape: (25, 2)
|
4568
|
+
# # ┌───────┬─────────────────────┐
|
4569
|
+
# # │ index ┆ date │
|
4570
|
+
# # │ --- ┆ --- │
|
4571
|
+
# # │ u32 ┆ datetime[ns] │
|
4572
|
+
# # ╞═══════╪═════════════════════╡
|
4573
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
4574
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
4575
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
4576
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
4577
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
4578
|
+
# # │ … ┆ … │
|
4579
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
4580
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
4581
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
4582
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
4583
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
4584
|
+
# # └───────┴─────────────────────┘
|
4585
|
+
#
|
4586
|
+
# @example Compute the rolling var with the temporal windows closed on the right (default)
|
4587
|
+
# df_temporal.with_columns(
|
4588
|
+
# rolling_row_var: Polars.col("index").rolling_var_by("date", "2h")
|
4589
|
+
# )
|
4590
|
+
# # =>
|
4591
|
+
# # shape: (25, 3)
|
4592
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4593
|
+
# # │ index ┆ date ┆ rolling_row_var │
|
4594
|
+
# # │ --- ┆ --- ┆ --- │
|
4595
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4596
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4597
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
|
4598
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
|
4599
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 0.5 │
|
4600
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 0.5 │
|
4601
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 0.5 │
|
4602
|
+
# # │ … ┆ … ┆ … │
|
4603
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 0.5 │
|
4604
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 0.5 │
|
4605
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 0.5 │
|
4606
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 0.5 │
|
4607
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 0.5 │
|
4608
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4609
|
+
#
|
4610
|
+
# @example Compute the rolling var with the closure of windows on both sides
|
4611
|
+
# df_temporal.with_columns(
|
4612
|
+
# rolling_row_var: Polars.col("index").rolling_var_by(
|
4613
|
+
# "date", "2h", closed: "both"
|
4614
|
+
# )
|
4615
|
+
# )
|
4616
|
+
# # =>
|
4617
|
+
# # shape: (25, 3)
|
4618
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4619
|
+
# # │ index ┆ date ┆ rolling_row_var │
|
4620
|
+
# # │ --- ┆ --- ┆ --- │
|
4621
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4622
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4623
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
|
4624
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
|
4625
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
|
4626
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 1.0 │
|
4627
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 1.0 │
|
4628
|
+
# # │ … ┆ … ┆ … │
|
4629
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 1.0 │
|
4630
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 1.0 │
|
4631
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 1.0 │
|
4632
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 1.0 │
|
4633
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 1.0 │
|
4634
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4635
|
+
def rolling_var_by(
|
4636
|
+
by,
|
4637
|
+
window_size,
|
4638
|
+
min_periods: 1,
|
4639
|
+
closed: "right",
|
4640
|
+
ddof: 1,
|
4641
|
+
warn_if_unsorted: nil
|
4642
|
+
)
|
4643
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
4644
|
+
by = Utils.parse_as_expression(by)
|
4645
|
+
_from_rbexpr(
|
4646
|
+
_rbexpr.rolling_var_by(
|
4647
|
+
by,
|
4648
|
+
window_size,
|
4649
|
+
min_periods,
|
4650
|
+
closed,
|
4651
|
+
ddof
|
4652
|
+
)
|
4653
|
+
)
|
4654
|
+
end
|
4655
|
+
|
4656
|
+
# Compute a rolling median based on another column.
|
4657
|
+
#
|
4658
|
+
# @param by [String]
|
4659
|
+
# This column must be of dtype Datetime or Date.
|
4660
|
+
# @param window_size [String]
|
4661
|
+
# The length of the window. Can be a dynamic temporal
|
4662
|
+
# size indicated by a timedelta or the following string language:
|
4663
|
+
#
|
4664
|
+
# - 1ns (1 nanosecond)
|
4665
|
+
# - 1us (1 microsecond)
|
4666
|
+
# - 1ms (1 millisecond)
|
4667
|
+
# - 1s (1 second)
|
4668
|
+
# - 1m (1 minute)
|
4669
|
+
# - 1h (1 hour)
|
4670
|
+
# - 1d (1 calendar day)
|
4671
|
+
# - 1w (1 calendar week)
|
4672
|
+
# - 1mo (1 calendar month)
|
4673
|
+
# - 1q (1 calendar quarter)
|
4674
|
+
# - 1y (1 calendar year)
|
4675
|
+
#
|
4676
|
+
# By "calendar day", we mean the corresponding time on the next day
|
4677
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
4678
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
4679
|
+
# "calendar year".
|
4680
|
+
# @param min_periods [Integer]
|
4681
|
+
# The number of values in the window that should be non-null before computing
|
4682
|
+
# a result.
|
4683
|
+
# @param closed ['left', 'right', 'both', 'none']
|
4684
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
4685
|
+
# defaults to `'right'`.
|
4686
|
+
# @param warn_if_unsorted [Boolean]
|
4687
|
+
# Warn if data is not known to be sorted by `by` column.
|
4688
|
+
#
|
4689
|
+
# @return [Expr]
|
4690
|
+
#
|
4691
|
+
# @note
|
4692
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
4693
|
+
# window, consider using `rolling` - this method can cache the window size
|
4694
|
+
# computation.
|
4695
|
+
#
|
4696
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
4697
|
+
# start = DateTime.new(2001, 1, 1)
|
4698
|
+
# stop = DateTime.new(2001, 1, 2)
|
4699
|
+
# df_temporal = Polars::DataFrame.new(
|
4700
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
4701
|
+
# ).with_row_index
|
4702
|
+
# # =>
|
4703
|
+
# # shape: (25, 2)
|
4704
|
+
# # ┌───────┬─────────────────────┐
|
4705
|
+
# # │ index ┆ date │
|
4706
|
+
# # │ --- ┆ --- │
|
4707
|
+
# # │ u32 ┆ datetime[ns] │
|
4708
|
+
# # ╞═══════╪═════════════════════╡
|
4709
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
4710
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
4711
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
4712
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
4713
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
4714
|
+
# # │ … ┆ … │
|
4715
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
4716
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
4717
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
4718
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
4719
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
4720
|
+
# # └───────┴─────────────────────┘
|
4721
|
+
#
|
4722
|
+
# @example Compute the rolling median with the temporal windows closed on the right:
|
4723
|
+
# df_temporal.with_columns(
|
4724
|
+
# rolling_row_median: Polars.col("index").rolling_median_by(
|
4725
|
+
# "date", "2h"
|
4726
|
+
# )
|
4727
|
+
# )
|
4728
|
+
# # =>
|
4729
|
+
# # shape: (25, 3)
|
4730
|
+
# # ┌───────┬─────────────────────┬────────────────────┐
|
4731
|
+
# # │ index ┆ date ┆ rolling_row_median │
|
4732
|
+
# # │ --- ┆ --- ┆ --- │
|
4733
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4734
|
+
# # ╞═══════╪═════════════════════╪════════════════════╡
|
4735
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
|
4736
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
|
4737
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.5 │
|
4738
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.5 │
|
4739
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.5 │
|
4740
|
+
# # │ … ┆ … ┆ … │
|
4741
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.5 │
|
4742
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.5 │
|
4743
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.5 │
|
4744
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.5 │
|
4745
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.5 │
|
4746
|
+
# # └───────┴─────────────────────┴────────────────────┘
|
4747
|
+
def rolling_median_by(
|
4748
|
+
by,
|
4749
|
+
window_size,
|
4750
|
+
min_periods: 1,
|
4751
|
+
closed: "right",
|
4752
|
+
warn_if_unsorted: nil
|
4753
|
+
)
|
4754
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
4755
|
+
by = Utils.parse_as_expression(by)
|
4756
|
+
_from_rbexpr(
|
4757
|
+
_rbexpr.rolling_median_by(by, window_size, min_periods, closed)
|
4758
|
+
)
|
4759
|
+
end
|
4760
|
+
|
4761
|
+
# Compute a rolling quantile based on another column.
|
4762
|
+
#
|
4763
|
+
# @param by [String]
|
4764
|
+
# This column must be of dtype Datetime or Date.
|
4765
|
+
# @param quantile [Float]
|
4766
|
+
# Quantile between 0.0 and 1.0.
|
4767
|
+
# @param interpolation ['nearest', 'higher', 'lower', 'midpoint', 'linear']
|
4768
|
+
# Interpolation method.
|
4769
|
+
# @param window_size [String]
|
4770
|
+
# The length of the window. Can be a dynamic
|
4771
|
+
# temporal size indicated by a timedelta or the following string language:
|
4772
|
+
#
|
4773
|
+
# - 1ns (1 nanosecond)
|
4774
|
+
# - 1us (1 microsecond)
|
4775
|
+
# - 1ms (1 millisecond)
|
4776
|
+
# - 1s (1 second)
|
4777
|
+
# - 1m (1 minute)
|
4778
|
+
# - 1h (1 hour)
|
4779
|
+
# - 1d (1 calendar day)
|
4780
|
+
# - 1w (1 calendar week)
|
4781
|
+
# - 1mo (1 calendar month)
|
4782
|
+
# - 1q (1 calendar quarter)
|
4783
|
+
# - 1y (1 calendar year)
|
4784
|
+
#
|
4785
|
+
# By "calendar day", we mean the corresponding time on the next day
|
4786
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
4787
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
4788
|
+
# "calendar year".
|
4789
|
+
# @param min_periods [Integer]
|
4790
|
+
# The number of values in the window that should be non-null before computing
|
4791
|
+
# a result.
|
4792
|
+
# @param closed ['left', 'right', 'both', 'none']
|
4793
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
4794
|
+
# defaults to `'right'`.
|
4795
|
+
# @param warn_if_unsorted [Boolean]
|
4796
|
+
# Warn if data is not known to be sorted by `by` column.
|
4797
|
+
#
|
4798
|
+
# @return [Expr]
|
4799
|
+
#
|
4800
|
+
# @note
|
4801
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
4802
|
+
# window, consider using `rolling` - this method can cache the window size
|
4803
|
+
# computation.
|
4804
|
+
#
|
4805
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
4806
|
+
# start = DateTime.new(2001, 1, 1)
|
4807
|
+
# stop = DateTime.new(2001, 1, 2)
|
4808
|
+
# df_temporal = Polars::DataFrame.new(
|
4809
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
4810
|
+
# ).with_row_index
|
4811
|
+
# # =>
|
4812
|
+
# # shape: (25, 2)
|
4813
|
+
# # ┌───────┬─────────────────────┐
|
4814
|
+
# # │ index ┆ date │
|
4815
|
+
# # │ --- ┆ --- │
|
4816
|
+
# # │ u32 ┆ datetime[ns] │
|
4817
|
+
# # ╞═══════╪═════════════════════╡
|
4818
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
4819
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
4820
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
4821
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
4822
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
4823
|
+
# # │ … ┆ … │
|
4824
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
4825
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
4826
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
4827
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
4828
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
4829
|
+
# # └───────┴─────────────────────┘
|
4830
|
+
#
|
4831
|
+
# @example Compute the rolling quantile with the temporal windows closed on the right:
|
4832
|
+
# df_temporal.with_columns(
|
4833
|
+
# rolling_row_quantile: Polars.col("index").rolling_quantile_by(
|
4834
|
+
# "date", "2h", quantile: 0.3
|
4835
|
+
# )
|
4836
|
+
# )
|
4837
|
+
# # =>
|
4838
|
+
# # shape: (25, 3)
|
4839
|
+
# # ┌───────┬─────────────────────┬──────────────────────┐
|
4840
|
+
# # │ index ┆ date ┆ rolling_row_quantile │
|
4841
|
+
# # │ --- ┆ --- ┆ --- │
|
4842
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4843
|
+
# # ╞═══════╪═════════════════════╪══════════════════════╡
|
4844
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
|
4845
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.0 │
|
4846
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
|
4847
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.0 │
|
4848
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.0 │
|
4849
|
+
# # │ … ┆ … ┆ … │
|
4850
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.0 │
|
4851
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.0 │
|
4852
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.0 │
|
4853
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.0 │
|
4854
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.0 │
|
4855
|
+
# # └───────┴─────────────────────┴──────────────────────┘
|
4856
|
+
def rolling_quantile_by(
|
4857
|
+
by,
|
4858
|
+
window_size,
|
4859
|
+
quantile:,
|
4860
|
+
interpolation: "nearest",
|
4861
|
+
min_periods: 1,
|
4862
|
+
closed: "right",
|
4863
|
+
warn_if_unsorted: nil
|
4864
|
+
)
|
4865
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
4866
|
+
by = Utils.parse_as_expression(by)
|
4867
|
+
_from_rbexpr(
|
4868
|
+
_rbexpr.rolling_quantile_by(
|
4869
|
+
by,
|
4870
|
+
quantile,
|
4871
|
+
interpolation,
|
4872
|
+
window_size,
|
4873
|
+
min_periods,
|
4874
|
+
closed,
|
4875
|
+
)
|
4876
|
+
)
|
4877
|
+
end
|
4878
|
+
|
3860
4879
|
# Apply a rolling min (moving min) over the values in this array.
|
3861
4880
|
#
|
3862
4881
|
# A window of length `window_size` will traverse the array. The values that fill
|
@@ -3939,9 +4958,20 @@ module Polars
|
|
3939
4958
|
window_size, min_periods = _prepare_rolling_window_args(
|
3940
4959
|
window_size, min_periods
|
3941
4960
|
)
|
4961
|
+
if !by.nil?
|
4962
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
4963
|
+
return rolling_min_by(
|
4964
|
+
by,
|
4965
|
+
window_size,
|
4966
|
+
min_periods: min_periods,
|
4967
|
+
closed: closed || "right",
|
4968
|
+
warn_if_unsorted: warn_if_unsorted
|
4969
|
+
)
|
4970
|
+
end
|
4971
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
3942
4972
|
_from_rbexpr(
|
3943
4973
|
_rbexpr.rolling_min(
|
3944
|
-
window_size, weights, min_periods, center
|
4974
|
+
window_size, weights, min_periods, center
|
3945
4975
|
)
|
3946
4976
|
)
|
3947
4977
|
end
|
@@ -4028,9 +5058,20 @@ module Polars
|
|
4028
5058
|
window_size, min_periods = _prepare_rolling_window_args(
|
4029
5059
|
window_size, min_periods
|
4030
5060
|
)
|
5061
|
+
if !by.nil?
|
5062
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5063
|
+
return rolling_max_by(
|
5064
|
+
by,
|
5065
|
+
window_size,
|
5066
|
+
min_periods: min_periods,
|
5067
|
+
closed: closed || "right",
|
5068
|
+
warn_if_unsorted: warn_if_unsorted
|
5069
|
+
)
|
5070
|
+
end
|
5071
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
4031
5072
|
_from_rbexpr(
|
4032
5073
|
_rbexpr.rolling_max(
|
4033
|
-
window_size, weights, min_periods, center
|
5074
|
+
window_size, weights, min_periods, center
|
4034
5075
|
)
|
4035
5076
|
)
|
4036
5077
|
end
|
@@ -4117,9 +5158,20 @@ module Polars
|
|
4117
5158
|
window_size, min_periods = _prepare_rolling_window_args(
|
4118
5159
|
window_size, min_periods
|
4119
5160
|
)
|
5161
|
+
if !by.nil?
|
5162
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5163
|
+
return rolling_mean_by(
|
5164
|
+
by,
|
5165
|
+
window_size,
|
5166
|
+
min_periods: min_periods,
|
5167
|
+
closed: closed || "right",
|
5168
|
+
warn_if_unsorted: warn_if_unsorted
|
5169
|
+
)
|
5170
|
+
end
|
5171
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
4120
5172
|
_from_rbexpr(
|
4121
5173
|
_rbexpr.rolling_mean(
|
4122
|
-
window_size, weights, min_periods, center
|
5174
|
+
window_size, weights, min_periods, center
|
4123
5175
|
)
|
4124
5176
|
)
|
4125
5177
|
end
|
@@ -4206,9 +5258,20 @@ module Polars
|
|
4206
5258
|
window_size, min_periods = _prepare_rolling_window_args(
|
4207
5259
|
window_size, min_periods
|
4208
5260
|
)
|
5261
|
+
if !by.nil?
|
5262
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5263
|
+
return rolling_sum_by(
|
5264
|
+
by,
|
5265
|
+
window_size,
|
5266
|
+
min_periods: min_periods,
|
5267
|
+
closed: closed || "right",
|
5268
|
+
warn_if_unsorted: warn_if_unsorted
|
5269
|
+
)
|
5270
|
+
end
|
5271
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
4209
5272
|
_from_rbexpr(
|
4210
5273
|
_rbexpr.rolling_sum(
|
4211
|
-
window_size, weights, min_periods, center
|
5274
|
+
window_size, weights, min_periods, center
|
4212
5275
|
)
|
4213
5276
|
)
|
4214
5277
|
end
|
@@ -4297,9 +5360,21 @@ module Polars
|
|
4297
5360
|
window_size, min_periods = _prepare_rolling_window_args(
|
4298
5361
|
window_size, min_periods
|
4299
5362
|
)
|
5363
|
+
if !by.nil?
|
5364
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5365
|
+
return rolling_std_by(
|
5366
|
+
by,
|
5367
|
+
window_size,
|
5368
|
+
min_periods: min_periods,
|
5369
|
+
closed: closed || "right",
|
5370
|
+
ddof: ddof,
|
5371
|
+
warn_if_unsorted: warn_if_unsorted
|
5372
|
+
)
|
5373
|
+
end
|
5374
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
4300
5375
|
_from_rbexpr(
|
4301
5376
|
_rbexpr.rolling_std(
|
4302
|
-
window_size, weights, min_periods, center,
|
5377
|
+
window_size, weights, min_periods, center, ddof
|
4303
5378
|
)
|
4304
5379
|
)
|
4305
5380
|
end
|
@@ -4388,9 +5463,21 @@ module Polars
|
|
4388
5463
|
window_size, min_periods = _prepare_rolling_window_args(
|
4389
5464
|
window_size, min_periods
|
4390
5465
|
)
|
5466
|
+
if !by.nil?
|
5467
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5468
|
+
return rolling_var_by(
|
5469
|
+
by,
|
5470
|
+
window_size,
|
5471
|
+
min_periods: min_periods,
|
5472
|
+
closed: closed || "right",
|
5473
|
+
ddof: ddof,
|
5474
|
+
warn_if_unsorted: warn_if_unsorted
|
5475
|
+
)
|
5476
|
+
end
|
5477
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
4391
5478
|
_from_rbexpr(
|
4392
5479
|
_rbexpr.rolling_var(
|
4393
|
-
window_size, weights, min_periods, center,
|
5480
|
+
window_size, weights, min_periods, center, ddof
|
4394
5481
|
)
|
4395
5482
|
)
|
4396
5483
|
end
|
@@ -4474,9 +5561,20 @@ module Polars
|
|
4474
5561
|
window_size, min_periods = _prepare_rolling_window_args(
|
4475
5562
|
window_size, min_periods
|
4476
5563
|
)
|
5564
|
+
if !by.nil?
|
5565
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5566
|
+
return rolling_median_by(
|
5567
|
+
by,
|
5568
|
+
window_size,
|
5569
|
+
min_periods: min_periods,
|
5570
|
+
closed: closed || "right",
|
5571
|
+
warn_if_unsorted: warn_if_unsorted
|
5572
|
+
)
|
5573
|
+
end
|
5574
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
4477
5575
|
_from_rbexpr(
|
4478
5576
|
_rbexpr.rolling_median(
|
4479
|
-
window_size, weights, min_periods, center
|
5577
|
+
window_size, weights, min_periods, center
|
4480
5578
|
)
|
4481
5579
|
)
|
4482
5580
|
end
|
@@ -4566,9 +5664,21 @@ module Polars
|
|
4566
5664
|
window_size, min_periods = _prepare_rolling_window_args(
|
4567
5665
|
window_size, min_periods
|
4568
5666
|
)
|
5667
|
+
if !by.nil?
|
5668
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5669
|
+
return rolling_quantile_by(
|
5670
|
+
by,
|
5671
|
+
window_size,
|
5672
|
+
min_periods: min_periods,
|
5673
|
+
closed: closed || "right",
|
5674
|
+
warn_if_unsorted: warn_if_unsorted,
|
5675
|
+
quantile: quantile
|
5676
|
+
)
|
5677
|
+
end
|
5678
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
4569
5679
|
_from_rbexpr(
|
4570
5680
|
_rbexpr.rolling_quantile(
|
4571
|
-
quantile, interpolation, window_size, weights, min_periods, center
|
5681
|
+
quantile, interpolation, window_size, weights, min_periods, center
|
4572
5682
|
)
|
4573
5683
|
)
|
4574
5684
|
end
|
@@ -6101,5 +7211,9 @@ module Polars
|
|
6101
7211
|
end
|
6102
7212
|
[window_size, min_periods]
|
6103
7213
|
end
|
7214
|
+
|
7215
|
+
def _prepare_rolling_by_window_args(window_size)
|
7216
|
+
window_size
|
7217
|
+
end
|
6104
7218
|
end
|
6105
7219
|
end
|