polars-df 0.9.0-arm64-darwin → 0.11.0-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +23 -0
- data/Cargo.lock +144 -57
- data/LICENSE-THIRD-PARTY.txt +629 -29
- data/README.md +7 -6
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +6 -2
- data/lib/polars/batched_csv_reader.rb +11 -3
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +225 -370
- data/lib/polars/date_time_expr.rb +11 -4
- data/lib/polars/date_time_name_space.rb +14 -4
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/exceptions.rb +4 -0
- data/lib/polars/expr.rb +1171 -54
- data/lib/polars/functions/lazy.rb +3 -3
- data/lib/polars/functions/range/date_range.rb +92 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/whenthen.rb +74 -5
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +307 -489
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +18 -0
- data/lib/polars/io/ndjson.rb +69 -0
- data/lib/polars/io/parquet.rb +226 -0
- data/lib/polars/lazy_frame.rb +55 -195
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +6 -2
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +14 -12
- data/lib/polars/string_expr.rb +38 -36
- data/lib/polars/utils.rb +89 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars.rb +10 -3
- metadata +13 -6
- data/lib/polars/when.rb +0 -16
- data/lib/polars/when_then.rb +0 -19
data/lib/polars/expr.rb
CHANGED
@@ -1345,9 +1345,9 @@ module Polars
|
|
1345
1345
|
# # │ 3 ┆ 4 │
|
1346
1346
|
# # │ 2 ┆ 98 │
|
1347
1347
|
# # └───────┴──────────┘
|
1348
|
-
def top_k(k: 5)
|
1348
|
+
def top_k(k: 5, nulls_last: false, multithreaded: true)
|
1349
1349
|
k = Utils.parse_as_expression(k)
|
1350
|
-
_from_rbexpr(_rbexpr.top_k(k))
|
1350
|
+
_from_rbexpr(_rbexpr.top_k(k, nulls_last, multithreaded))
|
1351
1351
|
end
|
1352
1352
|
|
1353
1353
|
# Return the `k` smallest elements.
|
@@ -1384,9 +1384,9 @@ module Polars
|
|
1384
1384
|
# # │ 3 ┆ 4 │
|
1385
1385
|
# # │ 2 ┆ 98 │
|
1386
1386
|
# # └───────┴──────────┘
|
1387
|
-
def bottom_k(k: 5)
|
1387
|
+
def bottom_k(k: 5, nulls_last: false, multithreaded: true)
|
1388
1388
|
k = Utils.parse_as_expression(k)
|
1389
|
-
_from_rbexpr(_rbexpr.bottom_k(k))
|
1389
|
+
_from_rbexpr(_rbexpr.bottom_k(k, nulls_last, multithreaded))
|
1390
1390
|
end
|
1391
1391
|
|
1392
1392
|
# Get the index values that would sort this column.
|
@@ -1544,16 +1544,14 @@ module Polars
|
|
1544
1544
|
# # │ one │
|
1545
1545
|
# # │ two │
|
1546
1546
|
# # └───────┘
|
1547
|
-
def sort_by(by, reverse: false)
|
1548
|
-
|
1549
|
-
by = [by]
|
1550
|
-
end
|
1547
|
+
def sort_by(by, *more_by, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false)
|
1548
|
+
by = Utils.parse_as_list_of_expressions(by, *more_by)
|
1551
1549
|
if !reverse.is_a?(::Array)
|
1552
1550
|
reverse = [reverse]
|
1551
|
+
elsif by.length != reverse.length
|
1552
|
+
raise ArgumentError, "the length of `reverse` (#{reverse.length}) does not match the length of `by` (#{by.length})"
|
1553
1553
|
end
|
1554
|
-
by
|
1555
|
-
|
1556
|
-
_from_rbexpr(_rbexpr.sort_by(by, reverse))
|
1554
|
+
_from_rbexpr(_rbexpr.sort_by(by, reverse, nulls_last, multithreaded, maintain_order))
|
1557
1555
|
end
|
1558
1556
|
|
1559
1557
|
# Take values by index.
|
@@ -2766,6 +2764,9 @@ module Polars
|
|
2766
2764
|
# Dtype of the output Series.
|
2767
2765
|
# @param agg_list [Boolean]
|
2768
2766
|
# Aggregate list.
|
2767
|
+
# @param is_elementwise [Boolean]
|
2768
|
+
# If set to true this can run in the streaming engine, but may yield
|
2769
|
+
# incorrect results in group-by. Ensure you know what you are doing!
|
2769
2770
|
#
|
2770
2771
|
# @return [Expr]
|
2771
2772
|
#
|
@@ -2786,12 +2787,21 @@ module Polars
|
|
2786
2787
|
# # ╞══════╪════════╡
|
2787
2788
|
# # │ 1 ┆ 0 │
|
2788
2789
|
# # └──────┴────────┘
|
2789
|
-
# def
|
2790
|
+
# def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, &f)
|
2790
2791
|
# if !return_dtype.nil?
|
2791
2792
|
# return_dtype = Utils.rb_type_to_dtype(return_dtype)
|
2792
2793
|
# end
|
2793
|
-
# _from_rbexpr(
|
2794
|
+
# _from_rbexpr(
|
2795
|
+
# _rbexpr.map_batches(
|
2796
|
+
# # TODO _map_batches_wrapper
|
2797
|
+
# f,
|
2798
|
+
# return_dtype,
|
2799
|
+
# agg_list,
|
2800
|
+
# is_elementwise
|
2801
|
+
# )
|
2802
|
+
# )
|
2794
2803
|
# end
|
2804
|
+
# alias_method :map, :map_batches
|
2795
2805
|
|
2796
2806
|
# Apply a custom/user-defined function (UDF) in a GroupBy or Projection context.
|
2797
2807
|
#
|
@@ -2833,7 +2843,7 @@ module Polars
|
|
2833
2843
|
#
|
2834
2844
|
# @example In a selection context, the function is applied by row.
|
2835
2845
|
# df.with_column(
|
2836
|
-
# Polars.col("a").
|
2846
|
+
# Polars.col("a").map_elements { |x| x * 2 }.alias("a_times_2")
|
2837
2847
|
# )
|
2838
2848
|
# # =>
|
2839
2849
|
# # shape: (4, 3)
|
@@ -2853,7 +2863,7 @@ module Polars
|
|
2853
2863
|
# .group_by("b", maintain_order: true)
|
2854
2864
|
# .agg(
|
2855
2865
|
# [
|
2856
|
-
# Polars.col("a").
|
2866
|
+
# Polars.col("a").map_elements { |x| x.sum }
|
2857
2867
|
# ]
|
2858
2868
|
# )
|
2859
2869
|
# .collect
|
@@ -2868,12 +2878,23 @@ module Polars
|
|
2868
2878
|
# # │ b ┆ 2 │
|
2869
2879
|
# # │ c ┆ 4 │
|
2870
2880
|
# # └─────┴─────┘
|
2871
|
-
# def
|
2872
|
-
#
|
2873
|
-
#
|
2881
|
+
# def map_elements(
|
2882
|
+
# return_dtype: nil,
|
2883
|
+
# skip_nulls: true,
|
2884
|
+
# pass_name: false,
|
2885
|
+
# strategy: "thread_local",
|
2886
|
+
# &f
|
2887
|
+
# )
|
2888
|
+
# if pass_name
|
2889
|
+
# raise Todo
|
2890
|
+
# else
|
2891
|
+
# wrap_f = lambda do |x|
|
2892
|
+
# x.map_elements(return_dtype: return_dtype, skip_nulls: skip_nulls, &f)
|
2893
|
+
# end
|
2874
2894
|
# end
|
2875
|
-
#
|
2895
|
+
# map_batches(agg_list: true, return_dtype: return_dtype, &wrap_f)
|
2876
2896
|
# end
|
2897
|
+
# alias_method :apply, :map_elements
|
2877
2898
|
|
2878
2899
|
# Explode a list or utf8 Series. This means that every item is expanded to a new
|
2879
2900
|
# row.
|
@@ -3515,20 +3536,23 @@ module Polars
|
|
3515
3536
|
# @return [Expr]
|
3516
3537
|
#
|
3517
3538
|
# @example
|
3518
|
-
# df = Polars::DataFrame.new({"
|
3519
|
-
# df.
|
3539
|
+
# df = Polars::DataFrame.new({"x" => [1, 2, 4, 8]})
|
3540
|
+
# df.with_columns(
|
3541
|
+
# Polars.col("x").pow(3).alias("cube"),
|
3542
|
+
# Polars.col("x").pow(Polars.col("x").log(2)).alias("x ** xlog2")
|
3543
|
+
# )
|
3520
3544
|
# # =>
|
3521
|
-
# # shape: (4,
|
3522
|
-
# #
|
3523
|
-
# # │
|
3524
|
-
# # │ --- │
|
3525
|
-
# # │ f64
|
3526
|
-
# #
|
3527
|
-
# # │ 1.0
|
3528
|
-
# # │ 8.0
|
3529
|
-
# # │
|
3530
|
-
# # │
|
3531
|
-
# #
|
3545
|
+
# # shape: (4, 3)
|
3546
|
+
# # ┌─────┬──────┬────────────┐
|
3547
|
+
# # │ x ┆ cube ┆ x ** xlog2 │
|
3548
|
+
# # │ --- ┆ --- ┆ --- │
|
3549
|
+
# # │ i64 ┆ i64 ┆ f64 │
|
3550
|
+
# # ╞═════╪══════╪════════════╡
|
3551
|
+
# # │ 1 ┆ 1 ┆ 1.0 │
|
3552
|
+
# # │ 2 ┆ 8 ┆ 2.0 │
|
3553
|
+
# # │ 4 ┆ 64 ┆ 16.0 │
|
3554
|
+
# # │ 8 ┆ 512 ┆ 512.0 │
|
3555
|
+
# # └─────┴──────┴────────────┘
|
3532
3556
|
def pow(exponent)
|
3533
3557
|
self**exponent
|
3534
3558
|
end
|
@@ -3856,6 +3880,1002 @@ module Polars
|
|
3856
3880
|
_from_rbexpr(_rbexpr.interpolate(method))
|
3857
3881
|
end
|
3858
3882
|
|
3883
|
+
# Apply a rolling min based on another column.
|
3884
|
+
#
|
3885
|
+
# @param by [String]
|
3886
|
+
# This column must be of dtype Datetime or Date.
|
3887
|
+
# @param window_size [String]
|
3888
|
+
# The length of the window. Can be a dynamic temporal
|
3889
|
+
# size indicated by a timedelta or the following string language:
|
3890
|
+
#
|
3891
|
+
# - 1ns (1 nanosecond)
|
3892
|
+
# - 1us (1 microsecond)
|
3893
|
+
# - 1ms (1 millisecond)
|
3894
|
+
# - 1s (1 second)
|
3895
|
+
# - 1m (1 minute)
|
3896
|
+
# - 1h (1 hour)
|
3897
|
+
# - 1d (1 calendar day)
|
3898
|
+
# - 1w (1 calendar week)
|
3899
|
+
# - 1mo (1 calendar month)
|
3900
|
+
# - 1q (1 calendar quarter)
|
3901
|
+
# - 1y (1 calendar year)
|
3902
|
+
#
|
3903
|
+
# By "calendar day", we mean the corresponding time on the next day
|
3904
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
3905
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
3906
|
+
# "calendar year".
|
3907
|
+
# @param min_periods [Integer]
|
3908
|
+
# The number of values in the window that should be non-null before computing
|
3909
|
+
# a result.
|
3910
|
+
# @param closed ['left', 'right', 'both', 'none']
|
3911
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
3912
|
+
# defaults to `'right'`.
|
3913
|
+
# @param warn_if_unsorted [Boolean]
|
3914
|
+
# Warn if data is not known to be sorted by `by` column.
|
3915
|
+
#
|
3916
|
+
# @return [Expr]
|
3917
|
+
#
|
3918
|
+
# @note
|
3919
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3920
|
+
# window, consider using `rolling` - this method can cache the window size
|
3921
|
+
# computation.
|
3922
|
+
#
|
3923
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
3924
|
+
# start = DateTime.new(2001, 1, 1)
|
3925
|
+
# stop = DateTime.new(2001, 1, 2)
|
3926
|
+
# df_temporal = Polars::DataFrame.new(
|
3927
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
3928
|
+
# ).with_row_index
|
3929
|
+
# # =>
|
3930
|
+
# # shape: (25, 2)
|
3931
|
+
# # ┌───────┬─────────────────────┐
|
3932
|
+
# # │ index ┆ date │
|
3933
|
+
# # │ --- ┆ --- │
|
3934
|
+
# # │ u32 ┆ datetime[ns] │
|
3935
|
+
# # ╞═══════╪═════════════════════╡
|
3936
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
3937
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
3938
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
3939
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
3940
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
3941
|
+
# # │ … ┆ … │
|
3942
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
3943
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
3944
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
3945
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
3946
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
3947
|
+
# # └───────┴─────────────────────┘
|
3948
|
+
#
|
3949
|
+
# @example Compute the rolling min with the temporal windows closed on the right (default)
|
3950
|
+
# df_temporal.with_columns(
|
3951
|
+
# rolling_row_min: Polars.col("index").rolling_min_by("date", "2h")
|
3952
|
+
# )
|
3953
|
+
# # =>
|
3954
|
+
# # shape: (25, 3)
|
3955
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
3956
|
+
# # │ index ┆ date ┆ rolling_row_min │
|
3957
|
+
# # │ --- ┆ --- ┆ --- │
|
3958
|
+
# # │ u32 ┆ datetime[ns] ┆ u32 │
|
3959
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
3960
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
|
3961
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0 │
|
3962
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1 │
|
3963
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2 │
|
3964
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3 │
|
3965
|
+
# # │ … ┆ … ┆ … │
|
3966
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19 │
|
3967
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20 │
|
3968
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21 │
|
3969
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22 │
|
3970
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23 │
|
3971
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
3972
|
+
def rolling_min_by(
|
3973
|
+
by,
|
3974
|
+
window_size,
|
3975
|
+
min_periods: 1,
|
3976
|
+
closed: "right",
|
3977
|
+
warn_if_unsorted: nil
|
3978
|
+
)
|
3979
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
3980
|
+
by = Utils.parse_as_expression(by)
|
3981
|
+
_from_rbexpr(
|
3982
|
+
_rbexpr.rolling_min_by(by, window_size, min_periods, closed)
|
3983
|
+
)
|
3984
|
+
end
|
3985
|
+
|
3986
|
+
# Apply a rolling max based on another column.
|
3987
|
+
#
|
3988
|
+
# @param by [String]
|
3989
|
+
# This column must be of dtype Datetime or Date.
|
3990
|
+
# @param window_size [String]
|
3991
|
+
# The length of the window. Can be a dynamic temporal
|
3992
|
+
# size indicated by a timedelta or the following string language:
|
3993
|
+
#
|
3994
|
+
# - 1ns (1 nanosecond)
|
3995
|
+
# - 1us (1 microsecond)
|
3996
|
+
# - 1ms (1 millisecond)
|
3997
|
+
# - 1s (1 second)
|
3998
|
+
# - 1m (1 minute)
|
3999
|
+
# - 1h (1 hour)
|
4000
|
+
# - 1d (1 calendar day)
|
4001
|
+
# - 1w (1 calendar week)
|
4002
|
+
# - 1mo (1 calendar month)
|
4003
|
+
# - 1q (1 calendar quarter)
|
4004
|
+
# - 1y (1 calendar year)
|
4005
|
+
#
|
4006
|
+
# By "calendar day", we mean the corresponding time on the next day
|
4007
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
4008
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
4009
|
+
# "calendar year".
|
4010
|
+
# @param min_periods [Integer]
|
4011
|
+
# The number of values in the window that should be non-null before computing
|
4012
|
+
# a result.
|
4013
|
+
# @param closed ['left', 'right', 'both', 'none']
|
4014
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
4015
|
+
# defaults to `'right'`.
|
4016
|
+
# @param warn_if_unsorted [Boolean]
|
4017
|
+
# Warn if data is not known to be sorted by `by` column.
|
4018
|
+
#
|
4019
|
+
# @return [Expr]
|
4020
|
+
#
|
4021
|
+
# @note
|
4022
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
4023
|
+
# window, consider using `rolling` - this method can cache the window size
|
4024
|
+
# computation.
|
4025
|
+
#
|
4026
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
4027
|
+
# start = DateTime.new(2001, 1, 1)
|
4028
|
+
# stop = DateTime.new(2001, 1, 2)
|
4029
|
+
# df_temporal = Polars::DataFrame.new(
|
4030
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
4031
|
+
# ).with_row_index
|
4032
|
+
# # =>
|
4033
|
+
# # shape: (25, 2)
|
4034
|
+
# # ┌───────┬─────────────────────┐
|
4035
|
+
# # │ index ┆ date │
|
4036
|
+
# # │ --- ┆ --- │
|
4037
|
+
# # │ u32 ┆ datetime[ns] │
|
4038
|
+
# # ╞═══════╪═════════════════════╡
|
4039
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
4040
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
4041
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
4042
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
4043
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
4044
|
+
# # │ … ┆ … │
|
4045
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
4046
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
4047
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
4048
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
4049
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
4050
|
+
# # └───────┴─────────────────────┘
|
4051
|
+
#
|
4052
|
+
# @example Compute the rolling max with the temporal windows closed on the right (default)
|
4053
|
+
# df_temporal.with_columns(
|
4054
|
+
# rolling_row_max: Polars.col("index").rolling_max_by("date", "2h")
|
4055
|
+
# )
|
4056
|
+
# # =>
|
4057
|
+
# # shape: (25, 3)
|
4058
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4059
|
+
# # │ index ┆ date ┆ rolling_row_max │
|
4060
|
+
# # │ --- ┆ --- ┆ --- │
|
4061
|
+
# # │ u32 ┆ datetime[ns] ┆ u32 │
|
4062
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4063
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
|
4064
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
|
4065
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 2 │
|
4066
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 3 │
|
4067
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 4 │
|
4068
|
+
# # │ … ┆ … ┆ … │
|
4069
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 20 │
|
4070
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 21 │
|
4071
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 22 │
|
4072
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 23 │
|
4073
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 24 │
|
4074
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4075
|
+
#
|
4076
|
+
# @example Compute the rolling max with the closure of windows on both sides
|
4077
|
+
# df_temporal.with_columns(
|
4078
|
+
# rolling_row_max: Polars.col("index").rolling_max_by(
|
4079
|
+
# "date", "2h", closed: "both"
|
4080
|
+
# )
|
4081
|
+
# )
|
4082
|
+
# # =>
|
4083
|
+
# # shape: (25, 3)
|
4084
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4085
|
+
# # │ index ┆ date ┆ rolling_row_max │
|
4086
|
+
# # │ --- ┆ --- ┆ --- │
|
4087
|
+
# # │ u32 ┆ datetime[ns] ┆ u32 │
|
4088
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4089
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
|
4090
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
|
4091
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 2 │
|
4092
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 3 │
|
4093
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 4 │
|
4094
|
+
# # │ … ┆ … ┆ … │
|
4095
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 20 │
|
4096
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 21 │
|
4097
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 22 │
|
4098
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 23 │
|
4099
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 24 │
|
4100
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4101
|
+
def rolling_max_by(
|
4102
|
+
by,
|
4103
|
+
window_size,
|
4104
|
+
min_periods: 1,
|
4105
|
+
closed: "right",
|
4106
|
+
warn_if_unsorted: nil
|
4107
|
+
)
|
4108
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
4109
|
+
by = Utils.parse_as_expression(by)
|
4110
|
+
_from_rbexpr(
|
4111
|
+
_rbexpr.rolling_max_by(by, window_size, min_periods, closed)
|
4112
|
+
)
|
4113
|
+
end
|
4114
|
+
|
4115
|
+
# Apply a rolling mean based on another column.
|
4116
|
+
#
|
4117
|
+
# @param by [String]
|
4118
|
+
# This column must be of dtype Datetime or Date.
|
4119
|
+
# @param window_size [String]
|
4120
|
+
# The length of the window. Can be a dynamic temporal
|
4121
|
+
# size indicated by a timedelta or the following string language:
|
4122
|
+
#
|
4123
|
+
# - 1ns (1 nanosecond)
|
4124
|
+
# - 1us (1 microsecond)
|
4125
|
+
# - 1ms (1 millisecond)
|
4126
|
+
# - 1s (1 second)
|
4127
|
+
# - 1m (1 minute)
|
4128
|
+
# - 1h (1 hour)
|
4129
|
+
# - 1d (1 calendar day)
|
4130
|
+
# - 1w (1 calendar week)
|
4131
|
+
# - 1mo (1 calendar month)
|
4132
|
+
# - 1q (1 calendar quarter)
|
4133
|
+
# - 1y (1 calendar year)
|
4134
|
+
#
|
4135
|
+
# By "calendar day", we mean the corresponding time on the next day
|
4136
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
4137
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
4138
|
+
# "calendar year".
|
4139
|
+
# @param min_periods [Integer]
|
4140
|
+
# The number of values in the window that should be non-null before computing
|
4141
|
+
# a result.
|
4142
|
+
# @param closed ['left', 'right', 'both', 'none']
|
4143
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
4144
|
+
# defaults to `'right'`.
|
4145
|
+
# @param warn_if_unsorted [Boolean]
|
4146
|
+
# Warn if data is not known to be sorted by `by` column.
|
4147
|
+
#
|
4148
|
+
# @return [Expr]
|
4149
|
+
#
|
4150
|
+
# @note
|
4151
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
4152
|
+
# window, consider using `rolling` - this method can cache the window size
|
4153
|
+
# computation.
|
4154
|
+
#
|
4155
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
4156
|
+
# start = DateTime.new(2001, 1, 1)
|
4157
|
+
# stop = DateTime.new(2001, 1, 2)
|
4158
|
+
# df_temporal = Polars::DataFrame.new(
|
4159
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
4160
|
+
# ).with_row_index
|
4161
|
+
# # =>
|
4162
|
+
# # shape: (25, 2)
|
4163
|
+
# # ┌───────┬─────────────────────┐
|
4164
|
+
# # │ index ┆ date │
|
4165
|
+
# # │ --- ┆ --- │
|
4166
|
+
# # │ u32 ┆ datetime[ns] │
|
4167
|
+
# # ╞═══════╪═════════════════════╡
|
4168
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
4169
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
4170
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
4171
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
4172
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
4173
|
+
# # │ … ┆ … │
|
4174
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
4175
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
4176
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
4177
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
4178
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
4179
|
+
# # └───────┴─────────────────────┘
|
4180
|
+
#
|
4181
|
+
# @example Compute the rolling mean with the temporal windows closed on the right (default)
|
4182
|
+
# df_temporal.with_columns(
|
4183
|
+
# rolling_row_mean: Polars.col("index").rolling_mean_by(
|
4184
|
+
# "date", "2h"
|
4185
|
+
# )
|
4186
|
+
# )
|
4187
|
+
# # =>
|
4188
|
+
# # shape: (25, 3)
|
4189
|
+
# # ┌───────┬─────────────────────┬──────────────────┐
|
4190
|
+
# # │ index ┆ date ┆ rolling_row_mean │
|
4191
|
+
# # │ --- ┆ --- ┆ --- │
|
4192
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4193
|
+
# # ╞═══════╪═════════════════════╪══════════════════╡
|
4194
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
|
4195
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
|
4196
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.5 │
|
4197
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.5 │
|
4198
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.5 │
|
4199
|
+
# # │ … ┆ … ┆ … │
|
4200
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.5 │
|
4201
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.5 │
|
4202
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.5 │
|
4203
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.5 │
|
4204
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.5 │
|
4205
|
+
# # └───────┴─────────────────────┴──────────────────┘
|
4206
|
+
#
|
4207
|
+
# @example Compute the rolling mean with the closure of windows on both sides
|
4208
|
+
# df_temporal.with_columns(
|
4209
|
+
# rolling_row_mean: Polars.col("index").rolling_mean_by(
|
4210
|
+
# "date", "2h", closed: "both"
|
4211
|
+
# )
|
4212
|
+
# )
|
4213
|
+
# # =>
|
4214
|
+
# # shape: (25, 3)
|
4215
|
+
# # ┌───────┬─────────────────────┬──────────────────┐
|
4216
|
+
# # │ index ┆ date ┆ rolling_row_mean │
|
4217
|
+
# # │ --- ┆ --- ┆ --- │
|
4218
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4219
|
+
# # ╞═══════╪═════════════════════╪══════════════════╡
|
4220
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
|
4221
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
|
4222
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
|
4223
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.0 │
|
4224
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.0 │
|
4225
|
+
# # │ … ┆ … ┆ … │
|
4226
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.0 │
|
4227
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.0 │
|
4228
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.0 │
|
4229
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.0 │
|
4230
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.0 │
|
4231
|
+
# # └───────┴─────────────────────┴──────────────────┘
|
4232
|
+
def rolling_mean_by(
|
4233
|
+
by,
|
4234
|
+
window_size,
|
4235
|
+
min_periods: 1,
|
4236
|
+
closed: "right",
|
4237
|
+
warn_if_unsorted: nil
|
4238
|
+
)
|
4239
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
4240
|
+
by = Utils.parse_as_expression(by)
|
4241
|
+
_from_rbexpr(
|
4242
|
+
_rbexpr.rolling_mean_by(
|
4243
|
+
by,
|
4244
|
+
window_size,
|
4245
|
+
min_periods,
|
4246
|
+
closed
|
4247
|
+
)
|
4248
|
+
)
|
4249
|
+
end
|
4250
|
+
|
4251
|
+
# Apply a rolling sum based on another column.
|
4252
|
+
#
|
4253
|
+
# @param by [String]
|
4254
|
+
# This column must of dtype `{Date, Datetime}`
|
4255
|
+
# @param window_size [String]
|
4256
|
+
# The length of the window. Can be a dynamic temporal
|
4257
|
+
# size indicated by a timedelta or the following string language:
|
4258
|
+
#
|
4259
|
+
# - 1ns (1 nanosecond)
|
4260
|
+
# - 1us (1 microsecond)
|
4261
|
+
# - 1ms (1 millisecond)
|
4262
|
+
# - 1s (1 second)
|
4263
|
+
# - 1m (1 minute)
|
4264
|
+
# - 1h (1 hour)
|
4265
|
+
# - 1d (1 calendar day)
|
4266
|
+
# - 1w (1 calendar week)
|
4267
|
+
# - 1mo (1 calendar month)
|
4268
|
+
# - 1q (1 calendar quarter)
|
4269
|
+
# - 1y (1 calendar year)
|
4270
|
+
#
|
4271
|
+
# By "calendar day", we mean the corresponding time on the next day
|
4272
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
4273
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
4274
|
+
# "calendar year".
|
4275
|
+
# @param min_periods [Integer]
|
4276
|
+
# The number of values in the window that should be non-null before computing
|
4277
|
+
# a result.
|
4278
|
+
# @param closed ['left', 'right', 'both', 'none']
|
4279
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
4280
|
+
# defaults to `'right'`.
|
4281
|
+
# @param warn_if_unsorted [Boolean]
|
4282
|
+
# Warn if data is not known to be sorted by `by` column.
|
4283
|
+
#
|
4284
|
+
# @return [Expr]
|
4285
|
+
#
|
4286
|
+
# @note
|
4287
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
4288
|
+
# window, consider using `rolling` - this method can cache the window size
|
4289
|
+
# computation.
|
4290
|
+
#
|
4291
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
4292
|
+
# start = DateTime.new(2001, 1, 1)
|
4293
|
+
# stop = DateTime.new(2001, 1, 2)
|
4294
|
+
# df_temporal = Polars::DataFrame.new(
|
4295
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
4296
|
+
# ).with_row_index
|
4297
|
+
# # =>
|
4298
|
+
# # shape: (25, 2)
|
4299
|
+
# # ┌───────┬─────────────────────┐
|
4300
|
+
# # │ index ┆ date │
|
4301
|
+
# # │ --- ┆ --- │
|
4302
|
+
# # │ u32 ┆ datetime[ns] │
|
4303
|
+
# # ╞═══════╪═════════════════════╡
|
4304
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
4305
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
4306
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
4307
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
4308
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
4309
|
+
# # │ … ┆ … │
|
4310
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
4311
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
4312
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
4313
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
4314
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
4315
|
+
# # └───────┴─────────────────────┘
|
4316
|
+
#
|
4317
|
+
# @example Compute the rolling sum with the temporal windows closed on the right (default)
|
4318
|
+
# df_temporal.with_columns(
|
4319
|
+
# rolling_row_sum: Polars.col("index").rolling_sum_by("date", "2h")
|
4320
|
+
# )
|
4321
|
+
# # =>
|
4322
|
+
# # shape: (25, 3)
|
4323
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4324
|
+
# # │ index ┆ date ┆ rolling_row_sum │
|
4325
|
+
# # │ --- ┆ --- ┆ --- │
|
4326
|
+
# # │ u32 ┆ datetime[ns] ┆ u32 │
|
4327
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4328
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
|
4329
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
|
4330
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 3 │
|
4331
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 5 │
|
4332
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 7 │
|
4333
|
+
# # │ … ┆ … ┆ … │
|
4334
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 39 │
|
4335
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 41 │
|
4336
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 43 │
|
4337
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 45 │
|
4338
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 47 │
|
4339
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4340
|
+
#
|
4341
|
+
# @example Compute the rolling sum with the closure of windows on both sides
|
4342
|
+
# df_temporal.with_columns(
|
4343
|
+
# rolling_row_sum: Polars.col("index").rolling_sum_by(
|
4344
|
+
# "date", "2h", closed: "both"
|
4345
|
+
# )
|
4346
|
+
# )
|
4347
|
+
# # =>
|
4348
|
+
# # shape: (25, 3)
|
4349
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4350
|
+
# # │ index ┆ date ┆ rolling_row_sum │
|
4351
|
+
# # │ --- ┆ --- ┆ --- │
|
4352
|
+
# # │ u32 ┆ datetime[ns] ┆ u32 │
|
4353
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4354
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
|
4355
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
|
4356
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 3 │
|
4357
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 6 │
|
4358
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 9 │
|
4359
|
+
# # │ … ┆ … ┆ … │
|
4360
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 57 │
|
4361
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 60 │
|
4362
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 63 │
|
4363
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 66 │
|
4364
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 69 │
|
4365
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4366
|
+
def rolling_sum_by(
|
4367
|
+
by,
|
4368
|
+
window_size,
|
4369
|
+
min_periods: 1,
|
4370
|
+
closed: "right",
|
4371
|
+
warn_if_unsorted: nil
|
4372
|
+
)
|
4373
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
4374
|
+
by = Utils.parse_as_expression(by)
|
4375
|
+
_from_rbexpr(
|
4376
|
+
_rbexpr.rolling_sum_by(by, window_size, min_periods, closed)
|
4377
|
+
)
|
4378
|
+
end
|
4379
|
+
|
4380
|
+
# Compute a rolling standard deviation based on another column.
|
4381
|
+
#
|
4382
|
+
# @param by [String]
|
4383
|
+
# This column must be of dtype Datetime or Date.
|
4384
|
+
# @param window_size [String]
|
4385
|
+
# The length of the window. Can be a dynamic temporal
|
4386
|
+
# size indicated by a timedelta or the following string language:
|
4387
|
+
#
|
4388
|
+
# - 1ns (1 nanosecond)
|
4389
|
+
# - 1us (1 microsecond)
|
4390
|
+
# - 1ms (1 millisecond)
|
4391
|
+
# - 1s (1 second)
|
4392
|
+
# - 1m (1 minute)
|
4393
|
+
# - 1h (1 hour)
|
4394
|
+
# - 1d (1 calendar day)
|
4395
|
+
# - 1w (1 calendar week)
|
4396
|
+
# - 1mo (1 calendar month)
|
4397
|
+
# - 1q (1 calendar quarter)
|
4398
|
+
# - 1y (1 calendar year)
|
4399
|
+
#
|
4400
|
+
# By "calendar day", we mean the corresponding time on the next day
|
4401
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
4402
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
4403
|
+
# "calendar year".
|
4404
|
+
# @param min_periods [Integer]
|
4405
|
+
# The number of values in the window that should be non-null before computing
|
4406
|
+
# a result.
|
4407
|
+
# @param closed ['left', 'right', 'both', 'none']
|
4408
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
4409
|
+
# defaults to `'right'`.
|
4410
|
+
# @param ddof [Integer]
|
4411
|
+
# "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
|
4412
|
+
# @param warn_if_unsorted [Boolean]
|
4413
|
+
# Warn if data is not known to be sorted by `by` column.
|
4414
|
+
#
|
4415
|
+
# @return [Expr]
|
4416
|
+
#
|
4417
|
+
# @note
|
4418
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
4419
|
+
# window, consider using `rolling` - this method can cache the window size
|
4420
|
+
# computation.
|
4421
|
+
#
|
4422
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
4423
|
+
# start = DateTime.new(2001, 1, 1)
|
4424
|
+
# stop = DateTime.new(2001, 1, 2)
|
4425
|
+
# df_temporal = Polars::DataFrame.new(
|
4426
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
4427
|
+
# ).with_row_index
|
4428
|
+
# # =>
|
4429
|
+
# # shape: (25, 2)
|
4430
|
+
# # ┌───────┬─────────────────────┐
|
4431
|
+
# # │ index ┆ date │
|
4432
|
+
# # │ --- ┆ --- │
|
4433
|
+
# # │ u32 ┆ datetime[ns] │
|
4434
|
+
# # ╞═══════╪═════════════════════╡
|
4435
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
4436
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
4437
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
4438
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
4439
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
4440
|
+
# # │ … ┆ … │
|
4441
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
4442
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
4443
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
4444
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
4445
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
4446
|
+
# # └───────┴─────────────────────┘
|
4447
|
+
#
|
4448
|
+
# @example Compute the rolling std with the temporal windows closed on the right (default)
|
4449
|
+
# df_temporal.with_columns(
|
4450
|
+
# rolling_row_std: Polars.col("index").rolling_std_by("date", "2h")
|
4451
|
+
# )
|
4452
|
+
# # =>
|
4453
|
+
# # shape: (25, 3)
|
4454
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4455
|
+
# # │ index ┆ date ┆ rolling_row_std │
|
4456
|
+
# # │ --- ┆ --- ┆ --- │
|
4457
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4458
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4459
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
|
4460
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.707107 │
|
4461
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 0.707107 │
|
4462
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 0.707107 │
|
4463
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 0.707107 │
|
4464
|
+
# # │ … ┆ … ┆ … │
|
4465
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 0.707107 │
|
4466
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 0.707107 │
|
4467
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 0.707107 │
|
4468
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 0.707107 │
|
4469
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 0.707107 │
|
4470
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4471
|
+
#
|
4472
|
+
# @example Compute the rolling std with the closure of windows on both sides
|
4473
|
+
# df_temporal.with_columns(
|
4474
|
+
# rolling_row_std: Polars.col("index").rolling_std_by(
|
4475
|
+
# "date", "2h", closed: "both"
|
4476
|
+
# )
|
4477
|
+
# )
|
4478
|
+
# # =>
|
4479
|
+
# # shape: (25, 3)
|
4480
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4481
|
+
# # │ index ┆ date ┆ rolling_row_std │
|
4482
|
+
# # │ --- ┆ --- ┆ --- │
|
4483
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4484
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4485
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
|
4486
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.707107 │
|
4487
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
|
4488
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 1.0 │
|
4489
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 1.0 │
|
4490
|
+
# # │ … ┆ … ┆ … │
|
4491
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 1.0 │
|
4492
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 1.0 │
|
4493
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 1.0 │
|
4494
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 1.0 │
|
4495
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 1.0 │
|
4496
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4497
|
+
def rolling_std_by(
|
4498
|
+
by,
|
4499
|
+
window_size,
|
4500
|
+
min_periods: 1,
|
4501
|
+
closed: "right",
|
4502
|
+
ddof: 1,
|
4503
|
+
warn_if_unsorted: nil
|
4504
|
+
)
|
4505
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
4506
|
+
by = Utils.parse_as_expression(by)
|
4507
|
+
_from_rbexpr(
|
4508
|
+
_rbexpr.rolling_std_by(
|
4509
|
+
by,
|
4510
|
+
window_size,
|
4511
|
+
min_periods,
|
4512
|
+
closed,
|
4513
|
+
ddof
|
4514
|
+
)
|
4515
|
+
)
|
4516
|
+
end
|
4517
|
+
|
4518
|
+
# Compute a rolling variance based on another column.
|
4519
|
+
#
|
4520
|
+
# @param by [String]
|
4521
|
+
# This column must be of dtype Datetime or Date.
|
4522
|
+
# @param window_size [String]
|
4523
|
+
# The length of the window. Can be a dynamic temporal
|
4524
|
+
# size indicated by a timedelta or the following string language:
|
4525
|
+
#
|
4526
|
+
# - 1ns (1 nanosecond)
|
4527
|
+
# - 1us (1 microsecond)
|
4528
|
+
# - 1ms (1 millisecond)
|
4529
|
+
# - 1s (1 second)
|
4530
|
+
# - 1m (1 minute)
|
4531
|
+
# - 1h (1 hour)
|
4532
|
+
# - 1d (1 calendar day)
|
4533
|
+
# - 1w (1 calendar week)
|
4534
|
+
# - 1mo (1 calendar month)
|
4535
|
+
# - 1q (1 calendar quarter)
|
4536
|
+
# - 1y (1 calendar year)
|
4537
|
+
#
|
4538
|
+
# By "calendar day", we mean the corresponding time on the next day
|
4539
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
4540
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
4541
|
+
# "calendar year".
|
4542
|
+
# @param min_periods [Integer]
|
4543
|
+
# The number of values in the window that should be non-null before computing
|
4544
|
+
# a result.
|
4545
|
+
# @param closed ['left', 'right', 'both', 'none']
|
4546
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
4547
|
+
# defaults to `'right'`.
|
4548
|
+
# @param ddof [Integer]
|
4549
|
+
# "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
|
4550
|
+
# @param warn_if_unsorted [Boolean]
|
4551
|
+
# Warn if data is not known to be sorted by `by` column.
|
4552
|
+
#
|
4553
|
+
# @return [Expr]
|
4554
|
+
#
|
4555
|
+
# @note
|
4556
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
4557
|
+
# window, consider using `rolling` - this method can cache the window size
|
4558
|
+
# computation.
|
4559
|
+
#
|
4560
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
4561
|
+
# start = DateTime.new(2001, 1, 1)
|
4562
|
+
# stop = DateTime.new(2001, 1, 2)
|
4563
|
+
# df_temporal = Polars::DataFrame.new(
|
4564
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
4565
|
+
# ).with_row_index
|
4566
|
+
# # =>
|
4567
|
+
# # shape: (25, 2)
|
4568
|
+
# # ┌───────┬─────────────────────┐
|
4569
|
+
# # │ index ┆ date │
|
4570
|
+
# # │ --- ┆ --- │
|
4571
|
+
# # │ u32 ┆ datetime[ns] │
|
4572
|
+
# # ╞═══════╪═════════════════════╡
|
4573
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
4574
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
4575
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
4576
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
4577
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
4578
|
+
# # │ … ┆ … │
|
4579
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
4580
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
4581
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
4582
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
4583
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
4584
|
+
# # └───────┴─────────────────────┘
|
4585
|
+
#
|
4586
|
+
# @example Compute the rolling var with the temporal windows closed on the right (default)
|
4587
|
+
# df_temporal.with_columns(
|
4588
|
+
# rolling_row_var: Polars.col("index").rolling_var_by("date", "2h")
|
4589
|
+
# )
|
4590
|
+
# # =>
|
4591
|
+
# # shape: (25, 3)
|
4592
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4593
|
+
# # │ index ┆ date ┆ rolling_row_var │
|
4594
|
+
# # │ --- ┆ --- ┆ --- │
|
4595
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4596
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4597
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
|
4598
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
|
4599
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 0.5 │
|
4600
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 0.5 │
|
4601
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 0.5 │
|
4602
|
+
# # │ … ┆ … ┆ … │
|
4603
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 0.5 │
|
4604
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 0.5 │
|
4605
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 0.5 │
|
4606
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 0.5 │
|
4607
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 0.5 │
|
4608
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4609
|
+
#
|
4610
|
+
# @example Compute the rolling var with the closure of windows on both sides
|
4611
|
+
# df_temporal.with_columns(
|
4612
|
+
# rolling_row_var: Polars.col("index").rolling_var_by(
|
4613
|
+
# "date", "2h", closed: "both"
|
4614
|
+
# )
|
4615
|
+
# )
|
4616
|
+
# # =>
|
4617
|
+
# # shape: (25, 3)
|
4618
|
+
# # ┌───────┬─────────────────────┬─────────────────┐
|
4619
|
+
# # │ index ┆ date ┆ rolling_row_var │
|
4620
|
+
# # │ --- ┆ --- ┆ --- │
|
4621
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4622
|
+
# # ╞═══════╪═════════════════════╪═════════════════╡
|
4623
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
|
4624
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
|
4625
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
|
4626
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 1.0 │
|
4627
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 1.0 │
|
4628
|
+
# # │ … ┆ … ┆ … │
|
4629
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 1.0 │
|
4630
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 1.0 │
|
4631
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 1.0 │
|
4632
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 1.0 │
|
4633
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 1.0 │
|
4634
|
+
# # └───────┴─────────────────────┴─────────────────┘
|
4635
|
+
def rolling_var_by(
|
4636
|
+
by,
|
4637
|
+
window_size,
|
4638
|
+
min_periods: 1,
|
4639
|
+
closed: "right",
|
4640
|
+
ddof: 1,
|
4641
|
+
warn_if_unsorted: nil
|
4642
|
+
)
|
4643
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
4644
|
+
by = Utils.parse_as_expression(by)
|
4645
|
+
_from_rbexpr(
|
4646
|
+
_rbexpr.rolling_var_by(
|
4647
|
+
by,
|
4648
|
+
window_size,
|
4649
|
+
min_periods,
|
4650
|
+
closed,
|
4651
|
+
ddof
|
4652
|
+
)
|
4653
|
+
)
|
4654
|
+
end
|
4655
|
+
|
4656
|
+
# Compute a rolling median based on another column.
|
4657
|
+
#
|
4658
|
+
# @param by [String]
|
4659
|
+
# This column must be of dtype Datetime or Date.
|
4660
|
+
# @param window_size [String]
|
4661
|
+
# The length of the window. Can be a dynamic temporal
|
4662
|
+
# size indicated by a timedelta or the following string language:
|
4663
|
+
#
|
4664
|
+
# - 1ns (1 nanosecond)
|
4665
|
+
# - 1us (1 microsecond)
|
4666
|
+
# - 1ms (1 millisecond)
|
4667
|
+
# - 1s (1 second)
|
4668
|
+
# - 1m (1 minute)
|
4669
|
+
# - 1h (1 hour)
|
4670
|
+
# - 1d (1 calendar day)
|
4671
|
+
# - 1w (1 calendar week)
|
4672
|
+
# - 1mo (1 calendar month)
|
4673
|
+
# - 1q (1 calendar quarter)
|
4674
|
+
# - 1y (1 calendar year)
|
4675
|
+
#
|
4676
|
+
# By "calendar day", we mean the corresponding time on the next day
|
4677
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
4678
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
4679
|
+
# "calendar year".
|
4680
|
+
# @param min_periods [Integer]
|
4681
|
+
# The number of values in the window that should be non-null before computing
|
4682
|
+
# a result.
|
4683
|
+
# @param closed ['left', 'right', 'both', 'none']
|
4684
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
4685
|
+
# defaults to `'right'`.
|
4686
|
+
# @param warn_if_unsorted [Boolean]
|
4687
|
+
# Warn if data is not known to be sorted by `by` column.
|
4688
|
+
#
|
4689
|
+
# @return [Expr]
|
4690
|
+
#
|
4691
|
+
# @note
|
4692
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
4693
|
+
# window, consider using `rolling` - this method can cache the window size
|
4694
|
+
# computation.
|
4695
|
+
#
|
4696
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
4697
|
+
# start = DateTime.new(2001, 1, 1)
|
4698
|
+
# stop = DateTime.new(2001, 1, 2)
|
4699
|
+
# df_temporal = Polars::DataFrame.new(
|
4700
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
4701
|
+
# ).with_row_index
|
4702
|
+
# # =>
|
4703
|
+
# # shape: (25, 2)
|
4704
|
+
# # ┌───────┬─────────────────────┐
|
4705
|
+
# # │ index ┆ date │
|
4706
|
+
# # │ --- ┆ --- │
|
4707
|
+
# # │ u32 ┆ datetime[ns] │
|
4708
|
+
# # ╞═══════╪═════════════════════╡
|
4709
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
4710
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
4711
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
4712
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
4713
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
4714
|
+
# # │ … ┆ … │
|
4715
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
4716
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
4717
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
4718
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
4719
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
4720
|
+
# # └───────┴─────────────────────┘
|
4721
|
+
#
|
4722
|
+
# @example Compute the rolling median with the temporal windows closed on the right:
|
4723
|
+
# df_temporal.with_columns(
|
4724
|
+
# rolling_row_median: Polars.col("index").rolling_median_by(
|
4725
|
+
# "date", "2h"
|
4726
|
+
# )
|
4727
|
+
# )
|
4728
|
+
# # =>
|
4729
|
+
# # shape: (25, 3)
|
4730
|
+
# # ┌───────┬─────────────────────┬────────────────────┐
|
4731
|
+
# # │ index ┆ date ┆ rolling_row_median │
|
4732
|
+
# # │ --- ┆ --- ┆ --- │
|
4733
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4734
|
+
# # ╞═══════╪═════════════════════╪════════════════════╡
|
4735
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
|
4736
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
|
4737
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.5 │
|
4738
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.5 │
|
4739
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.5 │
|
4740
|
+
# # │ … ┆ … ┆ … │
|
4741
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.5 │
|
4742
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.5 │
|
4743
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.5 │
|
4744
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.5 │
|
4745
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.5 │
|
4746
|
+
# # └───────┴─────────────────────┴────────────────────┘
|
4747
|
+
def rolling_median_by(
|
4748
|
+
by,
|
4749
|
+
window_size,
|
4750
|
+
min_periods: 1,
|
4751
|
+
closed: "right",
|
4752
|
+
warn_if_unsorted: nil
|
4753
|
+
)
|
4754
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
4755
|
+
by = Utils.parse_as_expression(by)
|
4756
|
+
_from_rbexpr(
|
4757
|
+
_rbexpr.rolling_median_by(by, window_size, min_periods, closed)
|
4758
|
+
)
|
4759
|
+
end
|
4760
|
+
|
4761
|
+
# Compute a rolling quantile based on another column.
|
4762
|
+
#
|
4763
|
+
# @param by [String]
|
4764
|
+
# This column must be of dtype Datetime or Date.
|
4765
|
+
# @param quantile [Float]
|
4766
|
+
# Quantile between 0.0 and 1.0.
|
4767
|
+
# @param interpolation ['nearest', 'higher', 'lower', 'midpoint', 'linear']
|
4768
|
+
# Interpolation method.
|
4769
|
+
# @param window_size [String]
|
4770
|
+
# The length of the window. Can be a dynamic
|
4771
|
+
# temporal size indicated by a timedelta or the following string language:
|
4772
|
+
#
|
4773
|
+
# - 1ns (1 nanosecond)
|
4774
|
+
# - 1us (1 microsecond)
|
4775
|
+
# - 1ms (1 millisecond)
|
4776
|
+
# - 1s (1 second)
|
4777
|
+
# - 1m (1 minute)
|
4778
|
+
# - 1h (1 hour)
|
4779
|
+
# - 1d (1 calendar day)
|
4780
|
+
# - 1w (1 calendar week)
|
4781
|
+
# - 1mo (1 calendar month)
|
4782
|
+
# - 1q (1 calendar quarter)
|
4783
|
+
# - 1y (1 calendar year)
|
4784
|
+
#
|
4785
|
+
# By "calendar day", we mean the corresponding time on the next day
|
4786
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
4787
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
4788
|
+
# "calendar year".
|
4789
|
+
# @param min_periods [Integer]
|
4790
|
+
# The number of values in the window that should be non-null before computing
|
4791
|
+
# a result.
|
4792
|
+
# @param closed ['left', 'right', 'both', 'none']
|
4793
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
4794
|
+
# defaults to `'right'`.
|
4795
|
+
# @param warn_if_unsorted [Boolean]
|
4796
|
+
# Warn if data is not known to be sorted by `by` column.
|
4797
|
+
#
|
4798
|
+
# @return [Expr]
|
4799
|
+
#
|
4800
|
+
# @note
|
4801
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
4802
|
+
# window, consider using `rolling` - this method can cache the window size
|
4803
|
+
# computation.
|
4804
|
+
#
|
4805
|
+
# @example Create a DataFrame with a datetime column and a row number column
|
4806
|
+
# start = DateTime.new(2001, 1, 1)
|
4807
|
+
# stop = DateTime.new(2001, 1, 2)
|
4808
|
+
# df_temporal = Polars::DataFrame.new(
|
4809
|
+
# {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
|
4810
|
+
# ).with_row_index
|
4811
|
+
# # =>
|
4812
|
+
# # shape: (25, 2)
|
4813
|
+
# # ┌───────┬─────────────────────┐
|
4814
|
+
# # │ index ┆ date │
|
4815
|
+
# # │ --- ┆ --- │
|
4816
|
+
# # │ u32 ┆ datetime[ns] │
|
4817
|
+
# # ╞═══════╪═════════════════════╡
|
4818
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 │
|
4819
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 │
|
4820
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 │
|
4821
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 │
|
4822
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 │
|
4823
|
+
# # │ … ┆ … │
|
4824
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 │
|
4825
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 │
|
4826
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 │
|
4827
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 │
|
4828
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 │
|
4829
|
+
# # └───────┴─────────────────────┘
|
4830
|
+
#
|
4831
|
+
# @example Compute the rolling quantile with the temporal windows closed on the right:
|
4832
|
+
# df_temporal.with_columns(
|
4833
|
+
# rolling_row_quantile: Polars.col("index").rolling_quantile_by(
|
4834
|
+
# "date", "2h", quantile: 0.3
|
4835
|
+
# )
|
4836
|
+
# )
|
4837
|
+
# # =>
|
4838
|
+
# # shape: (25, 3)
|
4839
|
+
# # ┌───────┬─────────────────────┬──────────────────────┐
|
4840
|
+
# # │ index ┆ date ┆ rolling_row_quantile │
|
4841
|
+
# # │ --- ┆ --- ┆ --- │
|
4842
|
+
# # │ u32 ┆ datetime[ns] ┆ f64 │
|
4843
|
+
# # ╞═══════╪═════════════════════╪══════════════════════╡
|
4844
|
+
# # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
|
4845
|
+
# # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.0 │
|
4846
|
+
# # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
|
4847
|
+
# # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.0 │
|
4848
|
+
# # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.0 │
|
4849
|
+
# # │ … ┆ … ┆ … │
|
4850
|
+
# # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.0 │
|
4851
|
+
# # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.0 │
|
4852
|
+
# # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.0 │
|
4853
|
+
# # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.0 │
|
4854
|
+
# # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.0 │
|
4855
|
+
# # └───────┴─────────────────────┴──────────────────────┘
|
4856
|
+
def rolling_quantile_by(
|
4857
|
+
by,
|
4858
|
+
window_size,
|
4859
|
+
quantile:,
|
4860
|
+
interpolation: "nearest",
|
4861
|
+
min_periods: 1,
|
4862
|
+
closed: "right",
|
4863
|
+
warn_if_unsorted: nil
|
4864
|
+
)
|
4865
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
4866
|
+
by = Utils.parse_as_expression(by)
|
4867
|
+
_from_rbexpr(
|
4868
|
+
_rbexpr.rolling_quantile_by(
|
4869
|
+
by,
|
4870
|
+
quantile,
|
4871
|
+
interpolation,
|
4872
|
+
window_size,
|
4873
|
+
min_periods,
|
4874
|
+
closed,
|
4875
|
+
)
|
4876
|
+
)
|
4877
|
+
end
|
4878
|
+
|
3859
4879
|
# Apply a rolling min (moving min) over the values in this array.
|
3860
4880
|
#
|
3861
4881
|
# A window of length `window_size` will traverse the array. The values that fill
|
@@ -3933,14 +4953,25 @@ module Polars
|
|
3933
4953
|
min_periods: nil,
|
3934
4954
|
center: false,
|
3935
4955
|
by: nil,
|
3936
|
-
closed:
|
4956
|
+
closed: nil
|
3937
4957
|
)
|
3938
4958
|
window_size, min_periods = _prepare_rolling_window_args(
|
3939
4959
|
window_size, min_periods
|
3940
4960
|
)
|
4961
|
+
if !by.nil?
|
4962
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
4963
|
+
return rolling_min_by(
|
4964
|
+
by,
|
4965
|
+
window_size,
|
4966
|
+
min_periods: min_periods,
|
4967
|
+
closed: closed || "right",
|
4968
|
+
warn_if_unsorted: warn_if_unsorted
|
4969
|
+
)
|
4970
|
+
end
|
4971
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
3941
4972
|
_from_rbexpr(
|
3942
4973
|
_rbexpr.rolling_min(
|
3943
|
-
window_size, weights, min_periods, center
|
4974
|
+
window_size, weights, min_periods, center
|
3944
4975
|
)
|
3945
4976
|
)
|
3946
4977
|
end
|
@@ -4022,14 +5053,25 @@ module Polars
|
|
4022
5053
|
min_periods: nil,
|
4023
5054
|
center: false,
|
4024
5055
|
by: nil,
|
4025
|
-
closed:
|
5056
|
+
closed: nil
|
4026
5057
|
)
|
4027
5058
|
window_size, min_periods = _prepare_rolling_window_args(
|
4028
5059
|
window_size, min_periods
|
4029
5060
|
)
|
5061
|
+
if !by.nil?
|
5062
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5063
|
+
return rolling_max_by(
|
5064
|
+
by,
|
5065
|
+
window_size,
|
5066
|
+
min_periods: min_periods,
|
5067
|
+
closed: closed || "right",
|
5068
|
+
warn_if_unsorted: warn_if_unsorted
|
5069
|
+
)
|
5070
|
+
end
|
5071
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
4030
5072
|
_from_rbexpr(
|
4031
5073
|
_rbexpr.rolling_max(
|
4032
|
-
window_size, weights, min_periods, center
|
5074
|
+
window_size, weights, min_periods, center
|
4033
5075
|
)
|
4034
5076
|
)
|
4035
5077
|
end
|
@@ -4111,14 +5153,25 @@ module Polars
|
|
4111
5153
|
min_periods: nil,
|
4112
5154
|
center: false,
|
4113
5155
|
by: nil,
|
4114
|
-
closed:
|
5156
|
+
closed: nil
|
4115
5157
|
)
|
4116
5158
|
window_size, min_periods = _prepare_rolling_window_args(
|
4117
5159
|
window_size, min_periods
|
4118
5160
|
)
|
5161
|
+
if !by.nil?
|
5162
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5163
|
+
return rolling_mean_by(
|
5164
|
+
by,
|
5165
|
+
window_size,
|
5166
|
+
min_periods: min_periods,
|
5167
|
+
closed: closed || "right",
|
5168
|
+
warn_if_unsorted: warn_if_unsorted
|
5169
|
+
)
|
5170
|
+
end
|
5171
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
4119
5172
|
_from_rbexpr(
|
4120
5173
|
_rbexpr.rolling_mean(
|
4121
|
-
window_size, weights, min_periods, center
|
5174
|
+
window_size, weights, min_periods, center
|
4122
5175
|
)
|
4123
5176
|
)
|
4124
5177
|
end
|
@@ -4200,14 +5253,25 @@ module Polars
|
|
4200
5253
|
min_periods: nil,
|
4201
5254
|
center: false,
|
4202
5255
|
by: nil,
|
4203
|
-
closed:
|
5256
|
+
closed: nil
|
4204
5257
|
)
|
4205
5258
|
window_size, min_periods = _prepare_rolling_window_args(
|
4206
5259
|
window_size, min_periods
|
4207
5260
|
)
|
5261
|
+
if !by.nil?
|
5262
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5263
|
+
return rolling_sum_by(
|
5264
|
+
by,
|
5265
|
+
window_size,
|
5266
|
+
min_periods: min_periods,
|
5267
|
+
closed: closed || "right",
|
5268
|
+
warn_if_unsorted: warn_if_unsorted
|
5269
|
+
)
|
5270
|
+
end
|
5271
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
4208
5272
|
_from_rbexpr(
|
4209
5273
|
_rbexpr.rolling_sum(
|
4210
|
-
window_size, weights, min_periods, center
|
5274
|
+
window_size, weights, min_periods, center
|
4211
5275
|
)
|
4212
5276
|
)
|
4213
5277
|
end
|
@@ -4289,16 +5353,28 @@ module Polars
|
|
4289
5353
|
min_periods: nil,
|
4290
5354
|
center: false,
|
4291
5355
|
by: nil,
|
4292
|
-
closed:
|
5356
|
+
closed: nil,
|
4293
5357
|
ddof: 1,
|
4294
5358
|
warn_if_unsorted: true
|
4295
5359
|
)
|
4296
5360
|
window_size, min_periods = _prepare_rolling_window_args(
|
4297
5361
|
window_size, min_periods
|
4298
5362
|
)
|
5363
|
+
if !by.nil?
|
5364
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5365
|
+
return rolling_std_by(
|
5366
|
+
by,
|
5367
|
+
window_size,
|
5368
|
+
min_periods: min_periods,
|
5369
|
+
closed: closed || "right",
|
5370
|
+
ddof: ddof,
|
5371
|
+
warn_if_unsorted: warn_if_unsorted
|
5372
|
+
)
|
5373
|
+
end
|
5374
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
4299
5375
|
_from_rbexpr(
|
4300
5376
|
_rbexpr.rolling_std(
|
4301
|
-
window_size, weights, min_periods, center,
|
5377
|
+
window_size, weights, min_periods, center, ddof
|
4302
5378
|
)
|
4303
5379
|
)
|
4304
5380
|
end
|
@@ -4380,16 +5456,28 @@ module Polars
|
|
4380
5456
|
min_periods: nil,
|
4381
5457
|
center: false,
|
4382
5458
|
by: nil,
|
4383
|
-
closed:
|
5459
|
+
closed: nil,
|
4384
5460
|
ddof: 1,
|
4385
5461
|
warn_if_unsorted: true
|
4386
5462
|
)
|
4387
5463
|
window_size, min_periods = _prepare_rolling_window_args(
|
4388
5464
|
window_size, min_periods
|
4389
5465
|
)
|
5466
|
+
if !by.nil?
|
5467
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5468
|
+
return rolling_var_by(
|
5469
|
+
by,
|
5470
|
+
window_size,
|
5471
|
+
min_periods: min_periods,
|
5472
|
+
closed: closed || "right",
|
5473
|
+
ddof: ddof,
|
5474
|
+
warn_if_unsorted: warn_if_unsorted
|
5475
|
+
)
|
5476
|
+
end
|
5477
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
4390
5478
|
_from_rbexpr(
|
4391
5479
|
_rbexpr.rolling_var(
|
4392
|
-
window_size, weights, min_periods, center,
|
5480
|
+
window_size, weights, min_periods, center, ddof
|
4393
5481
|
)
|
4394
5482
|
)
|
4395
5483
|
end
|
@@ -4467,15 +5555,26 @@ module Polars
|
|
4467
5555
|
min_periods: nil,
|
4468
5556
|
center: false,
|
4469
5557
|
by: nil,
|
4470
|
-
closed:
|
5558
|
+
closed: nil,
|
4471
5559
|
warn_if_unsorted: true
|
4472
5560
|
)
|
4473
5561
|
window_size, min_periods = _prepare_rolling_window_args(
|
4474
5562
|
window_size, min_periods
|
4475
5563
|
)
|
5564
|
+
if !by.nil?
|
5565
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5566
|
+
return rolling_median_by(
|
5567
|
+
by,
|
5568
|
+
window_size,
|
5569
|
+
min_periods: min_periods,
|
5570
|
+
closed: closed || "right",
|
5571
|
+
warn_if_unsorted: warn_if_unsorted
|
5572
|
+
)
|
5573
|
+
end
|
5574
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
4476
5575
|
_from_rbexpr(
|
4477
5576
|
_rbexpr.rolling_median(
|
4478
|
-
window_size, weights, min_periods, center
|
5577
|
+
window_size, weights, min_periods, center
|
4479
5578
|
)
|
4480
5579
|
)
|
4481
5580
|
end
|
@@ -4559,15 +5658,27 @@ module Polars
|
|
4559
5658
|
min_periods: nil,
|
4560
5659
|
center: false,
|
4561
5660
|
by: nil,
|
4562
|
-
closed:
|
5661
|
+
closed: nil,
|
4563
5662
|
warn_if_unsorted: true
|
4564
5663
|
)
|
4565
5664
|
window_size, min_periods = _prepare_rolling_window_args(
|
4566
5665
|
window_size, min_periods
|
4567
5666
|
)
|
5667
|
+
if !by.nil?
|
5668
|
+
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5669
|
+
return rolling_quantile_by(
|
5670
|
+
by,
|
5671
|
+
window_size,
|
5672
|
+
min_periods: min_periods,
|
5673
|
+
closed: closed || "right",
|
5674
|
+
warn_if_unsorted: warn_if_unsorted,
|
5675
|
+
quantile: quantile
|
5676
|
+
)
|
5677
|
+
end
|
5678
|
+
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
4568
5679
|
_from_rbexpr(
|
4569
5680
|
_rbexpr.rolling_quantile(
|
4570
|
-
quantile, interpolation, window_size, weights, min_periods, center
|
5681
|
+
quantile, interpolation, window_size, weights, min_periods, center
|
4571
5682
|
)
|
4572
5683
|
)
|
4573
5684
|
end
|
@@ -4730,6 +5841,8 @@ module Polars
|
|
4730
5841
|
# on the order that the values occur in the Series.
|
4731
5842
|
# @param reverse [Boolean]
|
4732
5843
|
# Reverse the operation.
|
5844
|
+
# @param seed [Integer]
|
5845
|
+
# If `method: "random"`, use this as seed.
|
4733
5846
|
#
|
4734
5847
|
# @return [Expr]
|
4735
5848
|
#
|
@@ -5711,13 +6824,13 @@ module Polars
|
|
5711
6824
|
# # ┌────────┐
|
5712
6825
|
# # │ values │
|
5713
6826
|
# # │ --- │
|
5714
|
-
# # │
|
6827
|
+
# # │ i64 │
|
5715
6828
|
# # ╞════════╡
|
5716
|
-
# # │ 0
|
5717
|
-
# # │ -3
|
5718
|
-
# # │ -8
|
5719
|
-
# # │ -15
|
5720
|
-
# # │ -24
|
6829
|
+
# # │ 0 │
|
6830
|
+
# # │ -3 │
|
6831
|
+
# # │ -8 │
|
6832
|
+
# # │ -15 │
|
6833
|
+
# # │ -24 │
|
5721
6834
|
# # └────────┘
|
5722
6835
|
def cumulative_eval(expr, min_periods: 1, parallel: false)
|
5723
6836
|
_from_rbexpr(
|
@@ -6098,5 +7211,9 @@ module Polars
|
|
6098
7211
|
end
|
6099
7212
|
[window_size, min_periods]
|
6100
7213
|
end
|
7214
|
+
|
7215
|
+
def _prepare_rolling_by_window_args(window_size)
|
7216
|
+
window_size
|
7217
|
+
end
|
6101
7218
|
end
|
6102
7219
|
end
|