polars-df 0.10.0-arm64-darwin → 0.12.0-arm64-darwin

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +392 -351
  4. data/LICENSE-THIRD-PARTY.txt +1127 -867
  5. data/README.md +6 -6
  6. data/lib/polars/3.1/polars.bundle +0 -0
  7. data/lib/polars/3.2/polars.bundle +0 -0
  8. data/lib/polars/3.3/polars.bundle +0 -0
  9. data/lib/polars/array_expr.rb +4 -4
  10. data/lib/polars/batched_csv_reader.rb +11 -5
  11. data/lib/polars/cat_expr.rb +0 -36
  12. data/lib/polars/cat_name_space.rb +0 -37
  13. data/lib/polars/convert.rb +6 -1
  14. data/lib/polars/data_frame.rb +176 -403
  15. data/lib/polars/data_types.rb +1 -1
  16. data/lib/polars/date_time_expr.rb +525 -572
  17. data/lib/polars/date_time_name_space.rb +263 -460
  18. data/lib/polars/dynamic_group_by.rb +5 -5
  19. data/lib/polars/exceptions.rb +7 -0
  20. data/lib/polars/expr.rb +1394 -243
  21. data/lib/polars/expr_dispatch.rb +1 -1
  22. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  23. data/lib/polars/functions/as_datatype.rb +63 -40
  24. data/lib/polars/functions/lazy.rb +63 -14
  25. data/lib/polars/functions/lit.rb +1 -1
  26. data/lib/polars/functions/range/date_range.rb +90 -57
  27. data/lib/polars/functions/range/datetime_range.rb +149 -0
  28. data/lib/polars/functions/range/int_range.rb +2 -2
  29. data/lib/polars/functions/range/time_range.rb +141 -0
  30. data/lib/polars/functions/repeat.rb +1 -1
  31. data/lib/polars/functions/whenthen.rb +1 -1
  32. data/lib/polars/group_by.rb +88 -23
  33. data/lib/polars/io/avro.rb +24 -0
  34. data/lib/polars/{io.rb → io/csv.rb} +299 -493
  35. data/lib/polars/io/database.rb +73 -0
  36. data/lib/polars/io/ipc.rb +247 -0
  37. data/lib/polars/io/json.rb +29 -0
  38. data/lib/polars/io/ndjson.rb +80 -0
  39. data/lib/polars/io/parquet.rb +227 -0
  40. data/lib/polars/lazy_frame.rb +143 -272
  41. data/lib/polars/lazy_group_by.rb +100 -3
  42. data/lib/polars/list_expr.rb +11 -11
  43. data/lib/polars/list_name_space.rb +5 -1
  44. data/lib/polars/rolling_group_by.rb +7 -9
  45. data/lib/polars/series.rb +103 -187
  46. data/lib/polars/string_expr.rb +78 -102
  47. data/lib/polars/string_name_space.rb +5 -4
  48. data/lib/polars/testing.rb +2 -2
  49. data/lib/polars/utils/constants.rb +9 -0
  50. data/lib/polars/utils/convert.rb +97 -0
  51. data/lib/polars/utils/parse.rb +89 -0
  52. data/lib/polars/utils/various.rb +76 -0
  53. data/lib/polars/utils/wrap.rb +19 -0
  54. data/lib/polars/utils.rb +8 -300
  55. data/lib/polars/version.rb +1 -1
  56. data/lib/polars/whenthen.rb +6 -6
  57. data/lib/polars.rb +20 -1
  58. metadata +17 -4
data/lib/polars/expr.rb CHANGED
@@ -82,8 +82,8 @@ module Polars
82
82
  #
83
83
  # @return [Expr]
84
84
  def **(power)
85
- exponent = Utils.expr_to_lit_or_expr(power)
86
- _from_rbexpr(_rbexpr.pow(exponent._rbexpr))
85
+ exponent = Utils.parse_into_expression(power)
86
+ _from_rbexpr(_rbexpr.pow(exponent))
87
87
  end
88
88
 
89
89
  # Greater than or equal.
@@ -811,8 +811,8 @@ module Polars
811
811
  # # │ 10 ┆ 4 │
812
812
  # # └─────┴──────┘
813
813
  def append(other, upcast: true)
814
- other = Utils.expr_to_lit_or_expr(other)
815
- _from_rbexpr(_rbexpr.append(other._rbexpr, upcast))
814
+ other = Utils.parse_into_expression(other)
815
+ _from_rbexpr(_rbexpr.append(other, upcast))
816
816
  end
817
817
 
818
818
  # Create a single chunk of memory for this Series.
@@ -1165,8 +1165,8 @@ module Polars
1165
1165
  # # │ 44 │
1166
1166
  # # └─────┘
1167
1167
  def dot(other)
1168
- other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
1169
- _from_rbexpr(_rbexpr.dot(other._rbexpr))
1168
+ other = Utils.parse_into_expression(other, str_as_lit: false)
1169
+ _from_rbexpr(_rbexpr.dot(other))
1170
1170
  end
1171
1171
 
1172
1172
  # Compute the most occurring value(s).
@@ -1252,12 +1252,12 @@ module Polars
1252
1252
  # df = Polars::DataFrame.new(
1253
1253
  # {
1254
1254
  # "group" => [
1255
- # "one",
1256
- # "one",
1257
- # "one",
1258
- # "two",
1259
- # "two",
1260
- # "two"
1255
+ # "one",
1256
+ # "one",
1257
+ # "one",
1258
+ # "two",
1259
+ # "two",
1260
+ # "two"
1261
1261
  # ],
1262
1262
  # "value" => [1, 98, 2, 3, 99, 4]
1263
1263
  # }
@@ -1346,7 +1346,7 @@ module Polars
1346
1346
  # # │ 2 ┆ 98 │
1347
1347
  # # └───────┴──────────┘
1348
1348
  def top_k(k: 5)
1349
- k = Utils.parse_as_expression(k)
1349
+ k = Utils.parse_into_expression(k)
1350
1350
  _from_rbexpr(_rbexpr.top_k(k))
1351
1351
  end
1352
1352
 
@@ -1385,7 +1385,7 @@ module Polars
1385
1385
  # # │ 2 ┆ 98 │
1386
1386
  # # └───────┴──────────┘
1387
1387
  def bottom_k(k: 5)
1388
- k = Utils.parse_as_expression(k)
1388
+ k = Utils.parse_into_expression(k)
1389
1389
  _from_rbexpr(_rbexpr.bottom_k(k))
1390
1390
  end
1391
1391
 
@@ -1498,8 +1498,8 @@ module Polars
1498
1498
  # # │ 0 ┆ 2 ┆ 4 │
1499
1499
  # # └──────┴───────┴─────┘
1500
1500
  def search_sorted(element, side: "any")
1501
- element = Utils.expr_to_lit_or_expr(element, str_to_lit: false)
1502
- _from_rbexpr(_rbexpr.search_sorted(element._rbexpr, side))
1501
+ element = Utils.parse_into_expression(element, str_as_lit: false)
1502
+ _from_rbexpr(_rbexpr.search_sorted(element, side))
1503
1503
  end
1504
1504
 
1505
1505
  # Sort this column by the ordering of another column, or multiple other columns.
@@ -1545,13 +1545,14 @@ module Polars
1545
1545
  # # │ two │
1546
1546
  # # └───────┘
1547
1547
  def sort_by(by, *more_by, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false)
1548
- by = Utils.parse_as_list_of_expressions(by, *more_by)
1549
- if !reverse.is_a?(::Array)
1550
- reverse = [reverse]
1551
- elsif by.length != reverse.length
1552
- raise ArgumentError, "the length of `reverse` (#{reverse.length}) does not match the length of `by` (#{by.length})"
1553
- end
1554
- _from_rbexpr(_rbexpr.sort_by(by, reverse, nulls_last, multithreaded, maintain_order))
1548
+ by = Utils.parse_into_list_of_expressions(by, *more_by)
1549
+ reverse = Utils.extend_bool(reverse, by.length, "reverse", "by")
1550
+ nulls_last = Utils.extend_bool(nulls_last, by.length, "nulls_last", "by")
1551
+ _from_rbexpr(
1552
+ _rbexpr.sort_by(
1553
+ by, reverse, nulls_last, multithreaded, maintain_order
1554
+ )
1555
+ )
1555
1556
  end
1556
1557
 
1557
1558
  # Take values by index.
@@ -1588,14 +1589,51 @@ module Polars
1588
1589
  # # └───────┴───────────┘
1589
1590
  def gather(indices)
1590
1591
  if indices.is_a?(::Array)
1591
- indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
1592
+ indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))._rbexpr
1592
1593
  else
1593
- indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
1594
+ indices_lit = Utils.parse_into_expression(indices, str_as_lit: false)
1594
1595
  end
1595
- _from_rbexpr(_rbexpr.gather(indices_lit._rbexpr))
1596
+ _from_rbexpr(_rbexpr.gather(indices_lit))
1596
1597
  end
1597
1598
  alias_method :take, :gather
1598
1599
 
1600
+ # Return a single value by index.
1601
+ #
1602
+ # @param index [Object]
1603
+ # An expression that leads to a UInt32 index.
1604
+ #
1605
+ # @return [Expr]
1606
+ #
1607
+ # @example
1608
+ # df = Polars::DataFrame.new(
1609
+ # {
1610
+ # "group" => [
1611
+ # "one",
1612
+ # "one",
1613
+ # "one",
1614
+ # "two",
1615
+ # "two",
1616
+ # "two"
1617
+ # ],
1618
+ # "value" => [1, 98, 2, 3, 99, 4]
1619
+ # }
1620
+ # )
1621
+ # df.group_by("group", maintain_order: true).agg(Polars.col("value").get(1))
1622
+ # # =>
1623
+ # # shape: (2, 2)
1624
+ # # ┌───────┬───────┐
1625
+ # # │ group ┆ value │
1626
+ # # │ --- ┆ --- │
1627
+ # # │ str ┆ i64 │
1628
+ # # ╞═══════╪═══════╡
1629
+ # # │ one ┆ 98 │
1630
+ # # │ two ┆ 99 │
1631
+ # # └───────┴───────┘
1632
+ def get(index)
1633
+ index_lit = Utils.parse_into_expression(index)
1634
+ _from_rbexpr(_rbexpr.get(index_lit))
1635
+ end
1636
+
1599
1637
  # Shift the values by a given period.
1600
1638
  #
1601
1639
  # @param n [Integer]
@@ -1622,9 +1660,9 @@ module Polars
1622
1660
  # # └──────┘
1623
1661
  def shift(n = 1, fill_value: nil)
1624
1662
  if !fill_value.nil?
1625
- fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
1663
+ fill_value = Utils.parse_into_expression(fill_value, str_as_lit: true)
1626
1664
  end
1627
- n = Utils.parse_as_expression(n)
1665
+ n = Utils.parse_into_expression(n)
1628
1666
  _from_rbexpr(_rbexpr.shift(n, fill_value))
1629
1667
  end
1630
1668
 
@@ -1727,8 +1765,8 @@ module Polars
1727
1765
  end
1728
1766
 
1729
1767
  if !value.nil?
1730
- value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
1731
- _from_rbexpr(_rbexpr.fill_null(value._rbexpr))
1768
+ value = Utils.parse_into_expression(value, str_as_lit: true)
1769
+ _from_rbexpr(_rbexpr.fill_null(value))
1732
1770
  else
1733
1771
  _from_rbexpr(_rbexpr.fill_null_with_strategy(strategy, limit))
1734
1772
  end
@@ -1758,8 +1796,8 @@ module Polars
1758
1796
  # # │ zero ┆ 6.0 │
1759
1797
  # # └──────┴──────┘
1760
1798
  def fill_nan(fill_value)
1761
- fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
1762
- _from_rbexpr(_rbexpr.fill_nan(fill_value._rbexpr))
1799
+ fill_value = Utils.parse_into_expression(fill_value, str_as_lit: true)
1800
+ _from_rbexpr(_rbexpr.fill_nan(fill_value))
1763
1801
  end
1764
1802
 
1765
1803
  # Fill missing values with the latest seen values.
@@ -2275,7 +2313,7 @@ module Polars
2275
2313
  # # │ 4 │
2276
2314
  # # └────────┘
2277
2315
  def over(expr)
2278
- rbexprs = Utils.selection_to_rbexpr_list(expr)
2316
+ rbexprs = Utils.parse_into_list_of_expressions(expr)
2279
2317
  _from_rbexpr(_rbexpr.over(rbexprs))
2280
2318
  end
2281
2319
 
@@ -2470,8 +2508,8 @@ module Polars
2470
2508
  # # │ 1.5 │
2471
2509
  # # └─────┘
2472
2510
  def quantile(quantile, interpolation: "nearest")
2473
- quantile = Utils.expr_to_lit_or_expr(quantile, str_to_lit: false)
2474
- _from_rbexpr(_rbexpr.quantile(quantile._rbexpr, interpolation))
2511
+ quantile = Utils.parse_into_expression(quantile, str_as_lit: false)
2512
+ _from_rbexpr(_rbexpr.quantile(quantile, interpolation))
2475
2513
  end
2476
2514
 
2477
2515
  # Bin continuous values into discrete categories.
@@ -2515,17 +2553,17 @@ module Polars
2515
2553
  # ).unnest("cut")
2516
2554
  # # =>
2517
2555
  # # shape: (5, 3)
2518
- # # ┌─────┬──────┬────────────┐
2519
- # # │ foo ┆ brk foo_bin
2520
- # # │ --- ┆ --- ┆ --- │
2521
- # # │ i64 ┆ f64 ┆ cat │
2522
- # # ╞═════╪══════╪════════════╡
2523
- # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
2524
- # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
2525
- # # │ 0 ┆ 1.0 ┆ (-1, 1] │
2526
- # # │ 1 ┆ 1.0 ┆ (-1, 1] │
2527
- # # │ 2 ┆ inf ┆ (1, inf] │
2528
- # # └─────┴──────┴────────────┘
2556
+ # # ┌─────┬────────────┬────────────┐
2557
+ # # │ foo ┆ breakpoint category
2558
+ # # │ --- ┆ --- ┆ --- │
2559
+ # # │ i64 ┆ f64 ┆ cat │
2560
+ # # ╞═════╪════════════╪════════════╡
2561
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
2562
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
2563
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
2564
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
2565
+ # # │ 2 ┆ inf ┆ (1, inf] │
2566
+ # # └─────┴────────────┴────────────┘
2529
2567
  def cut(breaks, labels: nil, left_closed: false, include_breaks: false)
2530
2568
  _from_rbexpr(_rbexpr.cut(breaks, labels, left_closed, include_breaks))
2531
2569
  end
@@ -2596,17 +2634,17 @@ module Polars
2596
2634
  # ).unnest("qcut")
2597
2635
  # # =>
2598
2636
  # # shape: (5, 3)
2599
- # # ┌─────┬──────┬────────────┐
2600
- # # │ foo ┆ brk foo_bin
2601
- # # │ --- ┆ --- ┆ --- │
2602
- # # │ i64 ┆ f64 ┆ cat │
2603
- # # ╞═════╪══════╪════════════╡
2604
- # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
2605
- # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
2606
- # # │ 0 ┆ 1.0 ┆ (-1, 1] │
2607
- # # │ 1 ┆ 1.0 ┆ (-1, 1] │
2608
- # # │ 2 ┆ inf ┆ (1, inf] │
2609
- # # └─────┴──────┴────────────┘
2637
+ # # ┌─────┬────────────┬────────────┐
2638
+ # # │ foo ┆ breakpoint category
2639
+ # # │ --- ┆ --- ┆ --- │
2640
+ # # │ i64 ┆ f64 ┆ cat │
2641
+ # # ╞═════╪════════════╪════════════╡
2642
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
2643
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
2644
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
2645
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
2646
+ # # │ 2 ┆ inf ┆ (1, inf] │
2647
+ # # └─────┴────────────┴────────────┘
2610
2648
  def qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false)
2611
2649
  if quantiles.is_a?(Integer)
2612
2650
  rbexpr = _rbexpr.qcut_uniform(
@@ -2630,18 +2668,18 @@ module Polars
2630
2668
  # df.select(Polars.col("s").rle).unnest("s")
2631
2669
  # # =>
2632
2670
  # # shape: (6, 2)
2633
- # # ┌─────────┬────────┐
2634
- # # │ lengthsvalues
2635
- # # │ --- ┆ ---
2636
- # # │ i32 ┆ i64
2637
- # # ╞═════════╪════════╡
2638
- # # │ 2 ┆ 1
2639
- # # │ 1 ┆ 2
2640
- # # │ 1 ┆ 1
2641
- # # │ 1 ┆ null
2642
- # # │ 1 ┆ 1
2643
- # # │ 2 ┆ 3
2644
- # # └─────────┴────────┘
2671
+ # # ┌─────┬───────┐
2672
+ # # │ lenvalue
2673
+ # # │ --- ┆ ---
2674
+ # # │ u32 ┆ i64
2675
+ # # ╞═════╪═══════╡
2676
+ # # │ 2 ┆ 1
2677
+ # # │ 1 ┆ 2
2678
+ # # │ 1 ┆ 1
2679
+ # # │ 1 ┆ null
2680
+ # # │ 1 ┆ 1
2681
+ # # │ 2 ┆ 3
2682
+ # # └─────┴───────┘
2645
2683
  def rle
2646
2684
  _from_rbexpr(_rbexpr.rle)
2647
2685
  end
@@ -2764,6 +2802,9 @@ module Polars
2764
2802
  # Dtype of the output Series.
2765
2803
  # @param agg_list [Boolean]
2766
2804
  # Aggregate list.
2805
+ # @param is_elementwise [Boolean]
2806
+ # If set to true this can run in the streaming engine, but may yield
2807
+ # incorrect results in group-by. Ensure you know what you are doing!
2767
2808
  #
2768
2809
  # @return [Expr]
2769
2810
  #
@@ -2784,12 +2825,21 @@ module Polars
2784
2825
  # # ╞══════╪════════╡
2785
2826
  # # │ 1 ┆ 0 │
2786
2827
  # # └──────┴────────┘
2787
- # def map(return_dtype: nil, agg_list: false, &f)
2828
+ # def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, &f)
2788
2829
  # if !return_dtype.nil?
2789
2830
  # return_dtype = Utils.rb_type_to_dtype(return_dtype)
2790
2831
  # end
2791
- # _from_rbexpr(_rbexpr.map(f, return_dtype, agg_list))
2832
+ # _from_rbexpr(
2833
+ # _rbexpr.map_batches(
2834
+ # # TODO _map_batches_wrapper
2835
+ # f,
2836
+ # return_dtype,
2837
+ # agg_list,
2838
+ # is_elementwise
2839
+ # )
2840
+ # )
2792
2841
  # end
2842
+ # alias_method :map, :map_batches
2793
2843
 
2794
2844
  # Apply a custom/user-defined function (UDF) in a GroupBy or Projection context.
2795
2845
  #
@@ -2831,7 +2881,7 @@ module Polars
2831
2881
  #
2832
2882
  # @example In a selection context, the function is applied by row.
2833
2883
  # df.with_column(
2834
- # Polars.col("a").apply { |x| x * 2 }.alias("a_times_2")
2884
+ # Polars.col("a").map_elements { |x| x * 2 }.alias("a_times_2")
2835
2885
  # )
2836
2886
  # # =>
2837
2887
  # # shape: (4, 3)
@@ -2851,7 +2901,7 @@ module Polars
2851
2901
  # .group_by("b", maintain_order: true)
2852
2902
  # .agg(
2853
2903
  # [
2854
- # Polars.col("a").apply { |x| x.sum }
2904
+ # Polars.col("a").map_elements { |x| x.sum }
2855
2905
  # ]
2856
2906
  # )
2857
2907
  # .collect
@@ -2866,12 +2916,23 @@ module Polars
2866
2916
  # # │ b ┆ 2 │
2867
2917
  # # │ c ┆ 4 │
2868
2918
  # # └─────┴─────┘
2869
- # def apply(return_dtype: nil, &f)
2870
- # wrap_f = lambda do |x|
2871
- # x.apply(return_dtype: return_dtype, &f)
2919
+ # def map_elements(
2920
+ # return_dtype: nil,
2921
+ # skip_nulls: true,
2922
+ # pass_name: false,
2923
+ # strategy: "thread_local",
2924
+ # &f
2925
+ # )
2926
+ # if pass_name
2927
+ # raise Todo
2928
+ # else
2929
+ # wrap_f = lambda do |x|
2930
+ # x.map_elements(return_dtype: return_dtype, skip_nulls: skip_nulls, &f)
2931
+ # end
2872
2932
  # end
2873
- # map(agg_list: true, return_dtype: return_dtype, &wrap_f)
2933
+ # map_batches(agg_list: true, return_dtype: return_dtype, &wrap_f)
2874
2934
  # end
2935
+ # alias_method :apply, :map_elements
2875
2936
 
2876
2937
  # Explode a list or utf8 Series. This means that every item is expanded to a new
2877
2938
  # row.
@@ -3081,7 +3142,7 @@ module Polars
3081
3142
  # # │ null ┆ null ┆ null ┆ true │
3082
3143
  # # └──────┴──────┴────────┴────────────────┘
3083
3144
  def eq_missing(other)
3084
- other = Utils.parse_as_expression(other, str_as_lit: true)
3145
+ other = Utils.parse_into_expression(other, str_as_lit: true)
3085
3146
  _from_rbexpr(_rbexpr.eq_missing(other))
3086
3147
  end
3087
3148
 
@@ -3285,7 +3346,7 @@ module Polars
3285
3346
  # # │ null ┆ null ┆ null ┆ false │
3286
3347
  # # └──────┴──────┴────────┴────────────────┘
3287
3348
  def ne_missing(other)
3288
- other = Utils.parse_as_expression(other, str_as_lit: true)
3349
+ other = Utils.parse_into_expression(other, str_as_lit: true)
3289
3350
  _from_rbexpr(_rbexpr.neq_missing(other))
3290
3351
  end
3291
3352
 
@@ -3588,14 +3649,14 @@ module Polars
3588
3649
  def is_in(other)
3589
3650
  if other.is_a?(::Array)
3590
3651
  if other.length == 0
3591
- other = Polars.lit(nil)
3652
+ other = Polars.lit(nil)._rbexpr
3592
3653
  else
3593
- other = Polars.lit(Series.new(other))
3654
+ other = Polars.lit(Series.new(other))._rbexpr
3594
3655
  end
3595
3656
  else
3596
- other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
3657
+ other = Utils.parse_into_expression(other, str_as_lit: false)
3597
3658
  end
3598
- _from_rbexpr(_rbexpr.is_in(other._rbexpr))
3659
+ _from_rbexpr(_rbexpr.is_in(other))
3599
3660
  end
3600
3661
  alias_method :in?, :is_in
3601
3662
 
@@ -3630,15 +3691,15 @@ module Polars
3630
3691
  # # │ ["z", "z", "z"] │
3631
3692
  # # └─────────────────┘
3632
3693
  def repeat_by(by)
3633
- by = Utils.expr_to_lit_or_expr(by, str_to_lit: false)
3634
- _from_rbexpr(_rbexpr.repeat_by(by._rbexpr))
3694
+ by = Utils.parse_into_expression(by, str_as_lit: false)
3695
+ _from_rbexpr(_rbexpr.repeat_by(by))
3635
3696
  end
3636
3697
 
3637
3698
  # Check if this expression is between start and end.
3638
3699
  #
3639
- # @param start [Object]
3700
+ # @param lower_bound [Object]
3640
3701
  # Lower bound as primitive type or datetime.
3641
- # @param _end [Object]
3702
+ # @param upper_bound [Object]
3642
3703
  # Upper bound as primitive type or datetime.
3643
3704
  # @param closed ["both", "left", "right", "none"]
3644
3705
  # Define which sides of the interval are closed (inclusive).
@@ -3700,22 +3761,13 @@ module Polars
3700
3761
  # # │ d ┆ false │
3701
3762
  # # │ e ┆ false │
3702
3763
  # # └─────┴────────────┘
3703
- def is_between(start, _end, closed: "both")
3704
- start = Utils.expr_to_lit_or_expr(start, str_to_lit: false)
3705
- _end = Utils.expr_to_lit_or_expr(_end, str_to_lit: false)
3706
-
3707
- case closed
3708
- when "none"
3709
- (self > start) & (self < _end)
3710
- when "both"
3711
- (self >= start) & (self <= _end)
3712
- when "right"
3713
- (self > start) & (self <= _end)
3714
- when "left"
3715
- (self >= start) & (self < _end)
3716
- else
3717
- raise ArgumentError, "closed must be one of 'left', 'right', 'both', or 'none'"
3718
- end
3764
+ def is_between(lower_bound, upper_bound, closed: "both")
3765
+ lower_bound = Utils.parse_into_expression(lower_bound)
3766
+ upper_bound = Utils.parse_into_expression(upper_bound)
3767
+
3768
+ _from_rbexpr(
3769
+ _rbexpr.is_between(lower_bound, upper_bound, closed)
3770
+ )
3719
3771
  end
3720
3772
 
3721
3773
  # Hash the elements in the selection.
@@ -3857,6 +3909,1002 @@ module Polars
3857
3909
  _from_rbexpr(_rbexpr.interpolate(method))
3858
3910
  end
3859
3911
 
3912
+ # Apply a rolling min based on another column.
3913
+ #
3914
+ # @param by [String]
3915
+ # This column must be of dtype Datetime or Date.
3916
+ # @param window_size [String]
3917
+ # The length of the window. Can be a dynamic temporal
3918
+ # size indicated by a timedelta or the following string language:
3919
+ #
3920
+ # - 1ns (1 nanosecond)
3921
+ # - 1us (1 microsecond)
3922
+ # - 1ms (1 millisecond)
3923
+ # - 1s (1 second)
3924
+ # - 1m (1 minute)
3925
+ # - 1h (1 hour)
3926
+ # - 1d (1 calendar day)
3927
+ # - 1w (1 calendar week)
3928
+ # - 1mo (1 calendar month)
3929
+ # - 1q (1 calendar quarter)
3930
+ # - 1y (1 calendar year)
3931
+ #
3932
+ # By "calendar day", we mean the corresponding time on the next day
3933
+ # (which may not be 24 hours, due to daylight savings). Similarly for
3934
+ # "calendar week", "calendar month", "calendar quarter", and
3935
+ # "calendar year".
3936
+ # @param min_periods [Integer]
3937
+ # The number of values in the window that should be non-null before computing
3938
+ # a result.
3939
+ # @param closed ['left', 'right', 'both', 'none']
3940
+ # Define which sides of the temporal interval are closed (inclusive),
3941
+ # defaults to `'right'`.
3942
+ # @param warn_if_unsorted [Boolean]
3943
+ # Warn if data is not known to be sorted by `by` column.
3944
+ #
3945
+ # @return [Expr]
3946
+ #
3947
+ # @note
3948
+ # If you want to compute multiple aggregation statistics over the same dynamic
3949
+ # window, consider using `rolling` - this method can cache the window size
3950
+ # computation.
3951
+ #
3952
+ # @example Create a DataFrame with a datetime column and a row number column
3953
+ # start = DateTime.new(2001, 1, 1)
3954
+ # stop = DateTime.new(2001, 1, 2)
3955
+ # df_temporal = Polars::DataFrame.new(
3956
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
3957
+ # ).with_row_index
3958
+ # # =>
3959
+ # # shape: (25, 2)
3960
+ # # ┌───────┬─────────────────────┐
3961
+ # # │ index ┆ date │
3962
+ # # │ --- ┆ --- │
3963
+ # # │ u32 ┆ datetime[ns] │
3964
+ # # ╞═══════╪═════════════════════╡
3965
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
3966
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
3967
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
3968
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
3969
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
3970
+ # # │ … ┆ … │
3971
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
3972
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
3973
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
3974
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
3975
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
3976
+ # # └───────┴─────────────────────┘
3977
+ #
3978
+ # @example Compute the rolling min with the temporal windows closed on the right (default)
3979
+ # df_temporal.with_columns(
3980
+ # rolling_row_min: Polars.col("index").rolling_min_by("date", "2h")
3981
+ # )
3982
+ # # =>
3983
+ # # shape: (25, 3)
3984
+ # # ┌───────┬─────────────────────┬─────────────────┐
3985
+ # # │ index ┆ date ┆ rolling_row_min │
3986
+ # # │ --- ┆ --- ┆ --- │
3987
+ # # │ u32 ┆ datetime[ns] ┆ u32 │
3988
+ # # ╞═══════╪═════════════════════╪═════════════════╡
3989
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
3990
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0 │
3991
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1 │
3992
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2 │
3993
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3 │
3994
+ # # │ … ┆ … ┆ … │
3995
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19 │
3996
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20 │
3997
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21 │
3998
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22 │
3999
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23 │
4000
+ # # └───────┴─────────────────────┴─────────────────┘
4001
+ def rolling_min_by(
4002
+ by,
4003
+ window_size,
4004
+ min_periods: 1,
4005
+ closed: "right",
4006
+ warn_if_unsorted: nil
4007
+ )
4008
+ window_size = _prepare_rolling_by_window_args(window_size)
4009
+ by = Utils.parse_into_expression(by)
4010
+ _from_rbexpr(
4011
+ _rbexpr.rolling_min_by(by, window_size, min_periods, closed)
4012
+ )
4013
+ end
4014
+
4015
+ # Apply a rolling max based on another column.
4016
+ #
4017
+ # @param by [String]
4018
+ # This column must be of dtype Datetime or Date.
4019
+ # @param window_size [String]
4020
+ # The length of the window. Can be a dynamic temporal
4021
+ # size indicated by a timedelta or the following string language:
4022
+ #
4023
+ # - 1ns (1 nanosecond)
4024
+ # - 1us (1 microsecond)
4025
+ # - 1ms (1 millisecond)
4026
+ # - 1s (1 second)
4027
+ # - 1m (1 minute)
4028
+ # - 1h (1 hour)
4029
+ # - 1d (1 calendar day)
4030
+ # - 1w (1 calendar week)
4031
+ # - 1mo (1 calendar month)
4032
+ # - 1q (1 calendar quarter)
4033
+ # - 1y (1 calendar year)
4034
+ #
4035
+ # By "calendar day", we mean the corresponding time on the next day
4036
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4037
+ # "calendar week", "calendar month", "calendar quarter", and
4038
+ # "calendar year".
4039
+ # @param min_periods [Integer]
4040
+ # The number of values in the window that should be non-null before computing
4041
+ # a result.
4042
+ # @param closed ['left', 'right', 'both', 'none']
4043
+ # Define which sides of the temporal interval are closed (inclusive),
4044
+ # defaults to `'right'`.
4045
+ # @param warn_if_unsorted [Boolean]
4046
+ # Warn if data is not known to be sorted by `by` column.
4047
+ #
4048
+ # @return [Expr]
4049
+ #
4050
+ # @note
4051
+ # If you want to compute multiple aggregation statistics over the same dynamic
4052
+ # window, consider using `rolling` - this method can cache the window size
4053
+ # computation.
4054
+ #
4055
+ # @example Create a DataFrame with a datetime column and a row number column
4056
+ # start = DateTime.new(2001, 1, 1)
4057
+ # stop = DateTime.new(2001, 1, 2)
4058
+ # df_temporal = Polars::DataFrame.new(
4059
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4060
+ # ).with_row_index
4061
+ # # =>
4062
+ # # shape: (25, 2)
4063
+ # # ┌───────┬─────────────────────┐
4064
+ # # │ index ┆ date │
4065
+ # # │ --- ┆ --- │
4066
+ # # │ u32 ┆ datetime[ns] │
4067
+ # # ╞═══════╪═════════════════════╡
4068
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4069
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4070
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4071
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4072
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4073
+ # # │ … ┆ … │
4074
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4075
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4076
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4077
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4078
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4079
+ # # └───────┴─────────────────────┘
4080
+ #
4081
+ # @example Compute the rolling max with the temporal windows closed on the right (default)
4082
+ # df_temporal.with_columns(
4083
+ # rolling_row_max: Polars.col("index").rolling_max_by("date", "2h")
4084
+ # )
4085
+ # # =>
4086
+ # # shape: (25, 3)
4087
+ # # ┌───────┬─────────────────────┬─────────────────┐
4088
+ # # │ index ┆ date ┆ rolling_row_max │
4089
+ # # │ --- ┆ --- ┆ --- │
4090
+ # # │ u32 ┆ datetime[ns] ┆ u32 │
4091
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4092
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
4093
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
4094
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 2 │
4095
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 3 │
4096
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 4 │
4097
+ # # │ … ┆ … ┆ … │
4098
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 20 │
4099
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 21 │
4100
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 22 │
4101
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 23 │
4102
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 24 │
4103
+ # # └───────┴─────────────────────┴─────────────────┘
4104
+ #
4105
+ # @example Compute the rolling max with the closure of windows on both sides
4106
+ # df_temporal.with_columns(
4107
+ # rolling_row_max: Polars.col("index").rolling_max_by(
4108
+ # "date", "2h", closed: "both"
4109
+ # )
4110
+ # )
4111
+ # # =>
4112
+ # # shape: (25, 3)
4113
+ # # ┌───────┬─────────────────────┬─────────────────┐
4114
+ # # │ index ┆ date ┆ rolling_row_max │
4115
+ # # │ --- ┆ --- ┆ --- │
4116
+ # # │ u32 ┆ datetime[ns] ┆ u32 │
4117
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4118
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
4119
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
4120
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 2 │
4121
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 3 │
4122
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 4 │
4123
+ # # │ … ┆ … ┆ … │
4124
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 20 │
4125
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 21 │
4126
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 22 │
4127
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 23 │
4128
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 24 │
4129
+ # # └───────┴─────────────────────┴─────────────────┘
4130
+ def rolling_max_by(
4131
+ by,
4132
+ window_size,
4133
+ min_periods: 1,
4134
+ closed: "right",
4135
+ warn_if_unsorted: nil
4136
+ )
4137
+ window_size = _prepare_rolling_by_window_args(window_size)
4138
+ by = Utils.parse_into_expression(by)
4139
+ _from_rbexpr(
4140
+ _rbexpr.rolling_max_by(by, window_size, min_periods, closed)
4141
+ )
4142
+ end
4143
+
4144
+ # Apply a rolling mean based on another column.
4145
+ #
4146
+ # @param by [String]
4147
+ # This column must be of dtype Datetime or Date.
4148
+ # @param window_size [String]
4149
+ # The length of the window. Can be a dynamic temporal
4150
+ # size indicated by a timedelta or the following string language:
4151
+ #
4152
+ # - 1ns (1 nanosecond)
4153
+ # - 1us (1 microsecond)
4154
+ # - 1ms (1 millisecond)
4155
+ # - 1s (1 second)
4156
+ # - 1m (1 minute)
4157
+ # - 1h (1 hour)
4158
+ # - 1d (1 calendar day)
4159
+ # - 1w (1 calendar week)
4160
+ # - 1mo (1 calendar month)
4161
+ # - 1q (1 calendar quarter)
4162
+ # - 1y (1 calendar year)
4163
+ #
4164
+ # By "calendar day", we mean the corresponding time on the next day
4165
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4166
+ # "calendar week", "calendar month", "calendar quarter", and
4167
+ # "calendar year".
4168
+ # @param min_periods [Integer]
4169
+ # The number of values in the window that should be non-null before computing
4170
+ # a result.
4171
+ # @param closed ['left', 'right', 'both', 'none']
4172
+ # Define which sides of the temporal interval are closed (inclusive),
4173
+ # defaults to `'right'`.
4174
+ # @param warn_if_unsorted [Boolean]
4175
+ # Warn if data is not known to be sorted by `by` column.
4176
+ #
4177
+ # @return [Expr]
4178
+ #
4179
+ # @note
4180
+ # If you want to compute multiple aggregation statistics over the same dynamic
4181
+ # window, consider using `rolling` - this method can cache the window size
4182
+ # computation.
4183
+ #
4184
+ # @example Create a DataFrame with a datetime column and a row number column
4185
+ # start = DateTime.new(2001, 1, 1)
4186
+ # stop = DateTime.new(2001, 1, 2)
4187
+ # df_temporal = Polars::DataFrame.new(
4188
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4189
+ # ).with_row_index
4190
+ # # =>
4191
+ # # shape: (25, 2)
4192
+ # # ┌───────┬─────────────────────┐
4193
+ # # │ index ┆ date │
4194
+ # # │ --- ┆ --- │
4195
+ # # │ u32 ┆ datetime[ns] │
4196
+ # # ╞═══════╪═════════════════════╡
4197
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4198
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4199
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4200
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4201
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4202
+ # # │ … ┆ … │
4203
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4204
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4205
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4206
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4207
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4208
+ # # └───────┴─────────────────────┘
4209
+ #
4210
+ # @example Compute the rolling mean with the temporal windows closed on the right (default)
4211
+ # df_temporal.with_columns(
4212
+ # rolling_row_mean: Polars.col("index").rolling_mean_by(
4213
+ # "date", "2h"
4214
+ # )
4215
+ # )
4216
+ # # =>
4217
+ # # shape: (25, 3)
4218
+ # # ┌───────┬─────────────────────┬──────────────────┐
4219
+ # # │ index ┆ date ┆ rolling_row_mean │
4220
+ # # │ --- ┆ --- ┆ --- │
4221
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4222
+ # # ╞═══════╪═════════════════════╪══════════════════╡
4223
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
4224
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
4225
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.5 │
4226
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.5 │
4227
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.5 │
4228
+ # # │ … ┆ … ┆ … │
4229
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.5 │
4230
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.5 │
4231
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.5 │
4232
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.5 │
4233
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.5 │
4234
+ # # └───────┴─────────────────────┴──────────────────┘
4235
+ #
4236
+ # @example Compute the rolling mean with the closure of windows on both sides
4237
+ # df_temporal.with_columns(
4238
+ # rolling_row_mean: Polars.col("index").rolling_mean_by(
4239
+ # "date", "2h", closed: "both"
4240
+ # )
4241
+ # )
4242
+ # # =>
4243
+ # # shape: (25, 3)
4244
+ # # ┌───────┬─────────────────────┬──────────────────┐
4245
+ # # │ index ┆ date ┆ rolling_row_mean │
4246
+ # # │ --- ┆ --- ┆ --- │
4247
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4248
+ # # ╞═══════╪═════════════════════╪══════════════════╡
4249
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
4250
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
4251
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
4252
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.0 │
4253
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.0 │
4254
+ # # │ … ┆ … ┆ … │
4255
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.0 │
4256
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.0 │
4257
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.0 │
4258
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.0 │
4259
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.0 │
4260
+ # # └───────┴─────────────────────┴──────────────────┘
4261
+ def rolling_mean_by(
4262
+ by,
4263
+ window_size,
4264
+ min_periods: 1,
4265
+ closed: "right",
4266
+ warn_if_unsorted: nil
4267
+ )
4268
+ window_size = _prepare_rolling_by_window_args(window_size)
4269
+ by = Utils.parse_into_expression(by)
4270
+ _from_rbexpr(
4271
+ _rbexpr.rolling_mean_by(
4272
+ by,
4273
+ window_size,
4274
+ min_periods,
4275
+ closed
4276
+ )
4277
+ )
4278
+ end
4279
+
4280
+ # Apply a rolling sum based on another column.
4281
+ #
4282
+ # @param by [String]
4283
+ # This column must of dtype `{Date, Datetime}`
4284
+ # @param window_size [String]
4285
+ # The length of the window. Can be a dynamic temporal
4286
+ # size indicated by a timedelta or the following string language:
4287
+ #
4288
+ # - 1ns (1 nanosecond)
4289
+ # - 1us (1 microsecond)
4290
+ # - 1ms (1 millisecond)
4291
+ # - 1s (1 second)
4292
+ # - 1m (1 minute)
4293
+ # - 1h (1 hour)
4294
+ # - 1d (1 calendar day)
4295
+ # - 1w (1 calendar week)
4296
+ # - 1mo (1 calendar month)
4297
+ # - 1q (1 calendar quarter)
4298
+ # - 1y (1 calendar year)
4299
+ #
4300
+ # By "calendar day", we mean the corresponding time on the next day
4301
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4302
+ # "calendar week", "calendar month", "calendar quarter", and
4303
+ # "calendar year".
4304
+ # @param min_periods [Integer]
4305
+ # The number of values in the window that should be non-null before computing
4306
+ # a result.
4307
+ # @param closed ['left', 'right', 'both', 'none']
4308
+ # Define which sides of the temporal interval are closed (inclusive),
4309
+ # defaults to `'right'`.
4310
+ # @param warn_if_unsorted [Boolean]
4311
+ # Warn if data is not known to be sorted by `by` column.
4312
+ #
4313
+ # @return [Expr]
4314
+ #
4315
+ # @note
4316
+ # If you want to compute multiple aggregation statistics over the same dynamic
4317
+ # window, consider using `rolling` - this method can cache the window size
4318
+ # computation.
4319
+ #
4320
+ # @example Create a DataFrame with a datetime column and a row number column
4321
+ # start = DateTime.new(2001, 1, 1)
4322
+ # stop = DateTime.new(2001, 1, 2)
4323
+ # df_temporal = Polars::DataFrame.new(
4324
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4325
+ # ).with_row_index
4326
+ # # =>
4327
+ # # shape: (25, 2)
4328
+ # # ┌───────┬─────────────────────┐
4329
+ # # │ index ┆ date │
4330
+ # # │ --- ┆ --- │
4331
+ # # │ u32 ┆ datetime[ns] │
4332
+ # # ╞═══════╪═════════════════════╡
4333
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4334
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4335
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4336
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4337
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4338
+ # # │ … ┆ … │
4339
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4340
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4341
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4342
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4343
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4344
+ # # └───────┴─────────────────────┘
4345
+ #
4346
+ # @example Compute the rolling sum with the temporal windows closed on the right (default)
4347
+ # df_temporal.with_columns(
4348
+ # rolling_row_sum: Polars.col("index").rolling_sum_by("date", "2h")
4349
+ # )
4350
+ # # =>
4351
+ # # shape: (25, 3)
4352
+ # # ┌───────┬─────────────────────┬─────────────────┐
4353
+ # # │ index ┆ date ┆ rolling_row_sum │
4354
+ # # │ --- ┆ --- ┆ --- │
4355
+ # # │ u32 ┆ datetime[ns] ┆ u32 │
4356
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4357
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
4358
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
4359
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 3 │
4360
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 5 │
4361
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 7 │
4362
+ # # │ … ┆ … ┆ … │
4363
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 39 │
4364
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 41 │
4365
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 43 │
4366
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 45 │
4367
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 47 │
4368
+ # # └───────┴─────────────────────┴─────────────────┘
4369
+ #
4370
+ # @example Compute the rolling sum with the closure of windows on both sides
4371
+ # df_temporal.with_columns(
4372
+ # rolling_row_sum: Polars.col("index").rolling_sum_by(
4373
+ # "date", "2h", closed: "both"
4374
+ # )
4375
+ # )
4376
+ # # =>
4377
+ # # shape: (25, 3)
4378
+ # # ┌───────┬─────────────────────┬─────────────────┐
4379
+ # # │ index ┆ date ┆ rolling_row_sum │
4380
+ # # │ --- ┆ --- ┆ --- │
4381
+ # # │ u32 ┆ datetime[ns] ┆ u32 │
4382
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4383
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
4384
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
4385
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 3 │
4386
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 6 │
4387
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 9 │
4388
+ # # │ … ┆ … ┆ … │
4389
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 57 │
4390
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 60 │
4391
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 63 │
4392
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 66 │
4393
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 69 │
4394
+ # # └───────┴─────────────────────┴─────────────────┘
4395
+ def rolling_sum_by(
4396
+ by,
4397
+ window_size,
4398
+ min_periods: 1,
4399
+ closed: "right",
4400
+ warn_if_unsorted: nil
4401
+ )
4402
+ window_size = _prepare_rolling_by_window_args(window_size)
4403
+ by = Utils.parse_into_expression(by)
4404
+ _from_rbexpr(
4405
+ _rbexpr.rolling_sum_by(by, window_size, min_periods, closed)
4406
+ )
4407
+ end
4408
+
4409
+ # Compute a rolling standard deviation based on another column.
4410
+ #
4411
+ # @param by [String]
4412
+ # This column must be of dtype Datetime or Date.
4413
+ # @param window_size [String]
4414
+ # The length of the window. Can be a dynamic temporal
4415
+ # size indicated by a timedelta or the following string language:
4416
+ #
4417
+ # - 1ns (1 nanosecond)
4418
+ # - 1us (1 microsecond)
4419
+ # - 1ms (1 millisecond)
4420
+ # - 1s (1 second)
4421
+ # - 1m (1 minute)
4422
+ # - 1h (1 hour)
4423
+ # - 1d (1 calendar day)
4424
+ # - 1w (1 calendar week)
4425
+ # - 1mo (1 calendar month)
4426
+ # - 1q (1 calendar quarter)
4427
+ # - 1y (1 calendar year)
4428
+ #
4429
+ # By "calendar day", we mean the corresponding time on the next day
4430
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4431
+ # "calendar week", "calendar month", "calendar quarter", and
4432
+ # "calendar year".
4433
+ # @param min_periods [Integer]
4434
+ # The number of values in the window that should be non-null before computing
4435
+ # a result.
4436
+ # @param closed ['left', 'right', 'both', 'none']
4437
+ # Define which sides of the temporal interval are closed (inclusive),
4438
+ # defaults to `'right'`.
4439
+ # @param ddof [Integer]
4440
+ # "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
4441
+ # @param warn_if_unsorted [Boolean]
4442
+ # Warn if data is not known to be sorted by `by` column.
4443
+ #
4444
+ # @return [Expr]
4445
+ #
4446
+ # @note
4447
+ # If you want to compute multiple aggregation statistics over the same dynamic
4448
+ # window, consider using `rolling` - this method can cache the window size
4449
+ # computation.
4450
+ #
4451
+ # @example Create a DataFrame with a datetime column and a row number column
4452
+ # start = DateTime.new(2001, 1, 1)
4453
+ # stop = DateTime.new(2001, 1, 2)
4454
+ # df_temporal = Polars::DataFrame.new(
4455
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4456
+ # ).with_row_index
4457
+ # # =>
4458
+ # # shape: (25, 2)
4459
+ # # ┌───────┬─────────────────────┐
4460
+ # # │ index ┆ date │
4461
+ # # │ --- ┆ --- │
4462
+ # # │ u32 ┆ datetime[ns] │
4463
+ # # ╞═══════╪═════════════════════╡
4464
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4465
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4466
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4467
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4468
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4469
+ # # │ … ┆ … │
4470
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4471
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4472
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4473
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4474
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4475
+ # # └───────┴─────────────────────┘
4476
+ #
4477
+ # @example Compute the rolling std with the temporal windows closed on the right (default)
4478
+ # df_temporal.with_columns(
4479
+ # rolling_row_std: Polars.col("index").rolling_std_by("date", "2h")
4480
+ # )
4481
+ # # =>
4482
+ # # shape: (25, 3)
4483
+ # # ┌───────┬─────────────────────┬─────────────────┐
4484
+ # # │ index ┆ date ┆ rolling_row_std │
4485
+ # # │ --- ┆ --- ┆ --- │
4486
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4487
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4488
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
4489
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.707107 │
4490
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 0.707107 │
4491
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 0.707107 │
4492
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 0.707107 │
4493
+ # # │ … ┆ … ┆ … │
4494
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 0.707107 │
4495
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 0.707107 │
4496
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 0.707107 │
4497
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 0.707107 │
4498
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 0.707107 │
4499
+ # # └───────┴─────────────────────┴─────────────────┘
4500
+ #
4501
+ # @example Compute the rolling std with the closure of windows on both sides
4502
+ # df_temporal.with_columns(
4503
+ # rolling_row_std: Polars.col("index").rolling_std_by(
4504
+ # "date", "2h", closed: "both"
4505
+ # )
4506
+ # )
4507
+ # # =>
4508
+ # # shape: (25, 3)
4509
+ # # ┌───────┬─────────────────────┬─────────────────┐
4510
+ # # │ index ┆ date ┆ rolling_row_std │
4511
+ # # │ --- ┆ --- ┆ --- │
4512
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4513
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4514
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
4515
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.707107 │
4516
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
4517
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 1.0 │
4518
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 1.0 │
4519
+ # # │ … ┆ … ┆ … │
4520
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 1.0 │
4521
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 1.0 │
4522
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 1.0 │
4523
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 1.0 │
4524
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 1.0 │
4525
+ # # └───────┴─────────────────────┴─────────────────┘
4526
+ def rolling_std_by(
4527
+ by,
4528
+ window_size,
4529
+ min_periods: 1,
4530
+ closed: "right",
4531
+ ddof: 1,
4532
+ warn_if_unsorted: nil
4533
+ )
4534
+ window_size = _prepare_rolling_by_window_args(window_size)
4535
+ by = Utils.parse_into_expression(by)
4536
+ _from_rbexpr(
4537
+ _rbexpr.rolling_std_by(
4538
+ by,
4539
+ window_size,
4540
+ min_periods,
4541
+ closed,
4542
+ ddof
4543
+ )
4544
+ )
4545
+ end
4546
+
4547
+ # Compute a rolling variance based on another column.
4548
+ #
4549
+ # @param by [String]
4550
+ # This column must be of dtype Datetime or Date.
4551
+ # @param window_size [String]
4552
+ # The length of the window. Can be a dynamic temporal
4553
+ # size indicated by a timedelta or the following string language:
4554
+ #
4555
+ # - 1ns (1 nanosecond)
4556
+ # - 1us (1 microsecond)
4557
+ # - 1ms (1 millisecond)
4558
+ # - 1s (1 second)
4559
+ # - 1m (1 minute)
4560
+ # - 1h (1 hour)
4561
+ # - 1d (1 calendar day)
4562
+ # - 1w (1 calendar week)
4563
+ # - 1mo (1 calendar month)
4564
+ # - 1q (1 calendar quarter)
4565
+ # - 1y (1 calendar year)
4566
+ #
4567
+ # By "calendar day", we mean the corresponding time on the next day
4568
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4569
+ # "calendar week", "calendar month", "calendar quarter", and
4570
+ # "calendar year".
4571
+ # @param min_periods [Integer]
4572
+ # The number of values in the window that should be non-null before computing
4573
+ # a result.
4574
+ # @param closed ['left', 'right', 'both', 'none']
4575
+ # Define which sides of the temporal interval are closed (inclusive),
4576
+ # defaults to `'right'`.
4577
+ # @param ddof [Integer]
4578
+ # "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
4579
+ # @param warn_if_unsorted [Boolean]
4580
+ # Warn if data is not known to be sorted by `by` column.
4581
+ #
4582
+ # @return [Expr]
4583
+ #
4584
+ # @note
4585
+ # If you want to compute multiple aggregation statistics over the same dynamic
4586
+ # window, consider using `rolling` - this method can cache the window size
4587
+ # computation.
4588
+ #
4589
+ # @example Create a DataFrame with a datetime column and a row number column
4590
+ # start = DateTime.new(2001, 1, 1)
4591
+ # stop = DateTime.new(2001, 1, 2)
4592
+ # df_temporal = Polars::DataFrame.new(
4593
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4594
+ # ).with_row_index
4595
+ # # =>
4596
+ # # shape: (25, 2)
4597
+ # # ┌───────┬─────────────────────┐
4598
+ # # │ index ┆ date │
4599
+ # # │ --- ┆ --- │
4600
+ # # │ u32 ┆ datetime[ns] │
4601
+ # # ╞═══════╪═════════════════════╡
4602
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4603
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4604
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4605
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4606
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4607
+ # # │ … ┆ … │
4608
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4609
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4610
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4611
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4612
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4613
+ # # └───────┴─────────────────────┘
4614
+ #
4615
+ # @example Compute the rolling var with the temporal windows closed on the right (default)
4616
+ # df_temporal.with_columns(
4617
+ # rolling_row_var: Polars.col("index").rolling_var_by("date", "2h")
4618
+ # )
4619
+ # # =>
4620
+ # # shape: (25, 3)
4621
+ # # ┌───────┬─────────────────────┬─────────────────┐
4622
+ # # │ index ┆ date ┆ rolling_row_var │
4623
+ # # │ --- ┆ --- ┆ --- │
4624
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4625
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4626
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
4627
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
4628
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 0.5 │
4629
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 0.5 │
4630
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 0.5 │
4631
+ # # │ … ┆ … ┆ … │
4632
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 0.5 │
4633
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 0.5 │
4634
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 0.5 │
4635
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 0.5 │
4636
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 0.5 │
4637
+ # # └───────┴─────────────────────┴─────────────────┘
4638
+ #
4639
+ # @example Compute the rolling var with the closure of windows on both sides
4640
+ # df_temporal.with_columns(
4641
+ # rolling_row_var: Polars.col("index").rolling_var_by(
4642
+ # "date", "2h", closed: "both"
4643
+ # )
4644
+ # )
4645
+ # # =>
4646
+ # # shape: (25, 3)
4647
+ # # ┌───────┬─────────────────────┬─────────────────┐
4648
+ # # │ index ┆ date ┆ rolling_row_var │
4649
+ # # │ --- ┆ --- ┆ --- │
4650
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4651
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4652
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
4653
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
4654
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
4655
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 1.0 │
4656
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 1.0 │
4657
+ # # │ … ┆ … ┆ … │
4658
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 1.0 │
4659
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 1.0 │
4660
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 1.0 │
4661
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 1.0 │
4662
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 1.0 │
4663
+ # # └───────┴─────────────────────┴─────────────────┘
4664
+ def rolling_var_by(
4665
+ by,
4666
+ window_size,
4667
+ min_periods: 1,
4668
+ closed: "right",
4669
+ ddof: 1,
4670
+ warn_if_unsorted: nil
4671
+ )
4672
+ window_size = _prepare_rolling_by_window_args(window_size)
4673
+ by = Utils.parse_into_expression(by)
4674
+ _from_rbexpr(
4675
+ _rbexpr.rolling_var_by(
4676
+ by,
4677
+ window_size,
4678
+ min_periods,
4679
+ closed,
4680
+ ddof
4681
+ )
4682
+ )
4683
+ end
4684
+
4685
+ # Compute a rolling median based on another column.
4686
+ #
4687
+ # @param by [String]
4688
+ # This column must be of dtype Datetime or Date.
4689
+ # @param window_size [String]
4690
+ # The length of the window. Can be a dynamic temporal
4691
+ # size indicated by a timedelta or the following string language:
4692
+ #
4693
+ # - 1ns (1 nanosecond)
4694
+ # - 1us (1 microsecond)
4695
+ # - 1ms (1 millisecond)
4696
+ # - 1s (1 second)
4697
+ # - 1m (1 minute)
4698
+ # - 1h (1 hour)
4699
+ # - 1d (1 calendar day)
4700
+ # - 1w (1 calendar week)
4701
+ # - 1mo (1 calendar month)
4702
+ # - 1q (1 calendar quarter)
4703
+ # - 1y (1 calendar year)
4704
+ #
4705
+ # By "calendar day", we mean the corresponding time on the next day
4706
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4707
+ # "calendar week", "calendar month", "calendar quarter", and
4708
+ # "calendar year".
4709
+ # @param min_periods [Integer]
4710
+ # The number of values in the window that should be non-null before computing
4711
+ # a result.
4712
+ # @param closed ['left', 'right', 'both', 'none']
4713
+ # Define which sides of the temporal interval are closed (inclusive),
4714
+ # defaults to `'right'`.
4715
+ # @param warn_if_unsorted [Boolean]
4716
+ # Warn if data is not known to be sorted by `by` column.
4717
+ #
4718
+ # @return [Expr]
4719
+ #
4720
+ # @note
4721
+ # If you want to compute multiple aggregation statistics over the same dynamic
4722
+ # window, consider using `rolling` - this method can cache the window size
4723
+ # computation.
4724
+ #
4725
+ # @example Create a DataFrame with a datetime column and a row number column
4726
+ # start = DateTime.new(2001, 1, 1)
4727
+ # stop = DateTime.new(2001, 1, 2)
4728
+ # df_temporal = Polars::DataFrame.new(
4729
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4730
+ # ).with_row_index
4731
+ # # =>
4732
+ # # shape: (25, 2)
4733
+ # # ┌───────┬─────────────────────┐
4734
+ # # │ index ┆ date │
4735
+ # # │ --- ┆ --- │
4736
+ # # │ u32 ┆ datetime[ns] │
4737
+ # # ╞═══════╪═════════════════════╡
4738
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4739
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4740
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4741
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4742
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4743
+ # # │ … ┆ … │
4744
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4745
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4746
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4747
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4748
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4749
+ # # └───────┴─────────────────────┘
4750
+ #
4751
+ # @example Compute the rolling median with the temporal windows closed on the right:
4752
+ # df_temporal.with_columns(
4753
+ # rolling_row_median: Polars.col("index").rolling_median_by(
4754
+ # "date", "2h"
4755
+ # )
4756
+ # )
4757
+ # # =>
4758
+ # # shape: (25, 3)
4759
+ # # ┌───────┬─────────────────────┬────────────────────┐
4760
+ # # │ index ┆ date ┆ rolling_row_median │
4761
+ # # │ --- ┆ --- ┆ --- │
4762
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4763
+ # # ╞═══════╪═════════════════════╪════════════════════╡
4764
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
4765
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
4766
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.5 │
4767
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.5 │
4768
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.5 │
4769
+ # # │ … ┆ … ┆ … │
4770
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.5 │
4771
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.5 │
4772
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.5 │
4773
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.5 │
4774
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.5 │
4775
+ # # └───────┴─────────────────────┴────────────────────┘
4776
+ def rolling_median_by(
4777
+ by,
4778
+ window_size,
4779
+ min_periods: 1,
4780
+ closed: "right",
4781
+ warn_if_unsorted: nil
4782
+ )
4783
+ window_size = _prepare_rolling_by_window_args(window_size)
4784
+ by = Utils.parse_into_expression(by)
4785
+ _from_rbexpr(
4786
+ _rbexpr.rolling_median_by(by, window_size, min_periods, closed)
4787
+ )
4788
+ end
4789
+
4790
+ # Compute a rolling quantile based on another column.
4791
+ #
4792
+ # @param by [String]
4793
+ # This column must be of dtype Datetime or Date.
4794
+ # @param quantile [Float]
4795
+ # Quantile between 0.0 and 1.0.
4796
+ # @param interpolation ['nearest', 'higher', 'lower', 'midpoint', 'linear']
4797
+ # Interpolation method.
4798
+ # @param window_size [String]
4799
+ # The length of the window. Can be a dynamic
4800
+ # temporal size indicated by a timedelta or the following string language:
4801
+ #
4802
+ # - 1ns (1 nanosecond)
4803
+ # - 1us (1 microsecond)
4804
+ # - 1ms (1 millisecond)
4805
+ # - 1s (1 second)
4806
+ # - 1m (1 minute)
4807
+ # - 1h (1 hour)
4808
+ # - 1d (1 calendar day)
4809
+ # - 1w (1 calendar week)
4810
+ # - 1mo (1 calendar month)
4811
+ # - 1q (1 calendar quarter)
4812
+ # - 1y (1 calendar year)
4813
+ #
4814
+ # By "calendar day", we mean the corresponding time on the next day
4815
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4816
+ # "calendar week", "calendar month", "calendar quarter", and
4817
+ # "calendar year".
4818
+ # @param min_periods [Integer]
4819
+ # The number of values in the window that should be non-null before computing
4820
+ # a result.
4821
+ # @param closed ['left', 'right', 'both', 'none']
4822
+ # Define which sides of the temporal interval are closed (inclusive),
4823
+ # defaults to `'right'`.
4824
+ # @param warn_if_unsorted [Boolean]
4825
+ # Warn if data is not known to be sorted by `by` column.
4826
+ #
4827
+ # @return [Expr]
4828
+ #
4829
+ # @note
4830
+ # If you want to compute multiple aggregation statistics over the same dynamic
4831
+ # window, consider using `rolling` - this method can cache the window size
4832
+ # computation.
4833
+ #
4834
+ # @example Create a DataFrame with a datetime column and a row number column
4835
+ # start = DateTime.new(2001, 1, 1)
4836
+ # stop = DateTime.new(2001, 1, 2)
4837
+ # df_temporal = Polars::DataFrame.new(
4838
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4839
+ # ).with_row_index
4840
+ # # =>
4841
+ # # shape: (25, 2)
4842
+ # # ┌───────┬─────────────────────┐
4843
+ # # │ index ┆ date │
4844
+ # # │ --- ┆ --- │
4845
+ # # │ u32 ┆ datetime[ns] │
4846
+ # # ╞═══════╪═════════════════════╡
4847
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4848
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4849
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4850
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4851
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4852
+ # # │ … ┆ … │
4853
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4854
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4855
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4856
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4857
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4858
+ # # └───────┴─────────────────────┘
4859
+ #
4860
+ # @example Compute the rolling quantile with the temporal windows closed on the right:
4861
+ # df_temporal.with_columns(
4862
+ # rolling_row_quantile: Polars.col("index").rolling_quantile_by(
4863
+ # "date", "2h", quantile: 0.3
4864
+ # )
4865
+ # )
4866
+ # # =>
4867
+ # # shape: (25, 3)
4868
+ # # ┌───────┬─────────────────────┬──────────────────────┐
4869
+ # # │ index ┆ date ┆ rolling_row_quantile │
4870
+ # # │ --- ┆ --- ┆ --- │
4871
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4872
+ # # ╞═══════╪═════════════════════╪══════════════════════╡
4873
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
4874
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.0 │
4875
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
4876
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.0 │
4877
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.0 │
4878
+ # # │ … ┆ … ┆ … │
4879
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.0 │
4880
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.0 │
4881
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.0 │
4882
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.0 │
4883
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.0 │
4884
+ # # └───────┴─────────────────────┴──────────────────────┘
4885
+ def rolling_quantile_by(
4886
+ by,
4887
+ window_size,
4888
+ quantile:,
4889
+ interpolation: "nearest",
4890
+ min_periods: 1,
4891
+ closed: "right",
4892
+ warn_if_unsorted: nil
4893
+ )
4894
+ window_size = _prepare_rolling_by_window_args(window_size)
4895
+ by = Utils.parse_into_expression(by)
4896
+ _from_rbexpr(
4897
+ _rbexpr.rolling_quantile_by(
4898
+ by,
4899
+ quantile,
4900
+ interpolation,
4901
+ window_size,
4902
+ min_periods,
4903
+ closed,
4904
+ )
4905
+ )
4906
+ end
4907
+
3860
4908
  # Apply a rolling min (moving min) over the values in this array.
3861
4909
  #
3862
4910
  # A window of length `window_size` will traverse the array. The values that fill
@@ -3889,12 +4937,6 @@ module Polars
3889
4937
  # a result. If None, it will be set equal to window size.
3890
4938
  # @param center [Boolean]
3891
4939
  # Set the labels at the center of the window
3892
- # @param by [String]
3893
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3894
- # set the column that will be used to determine the windows. This column must
3895
- # be of dtype `{Date, Datetime}`
3896
- # @param closed ["left", "right", "both", "none"]
3897
- # Define whether the temporal window interval is closed or not.
3898
4940
  #
3899
4941
  # @note
3900
4942
  # This functionality is experimental and may change without it being considered a
@@ -3932,16 +4974,11 @@ module Polars
3932
4974
  window_size,
3933
4975
  weights: nil,
3934
4976
  min_periods: nil,
3935
- center: false,
3936
- by: nil,
3937
- closed: nil
4977
+ center: false
3938
4978
  )
3939
- window_size, min_periods = _prepare_rolling_window_args(
3940
- window_size, min_periods
3941
- )
3942
4979
  _from_rbexpr(
3943
4980
  _rbexpr.rolling_min(
3944
- window_size, weights, min_periods, center, by, closed
4981
+ window_size, weights, min_periods, center
3945
4982
  )
3946
4983
  )
3947
4984
  end
@@ -3978,12 +5015,6 @@ module Polars
3978
5015
  # a result. If None, it will be set equal to window size.
3979
5016
  # @param center [Boolean]
3980
5017
  # Set the labels at the center of the window
3981
- # @param by [String]
3982
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3983
- # set the column that will be used to determine the windows. This column must
3984
- # be of dtype `{Date, Datetime}`
3985
- # @param closed ["left", "right", "both", "none"]
3986
- # Define whether the temporal window interval is closed or not.
3987
5018
  #
3988
5019
  # @note
3989
5020
  # This functionality is experimental and may change without it being considered a
@@ -4021,16 +5052,11 @@ module Polars
4021
5052
  window_size,
4022
5053
  weights: nil,
4023
5054
  min_periods: nil,
4024
- center: false,
4025
- by: nil,
4026
- closed: nil
5055
+ center: false
4027
5056
  )
4028
- window_size, min_periods = _prepare_rolling_window_args(
4029
- window_size, min_periods
4030
- )
4031
5057
  _from_rbexpr(
4032
5058
  _rbexpr.rolling_max(
4033
- window_size, weights, min_periods, center, by, closed
5059
+ window_size, weights, min_periods, center
4034
5060
  )
4035
5061
  )
4036
5062
  end
@@ -4067,12 +5093,6 @@ module Polars
4067
5093
  # a result. If None, it will be set equal to window size.
4068
5094
  # @param center [Boolean]
4069
5095
  # Set the labels at the center of the window
4070
- # @param by [String]
4071
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
4072
- # set the column that will be used to determine the windows. This column must
4073
- # be of dtype `{Date, Datetime}`
4074
- # @param closed ["left", "right", "both", "none"]
4075
- # Define whether the temporal window interval is closed or not.
4076
5096
  #
4077
5097
  # @note
4078
5098
  # This functionality is experimental and may change without it being considered a
@@ -4110,16 +5130,11 @@ module Polars
4110
5130
  window_size,
4111
5131
  weights: nil,
4112
5132
  min_periods: nil,
4113
- center: false,
4114
- by: nil,
4115
- closed: nil
5133
+ center: false
4116
5134
  )
4117
- window_size, min_periods = _prepare_rolling_window_args(
4118
- window_size, min_periods
4119
- )
4120
5135
  _from_rbexpr(
4121
5136
  _rbexpr.rolling_mean(
4122
- window_size, weights, min_periods, center, by, closed
5137
+ window_size, weights, min_periods, center
4123
5138
  )
4124
5139
  )
4125
5140
  end
@@ -4156,12 +5171,6 @@ module Polars
4156
5171
  # a result. If None, it will be set equal to window size.
4157
5172
  # @param center [Boolean]
4158
5173
  # Set the labels at the center of the window
4159
- # @param by [String]
4160
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
4161
- # set the column that will be used to determine the windows. This column must
4162
- # be of dtype `{Date, Datetime}`
4163
- # @param closed ["left", "right", "both", "none"]
4164
- # Define whether the temporal window interval is closed or not.
4165
5174
  #
4166
5175
  # @note
4167
5176
  # This functionality is experimental and may change without it being considered a
@@ -4199,16 +5208,11 @@ module Polars
4199
5208
  window_size,
4200
5209
  weights: nil,
4201
5210
  min_periods: nil,
4202
- center: false,
4203
- by: nil,
4204
- closed: nil
5211
+ center: false
4205
5212
  )
4206
- window_size, min_periods = _prepare_rolling_window_args(
4207
- window_size, min_periods
4208
- )
4209
5213
  _from_rbexpr(
4210
5214
  _rbexpr.rolling_sum(
4211
- window_size, weights, min_periods, center, by, closed
5215
+ window_size, weights, min_periods, center
4212
5216
  )
4213
5217
  )
4214
5218
  end
@@ -4245,12 +5249,6 @@ module Polars
4245
5249
  # a result. If None, it will be set equal to window size.
4246
5250
  # @param center [Boolean]
4247
5251
  # Set the labels at the center of the window
4248
- # @param by [String]
4249
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
4250
- # set the column that will be used to determine the windows. This column must
4251
- # be of dtype `{Date, Datetime}`
4252
- # @param closed ["left", "right", "both", "none"]
4253
- # Define whether the temporal window interval is closed or not.
4254
5252
  #
4255
5253
  # @note
4256
5254
  # This functionality is experimental and may change without it being considered a
@@ -4289,17 +5287,11 @@ module Polars
4289
5287
  weights: nil,
4290
5288
  min_periods: nil,
4291
5289
  center: false,
4292
- by: nil,
4293
- closed: nil,
4294
- ddof: 1,
4295
- warn_if_unsorted: true
5290
+ ddof: 1
4296
5291
  )
4297
- window_size, min_periods = _prepare_rolling_window_args(
4298
- window_size, min_periods
4299
- )
4300
5292
  _from_rbexpr(
4301
5293
  _rbexpr.rolling_std(
4302
- window_size, weights, min_periods, center, by, closed, ddof, warn_if_unsorted
5294
+ window_size, weights, min_periods, center, ddof
4303
5295
  )
4304
5296
  )
4305
5297
  end
@@ -4336,12 +5328,6 @@ module Polars
4336
5328
  # a result. If None, it will be set equal to window size.
4337
5329
  # @param center [Boolean]
4338
5330
  # Set the labels at the center of the window
4339
- # @param by [String]
4340
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
4341
- # set the column that will be used to determine the windows. This column must
4342
- # be of dtype `{Date, Datetime}`
4343
- # @param closed ["left", "right", "both", "none"]
4344
- # Define whether the temporal window interval is closed or not.
4345
5331
  #
4346
5332
  # @note
4347
5333
  # This functionality is experimental and may change without it being considered a
@@ -4380,17 +5366,11 @@ module Polars
4380
5366
  weights: nil,
4381
5367
  min_periods: nil,
4382
5368
  center: false,
4383
- by: nil,
4384
- closed: nil,
4385
- ddof: 1,
4386
- warn_if_unsorted: true
5369
+ ddof: 1
4387
5370
  )
4388
- window_size, min_periods = _prepare_rolling_window_args(
4389
- window_size, min_periods
4390
- )
4391
5371
  _from_rbexpr(
4392
5372
  _rbexpr.rolling_var(
4393
- window_size, weights, min_periods, center, by, closed, ddof, warn_if_unsorted
5373
+ window_size, weights, min_periods, center, ddof
4394
5374
  )
4395
5375
  )
4396
5376
  end
@@ -4423,12 +5403,6 @@ module Polars
4423
5403
  # a result. If None, it will be set equal to window size.
4424
5404
  # @param center [Boolean]
4425
5405
  # Set the labels at the center of the window
4426
- # @param by [String]
4427
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
4428
- # set the column that will be used to determine the windows. This column must
4429
- # be of dtype `{Date, Datetime}`
4430
- # @param closed ["left", "right", "both", "none"]
4431
- # Define whether the temporal window interval is closed or not.
4432
5406
  #
4433
5407
  # @note
4434
5408
  # This functionality is experimental and may change without it being considered a
@@ -4466,17 +5440,11 @@ module Polars
4466
5440
  window_size,
4467
5441
  weights: nil,
4468
5442
  min_periods: nil,
4469
- center: false,
4470
- by: nil,
4471
- closed: nil,
4472
- warn_if_unsorted: true
5443
+ center: false
4473
5444
  )
4474
- window_size, min_periods = _prepare_rolling_window_args(
4475
- window_size, min_periods
4476
- )
4477
5445
  _from_rbexpr(
4478
5446
  _rbexpr.rolling_median(
4479
- window_size, weights, min_periods, center, by, closed, warn_if_unsorted
5447
+ window_size, weights, min_periods, center
4480
5448
  )
4481
5449
  )
4482
5450
  end
@@ -4513,12 +5481,6 @@ module Polars
4513
5481
  # a result. If None, it will be set equal to window size.
4514
5482
  # @param center [Boolean]
4515
5483
  # Set the labels at the center of the window
4516
- # @param by [String]
4517
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
4518
- # set the column that will be used to determine the windows. This column must
4519
- # be of dtype `{Date, Datetime}`
4520
- # @param closed ["left", "right", "both", "none"]
4521
- # Define whether the temporal window interval is closed or not.
4522
5484
  #
4523
5485
  # @note
4524
5486
  # This functionality is experimental and may change without it being considered a
@@ -4558,17 +5520,11 @@ module Polars
4558
5520
  window_size: 2,
4559
5521
  weights: nil,
4560
5522
  min_periods: nil,
4561
- center: false,
4562
- by: nil,
4563
- closed: nil,
4564
- warn_if_unsorted: true
5523
+ center: false
4565
5524
  )
4566
- window_size, min_periods = _prepare_rolling_window_args(
4567
- window_size, min_periods
4568
- )
4569
5525
  _from_rbexpr(
4570
5526
  _rbexpr.rolling_quantile(
4571
- quantile, interpolation, window_size, weights, min_periods, center, by, closed, warn_if_unsorted
5527
+ quantile, interpolation, window_size, weights, min_periods, center
4572
5528
  )
4573
5529
  )
4574
5530
  end
@@ -4837,7 +5793,7 @@ module Polars
4837
5793
  # # │ 12 ┆ 0.0 │
4838
5794
  # # └──────┴────────────┘
4839
5795
  def pct_change(n: 1)
4840
- n = Utils.parse_as_expression(n)
5796
+ n = Utils.parse_into_expression(n)
4841
5797
  _from_rbexpr(_rbexpr.pct_change(n))
4842
5798
  end
4843
5799
 
@@ -4929,12 +5885,12 @@ module Polars
4929
5885
  # # │ null ┆ null │
4930
5886
  # # │ 50 ┆ 10 │
4931
5887
  # # └──────┴─────────────┘
4932
- def clip(lower_bound, upper_bound)
5888
+ def clip(lower_bound = nil, upper_bound = nil)
4933
5889
  if !lower_bound.nil?
4934
- lower_bound = Utils.parse_as_expression(lower_bound, str_as_lit: true)
5890
+ lower_bound = Utils.parse_into_expression(lower_bound)
4935
5891
  end
4936
5892
  if !upper_bound.nil?
4937
- upper_bound = Utils.parse_as_expression(upper_bound, str_as_lit: true)
5893
+ upper_bound = Utils.parse_into_expression(upper_bound)
4938
5894
  end
4939
5895
  _from_rbexpr(_rbexpr.clip(lower_bound, upper_bound))
4940
5896
  end
@@ -5321,18 +6277,38 @@ module Polars
5321
6277
  #
5322
6278
  # @example
5323
6279
  # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
5324
- # df.select(Polars.col("foo").reshape([3, 3]))
6280
+ # square = df.select(Polars.col("foo").reshape([3, 3]))
5325
6281
  # # =>
5326
6282
  # # shape: (3, 1)
5327
- # # ┌───────────┐
5328
- # # │ foo
5329
- # # │ ---
5330
- # # │ list[i64] │
5331
- # # ╞═══════════╡
5332
- # # │ [1, 2, 3]
5333
- # # │ [4, 5, 6]
5334
- # # │ [7, 8, 9]
5335
- # # └───────────┘
6283
+ # # ┌───────────────┐
6284
+ # # │ foo
6285
+ # # │ ---
6286
+ # # │ array[i64, 3] │
6287
+ # # ╞═══════════════╡
6288
+ # # │ [1, 2, 3]
6289
+ # # │ [4, 5, 6]
6290
+ # # │ [7, 8, 9]
6291
+ # # └───────────────┘
6292
+ #
6293
+ # @example
6294
+ # square.select(Polars.col("foo").reshape([9]))
6295
+ # # =>
6296
+ # # shape: (9, 1)
6297
+ # # ┌─────┐
6298
+ # # │ foo │
6299
+ # # │ --- │
6300
+ # # │ i64 │
6301
+ # # ╞═════╡
6302
+ # # │ 1 │
6303
+ # # │ 2 │
6304
+ # # │ 3 │
6305
+ # # │ 4 │
6306
+ # # │ 5 │
6307
+ # # │ 6 │
6308
+ # # │ 7 │
6309
+ # # │ 8 │
6310
+ # # │ 9 │
6311
+ # # └─────┘
5336
6312
  def reshape(dims)
5337
6313
  _from_rbexpr(_rbexpr.reshape(dims))
5338
6314
  end
@@ -5408,14 +6384,14 @@ module Polars
5408
6384
  end
5409
6385
 
5410
6386
  if !n.nil? && frac.nil?
5411
- n = Utils.parse_as_expression(n)
6387
+ n = Utils.parse_into_expression(n)
5412
6388
  return _from_rbexpr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
5413
6389
  end
5414
6390
 
5415
6391
  if frac.nil?
5416
6392
  frac = 1.0
5417
6393
  end
5418
- frac = Utils.parse_as_expression(frac)
6394
+ frac = Utils.parse_into_expression(frac)
5419
6395
  _from_rbexpr(
5420
6396
  _rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
5421
6397
  )
@@ -5548,11 +6524,17 @@ module Polars
5548
6524
 
5549
6525
  # Count all unique values and create a struct mapping value to count.
5550
6526
  #
5551
- # @param multithreaded [Boolean]
5552
- # Better to turn this off in the aggregation context, as it can lead to
5553
- # contention.
5554
6527
  # @param sort [Boolean]
5555
- # Ensure the output is sorted from most values to least.
6528
+ # Sort the output by count in descending order.
6529
+ # If set to `false` (default), the order of the output is random.
6530
+ # @param parallel [Boolean]
6531
+ # Execute the computation in parallel.
6532
+ # @param name [String]
6533
+ # Give the resulting count column a specific name;
6534
+ # if `normalize` is true defaults to "count",
6535
+ # otherwise defaults to "proportion".
6536
+ # @param normalize [Boolean]
6537
+ # If true gives relative frequencies of the unique values
5556
6538
  #
5557
6539
  # @return [Expr]
5558
6540
  #
@@ -5578,8 +6560,22 @@ module Polars
5578
6560
  # # │ {"b",2} │
5579
6561
  # # │ {"a",1} │
5580
6562
  # # └───────────┘
5581
- def value_counts(multithreaded: false, sort: false)
5582
- _from_rbexpr(_rbexpr.value_counts(multithreaded, sort))
6563
+ def value_counts(
6564
+ sort: false,
6565
+ parallel: false,
6566
+ name: nil,
6567
+ normalize: false
6568
+ )
6569
+ if name.nil?
6570
+ if normalize
6571
+ name = "proportion"
6572
+ else
6573
+ name = "count"
6574
+ end
6575
+ end
6576
+ _from_rbexpr(
6577
+ _rbexpr.value_counts(sort, parallel, name, normalize)
6578
+ )
5583
6579
  end
5584
6580
 
5585
6581
  # Return a count of the unique values in the order of appearance.
@@ -5954,6 +6950,10 @@ module Polars
5954
6950
  # # │ 3 ┆ 1.0 ┆ 10.0 │
5955
6951
  # # └─────┴─────┴──────────┘
5956
6952
  def replace(old, new = NO_DEFAULT, default: NO_DEFAULT, return_dtype: nil)
6953
+ if !default.eql?(NO_DEFAULT)
6954
+ return replace_strict(old, new, default: default, return_dtype: return_dtype)
6955
+ end
6956
+
5957
6957
  if new.eql?(NO_DEFAULT) && old.is_a?(Hash)
5958
6958
  new = Series.new(old.values)
5959
6959
  old = Series.new(old.keys)
@@ -5966,17 +6966,164 @@ module Polars
5966
6966
  end
5967
6967
  end
5968
6968
 
5969
- old = Utils.parse_as_expression(old, str_as_lit: true)
5970
- new = Utils.parse_as_expression(new, str_as_lit: true)
6969
+ old = Utils.parse_into_expression(old, str_as_lit: true)
6970
+ new = Utils.parse_into_expression(new, str_as_lit: true)
5971
6971
 
5972
- default =
5973
- if default.eql?(NO_DEFAULT)
5974
- nil
5975
- else
5976
- Utils.parse_as_expression(default, str_as_lit: true)
5977
- end
6972
+ result = _from_rbexpr(_rbexpr.replace(old, new))
6973
+
6974
+ if !return_dtype.nil?
6975
+ result = result.cast(return_dtype)
6976
+ end
5978
6977
 
5979
- _from_rbexpr(_rbexpr.replace(old, new, default, return_dtype))
6978
+ result
6979
+ end
6980
+
6981
+ # Replace all values by different values.
6982
+ #
6983
+ # @param old [Object]
6984
+ # Value or sequence of values to replace.
6985
+ # Accepts expression input. Sequences are parsed as Series,
6986
+ # other non-expression inputs are parsed as literals.
6987
+ # Also accepts a mapping of values to their replacement as syntactic sugar for
6988
+ # `replace_all(old: Series.new(mapping.keys), new: Serie.new(mapping.values))`.
6989
+ # @param new [Object]
6990
+ # Value or sequence of values to replace by.
6991
+ # Accepts expression input. Sequences are parsed as Series,
6992
+ # other non-expression inputs are parsed as literals.
6993
+ # Length must match the length of `old` or have length 1.
6994
+ # @param default [Object]
6995
+ # Set values that were not replaced to this value. If no default is specified,
6996
+ # (default), an error is raised if any values were not replaced.
6997
+ # Accepts expression input. Non-expression inputs are parsed as literals.
6998
+ # @param return_dtype [Object]
6999
+ # The data type of the resulting expression. If set to `nil` (default),
7000
+ # the data type is determined automatically based on the other inputs.
7001
+ #
7002
+ # @return [Expr]
7003
+ #
7004
+ # @note
7005
+ # The global string cache must be enabled when replacing categorical values.
7006
+ #
7007
+ # @example Replace values by passing sequences to the `old` and `new` parameters.
7008
+ # df = Polars::DataFrame.new({"a" => [1, 2, 2, 3]})
7009
+ # df.with_columns(
7010
+ # replaced: Polars.col("a").replace_strict([1, 2, 3], [100, 200, 300])
7011
+ # )
7012
+ # # =>
7013
+ # # shape: (4, 2)
7014
+ # # ┌─────┬──────────┐
7015
+ # # │ a ┆ replaced │
7016
+ # # │ --- ┆ --- │
7017
+ # # │ i64 ┆ i64 │
7018
+ # # ╞═════╪══════════╡
7019
+ # # │ 1 ┆ 100 │
7020
+ # # │ 2 ┆ 200 │
7021
+ # # │ 2 ┆ 200 │
7022
+ # # │ 3 ┆ 300 │
7023
+ # # └─────┴──────────┘
7024
+ #
7025
+ # @example By default, an error is raised if any non-null values were not replaced. Specify a default to set all values that were not matched.
7026
+ # mapping = {2 => 200, 3 => 300}
7027
+ # df.with_columns(replaced: Polars.col("a").replace_strict(mapping, default: -1))
7028
+ # # =>
7029
+ # # shape: (4, 2)
7030
+ # # ┌─────┬──────────┐
7031
+ # # │ a ┆ replaced │
7032
+ # # │ --- ┆ --- │
7033
+ # # │ i64 ┆ i64 │
7034
+ # # ╞═════╪══════════╡
7035
+ # # │ 1 ┆ -1 │
7036
+ # # │ 2 ┆ 200 │
7037
+ # # │ 2 ┆ 200 │
7038
+ # # │ 3 ┆ 300 │
7039
+ # # └─────┴──────────┘
7040
+ #
7041
+ # @example Replacing by values of a different data type sets the return type based on a combination of the `new` data type and the `default` data type.
7042
+ # df = Polars::DataFrame.new({"a" => ["x", "y", "z"]})
7043
+ # mapping = {"x" => 1, "y" => 2, "z" => 3}
7044
+ # df.with_columns(replaced: Polars.col("a").replace_strict(mapping))
7045
+ # # =>
7046
+ # # shape: (3, 2)
7047
+ # # ┌─────┬──────────┐
7048
+ # # │ a ┆ replaced │
7049
+ # # │ --- ┆ --- │
7050
+ # # │ str ┆ i64 │
7051
+ # # ╞═════╪══════════╡
7052
+ # # │ x ┆ 1 │
7053
+ # # │ y ┆ 2 │
7054
+ # # │ z ┆ 3 │
7055
+ # # └─────┴──────────┘
7056
+ #
7057
+ # @example
7058
+ # df.with_columns(replaced: Polars.col("a").replace_strict(mapping, default: "x"))
7059
+ # # =>
7060
+ # # shape: (3, 2)
7061
+ # # ┌─────┬──────────┐
7062
+ # # │ a ┆ replaced │
7063
+ # # │ --- ┆ --- │
7064
+ # # │ str ┆ str │
7065
+ # # ╞═════╪══════════╡
7066
+ # # │ x ┆ 1 │
7067
+ # # │ y ┆ 2 │
7068
+ # # │ z ┆ 3 │
7069
+ # # └─────┴──────────┘
7070
+ #
7071
+ # @example Set the `return_dtype` parameter to control the resulting data type directly.
7072
+ # df.with_columns(
7073
+ # replaced: Polars.col("a").replace_strict(mapping, return_dtype: Polars::UInt8)
7074
+ # )
7075
+ # # =>
7076
+ # # shape: (3, 2)
7077
+ # # ┌─────┬──────────┐
7078
+ # # │ a ┆ replaced │
7079
+ # # │ --- ┆ --- │
7080
+ # # │ str ┆ u8 │
7081
+ # # ╞═════╪══════════╡
7082
+ # # │ x ┆ 1 │
7083
+ # # │ y ┆ 2 │
7084
+ # # │ z ┆ 3 │
7085
+ # # └─────┴──────────┘
7086
+ #
7087
+ # @example Expression input is supported for all parameters.
7088
+ # df = Polars::DataFrame.new({"a" => [1, 2, 2, 3], "b" => [1.5, 2.5, 5.0, 1.0]})
7089
+ # df.with_columns(
7090
+ # replaced: Polars.col("a").replace_strict(
7091
+ # Polars.col("a").max,
7092
+ # Polars.col("b").sum,
7093
+ # default: Polars.col("b")
7094
+ # )
7095
+ # )
7096
+ # # =>
7097
+ # # shape: (4, 3)
7098
+ # # ┌─────┬─────┬──────────┐
7099
+ # # │ a ┆ b ┆ replaced │
7100
+ # # │ --- ┆ --- ┆ --- │
7101
+ # # │ i64 ┆ f64 ┆ f64 │
7102
+ # # ╞═════╪═════╪══════════╡
7103
+ # # │ 1 ┆ 1.5 ┆ 1.5 │
7104
+ # # │ 2 ┆ 2.5 ┆ 2.5 │
7105
+ # # │ 2 ┆ 5.0 ┆ 5.0 │
7106
+ # # │ 3 ┆ 1.0 ┆ 10.0 │
7107
+ # # └─────┴─────┴──────────┘
7108
+ def replace_strict(
7109
+ old,
7110
+ new = NO_DEFAULT,
7111
+ default: NO_DEFAULT,
7112
+ return_dtype: nil
7113
+ )
7114
+ if new.eql?(NO_DEFAULT) && old.is_a?(Hash)
7115
+ new = Series.new(old.values)
7116
+ old = Series.new(old.keys)
7117
+ end
7118
+
7119
+ old = Utils.parse_into_expression(old, str_as_lit: true, list_as_series: true)
7120
+ new = Utils.parse_into_expression(new, str_as_lit: true, list_as_series: true)
7121
+
7122
+ default = default.eql?(NO_DEFAULT) ? nil : Utils.parse_into_expression(default, str_as_lit: true)
7123
+
7124
+ _from_rbexpr(
7125
+ _rbexpr.replace_strict(old, new, default, return_dtype)
7126
+ )
5980
7127
  end
5981
7128
 
5982
7129
  # Create an object namespace of all list related methods.
@@ -6053,7 +7200,7 @@ module Polars
6053
7200
  end
6054
7201
 
6055
7202
  def _to_expr(other)
6056
- other.is_a?(Expr) ? other : Utils.lit(other)
7203
+ other.is_a?(Expr) ? other : F.lit(other)
6057
7204
  end
6058
7205
 
6059
7206
  def _prepare_alpha(com, span, half_life, alpha)
@@ -6101,5 +7248,9 @@ module Polars
6101
7248
  end
6102
7249
  [window_size, min_periods]
6103
7250
  end
7251
+
7252
+ def _prepare_rolling_by_window_args(window_size)
7253
+ window_size
7254
+ end
6104
7255
  end
6105
7256
  end