polars-df 0.10.0-x86_64-darwin → 0.12.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +392 -351
  4. data/LICENSE-THIRD-PARTY.txt +1127 -867
  5. data/README.md +6 -6
  6. data/lib/polars/3.1/polars.bundle +0 -0
  7. data/lib/polars/3.2/polars.bundle +0 -0
  8. data/lib/polars/3.3/polars.bundle +0 -0
  9. data/lib/polars/array_expr.rb +4 -4
  10. data/lib/polars/batched_csv_reader.rb +11 -5
  11. data/lib/polars/cat_expr.rb +0 -36
  12. data/lib/polars/cat_name_space.rb +0 -37
  13. data/lib/polars/convert.rb +6 -1
  14. data/lib/polars/data_frame.rb +176 -403
  15. data/lib/polars/data_types.rb +1 -1
  16. data/lib/polars/date_time_expr.rb +525 -572
  17. data/lib/polars/date_time_name_space.rb +263 -460
  18. data/lib/polars/dynamic_group_by.rb +5 -5
  19. data/lib/polars/exceptions.rb +7 -0
  20. data/lib/polars/expr.rb +1394 -243
  21. data/lib/polars/expr_dispatch.rb +1 -1
  22. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  23. data/lib/polars/functions/as_datatype.rb +63 -40
  24. data/lib/polars/functions/lazy.rb +63 -14
  25. data/lib/polars/functions/lit.rb +1 -1
  26. data/lib/polars/functions/range/date_range.rb +90 -57
  27. data/lib/polars/functions/range/datetime_range.rb +149 -0
  28. data/lib/polars/functions/range/int_range.rb +2 -2
  29. data/lib/polars/functions/range/time_range.rb +141 -0
  30. data/lib/polars/functions/repeat.rb +1 -1
  31. data/lib/polars/functions/whenthen.rb +1 -1
  32. data/lib/polars/group_by.rb +88 -23
  33. data/lib/polars/io/avro.rb +24 -0
  34. data/lib/polars/{io.rb → io/csv.rb} +299 -493
  35. data/lib/polars/io/database.rb +73 -0
  36. data/lib/polars/io/ipc.rb +247 -0
  37. data/lib/polars/io/json.rb +29 -0
  38. data/lib/polars/io/ndjson.rb +80 -0
  39. data/lib/polars/io/parquet.rb +227 -0
  40. data/lib/polars/lazy_frame.rb +143 -272
  41. data/lib/polars/lazy_group_by.rb +100 -3
  42. data/lib/polars/list_expr.rb +11 -11
  43. data/lib/polars/list_name_space.rb +5 -1
  44. data/lib/polars/rolling_group_by.rb +7 -9
  45. data/lib/polars/series.rb +103 -187
  46. data/lib/polars/string_expr.rb +78 -102
  47. data/lib/polars/string_name_space.rb +5 -4
  48. data/lib/polars/testing.rb +2 -2
  49. data/lib/polars/utils/constants.rb +9 -0
  50. data/lib/polars/utils/convert.rb +97 -0
  51. data/lib/polars/utils/parse.rb +89 -0
  52. data/lib/polars/utils/various.rb +76 -0
  53. data/lib/polars/utils/wrap.rb +19 -0
  54. data/lib/polars/utils.rb +8 -300
  55. data/lib/polars/version.rb +1 -1
  56. data/lib/polars/whenthen.rb +6 -6
  57. data/lib/polars.rb +20 -1
  58. metadata +17 -4
data/lib/polars/expr.rb CHANGED
@@ -82,8 +82,8 @@ module Polars
82
82
  #
83
83
  # @return [Expr]
84
84
  def **(power)
85
- exponent = Utils.expr_to_lit_or_expr(power)
86
- _from_rbexpr(_rbexpr.pow(exponent._rbexpr))
85
+ exponent = Utils.parse_into_expression(power)
86
+ _from_rbexpr(_rbexpr.pow(exponent))
87
87
  end
88
88
 
89
89
  # Greater than or equal.
@@ -811,8 +811,8 @@ module Polars
811
811
  # # │ 10 ┆ 4 │
812
812
  # # └─────┴──────┘
813
813
  def append(other, upcast: true)
814
- other = Utils.expr_to_lit_or_expr(other)
815
- _from_rbexpr(_rbexpr.append(other._rbexpr, upcast))
814
+ other = Utils.parse_into_expression(other)
815
+ _from_rbexpr(_rbexpr.append(other, upcast))
816
816
  end
817
817
 
818
818
  # Create a single chunk of memory for this Series.
@@ -1165,8 +1165,8 @@ module Polars
1165
1165
  # # │ 44 │
1166
1166
  # # └─────┘
1167
1167
  def dot(other)
1168
- other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
1169
- _from_rbexpr(_rbexpr.dot(other._rbexpr))
1168
+ other = Utils.parse_into_expression(other, str_as_lit: false)
1169
+ _from_rbexpr(_rbexpr.dot(other))
1170
1170
  end
1171
1171
 
1172
1172
  # Compute the most occurring value(s).
@@ -1252,12 +1252,12 @@ module Polars
1252
1252
  # df = Polars::DataFrame.new(
1253
1253
  # {
1254
1254
  # "group" => [
1255
- # "one",
1256
- # "one",
1257
- # "one",
1258
- # "two",
1259
- # "two",
1260
- # "two"
1255
+ # "one",
1256
+ # "one",
1257
+ # "one",
1258
+ # "two",
1259
+ # "two",
1260
+ # "two"
1261
1261
  # ],
1262
1262
  # "value" => [1, 98, 2, 3, 99, 4]
1263
1263
  # }
@@ -1346,7 +1346,7 @@ module Polars
1346
1346
  # # │ 2 ┆ 98 │
1347
1347
  # # └───────┴──────────┘
1348
1348
  def top_k(k: 5)
1349
- k = Utils.parse_as_expression(k)
1349
+ k = Utils.parse_into_expression(k)
1350
1350
  _from_rbexpr(_rbexpr.top_k(k))
1351
1351
  end
1352
1352
 
@@ -1385,7 +1385,7 @@ module Polars
1385
1385
  # # │ 2 ┆ 98 │
1386
1386
  # # └───────┴──────────┘
1387
1387
  def bottom_k(k: 5)
1388
- k = Utils.parse_as_expression(k)
1388
+ k = Utils.parse_into_expression(k)
1389
1389
  _from_rbexpr(_rbexpr.bottom_k(k))
1390
1390
  end
1391
1391
 
@@ -1498,8 +1498,8 @@ module Polars
1498
1498
  # # │ 0 ┆ 2 ┆ 4 │
1499
1499
  # # └──────┴───────┴─────┘
1500
1500
  def search_sorted(element, side: "any")
1501
- element = Utils.expr_to_lit_or_expr(element, str_to_lit: false)
1502
- _from_rbexpr(_rbexpr.search_sorted(element._rbexpr, side))
1501
+ element = Utils.parse_into_expression(element, str_as_lit: false)
1502
+ _from_rbexpr(_rbexpr.search_sorted(element, side))
1503
1503
  end
1504
1504
 
1505
1505
  # Sort this column by the ordering of another column, or multiple other columns.
@@ -1545,13 +1545,14 @@ module Polars
1545
1545
  # # │ two │
1546
1546
  # # └───────┘
1547
1547
  def sort_by(by, *more_by, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false)
1548
- by = Utils.parse_as_list_of_expressions(by, *more_by)
1549
- if !reverse.is_a?(::Array)
1550
- reverse = [reverse]
1551
- elsif by.length != reverse.length
1552
- raise ArgumentError, "the length of `reverse` (#{reverse.length}) does not match the length of `by` (#{by.length})"
1553
- end
1554
- _from_rbexpr(_rbexpr.sort_by(by, reverse, nulls_last, multithreaded, maintain_order))
1548
+ by = Utils.parse_into_list_of_expressions(by, *more_by)
1549
+ reverse = Utils.extend_bool(reverse, by.length, "reverse", "by")
1550
+ nulls_last = Utils.extend_bool(nulls_last, by.length, "nulls_last", "by")
1551
+ _from_rbexpr(
1552
+ _rbexpr.sort_by(
1553
+ by, reverse, nulls_last, multithreaded, maintain_order
1554
+ )
1555
+ )
1555
1556
  end
1556
1557
 
1557
1558
  # Take values by index.
@@ -1588,14 +1589,51 @@ module Polars
1588
1589
  # # └───────┴───────────┘
1589
1590
  def gather(indices)
1590
1591
  if indices.is_a?(::Array)
1591
- indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
1592
+ indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))._rbexpr
1592
1593
  else
1593
- indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
1594
+ indices_lit = Utils.parse_into_expression(indices, str_as_lit: false)
1594
1595
  end
1595
- _from_rbexpr(_rbexpr.gather(indices_lit._rbexpr))
1596
+ _from_rbexpr(_rbexpr.gather(indices_lit))
1596
1597
  end
1597
1598
  alias_method :take, :gather
1598
1599
 
1600
+ # Return a single value by index.
1601
+ #
1602
+ # @param index [Object]
1603
+ # An expression that leads to a UInt32 index.
1604
+ #
1605
+ # @return [Expr]
1606
+ #
1607
+ # @example
1608
+ # df = Polars::DataFrame.new(
1609
+ # {
1610
+ # "group" => [
1611
+ # "one",
1612
+ # "one",
1613
+ # "one",
1614
+ # "two",
1615
+ # "two",
1616
+ # "two"
1617
+ # ],
1618
+ # "value" => [1, 98, 2, 3, 99, 4]
1619
+ # }
1620
+ # )
1621
+ # df.group_by("group", maintain_order: true).agg(Polars.col("value").get(1))
1622
+ # # =>
1623
+ # # shape: (2, 2)
1624
+ # # ┌───────┬───────┐
1625
+ # # │ group ┆ value │
1626
+ # # │ --- ┆ --- │
1627
+ # # │ str ┆ i64 │
1628
+ # # ╞═══════╪═══════╡
1629
+ # # │ one ┆ 98 │
1630
+ # # │ two ┆ 99 │
1631
+ # # └───────┴───────┘
1632
+ def get(index)
1633
+ index_lit = Utils.parse_into_expression(index)
1634
+ _from_rbexpr(_rbexpr.get(index_lit))
1635
+ end
1636
+
1599
1637
  # Shift the values by a given period.
1600
1638
  #
1601
1639
  # @param n [Integer]
@@ -1622,9 +1660,9 @@ module Polars
1622
1660
  # # └──────┘
1623
1661
  def shift(n = 1, fill_value: nil)
1624
1662
  if !fill_value.nil?
1625
- fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
1663
+ fill_value = Utils.parse_into_expression(fill_value, str_as_lit: true)
1626
1664
  end
1627
- n = Utils.parse_as_expression(n)
1665
+ n = Utils.parse_into_expression(n)
1628
1666
  _from_rbexpr(_rbexpr.shift(n, fill_value))
1629
1667
  end
1630
1668
 
@@ -1727,8 +1765,8 @@ module Polars
1727
1765
  end
1728
1766
 
1729
1767
  if !value.nil?
1730
- value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
1731
- _from_rbexpr(_rbexpr.fill_null(value._rbexpr))
1768
+ value = Utils.parse_into_expression(value, str_as_lit: true)
1769
+ _from_rbexpr(_rbexpr.fill_null(value))
1732
1770
  else
1733
1771
  _from_rbexpr(_rbexpr.fill_null_with_strategy(strategy, limit))
1734
1772
  end
@@ -1758,8 +1796,8 @@ module Polars
1758
1796
  # # │ zero ┆ 6.0 │
1759
1797
  # # └──────┴──────┘
1760
1798
  def fill_nan(fill_value)
1761
- fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
1762
- _from_rbexpr(_rbexpr.fill_nan(fill_value._rbexpr))
1799
+ fill_value = Utils.parse_into_expression(fill_value, str_as_lit: true)
1800
+ _from_rbexpr(_rbexpr.fill_nan(fill_value))
1763
1801
  end
1764
1802
 
1765
1803
  # Fill missing values with the latest seen values.
@@ -2275,7 +2313,7 @@ module Polars
2275
2313
  # # │ 4 │
2276
2314
  # # └────────┘
2277
2315
  def over(expr)
2278
- rbexprs = Utils.selection_to_rbexpr_list(expr)
2316
+ rbexprs = Utils.parse_into_list_of_expressions(expr)
2279
2317
  _from_rbexpr(_rbexpr.over(rbexprs))
2280
2318
  end
2281
2319
 
@@ -2470,8 +2508,8 @@ module Polars
2470
2508
  # # │ 1.5 │
2471
2509
  # # └─────┘
2472
2510
  def quantile(quantile, interpolation: "nearest")
2473
- quantile = Utils.expr_to_lit_or_expr(quantile, str_to_lit: false)
2474
- _from_rbexpr(_rbexpr.quantile(quantile._rbexpr, interpolation))
2511
+ quantile = Utils.parse_into_expression(quantile, str_as_lit: false)
2512
+ _from_rbexpr(_rbexpr.quantile(quantile, interpolation))
2475
2513
  end
2476
2514
 
2477
2515
  # Bin continuous values into discrete categories.
@@ -2515,17 +2553,17 @@ module Polars
2515
2553
  # ).unnest("cut")
2516
2554
  # # =>
2517
2555
  # # shape: (5, 3)
2518
- # # ┌─────┬──────┬────────────┐
2519
- # # │ foo ┆ brk foo_bin
2520
- # # │ --- ┆ --- ┆ --- │
2521
- # # │ i64 ┆ f64 ┆ cat │
2522
- # # ╞═════╪══════╪════════════╡
2523
- # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
2524
- # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
2525
- # # │ 0 ┆ 1.0 ┆ (-1, 1] │
2526
- # # │ 1 ┆ 1.0 ┆ (-1, 1] │
2527
- # # │ 2 ┆ inf ┆ (1, inf] │
2528
- # # └─────┴──────┴────────────┘
2556
+ # # ┌─────┬────────────┬────────────┐
2557
+ # # │ foo ┆ breakpoint category
2558
+ # # │ --- ┆ --- ┆ --- │
2559
+ # # │ i64 ┆ f64 ┆ cat │
2560
+ # # ╞═════╪════════════╪════════════╡
2561
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
2562
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
2563
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
2564
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
2565
+ # # │ 2 ┆ inf ┆ (1, inf] │
2566
+ # # └─────┴────────────┴────────────┘
2529
2567
  def cut(breaks, labels: nil, left_closed: false, include_breaks: false)
2530
2568
  _from_rbexpr(_rbexpr.cut(breaks, labels, left_closed, include_breaks))
2531
2569
  end
@@ -2596,17 +2634,17 @@ module Polars
2596
2634
  # ).unnest("qcut")
2597
2635
  # # =>
2598
2636
  # # shape: (5, 3)
2599
- # # ┌─────┬──────┬────────────┐
2600
- # # │ foo ┆ brk foo_bin
2601
- # # │ --- ┆ --- ┆ --- │
2602
- # # │ i64 ┆ f64 ┆ cat │
2603
- # # ╞═════╪══════╪════════════╡
2604
- # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
2605
- # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
2606
- # # │ 0 ┆ 1.0 ┆ (-1, 1] │
2607
- # # │ 1 ┆ 1.0 ┆ (-1, 1] │
2608
- # # │ 2 ┆ inf ┆ (1, inf] │
2609
- # # └─────┴──────┴────────────┘
2637
+ # # ┌─────┬────────────┬────────────┐
2638
+ # # │ foo ┆ breakpoint category
2639
+ # # │ --- ┆ --- ┆ --- │
2640
+ # # │ i64 ┆ f64 ┆ cat │
2641
+ # # ╞═════╪════════════╪════════════╡
2642
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
2643
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
2644
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
2645
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
2646
+ # # │ 2 ┆ inf ┆ (1, inf] │
2647
+ # # └─────┴────────────┴────────────┘
2610
2648
  def qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false)
2611
2649
  if quantiles.is_a?(Integer)
2612
2650
  rbexpr = _rbexpr.qcut_uniform(
@@ -2630,18 +2668,18 @@ module Polars
2630
2668
  # df.select(Polars.col("s").rle).unnest("s")
2631
2669
  # # =>
2632
2670
  # # shape: (6, 2)
2633
- # # ┌─────────┬────────┐
2634
- # # │ lengthsvalues
2635
- # # │ --- ┆ ---
2636
- # # │ i32 ┆ i64
2637
- # # ╞═════════╪════════╡
2638
- # # │ 2 ┆ 1
2639
- # # │ 1 ┆ 2
2640
- # # │ 1 ┆ 1
2641
- # # │ 1 ┆ null
2642
- # # │ 1 ┆ 1
2643
- # # │ 2 ┆ 3
2644
- # # └─────────┴────────┘
2671
+ # # ┌─────┬───────┐
2672
+ # # │ lenvalue
2673
+ # # │ --- ┆ ---
2674
+ # # │ u32 ┆ i64
2675
+ # # ╞═════╪═══════╡
2676
+ # # │ 2 ┆ 1
2677
+ # # │ 1 ┆ 2
2678
+ # # │ 1 ┆ 1
2679
+ # # │ 1 ┆ null
2680
+ # # │ 1 ┆ 1
2681
+ # # │ 2 ┆ 3
2682
+ # # └─────┴───────┘
2645
2683
  def rle
2646
2684
  _from_rbexpr(_rbexpr.rle)
2647
2685
  end
@@ -2764,6 +2802,9 @@ module Polars
2764
2802
  # Dtype of the output Series.
2765
2803
  # @param agg_list [Boolean]
2766
2804
  # Aggregate list.
2805
+ # @param is_elementwise [Boolean]
2806
+ # If set to true this can run in the streaming engine, but may yield
2807
+ # incorrect results in group-by. Ensure you know what you are doing!
2767
2808
  #
2768
2809
  # @return [Expr]
2769
2810
  #
@@ -2784,12 +2825,21 @@ module Polars
2784
2825
  # # ╞══════╪════════╡
2785
2826
  # # │ 1 ┆ 0 │
2786
2827
  # # └──────┴────────┘
2787
- # def map(return_dtype: nil, agg_list: false, &f)
2828
+ # def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, &f)
2788
2829
  # if !return_dtype.nil?
2789
2830
  # return_dtype = Utils.rb_type_to_dtype(return_dtype)
2790
2831
  # end
2791
- # _from_rbexpr(_rbexpr.map(f, return_dtype, agg_list))
2832
+ # _from_rbexpr(
2833
+ # _rbexpr.map_batches(
2834
+ # # TODO _map_batches_wrapper
2835
+ # f,
2836
+ # return_dtype,
2837
+ # agg_list,
2838
+ # is_elementwise
2839
+ # )
2840
+ # )
2792
2841
  # end
2842
+ # alias_method :map, :map_batches
2793
2843
 
2794
2844
  # Apply a custom/user-defined function (UDF) in a GroupBy or Projection context.
2795
2845
  #
@@ -2831,7 +2881,7 @@ module Polars
2831
2881
  #
2832
2882
  # @example In a selection context, the function is applied by row.
2833
2883
  # df.with_column(
2834
- # Polars.col("a").apply { |x| x * 2 }.alias("a_times_2")
2884
+ # Polars.col("a").map_elements { |x| x * 2 }.alias("a_times_2")
2835
2885
  # )
2836
2886
  # # =>
2837
2887
  # # shape: (4, 3)
@@ -2851,7 +2901,7 @@ module Polars
2851
2901
  # .group_by("b", maintain_order: true)
2852
2902
  # .agg(
2853
2903
  # [
2854
- # Polars.col("a").apply { |x| x.sum }
2904
+ # Polars.col("a").map_elements { |x| x.sum }
2855
2905
  # ]
2856
2906
  # )
2857
2907
  # .collect
@@ -2866,12 +2916,23 @@ module Polars
2866
2916
  # # │ b ┆ 2 │
2867
2917
  # # │ c ┆ 4 │
2868
2918
  # # └─────┴─────┘
2869
- # def apply(return_dtype: nil, &f)
2870
- # wrap_f = lambda do |x|
2871
- # x.apply(return_dtype: return_dtype, &f)
2919
+ # def map_elements(
2920
+ # return_dtype: nil,
2921
+ # skip_nulls: true,
2922
+ # pass_name: false,
2923
+ # strategy: "thread_local",
2924
+ # &f
2925
+ # )
2926
+ # if pass_name
2927
+ # raise Todo
2928
+ # else
2929
+ # wrap_f = lambda do |x|
2930
+ # x.map_elements(return_dtype: return_dtype, skip_nulls: skip_nulls, &f)
2931
+ # end
2872
2932
  # end
2873
- # map(agg_list: true, return_dtype: return_dtype, &wrap_f)
2933
+ # map_batches(agg_list: true, return_dtype: return_dtype, &wrap_f)
2874
2934
  # end
2935
+ # alias_method :apply, :map_elements
2875
2936
 
2876
2937
  # Explode a list or utf8 Series. This means that every item is expanded to a new
2877
2938
  # row.
@@ -3081,7 +3142,7 @@ module Polars
3081
3142
  # # │ null ┆ null ┆ null ┆ true │
3082
3143
  # # └──────┴──────┴────────┴────────────────┘
3083
3144
  def eq_missing(other)
3084
- other = Utils.parse_as_expression(other, str_as_lit: true)
3145
+ other = Utils.parse_into_expression(other, str_as_lit: true)
3085
3146
  _from_rbexpr(_rbexpr.eq_missing(other))
3086
3147
  end
3087
3148
 
@@ -3285,7 +3346,7 @@ module Polars
3285
3346
  # # │ null ┆ null ┆ null ┆ false │
3286
3347
  # # └──────┴──────┴────────┴────────────────┘
3287
3348
  def ne_missing(other)
3288
- other = Utils.parse_as_expression(other, str_as_lit: true)
3349
+ other = Utils.parse_into_expression(other, str_as_lit: true)
3289
3350
  _from_rbexpr(_rbexpr.neq_missing(other))
3290
3351
  end
3291
3352
 
@@ -3588,14 +3649,14 @@ module Polars
3588
3649
  def is_in(other)
3589
3650
  if other.is_a?(::Array)
3590
3651
  if other.length == 0
3591
- other = Polars.lit(nil)
3652
+ other = Polars.lit(nil)._rbexpr
3592
3653
  else
3593
- other = Polars.lit(Series.new(other))
3654
+ other = Polars.lit(Series.new(other))._rbexpr
3594
3655
  end
3595
3656
  else
3596
- other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
3657
+ other = Utils.parse_into_expression(other, str_as_lit: false)
3597
3658
  end
3598
- _from_rbexpr(_rbexpr.is_in(other._rbexpr))
3659
+ _from_rbexpr(_rbexpr.is_in(other))
3599
3660
  end
3600
3661
  alias_method :in?, :is_in
3601
3662
 
@@ -3630,15 +3691,15 @@ module Polars
3630
3691
  # # │ ["z", "z", "z"] │
3631
3692
  # # └─────────────────┘
3632
3693
  def repeat_by(by)
3633
- by = Utils.expr_to_lit_or_expr(by, str_to_lit: false)
3634
- _from_rbexpr(_rbexpr.repeat_by(by._rbexpr))
3694
+ by = Utils.parse_into_expression(by, str_as_lit: false)
3695
+ _from_rbexpr(_rbexpr.repeat_by(by))
3635
3696
  end
3636
3697
 
3637
3698
  # Check if this expression is between start and end.
3638
3699
  #
3639
- # @param start [Object]
3700
+ # @param lower_bound [Object]
3640
3701
  # Lower bound as primitive type or datetime.
3641
- # @param _end [Object]
3702
+ # @param upper_bound [Object]
3642
3703
  # Upper bound as primitive type or datetime.
3643
3704
  # @param closed ["both", "left", "right", "none"]
3644
3705
  # Define which sides of the interval are closed (inclusive).
@@ -3700,22 +3761,13 @@ module Polars
3700
3761
  # # │ d ┆ false │
3701
3762
  # # │ e ┆ false │
3702
3763
  # # └─────┴────────────┘
3703
- def is_between(start, _end, closed: "both")
3704
- start = Utils.expr_to_lit_or_expr(start, str_to_lit: false)
3705
- _end = Utils.expr_to_lit_or_expr(_end, str_to_lit: false)
3706
-
3707
- case closed
3708
- when "none"
3709
- (self > start) & (self < _end)
3710
- when "both"
3711
- (self >= start) & (self <= _end)
3712
- when "right"
3713
- (self > start) & (self <= _end)
3714
- when "left"
3715
- (self >= start) & (self < _end)
3716
- else
3717
- raise ArgumentError, "closed must be one of 'left', 'right', 'both', or 'none'"
3718
- end
3764
+ def is_between(lower_bound, upper_bound, closed: "both")
3765
+ lower_bound = Utils.parse_into_expression(lower_bound)
3766
+ upper_bound = Utils.parse_into_expression(upper_bound)
3767
+
3768
+ _from_rbexpr(
3769
+ _rbexpr.is_between(lower_bound, upper_bound, closed)
3770
+ )
3719
3771
  end
3720
3772
 
3721
3773
  # Hash the elements in the selection.
@@ -3857,6 +3909,1002 @@ module Polars
3857
3909
  _from_rbexpr(_rbexpr.interpolate(method))
3858
3910
  end
3859
3911
 
3912
+ # Apply a rolling min based on another column.
3913
+ #
3914
+ # @param by [String]
3915
+ # This column must be of dtype Datetime or Date.
3916
+ # @param window_size [String]
3917
+ # The length of the window. Can be a dynamic temporal
3918
+ # size indicated by a timedelta or the following string language:
3919
+ #
3920
+ # - 1ns (1 nanosecond)
3921
+ # - 1us (1 microsecond)
3922
+ # - 1ms (1 millisecond)
3923
+ # - 1s (1 second)
3924
+ # - 1m (1 minute)
3925
+ # - 1h (1 hour)
3926
+ # - 1d (1 calendar day)
3927
+ # - 1w (1 calendar week)
3928
+ # - 1mo (1 calendar month)
3929
+ # - 1q (1 calendar quarter)
3930
+ # - 1y (1 calendar year)
3931
+ #
3932
+ # By "calendar day", we mean the corresponding time on the next day
3933
+ # (which may not be 24 hours, due to daylight savings). Similarly for
3934
+ # "calendar week", "calendar month", "calendar quarter", and
3935
+ # "calendar year".
3936
+ # @param min_periods [Integer]
3937
+ # The number of values in the window that should be non-null before computing
3938
+ # a result.
3939
+ # @param closed ['left', 'right', 'both', 'none']
3940
+ # Define which sides of the temporal interval are closed (inclusive),
3941
+ # defaults to `'right'`.
3942
+ # @param warn_if_unsorted [Boolean]
3943
+ # Warn if data is not known to be sorted by `by` column.
3944
+ #
3945
+ # @return [Expr]
3946
+ #
3947
+ # @note
3948
+ # If you want to compute multiple aggregation statistics over the same dynamic
3949
+ # window, consider using `rolling` - this method can cache the window size
3950
+ # computation.
3951
+ #
3952
+ # @example Create a DataFrame with a datetime column and a row number column
3953
+ # start = DateTime.new(2001, 1, 1)
3954
+ # stop = DateTime.new(2001, 1, 2)
3955
+ # df_temporal = Polars::DataFrame.new(
3956
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
3957
+ # ).with_row_index
3958
+ # # =>
3959
+ # # shape: (25, 2)
3960
+ # # ┌───────┬─────────────────────┐
3961
+ # # │ index ┆ date │
3962
+ # # │ --- ┆ --- │
3963
+ # # │ u32 ┆ datetime[ns] │
3964
+ # # ╞═══════╪═════════════════════╡
3965
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
3966
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
3967
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
3968
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
3969
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
3970
+ # # │ … ┆ … │
3971
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
3972
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
3973
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
3974
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
3975
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
3976
+ # # └───────┴─────────────────────┘
3977
+ #
3978
+ # @example Compute the rolling min with the temporal windows closed on the right (default)
3979
+ # df_temporal.with_columns(
3980
+ # rolling_row_min: Polars.col("index").rolling_min_by("date", "2h")
3981
+ # )
3982
+ # # =>
3983
+ # # shape: (25, 3)
3984
+ # # ┌───────┬─────────────────────┬─────────────────┐
3985
+ # # │ index ┆ date ┆ rolling_row_min │
3986
+ # # │ --- ┆ --- ┆ --- │
3987
+ # # │ u32 ┆ datetime[ns] ┆ u32 │
3988
+ # # ╞═══════╪═════════════════════╪═════════════════╡
3989
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
3990
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0 │
3991
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1 │
3992
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2 │
3993
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3 │
3994
+ # # │ … ┆ … ┆ … │
3995
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19 │
3996
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20 │
3997
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21 │
3998
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22 │
3999
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23 │
4000
+ # # └───────┴─────────────────────┴─────────────────┘
4001
+ def rolling_min_by(
4002
+ by,
4003
+ window_size,
4004
+ min_periods: 1,
4005
+ closed: "right",
4006
+ warn_if_unsorted: nil
4007
+ )
4008
+ window_size = _prepare_rolling_by_window_args(window_size)
4009
+ by = Utils.parse_into_expression(by)
4010
+ _from_rbexpr(
4011
+ _rbexpr.rolling_min_by(by, window_size, min_periods, closed)
4012
+ )
4013
+ end
4014
+
4015
+ # Apply a rolling max based on another column.
4016
+ #
4017
+ # @param by [String]
4018
+ # This column must be of dtype Datetime or Date.
4019
+ # @param window_size [String]
4020
+ # The length of the window. Can be a dynamic temporal
4021
+ # size indicated by a timedelta or the following string language:
4022
+ #
4023
+ # - 1ns (1 nanosecond)
4024
+ # - 1us (1 microsecond)
4025
+ # - 1ms (1 millisecond)
4026
+ # - 1s (1 second)
4027
+ # - 1m (1 minute)
4028
+ # - 1h (1 hour)
4029
+ # - 1d (1 calendar day)
4030
+ # - 1w (1 calendar week)
4031
+ # - 1mo (1 calendar month)
4032
+ # - 1q (1 calendar quarter)
4033
+ # - 1y (1 calendar year)
4034
+ #
4035
+ # By "calendar day", we mean the corresponding time on the next day
4036
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4037
+ # "calendar week", "calendar month", "calendar quarter", and
4038
+ # "calendar year".
4039
+ # @param min_periods [Integer]
4040
+ # The number of values in the window that should be non-null before computing
4041
+ # a result.
4042
+ # @param closed ['left', 'right', 'both', 'none']
4043
+ # Define which sides of the temporal interval are closed (inclusive),
4044
+ # defaults to `'right'`.
4045
+ # @param warn_if_unsorted [Boolean]
4046
+ # Warn if data is not known to be sorted by `by` column.
4047
+ #
4048
+ # @return [Expr]
4049
+ #
4050
+ # @note
4051
+ # If you want to compute multiple aggregation statistics over the same dynamic
4052
+ # window, consider using `rolling` - this method can cache the window size
4053
+ # computation.
4054
+ #
4055
+ # @example Create a DataFrame with a datetime column and a row number column
4056
+ # start = DateTime.new(2001, 1, 1)
4057
+ # stop = DateTime.new(2001, 1, 2)
4058
+ # df_temporal = Polars::DataFrame.new(
4059
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4060
+ # ).with_row_index
4061
+ # # =>
4062
+ # # shape: (25, 2)
4063
+ # # ┌───────┬─────────────────────┐
4064
+ # # │ index ┆ date │
4065
+ # # │ --- ┆ --- │
4066
+ # # │ u32 ┆ datetime[ns] │
4067
+ # # ╞═══════╪═════════════════════╡
4068
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4069
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4070
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4071
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4072
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4073
+ # # │ … ┆ … │
4074
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4075
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4076
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4077
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4078
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4079
+ # # └───────┴─────────────────────┘
4080
+ #
4081
+ # @example Compute the rolling max with the temporal windows closed on the right (default)
4082
+ # df_temporal.with_columns(
4083
+ # rolling_row_max: Polars.col("index").rolling_max_by("date", "2h")
4084
+ # )
4085
+ # # =>
4086
+ # # shape: (25, 3)
4087
+ # # ┌───────┬─────────────────────┬─────────────────┐
4088
+ # # │ index ┆ date ┆ rolling_row_max │
4089
+ # # │ --- ┆ --- ┆ --- │
4090
+ # # │ u32 ┆ datetime[ns] ┆ u32 │
4091
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4092
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
4093
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
4094
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 2 │
4095
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 3 │
4096
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 4 │
4097
+ # # │ … ┆ … ┆ … │
4098
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 20 │
4099
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 21 │
4100
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 22 │
4101
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 23 │
4102
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 24 │
4103
+ # # └───────┴─────────────────────┴─────────────────┘
4104
+ #
4105
+ # @example Compute the rolling max with the closure of windows on both sides
4106
+ # df_temporal.with_columns(
4107
+ # rolling_row_max: Polars.col("index").rolling_max_by(
4108
+ # "date", "2h", closed: "both"
4109
+ # )
4110
+ # )
4111
+ # # =>
4112
+ # # shape: (25, 3)
4113
+ # # ┌───────┬─────────────────────┬─────────────────┐
4114
+ # # │ index ┆ date ┆ rolling_row_max │
4115
+ # # │ --- ┆ --- ┆ --- │
4116
+ # # │ u32 ┆ datetime[ns] ┆ u32 │
4117
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4118
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
4119
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
4120
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 2 │
4121
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 3 │
4122
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 4 │
4123
+ # # │ … ┆ … ┆ … │
4124
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 20 │
4125
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 21 │
4126
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 22 │
4127
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 23 │
4128
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 24 │
4129
+ # # └───────┴─────────────────────┴─────────────────┘
4130
+ def rolling_max_by(
4131
+ by,
4132
+ window_size,
4133
+ min_periods: 1,
4134
+ closed: "right",
4135
+ warn_if_unsorted: nil
4136
+ )
4137
+ window_size = _prepare_rolling_by_window_args(window_size)
4138
+ by = Utils.parse_into_expression(by)
4139
+ _from_rbexpr(
4140
+ _rbexpr.rolling_max_by(by, window_size, min_periods, closed)
4141
+ )
4142
+ end
4143
+
4144
+ # Apply a rolling mean based on another column.
4145
+ #
4146
+ # @param by [String]
4147
+ # This column must be of dtype Datetime or Date.
4148
+ # @param window_size [String]
4149
+ # The length of the window. Can be a dynamic temporal
4150
+ # size indicated by a timedelta or the following string language:
4151
+ #
4152
+ # - 1ns (1 nanosecond)
4153
+ # - 1us (1 microsecond)
4154
+ # - 1ms (1 millisecond)
4155
+ # - 1s (1 second)
4156
+ # - 1m (1 minute)
4157
+ # - 1h (1 hour)
4158
+ # - 1d (1 calendar day)
4159
+ # - 1w (1 calendar week)
4160
+ # - 1mo (1 calendar month)
4161
+ # - 1q (1 calendar quarter)
4162
+ # - 1y (1 calendar year)
4163
+ #
4164
+ # By "calendar day", we mean the corresponding time on the next day
4165
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4166
+ # "calendar week", "calendar month", "calendar quarter", and
4167
+ # "calendar year".
4168
+ # @param min_periods [Integer]
4169
+ # The number of values in the window that should be non-null before computing
4170
+ # a result.
4171
+ # @param closed ['left', 'right', 'both', 'none']
4172
+ # Define which sides of the temporal interval are closed (inclusive),
4173
+ # defaults to `'right'`.
4174
+ # @param warn_if_unsorted [Boolean]
4175
+ # Warn if data is not known to be sorted by `by` column.
4176
+ #
4177
+ # @return [Expr]
4178
+ #
4179
+ # @note
4180
+ # If you want to compute multiple aggregation statistics over the same dynamic
4181
+ # window, consider using `rolling` - this method can cache the window size
4182
+ # computation.
4183
+ #
4184
+ # @example Create a DataFrame with a datetime column and a row number column
4185
+ # start = DateTime.new(2001, 1, 1)
4186
+ # stop = DateTime.new(2001, 1, 2)
4187
+ # df_temporal = Polars::DataFrame.new(
4188
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4189
+ # ).with_row_index
4190
+ # # =>
4191
+ # # shape: (25, 2)
4192
+ # # ┌───────┬─────────────────────┐
4193
+ # # │ index ┆ date │
4194
+ # # │ --- ┆ --- │
4195
+ # # │ u32 ┆ datetime[ns] │
4196
+ # # ╞═══════╪═════════════════════╡
4197
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4198
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4199
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4200
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4201
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4202
+ # # │ … ┆ … │
4203
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4204
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4205
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4206
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4207
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4208
+ # # └───────┴─────────────────────┘
4209
+ #
4210
+ # @example Compute the rolling mean with the temporal windows closed on the right (default)
4211
+ # df_temporal.with_columns(
4212
+ # rolling_row_mean: Polars.col("index").rolling_mean_by(
4213
+ # "date", "2h"
4214
+ # )
4215
+ # )
4216
+ # # =>
4217
+ # # shape: (25, 3)
4218
+ # # ┌───────┬─────────────────────┬──────────────────┐
4219
+ # # │ index ┆ date ┆ rolling_row_mean │
4220
+ # # │ --- ┆ --- ┆ --- │
4221
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4222
+ # # ╞═══════╪═════════════════════╪══════════════════╡
4223
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
4224
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
4225
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.5 │
4226
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.5 │
4227
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.5 │
4228
+ # # │ … ┆ … ┆ … │
4229
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.5 │
4230
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.5 │
4231
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.5 │
4232
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.5 │
4233
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.5 │
4234
+ # # └───────┴─────────────────────┴──────────────────┘
4235
+ #
4236
+ # @example Compute the rolling mean with the closure of windows on both sides
4237
+ # df_temporal.with_columns(
4238
+ # rolling_row_mean: Polars.col("index").rolling_mean_by(
4239
+ # "date", "2h", closed: "both"
4240
+ # )
4241
+ # )
4242
+ # # =>
4243
+ # # shape: (25, 3)
4244
+ # # ┌───────┬─────────────────────┬──────────────────┐
4245
+ # # │ index ┆ date ┆ rolling_row_mean │
4246
+ # # │ --- ┆ --- ┆ --- │
4247
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4248
+ # # ╞═══════╪═════════════════════╪══════════════════╡
4249
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
4250
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
4251
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
4252
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.0 │
4253
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.0 │
4254
+ # # │ … ┆ … ┆ … │
4255
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.0 │
4256
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.0 │
4257
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.0 │
4258
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.0 │
4259
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.0 │
4260
+ # # └───────┴─────────────────────┴──────────────────┘
4261
+ def rolling_mean_by(
4262
+ by,
4263
+ window_size,
4264
+ min_periods: 1,
4265
+ closed: "right",
4266
+ warn_if_unsorted: nil
4267
+ )
4268
+ window_size = _prepare_rolling_by_window_args(window_size)
4269
+ by = Utils.parse_into_expression(by)
4270
+ _from_rbexpr(
4271
+ _rbexpr.rolling_mean_by(
4272
+ by,
4273
+ window_size,
4274
+ min_periods,
4275
+ closed
4276
+ )
4277
+ )
4278
+ end
4279
+
4280
+ # Apply a rolling sum based on another column.
4281
+ #
4282
+ # @param by [String]
4283
+ # This column must of dtype `{Date, Datetime}`
4284
+ # @param window_size [String]
4285
+ # The length of the window. Can be a dynamic temporal
4286
+ # size indicated by a timedelta or the following string language:
4287
+ #
4288
+ # - 1ns (1 nanosecond)
4289
+ # - 1us (1 microsecond)
4290
+ # - 1ms (1 millisecond)
4291
+ # - 1s (1 second)
4292
+ # - 1m (1 minute)
4293
+ # - 1h (1 hour)
4294
+ # - 1d (1 calendar day)
4295
+ # - 1w (1 calendar week)
4296
+ # - 1mo (1 calendar month)
4297
+ # - 1q (1 calendar quarter)
4298
+ # - 1y (1 calendar year)
4299
+ #
4300
+ # By "calendar day", we mean the corresponding time on the next day
4301
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4302
+ # "calendar week", "calendar month", "calendar quarter", and
4303
+ # "calendar year".
4304
+ # @param min_periods [Integer]
4305
+ # The number of values in the window that should be non-null before computing
4306
+ # a result.
4307
+ # @param closed ['left', 'right', 'both', 'none']
4308
+ # Define which sides of the temporal interval are closed (inclusive),
4309
+ # defaults to `'right'`.
4310
+ # @param warn_if_unsorted [Boolean]
4311
+ # Warn if data is not known to be sorted by `by` column.
4312
+ #
4313
+ # @return [Expr]
4314
+ #
4315
+ # @note
4316
+ # If you want to compute multiple aggregation statistics over the same dynamic
4317
+ # window, consider using `rolling` - this method can cache the window size
4318
+ # computation.
4319
+ #
4320
+ # @example Create a DataFrame with a datetime column and a row number column
4321
+ # start = DateTime.new(2001, 1, 1)
4322
+ # stop = DateTime.new(2001, 1, 2)
4323
+ # df_temporal = Polars::DataFrame.new(
4324
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4325
+ # ).with_row_index
4326
+ # # =>
4327
+ # # shape: (25, 2)
4328
+ # # ┌───────┬─────────────────────┐
4329
+ # # │ index ┆ date │
4330
+ # # │ --- ┆ --- │
4331
+ # # │ u32 ┆ datetime[ns] │
4332
+ # # ╞═══════╪═════════════════════╡
4333
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4334
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4335
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4336
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4337
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4338
+ # # │ … ┆ … │
4339
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4340
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4341
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4342
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4343
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4344
+ # # └───────┴─────────────────────┘
4345
+ #
4346
+ # @example Compute the rolling sum with the temporal windows closed on the right (default)
4347
+ # df_temporal.with_columns(
4348
+ # rolling_row_sum: Polars.col("index").rolling_sum_by("date", "2h")
4349
+ # )
4350
+ # # =>
4351
+ # # shape: (25, 3)
4352
+ # # ┌───────┬─────────────────────┬─────────────────┐
4353
+ # # │ index ┆ date ┆ rolling_row_sum │
4354
+ # # │ --- ┆ --- ┆ --- │
4355
+ # # │ u32 ┆ datetime[ns] ┆ u32 │
4356
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4357
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
4358
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
4359
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 3 │
4360
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 5 │
4361
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 7 │
4362
+ # # │ … ┆ … ┆ … │
4363
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 39 │
4364
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 41 │
4365
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 43 │
4366
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 45 │
4367
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 47 │
4368
+ # # └───────┴─────────────────────┴─────────────────┘
4369
+ #
4370
+ # @example Compute the rolling sum with the closure of windows on both sides
4371
+ # df_temporal.with_columns(
4372
+ # rolling_row_sum: Polars.col("index").rolling_sum_by(
4373
+ # "date", "2h", closed: "both"
4374
+ # )
4375
+ # )
4376
+ # # =>
4377
+ # # shape: (25, 3)
4378
+ # # ┌───────┬─────────────────────┬─────────────────┐
4379
+ # # │ index ┆ date ┆ rolling_row_sum │
4380
+ # # │ --- ┆ --- ┆ --- │
4381
+ # # │ u32 ┆ datetime[ns] ┆ u32 │
4382
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4383
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │
4384
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │
4385
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 3 │
4386
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 6 │
4387
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 9 │
4388
+ # # │ … ┆ … ┆ … │
4389
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 57 │
4390
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 60 │
4391
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 63 │
4392
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 66 │
4393
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 69 │
4394
+ # # └───────┴─────────────────────┴─────────────────┘
4395
+ def rolling_sum_by(
4396
+ by,
4397
+ window_size,
4398
+ min_periods: 1,
4399
+ closed: "right",
4400
+ warn_if_unsorted: nil
4401
+ )
4402
+ window_size = _prepare_rolling_by_window_args(window_size)
4403
+ by = Utils.parse_into_expression(by)
4404
+ _from_rbexpr(
4405
+ _rbexpr.rolling_sum_by(by, window_size, min_periods, closed)
4406
+ )
4407
+ end
4408
+
4409
+ # Compute a rolling standard deviation based on another column.
4410
+ #
4411
+ # @param by [String]
4412
+ # This column must be of dtype Datetime or Date.
4413
+ # @param window_size [String]
4414
+ # The length of the window. Can be a dynamic temporal
4415
+ # size indicated by a timedelta or the following string language:
4416
+ #
4417
+ # - 1ns (1 nanosecond)
4418
+ # - 1us (1 microsecond)
4419
+ # - 1ms (1 millisecond)
4420
+ # - 1s (1 second)
4421
+ # - 1m (1 minute)
4422
+ # - 1h (1 hour)
4423
+ # - 1d (1 calendar day)
4424
+ # - 1w (1 calendar week)
4425
+ # - 1mo (1 calendar month)
4426
+ # - 1q (1 calendar quarter)
4427
+ # - 1y (1 calendar year)
4428
+ #
4429
+ # By "calendar day", we mean the corresponding time on the next day
4430
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4431
+ # "calendar week", "calendar month", "calendar quarter", and
4432
+ # "calendar year".
4433
+ # @param min_periods [Integer]
4434
+ # The number of values in the window that should be non-null before computing
4435
+ # a result.
4436
+ # @param closed ['left', 'right', 'both', 'none']
4437
+ # Define which sides of the temporal interval are closed (inclusive),
4438
+ # defaults to `'right'`.
4439
+ # @param ddof [Integer]
4440
+ # "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
4441
+ # @param warn_if_unsorted [Boolean]
4442
+ # Warn if data is not known to be sorted by `by` column.
4443
+ #
4444
+ # @return [Expr]
4445
+ #
4446
+ # @note
4447
+ # If you want to compute multiple aggregation statistics over the same dynamic
4448
+ # window, consider using `rolling` - this method can cache the window size
4449
+ # computation.
4450
+ #
4451
+ # @example Create a DataFrame with a datetime column and a row number column
4452
+ # start = DateTime.new(2001, 1, 1)
4453
+ # stop = DateTime.new(2001, 1, 2)
4454
+ # df_temporal = Polars::DataFrame.new(
4455
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4456
+ # ).with_row_index
4457
+ # # =>
4458
+ # # shape: (25, 2)
4459
+ # # ┌───────┬─────────────────────┐
4460
+ # # │ index ┆ date │
4461
+ # # │ --- ┆ --- │
4462
+ # # │ u32 ┆ datetime[ns] │
4463
+ # # ╞═══════╪═════════════════════╡
4464
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4465
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4466
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4467
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4468
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4469
+ # # │ … ┆ … │
4470
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4471
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4472
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4473
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4474
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4475
+ # # └───────┴─────────────────────┘
4476
+ #
4477
+ # @example Compute the rolling std with the temporal windows closed on the right (default)
4478
+ # df_temporal.with_columns(
4479
+ # rolling_row_std: Polars.col("index").rolling_std_by("date", "2h")
4480
+ # )
4481
+ # # =>
4482
+ # # shape: (25, 3)
4483
+ # # ┌───────┬─────────────────────┬─────────────────┐
4484
+ # # │ index ┆ date ┆ rolling_row_std │
4485
+ # # │ --- ┆ --- ┆ --- │
4486
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4487
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4488
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
4489
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.707107 │
4490
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 0.707107 │
4491
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 0.707107 │
4492
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 0.707107 │
4493
+ # # │ … ┆ … ┆ … │
4494
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 0.707107 │
4495
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 0.707107 │
4496
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 0.707107 │
4497
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 0.707107 │
4498
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 0.707107 │
4499
+ # # └───────┴─────────────────────┴─────────────────┘
4500
+ #
4501
+ # @example Compute the rolling std with the closure of windows on both sides
4502
+ # df_temporal.with_columns(
4503
+ # rolling_row_std: Polars.col("index").rolling_std_by(
4504
+ # "date", "2h", closed: "both"
4505
+ # )
4506
+ # )
4507
+ # # =>
4508
+ # # shape: (25, 3)
4509
+ # # ┌───────┬─────────────────────┬─────────────────┐
4510
+ # # │ index ┆ date ┆ rolling_row_std │
4511
+ # # │ --- ┆ --- ┆ --- │
4512
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4513
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4514
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
4515
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.707107 │
4516
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
4517
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 1.0 │
4518
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 1.0 │
4519
+ # # │ … ┆ … ┆ … │
4520
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 1.0 │
4521
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 1.0 │
4522
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 1.0 │
4523
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 1.0 │
4524
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 1.0 │
4525
+ # # └───────┴─────────────────────┴─────────────────┘
4526
+ def rolling_std_by(
4527
+ by,
4528
+ window_size,
4529
+ min_periods: 1,
4530
+ closed: "right",
4531
+ ddof: 1,
4532
+ warn_if_unsorted: nil
4533
+ )
4534
+ window_size = _prepare_rolling_by_window_args(window_size)
4535
+ by = Utils.parse_into_expression(by)
4536
+ _from_rbexpr(
4537
+ _rbexpr.rolling_std_by(
4538
+ by,
4539
+ window_size,
4540
+ min_periods,
4541
+ closed,
4542
+ ddof
4543
+ )
4544
+ )
4545
+ end
4546
+
4547
+ # Compute a rolling variance based on another column.
4548
+ #
4549
+ # @param by [String]
4550
+ # This column must be of dtype Datetime or Date.
4551
+ # @param window_size [String]
4552
+ # The length of the window. Can be a dynamic temporal
4553
+ # size indicated by a timedelta or the following string language:
4554
+ #
4555
+ # - 1ns (1 nanosecond)
4556
+ # - 1us (1 microsecond)
4557
+ # - 1ms (1 millisecond)
4558
+ # - 1s (1 second)
4559
+ # - 1m (1 minute)
4560
+ # - 1h (1 hour)
4561
+ # - 1d (1 calendar day)
4562
+ # - 1w (1 calendar week)
4563
+ # - 1mo (1 calendar month)
4564
+ # - 1q (1 calendar quarter)
4565
+ # - 1y (1 calendar year)
4566
+ #
4567
+ # By "calendar day", we mean the corresponding time on the next day
4568
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4569
+ # "calendar week", "calendar month", "calendar quarter", and
4570
+ # "calendar year".
4571
+ # @param min_periods [Integer]
4572
+ # The number of values in the window that should be non-null before computing
4573
+ # a result.
4574
+ # @param closed ['left', 'right', 'both', 'none']
4575
+ # Define which sides of the temporal interval are closed (inclusive),
4576
+ # defaults to `'right'`.
4577
+ # @param ddof [Integer]
4578
+ # "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
4579
+ # @param warn_if_unsorted [Boolean]
4580
+ # Warn if data is not known to be sorted by `by` column.
4581
+ #
4582
+ # @return [Expr]
4583
+ #
4584
+ # @note
4585
+ # If you want to compute multiple aggregation statistics over the same dynamic
4586
+ # window, consider using `rolling` - this method can cache the window size
4587
+ # computation.
4588
+ #
4589
+ # @example Create a DataFrame with a datetime column and a row number column
4590
+ # start = DateTime.new(2001, 1, 1)
4591
+ # stop = DateTime.new(2001, 1, 2)
4592
+ # df_temporal = Polars::DataFrame.new(
4593
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4594
+ # ).with_row_index
4595
+ # # =>
4596
+ # # shape: (25, 2)
4597
+ # # ┌───────┬─────────────────────┐
4598
+ # # │ index ┆ date │
4599
+ # # │ --- ┆ --- │
4600
+ # # │ u32 ┆ datetime[ns] │
4601
+ # # ╞═══════╪═════════════════════╡
4602
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4603
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4604
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4605
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4606
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4607
+ # # │ … ┆ … │
4608
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4609
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4610
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4611
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4612
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4613
+ # # └───────┴─────────────────────┘
4614
+ #
4615
+ # @example Compute the rolling var with the temporal windows closed on the right (default)
4616
+ # df_temporal.with_columns(
4617
+ # rolling_row_var: Polars.col("index").rolling_var_by("date", "2h")
4618
+ # )
4619
+ # # =>
4620
+ # # shape: (25, 3)
4621
+ # # ┌───────┬─────────────────────┬─────────────────┐
4622
+ # # │ index ┆ date ┆ rolling_row_var │
4623
+ # # │ --- ┆ --- ┆ --- │
4624
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4625
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4626
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
4627
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
4628
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 0.5 │
4629
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 0.5 │
4630
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 0.5 │
4631
+ # # │ … ┆ … ┆ … │
4632
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 0.5 │
4633
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 0.5 │
4634
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 0.5 │
4635
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 0.5 │
4636
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 0.5 │
4637
+ # # └───────┴─────────────────────┴─────────────────┘
4638
+ #
4639
+ # @example Compute the rolling var with the closure of windows on both sides
4640
+ # df_temporal.with_columns(
4641
+ # rolling_row_var: Polars.col("index").rolling_var_by(
4642
+ # "date", "2h", closed: "both"
4643
+ # )
4644
+ # )
4645
+ # # =>
4646
+ # # shape: (25, 3)
4647
+ # # ┌───────┬─────────────────────┬─────────────────┐
4648
+ # # │ index ┆ date ┆ rolling_row_var │
4649
+ # # │ --- ┆ --- ┆ --- │
4650
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4651
+ # # ╞═══════╪═════════════════════╪═════════════════╡
4652
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ null │
4653
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
4654
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
4655
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 1.0 │
4656
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 1.0 │
4657
+ # # │ … ┆ … ┆ … │
4658
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 1.0 │
4659
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 1.0 │
4660
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 1.0 │
4661
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 1.0 │
4662
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 1.0 │
4663
+ # # └───────┴─────────────────────┴─────────────────┘
4664
+ def rolling_var_by(
4665
+ by,
4666
+ window_size,
4667
+ min_periods: 1,
4668
+ closed: "right",
4669
+ ddof: 1,
4670
+ warn_if_unsorted: nil
4671
+ )
4672
+ window_size = _prepare_rolling_by_window_args(window_size)
4673
+ by = Utils.parse_into_expression(by)
4674
+ _from_rbexpr(
4675
+ _rbexpr.rolling_var_by(
4676
+ by,
4677
+ window_size,
4678
+ min_periods,
4679
+ closed,
4680
+ ddof
4681
+ )
4682
+ )
4683
+ end
4684
+
4685
+ # Compute a rolling median based on another column.
4686
+ #
4687
+ # @param by [String]
4688
+ # This column must be of dtype Datetime or Date.
4689
+ # @param window_size [String]
4690
+ # The length of the window. Can be a dynamic temporal
4691
+ # size indicated by a timedelta or the following string language:
4692
+ #
4693
+ # - 1ns (1 nanosecond)
4694
+ # - 1us (1 microsecond)
4695
+ # - 1ms (1 millisecond)
4696
+ # - 1s (1 second)
4697
+ # - 1m (1 minute)
4698
+ # - 1h (1 hour)
4699
+ # - 1d (1 calendar day)
4700
+ # - 1w (1 calendar week)
4701
+ # - 1mo (1 calendar month)
4702
+ # - 1q (1 calendar quarter)
4703
+ # - 1y (1 calendar year)
4704
+ #
4705
+ # By "calendar day", we mean the corresponding time on the next day
4706
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4707
+ # "calendar week", "calendar month", "calendar quarter", and
4708
+ # "calendar year".
4709
+ # @param min_periods [Integer]
4710
+ # The number of values in the window that should be non-null before computing
4711
+ # a result.
4712
+ # @param closed ['left', 'right', 'both', 'none']
4713
+ # Define which sides of the temporal interval are closed (inclusive),
4714
+ # defaults to `'right'`.
4715
+ # @param warn_if_unsorted [Boolean]
4716
+ # Warn if data is not known to be sorted by `by` column.
4717
+ #
4718
+ # @return [Expr]
4719
+ #
4720
+ # @note
4721
+ # If you want to compute multiple aggregation statistics over the same dynamic
4722
+ # window, consider using `rolling` - this method can cache the window size
4723
+ # computation.
4724
+ #
4725
+ # @example Create a DataFrame with a datetime column and a row number column
4726
+ # start = DateTime.new(2001, 1, 1)
4727
+ # stop = DateTime.new(2001, 1, 2)
4728
+ # df_temporal = Polars::DataFrame.new(
4729
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4730
+ # ).with_row_index
4731
+ # # =>
4732
+ # # shape: (25, 2)
4733
+ # # ┌───────┬─────────────────────┐
4734
+ # # │ index ┆ date │
4735
+ # # │ --- ┆ --- │
4736
+ # # │ u32 ┆ datetime[ns] │
4737
+ # # ╞═══════╪═════════════════════╡
4738
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4739
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4740
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4741
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4742
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4743
+ # # │ … ┆ … │
4744
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4745
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4746
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4747
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4748
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4749
+ # # └───────┴─────────────────────┘
4750
+ #
4751
+ # @example Compute the rolling median with the temporal windows closed on the right:
4752
+ # df_temporal.with_columns(
4753
+ # rolling_row_median: Polars.col("index").rolling_median_by(
4754
+ # "date", "2h"
4755
+ # )
4756
+ # )
4757
+ # # =>
4758
+ # # shape: (25, 3)
4759
+ # # ┌───────┬─────────────────────┬────────────────────┐
4760
+ # # │ index ┆ date ┆ rolling_row_median │
4761
+ # # │ --- ┆ --- ┆ --- │
4762
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4763
+ # # ╞═══════╪═════════════════════╪════════════════════╡
4764
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
4765
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │
4766
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.5 │
4767
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.5 │
4768
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.5 │
4769
+ # # │ … ┆ … ┆ … │
4770
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.5 │
4771
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.5 │
4772
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.5 │
4773
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.5 │
4774
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.5 │
4775
+ # # └───────┴─────────────────────┴────────────────────┘
4776
+ def rolling_median_by(
4777
+ by,
4778
+ window_size,
4779
+ min_periods: 1,
4780
+ closed: "right",
4781
+ warn_if_unsorted: nil
4782
+ )
4783
+ window_size = _prepare_rolling_by_window_args(window_size)
4784
+ by = Utils.parse_into_expression(by)
4785
+ _from_rbexpr(
4786
+ _rbexpr.rolling_median_by(by, window_size, min_periods, closed)
4787
+ )
4788
+ end
4789
+
4790
+ # Compute a rolling quantile based on another column.
4791
+ #
4792
+ # @param by [String]
4793
+ # This column must be of dtype Datetime or Date.
4794
+ # @param quantile [Float]
4795
+ # Quantile between 0.0 and 1.0.
4796
+ # @param interpolation ['nearest', 'higher', 'lower', 'midpoint', 'linear']
4797
+ # Interpolation method.
4798
+ # @param window_size [String]
4799
+ # The length of the window. Can be a dynamic
4800
+ # temporal size indicated by a timedelta or the following string language:
4801
+ #
4802
+ # - 1ns (1 nanosecond)
4803
+ # - 1us (1 microsecond)
4804
+ # - 1ms (1 millisecond)
4805
+ # - 1s (1 second)
4806
+ # - 1m (1 minute)
4807
+ # - 1h (1 hour)
4808
+ # - 1d (1 calendar day)
4809
+ # - 1w (1 calendar week)
4810
+ # - 1mo (1 calendar month)
4811
+ # - 1q (1 calendar quarter)
4812
+ # - 1y (1 calendar year)
4813
+ #
4814
+ # By "calendar day", we mean the corresponding time on the next day
4815
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4816
+ # "calendar week", "calendar month", "calendar quarter", and
4817
+ # "calendar year".
4818
+ # @param min_periods [Integer]
4819
+ # The number of values in the window that should be non-null before computing
4820
+ # a result.
4821
+ # @param closed ['left', 'right', 'both', 'none']
4822
+ # Define which sides of the temporal interval are closed (inclusive),
4823
+ # defaults to `'right'`.
4824
+ # @param warn_if_unsorted [Boolean]
4825
+ # Warn if data is not known to be sorted by `by` column.
4826
+ #
4827
+ # @return [Expr]
4828
+ #
4829
+ # @note
4830
+ # If you want to compute multiple aggregation statistics over the same dynamic
4831
+ # window, consider using `rolling` - this method can cache the window size
4832
+ # computation.
4833
+ #
4834
+ # @example Create a DataFrame with a datetime column and a row number column
4835
+ # start = DateTime.new(2001, 1, 1)
4836
+ # stop = DateTime.new(2001, 1, 2)
4837
+ # df_temporal = Polars::DataFrame.new(
4838
+ # {"date" => Polars.datetime_range(start, stop, "1h", eager: true)}
4839
+ # ).with_row_index
4840
+ # # =>
4841
+ # # shape: (25, 2)
4842
+ # # ┌───────┬─────────────────────┐
4843
+ # # │ index ┆ date │
4844
+ # # │ --- ┆ --- │
4845
+ # # │ u32 ┆ datetime[ns] │
4846
+ # # ╞═══════╪═════════════════════╡
4847
+ # # │ 0 ┆ 2001-01-01 00:00:00 │
4848
+ # # │ 1 ┆ 2001-01-01 01:00:00 │
4849
+ # # │ 2 ┆ 2001-01-01 02:00:00 │
4850
+ # # │ 3 ┆ 2001-01-01 03:00:00 │
4851
+ # # │ 4 ┆ 2001-01-01 04:00:00 │
4852
+ # # │ … ┆ … │
4853
+ # # │ 20 ┆ 2001-01-01 20:00:00 │
4854
+ # # │ 21 ┆ 2001-01-01 21:00:00 │
4855
+ # # │ 22 ┆ 2001-01-01 22:00:00 │
4856
+ # # │ 23 ┆ 2001-01-01 23:00:00 │
4857
+ # # │ 24 ┆ 2001-01-02 00:00:00 │
4858
+ # # └───────┴─────────────────────┘
4859
+ #
4860
+ # @example Compute the rolling quantile with the temporal windows closed on the right:
4861
+ # df_temporal.with_columns(
4862
+ # rolling_row_quantile: Polars.col("index").rolling_quantile_by(
4863
+ # "date", "2h", quantile: 0.3
4864
+ # )
4865
+ # )
4866
+ # # =>
4867
+ # # shape: (25, 3)
4868
+ # # ┌───────┬─────────────────────┬──────────────────────┐
4869
+ # # │ index ┆ date ┆ rolling_row_quantile │
4870
+ # # │ --- ┆ --- ┆ --- │
4871
+ # # │ u32 ┆ datetime[ns] ┆ f64 │
4872
+ # # ╞═══════╪═════════════════════╪══════════════════════╡
4873
+ # # │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │
4874
+ # # │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.0 │
4875
+ # # │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │
4876
+ # # │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.0 │
4877
+ # # │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.0 │
4878
+ # # │ … ┆ … ┆ … │
4879
+ # # │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.0 │
4880
+ # # │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.0 │
4881
+ # # │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.0 │
4882
+ # # │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.0 │
4883
+ # # │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.0 │
4884
+ # # └───────┴─────────────────────┴──────────────────────┘
4885
+ def rolling_quantile_by(
4886
+ by,
4887
+ window_size,
4888
+ quantile:,
4889
+ interpolation: "nearest",
4890
+ min_periods: 1,
4891
+ closed: "right",
4892
+ warn_if_unsorted: nil
4893
+ )
4894
+ window_size = _prepare_rolling_by_window_args(window_size)
4895
+ by = Utils.parse_into_expression(by)
4896
+ _from_rbexpr(
4897
+ _rbexpr.rolling_quantile_by(
4898
+ by,
4899
+ quantile,
4900
+ interpolation,
4901
+ window_size,
4902
+ min_periods,
4903
+ closed,
4904
+ )
4905
+ )
4906
+ end
4907
+
3860
4908
  # Apply a rolling min (moving min) over the values in this array.
3861
4909
  #
3862
4910
  # A window of length `window_size` will traverse the array. The values that fill
@@ -3889,12 +4937,6 @@ module Polars
3889
4937
  # a result. If None, it will be set equal to window size.
3890
4938
  # @param center [Boolean]
3891
4939
  # Set the labels at the center of the window
3892
- # @param by [String]
3893
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3894
- # set the column that will be used to determine the windows. This column must
3895
- # be of dtype `{Date, Datetime}`
3896
- # @param closed ["left", "right", "both", "none"]
3897
- # Define whether the temporal window interval is closed or not.
3898
4940
  #
3899
4941
  # @note
3900
4942
  # This functionality is experimental and may change without it being considered a
@@ -3932,16 +4974,11 @@ module Polars
3932
4974
  window_size,
3933
4975
  weights: nil,
3934
4976
  min_periods: nil,
3935
- center: false,
3936
- by: nil,
3937
- closed: nil
4977
+ center: false
3938
4978
  )
3939
- window_size, min_periods = _prepare_rolling_window_args(
3940
- window_size, min_periods
3941
- )
3942
4979
  _from_rbexpr(
3943
4980
  _rbexpr.rolling_min(
3944
- window_size, weights, min_periods, center, by, closed
4981
+ window_size, weights, min_periods, center
3945
4982
  )
3946
4983
  )
3947
4984
  end
@@ -3978,12 +5015,6 @@ module Polars
3978
5015
  # a result. If None, it will be set equal to window size.
3979
5016
  # @param center [Boolean]
3980
5017
  # Set the labels at the center of the window
3981
- # @param by [String]
3982
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3983
- # set the column that will be used to determine the windows. This column must
3984
- # be of dtype `{Date, Datetime}`
3985
- # @param closed ["left", "right", "both", "none"]
3986
- # Define whether the temporal window interval is closed or not.
3987
5018
  #
3988
5019
  # @note
3989
5020
  # This functionality is experimental and may change without it being considered a
@@ -4021,16 +5052,11 @@ module Polars
4021
5052
  window_size,
4022
5053
  weights: nil,
4023
5054
  min_periods: nil,
4024
- center: false,
4025
- by: nil,
4026
- closed: nil
5055
+ center: false
4027
5056
  )
4028
- window_size, min_periods = _prepare_rolling_window_args(
4029
- window_size, min_periods
4030
- )
4031
5057
  _from_rbexpr(
4032
5058
  _rbexpr.rolling_max(
4033
- window_size, weights, min_periods, center, by, closed
5059
+ window_size, weights, min_periods, center
4034
5060
  )
4035
5061
  )
4036
5062
  end
@@ -4067,12 +5093,6 @@ module Polars
4067
5093
  # a result. If None, it will be set equal to window size.
4068
5094
  # @param center [Boolean]
4069
5095
  # Set the labels at the center of the window
4070
- # @param by [String]
4071
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
4072
- # set the column that will be used to determine the windows. This column must
4073
- # be of dtype `{Date, Datetime}`
4074
- # @param closed ["left", "right", "both", "none"]
4075
- # Define whether the temporal window interval is closed or not.
4076
5096
  #
4077
5097
  # @note
4078
5098
  # This functionality is experimental and may change without it being considered a
@@ -4110,16 +5130,11 @@ module Polars
4110
5130
  window_size,
4111
5131
  weights: nil,
4112
5132
  min_periods: nil,
4113
- center: false,
4114
- by: nil,
4115
- closed: nil
5133
+ center: false
4116
5134
  )
4117
- window_size, min_periods = _prepare_rolling_window_args(
4118
- window_size, min_periods
4119
- )
4120
5135
  _from_rbexpr(
4121
5136
  _rbexpr.rolling_mean(
4122
- window_size, weights, min_periods, center, by, closed
5137
+ window_size, weights, min_periods, center
4123
5138
  )
4124
5139
  )
4125
5140
  end
@@ -4156,12 +5171,6 @@ module Polars
4156
5171
  # a result. If None, it will be set equal to window size.
4157
5172
  # @param center [Boolean]
4158
5173
  # Set the labels at the center of the window
4159
- # @param by [String]
4160
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
4161
- # set the column that will be used to determine the windows. This column must
4162
- # be of dtype `{Date, Datetime}`
4163
- # @param closed ["left", "right", "both", "none"]
4164
- # Define whether the temporal window interval is closed or not.
4165
5174
  #
4166
5175
  # @note
4167
5176
  # This functionality is experimental and may change without it being considered a
@@ -4199,16 +5208,11 @@ module Polars
4199
5208
  window_size,
4200
5209
  weights: nil,
4201
5210
  min_periods: nil,
4202
- center: false,
4203
- by: nil,
4204
- closed: nil
5211
+ center: false
4205
5212
  )
4206
- window_size, min_periods = _prepare_rolling_window_args(
4207
- window_size, min_periods
4208
- )
4209
5213
  _from_rbexpr(
4210
5214
  _rbexpr.rolling_sum(
4211
- window_size, weights, min_periods, center, by, closed
5215
+ window_size, weights, min_periods, center
4212
5216
  )
4213
5217
  )
4214
5218
  end
@@ -4245,12 +5249,6 @@ module Polars
4245
5249
  # a result. If None, it will be set equal to window size.
4246
5250
  # @param center [Boolean]
4247
5251
  # Set the labels at the center of the window
4248
- # @param by [String]
4249
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
4250
- # set the column that will be used to determine the windows. This column must
4251
- # be of dtype `{Date, Datetime}`
4252
- # @param closed ["left", "right", "both", "none"]
4253
- # Define whether the temporal window interval is closed or not.
4254
5252
  #
4255
5253
  # @note
4256
5254
  # This functionality is experimental and may change without it being considered a
@@ -4289,17 +5287,11 @@ module Polars
4289
5287
  weights: nil,
4290
5288
  min_periods: nil,
4291
5289
  center: false,
4292
- by: nil,
4293
- closed: nil,
4294
- ddof: 1,
4295
- warn_if_unsorted: true
5290
+ ddof: 1
4296
5291
  )
4297
- window_size, min_periods = _prepare_rolling_window_args(
4298
- window_size, min_periods
4299
- )
4300
5292
  _from_rbexpr(
4301
5293
  _rbexpr.rolling_std(
4302
- window_size, weights, min_periods, center, by, closed, ddof, warn_if_unsorted
5294
+ window_size, weights, min_periods, center, ddof
4303
5295
  )
4304
5296
  )
4305
5297
  end
@@ -4336,12 +5328,6 @@ module Polars
4336
5328
  # a result. If None, it will be set equal to window size.
4337
5329
  # @param center [Boolean]
4338
5330
  # Set the labels at the center of the window
4339
- # @param by [String]
4340
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
4341
- # set the column that will be used to determine the windows. This column must
4342
- # be of dtype `{Date, Datetime}`
4343
- # @param closed ["left", "right", "both", "none"]
4344
- # Define whether the temporal window interval is closed or not.
4345
5331
  #
4346
5332
  # @note
4347
5333
  # This functionality is experimental and may change without it being considered a
@@ -4380,17 +5366,11 @@ module Polars
4380
5366
  weights: nil,
4381
5367
  min_periods: nil,
4382
5368
  center: false,
4383
- by: nil,
4384
- closed: nil,
4385
- ddof: 1,
4386
- warn_if_unsorted: true
5369
+ ddof: 1
4387
5370
  )
4388
- window_size, min_periods = _prepare_rolling_window_args(
4389
- window_size, min_periods
4390
- )
4391
5371
  _from_rbexpr(
4392
5372
  _rbexpr.rolling_var(
4393
- window_size, weights, min_periods, center, by, closed, ddof, warn_if_unsorted
5373
+ window_size, weights, min_periods, center, ddof
4394
5374
  )
4395
5375
  )
4396
5376
  end
@@ -4423,12 +5403,6 @@ module Polars
4423
5403
  # a result. If None, it will be set equal to window size.
4424
5404
  # @param center [Boolean]
4425
5405
  # Set the labels at the center of the window
4426
- # @param by [String]
4427
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
4428
- # set the column that will be used to determine the windows. This column must
4429
- # be of dtype `{Date, Datetime}`
4430
- # @param closed ["left", "right", "both", "none"]
4431
- # Define whether the temporal window interval is closed or not.
4432
5406
  #
4433
5407
  # @note
4434
5408
  # This functionality is experimental and may change without it being considered a
@@ -4466,17 +5440,11 @@ module Polars
4466
5440
  window_size,
4467
5441
  weights: nil,
4468
5442
  min_periods: nil,
4469
- center: false,
4470
- by: nil,
4471
- closed: nil,
4472
- warn_if_unsorted: true
5443
+ center: false
4473
5444
  )
4474
- window_size, min_periods = _prepare_rolling_window_args(
4475
- window_size, min_periods
4476
- )
4477
5445
  _from_rbexpr(
4478
5446
  _rbexpr.rolling_median(
4479
- window_size, weights, min_periods, center, by, closed, warn_if_unsorted
5447
+ window_size, weights, min_periods, center
4480
5448
  )
4481
5449
  )
4482
5450
  end
@@ -4513,12 +5481,6 @@ module Polars
4513
5481
  # a result. If None, it will be set equal to window size.
4514
5482
  # @param center [Boolean]
4515
5483
  # Set the labels at the center of the window
4516
- # @param by [String]
4517
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
4518
- # set the column that will be used to determine the windows. This column must
4519
- # be of dtype `{Date, Datetime}`
4520
- # @param closed ["left", "right", "both", "none"]
4521
- # Define whether the temporal window interval is closed or not.
4522
5484
  #
4523
5485
  # @note
4524
5486
  # This functionality is experimental and may change without it being considered a
@@ -4558,17 +5520,11 @@ module Polars
4558
5520
  window_size: 2,
4559
5521
  weights: nil,
4560
5522
  min_periods: nil,
4561
- center: false,
4562
- by: nil,
4563
- closed: nil,
4564
- warn_if_unsorted: true
5523
+ center: false
4565
5524
  )
4566
- window_size, min_periods = _prepare_rolling_window_args(
4567
- window_size, min_periods
4568
- )
4569
5525
  _from_rbexpr(
4570
5526
  _rbexpr.rolling_quantile(
4571
- quantile, interpolation, window_size, weights, min_periods, center, by, closed, warn_if_unsorted
5527
+ quantile, interpolation, window_size, weights, min_periods, center
4572
5528
  )
4573
5529
  )
4574
5530
  end
@@ -4837,7 +5793,7 @@ module Polars
4837
5793
  # # │ 12 ┆ 0.0 │
4838
5794
  # # └──────┴────────────┘
4839
5795
  def pct_change(n: 1)
4840
- n = Utils.parse_as_expression(n)
5796
+ n = Utils.parse_into_expression(n)
4841
5797
  _from_rbexpr(_rbexpr.pct_change(n))
4842
5798
  end
4843
5799
 
@@ -4929,12 +5885,12 @@ module Polars
4929
5885
  # # │ null ┆ null │
4930
5886
  # # │ 50 ┆ 10 │
4931
5887
  # # └──────┴─────────────┘
4932
- def clip(lower_bound, upper_bound)
5888
+ def clip(lower_bound = nil, upper_bound = nil)
4933
5889
  if !lower_bound.nil?
4934
- lower_bound = Utils.parse_as_expression(lower_bound, str_as_lit: true)
5890
+ lower_bound = Utils.parse_into_expression(lower_bound)
4935
5891
  end
4936
5892
  if !upper_bound.nil?
4937
- upper_bound = Utils.parse_as_expression(upper_bound, str_as_lit: true)
5893
+ upper_bound = Utils.parse_into_expression(upper_bound)
4938
5894
  end
4939
5895
  _from_rbexpr(_rbexpr.clip(lower_bound, upper_bound))
4940
5896
  end
@@ -5321,18 +6277,38 @@ module Polars
5321
6277
  #
5322
6278
  # @example
5323
6279
  # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
5324
- # df.select(Polars.col("foo").reshape([3, 3]))
6280
+ # square = df.select(Polars.col("foo").reshape([3, 3]))
5325
6281
  # # =>
5326
6282
  # # shape: (3, 1)
5327
- # # ┌───────────┐
5328
- # # │ foo
5329
- # # │ ---
5330
- # # │ list[i64] │
5331
- # # ╞═══════════╡
5332
- # # │ [1, 2, 3]
5333
- # # │ [4, 5, 6]
5334
- # # │ [7, 8, 9]
5335
- # # └───────────┘
6283
+ # # ┌───────────────┐
6284
+ # # │ foo
6285
+ # # │ ---
6286
+ # # │ array[i64, 3] │
6287
+ # # ╞═══════════════╡
6288
+ # # │ [1, 2, 3]
6289
+ # # │ [4, 5, 6]
6290
+ # # │ [7, 8, 9]
6291
+ # # └───────────────┘
6292
+ #
6293
+ # @example
6294
+ # square.select(Polars.col("foo").reshape([9]))
6295
+ # # =>
6296
+ # # shape: (9, 1)
6297
+ # # ┌─────┐
6298
+ # # │ foo │
6299
+ # # │ --- │
6300
+ # # │ i64 │
6301
+ # # ╞═════╡
6302
+ # # │ 1 │
6303
+ # # │ 2 │
6304
+ # # │ 3 │
6305
+ # # │ 4 │
6306
+ # # │ 5 │
6307
+ # # │ 6 │
6308
+ # # │ 7 │
6309
+ # # │ 8 │
6310
+ # # │ 9 │
6311
+ # # └─────┘
5336
6312
  def reshape(dims)
5337
6313
  _from_rbexpr(_rbexpr.reshape(dims))
5338
6314
  end
@@ -5408,14 +6384,14 @@ module Polars
5408
6384
  end
5409
6385
 
5410
6386
  if !n.nil? && frac.nil?
5411
- n = Utils.parse_as_expression(n)
6387
+ n = Utils.parse_into_expression(n)
5412
6388
  return _from_rbexpr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
5413
6389
  end
5414
6390
 
5415
6391
  if frac.nil?
5416
6392
  frac = 1.0
5417
6393
  end
5418
- frac = Utils.parse_as_expression(frac)
6394
+ frac = Utils.parse_into_expression(frac)
5419
6395
  _from_rbexpr(
5420
6396
  _rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
5421
6397
  )
@@ -5548,11 +6524,17 @@ module Polars
5548
6524
 
5549
6525
  # Count all unique values and create a struct mapping value to count.
5550
6526
  #
5551
- # @param multithreaded [Boolean]
5552
- # Better to turn this off in the aggregation context, as it can lead to
5553
- # contention.
5554
6527
  # @param sort [Boolean]
5555
- # Ensure the output is sorted from most values to least.
6528
+ # Sort the output by count in descending order.
6529
+ # If set to `false` (default), the order of the output is random.
6530
+ # @param parallel [Boolean]
6531
+ # Execute the computation in parallel.
6532
+ # @param name [String]
6533
+ # Give the resulting count column a specific name;
6534
+ # if `normalize` is true defaults to "count",
6535
+ # otherwise defaults to "proportion".
6536
+ # @param normalize [Boolean]
6537
+ # If true gives relative frequencies of the unique values
5556
6538
  #
5557
6539
  # @return [Expr]
5558
6540
  #
@@ -5578,8 +6560,22 @@ module Polars
5578
6560
  # # │ {"b",2} │
5579
6561
  # # │ {"a",1} │
5580
6562
  # # └───────────┘
5581
- def value_counts(multithreaded: false, sort: false)
5582
- _from_rbexpr(_rbexpr.value_counts(multithreaded, sort))
6563
+ def value_counts(
6564
+ sort: false,
6565
+ parallel: false,
6566
+ name: nil,
6567
+ normalize: false
6568
+ )
6569
+ if name.nil?
6570
+ if normalize
6571
+ name = "proportion"
6572
+ else
6573
+ name = "count"
6574
+ end
6575
+ end
6576
+ _from_rbexpr(
6577
+ _rbexpr.value_counts(sort, parallel, name, normalize)
6578
+ )
5583
6579
  end
5584
6580
 
5585
6581
  # Return a count of the unique values in the order of appearance.
@@ -5954,6 +6950,10 @@ module Polars
5954
6950
  # # │ 3 ┆ 1.0 ┆ 10.0 │
5955
6951
  # # └─────┴─────┴──────────┘
5956
6952
  def replace(old, new = NO_DEFAULT, default: NO_DEFAULT, return_dtype: nil)
6953
+ if !default.eql?(NO_DEFAULT)
6954
+ return replace_strict(old, new, default: default, return_dtype: return_dtype)
6955
+ end
6956
+
5957
6957
  if new.eql?(NO_DEFAULT) && old.is_a?(Hash)
5958
6958
  new = Series.new(old.values)
5959
6959
  old = Series.new(old.keys)
@@ -5966,17 +6966,164 @@ module Polars
5966
6966
  end
5967
6967
  end
5968
6968
 
5969
- old = Utils.parse_as_expression(old, str_as_lit: true)
5970
- new = Utils.parse_as_expression(new, str_as_lit: true)
6969
+ old = Utils.parse_into_expression(old, str_as_lit: true)
6970
+ new = Utils.parse_into_expression(new, str_as_lit: true)
5971
6971
 
5972
- default =
5973
- if default.eql?(NO_DEFAULT)
5974
- nil
5975
- else
5976
- Utils.parse_as_expression(default, str_as_lit: true)
5977
- end
6972
+ result = _from_rbexpr(_rbexpr.replace(old, new))
6973
+
6974
+ if !return_dtype.nil?
6975
+ result = result.cast(return_dtype)
6976
+ end
5978
6977
 
5979
- _from_rbexpr(_rbexpr.replace(old, new, default, return_dtype))
6978
+ result
6979
+ end
6980
+
6981
+ # Replace all values by different values.
6982
+ #
6983
+ # @param old [Object]
6984
+ # Value or sequence of values to replace.
6985
+ # Accepts expression input. Sequences are parsed as Series,
6986
+ # other non-expression inputs are parsed as literals.
6987
+ # Also accepts a mapping of values to their replacement as syntactic sugar for
6988
+ # `replace_all(old: Series.new(mapping.keys), new: Serie.new(mapping.values))`.
6989
+ # @param new [Object]
6990
+ # Value or sequence of values to replace by.
6991
+ # Accepts expression input. Sequences are parsed as Series,
6992
+ # other non-expression inputs are parsed as literals.
6993
+ # Length must match the length of `old` or have length 1.
6994
+ # @param default [Object]
6995
+ # Set values that were not replaced to this value. If no default is specified,
6996
+ # (default), an error is raised if any values were not replaced.
6997
+ # Accepts expression input. Non-expression inputs are parsed as literals.
6998
+ # @param return_dtype [Object]
6999
+ # The data type of the resulting expression. If set to `nil` (default),
7000
+ # the data type is determined automatically based on the other inputs.
7001
+ #
7002
+ # @return [Expr]
7003
+ #
7004
+ # @note
7005
+ # The global string cache must be enabled when replacing categorical values.
7006
+ #
7007
+ # @example Replace values by passing sequences to the `old` and `new` parameters.
7008
+ # df = Polars::DataFrame.new({"a" => [1, 2, 2, 3]})
7009
+ # df.with_columns(
7010
+ # replaced: Polars.col("a").replace_strict([1, 2, 3], [100, 200, 300])
7011
+ # )
7012
+ # # =>
7013
+ # # shape: (4, 2)
7014
+ # # ┌─────┬──────────┐
7015
+ # # │ a ┆ replaced │
7016
+ # # │ --- ┆ --- │
7017
+ # # │ i64 ┆ i64 │
7018
+ # # ╞═════╪══════════╡
7019
+ # # │ 1 ┆ 100 │
7020
+ # # │ 2 ┆ 200 │
7021
+ # # │ 2 ┆ 200 │
7022
+ # # │ 3 ┆ 300 │
7023
+ # # └─────┴──────────┘
7024
+ #
7025
+ # @example By default, an error is raised if any non-null values were not replaced. Specify a default to set all values that were not matched.
7026
+ # mapping = {2 => 200, 3 => 300}
7027
+ # df.with_columns(replaced: Polars.col("a").replace_strict(mapping, default: -1))
7028
+ # # =>
7029
+ # # shape: (4, 2)
7030
+ # # ┌─────┬──────────┐
7031
+ # # │ a ┆ replaced │
7032
+ # # │ --- ┆ --- │
7033
+ # # │ i64 ┆ i64 │
7034
+ # # ╞═════╪══════════╡
7035
+ # # │ 1 ┆ -1 │
7036
+ # # │ 2 ┆ 200 │
7037
+ # # │ 2 ┆ 200 │
7038
+ # # │ 3 ┆ 300 │
7039
+ # # └─────┴──────────┘
7040
+ #
7041
+ # @example Replacing by values of a different data type sets the return type based on a combination of the `new` data type and the `default` data type.
7042
+ # df = Polars::DataFrame.new({"a" => ["x", "y", "z"]})
7043
+ # mapping = {"x" => 1, "y" => 2, "z" => 3}
7044
+ # df.with_columns(replaced: Polars.col("a").replace_strict(mapping))
7045
+ # # =>
7046
+ # # shape: (3, 2)
7047
+ # # ┌─────┬──────────┐
7048
+ # # │ a ┆ replaced │
7049
+ # # │ --- ┆ --- │
7050
+ # # │ str ┆ i64 │
7051
+ # # ╞═════╪══════════╡
7052
+ # # │ x ┆ 1 │
7053
+ # # │ y ┆ 2 │
7054
+ # # │ z ┆ 3 │
7055
+ # # └─────┴──────────┘
7056
+ #
7057
+ # @example
7058
+ # df.with_columns(replaced: Polars.col("a").replace_strict(mapping, default: "x"))
7059
+ # # =>
7060
+ # # shape: (3, 2)
7061
+ # # ┌─────┬──────────┐
7062
+ # # │ a ┆ replaced │
7063
+ # # │ --- ┆ --- │
7064
+ # # │ str ┆ str │
7065
+ # # ╞═════╪══════════╡
7066
+ # # │ x ┆ 1 │
7067
+ # # │ y ┆ 2 │
7068
+ # # │ z ┆ 3 │
7069
+ # # └─────┴──────────┘
7070
+ #
7071
+ # @example Set the `return_dtype` parameter to control the resulting data type directly.
7072
+ # df.with_columns(
7073
+ # replaced: Polars.col("a").replace_strict(mapping, return_dtype: Polars::UInt8)
7074
+ # )
7075
+ # # =>
7076
+ # # shape: (3, 2)
7077
+ # # ┌─────┬──────────┐
7078
+ # # │ a ┆ replaced │
7079
+ # # │ --- ┆ --- │
7080
+ # # │ str ┆ u8 │
7081
+ # # ╞═════╪══════════╡
7082
+ # # │ x ┆ 1 │
7083
+ # # │ y ┆ 2 │
7084
+ # # │ z ┆ 3 │
7085
+ # # └─────┴──────────┘
7086
+ #
7087
+ # @example Expression input is supported for all parameters.
7088
+ # df = Polars::DataFrame.new({"a" => [1, 2, 2, 3], "b" => [1.5, 2.5, 5.0, 1.0]})
7089
+ # df.with_columns(
7090
+ # replaced: Polars.col("a").replace_strict(
7091
+ # Polars.col("a").max,
7092
+ # Polars.col("b").sum,
7093
+ # default: Polars.col("b")
7094
+ # )
7095
+ # )
7096
+ # # =>
7097
+ # # shape: (4, 3)
7098
+ # # ┌─────┬─────┬──────────┐
7099
+ # # │ a ┆ b ┆ replaced │
7100
+ # # │ --- ┆ --- ┆ --- │
7101
+ # # │ i64 ┆ f64 ┆ f64 │
7102
+ # # ╞═════╪═════╪══════════╡
7103
+ # # │ 1 ┆ 1.5 ┆ 1.5 │
7104
+ # # │ 2 ┆ 2.5 ┆ 2.5 │
7105
+ # # │ 2 ┆ 5.0 ┆ 5.0 │
7106
+ # # │ 3 ┆ 1.0 ┆ 10.0 │
7107
+ # # └─────┴─────┴──────────┘
7108
+ def replace_strict(
7109
+ old,
7110
+ new = NO_DEFAULT,
7111
+ default: NO_DEFAULT,
7112
+ return_dtype: nil
7113
+ )
7114
+ if new.eql?(NO_DEFAULT) && old.is_a?(Hash)
7115
+ new = Series.new(old.values)
7116
+ old = Series.new(old.keys)
7117
+ end
7118
+
7119
+ old = Utils.parse_into_expression(old, str_as_lit: true, list_as_series: true)
7120
+ new = Utils.parse_into_expression(new, str_as_lit: true, list_as_series: true)
7121
+
7122
+ default = default.eql?(NO_DEFAULT) ? nil : Utils.parse_into_expression(default, str_as_lit: true)
7123
+
7124
+ _from_rbexpr(
7125
+ _rbexpr.replace_strict(old, new, default, return_dtype)
7126
+ )
5980
7127
  end
5981
7128
 
5982
7129
  # Create an object namespace of all list related methods.
@@ -6053,7 +7200,7 @@ module Polars
6053
7200
  end
6054
7201
 
6055
7202
  def _to_expr(other)
6056
- other.is_a?(Expr) ? other : Utils.lit(other)
7203
+ other.is_a?(Expr) ? other : F.lit(other)
6057
7204
  end
6058
7205
 
6059
7206
  def _prepare_alpha(com, span, half_life, alpha)
@@ -6101,5 +7248,9 @@ module Polars
6101
7248
  end
6102
7249
  [window_size, min_periods]
6103
7250
  end
7251
+
7252
+ def _prepare_rolling_by_window_args(window_size)
7253
+ window_size
7254
+ end
6104
7255
  end
6105
7256
  end