polars-df 0.11.0-x86_64-linux-musl → 0.12.0-x86_64-linux-musl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/Cargo.lock +360 -361
  4. data/LICENSE-THIRD-PARTY.txt +1065 -878
  5. data/lib/polars/3.1/polars.so +0 -0
  6. data/lib/polars/3.2/polars.so +0 -0
  7. data/lib/polars/3.3/polars.so +0 -0
  8. data/lib/polars/array_expr.rb +4 -4
  9. data/lib/polars/batched_csv_reader.rb +2 -2
  10. data/lib/polars/cat_expr.rb +0 -36
  11. data/lib/polars/cat_name_space.rb +0 -37
  12. data/lib/polars/data_frame.rb +93 -101
  13. data/lib/polars/data_types.rb +1 -1
  14. data/lib/polars/date_time_expr.rb +525 -573
  15. data/lib/polars/date_time_name_space.rb +263 -464
  16. data/lib/polars/dynamic_group_by.rb +3 -3
  17. data/lib/polars/exceptions.rb +3 -0
  18. data/lib/polars/expr.rb +367 -330
  19. data/lib/polars/expr_dispatch.rb +1 -1
  20. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  21. data/lib/polars/functions/as_datatype.rb +63 -40
  22. data/lib/polars/functions/lazy.rb +63 -14
  23. data/lib/polars/functions/lit.rb +1 -1
  24. data/lib/polars/functions/range/date_range.rb +18 -77
  25. data/lib/polars/functions/range/datetime_range.rb +4 -4
  26. data/lib/polars/functions/range/int_range.rb +2 -2
  27. data/lib/polars/functions/range/time_range.rb +4 -4
  28. data/lib/polars/functions/repeat.rb +1 -1
  29. data/lib/polars/functions/whenthen.rb +1 -1
  30. data/lib/polars/io/csv.rb +8 -8
  31. data/lib/polars/io/ipc.rb +3 -3
  32. data/lib/polars/io/json.rb +13 -2
  33. data/lib/polars/io/ndjson.rb +15 -4
  34. data/lib/polars/io/parquet.rb +5 -4
  35. data/lib/polars/lazy_frame.rb +120 -106
  36. data/lib/polars/lazy_group_by.rb +1 -1
  37. data/lib/polars/list_expr.rb +11 -11
  38. data/lib/polars/list_name_space.rb +5 -1
  39. data/lib/polars/rolling_group_by.rb +5 -7
  40. data/lib/polars/series.rb +105 -189
  41. data/lib/polars/string_expr.rb +42 -67
  42. data/lib/polars/string_name_space.rb +5 -4
  43. data/lib/polars/testing.rb +2 -2
  44. data/lib/polars/utils/constants.rb +9 -0
  45. data/lib/polars/utils/convert.rb +97 -0
  46. data/lib/polars/utils/parse.rb +89 -0
  47. data/lib/polars/utils/various.rb +76 -0
  48. data/lib/polars/utils/wrap.rb +19 -0
  49. data/lib/polars/utils.rb +4 -330
  50. data/lib/polars/version.rb +1 -1
  51. data/lib/polars/whenthen.rb +6 -6
  52. data/lib/polars.rb +11 -0
  53. metadata +7 -2
data/lib/polars/expr.rb CHANGED
@@ -82,8 +82,8 @@ module Polars
82
82
  #
83
83
  # @return [Expr]
84
84
  def **(power)
85
- exponent = Utils.expr_to_lit_or_expr(power)
86
- _from_rbexpr(_rbexpr.pow(exponent._rbexpr))
85
+ exponent = Utils.parse_into_expression(power)
86
+ _from_rbexpr(_rbexpr.pow(exponent))
87
87
  end
88
88
 
89
89
  # Greater than or equal.
@@ -811,8 +811,8 @@ module Polars
811
811
  # # │ 10 ┆ 4 │
812
812
  # # └─────┴──────┘
813
813
  def append(other, upcast: true)
814
- other = Utils.expr_to_lit_or_expr(other)
815
- _from_rbexpr(_rbexpr.append(other._rbexpr, upcast))
814
+ other = Utils.parse_into_expression(other)
815
+ _from_rbexpr(_rbexpr.append(other, upcast))
816
816
  end
817
817
 
818
818
  # Create a single chunk of memory for this Series.
@@ -1165,8 +1165,8 @@ module Polars
1165
1165
  # # │ 44 │
1166
1166
  # # └─────┘
1167
1167
  def dot(other)
1168
- other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
1169
- _from_rbexpr(_rbexpr.dot(other._rbexpr))
1168
+ other = Utils.parse_into_expression(other, str_as_lit: false)
1169
+ _from_rbexpr(_rbexpr.dot(other))
1170
1170
  end
1171
1171
 
1172
1172
  # Compute the most occurring value(s).
@@ -1252,12 +1252,12 @@ module Polars
1252
1252
  # df = Polars::DataFrame.new(
1253
1253
  # {
1254
1254
  # "group" => [
1255
- # "one",
1256
- # "one",
1257
- # "one",
1258
- # "two",
1259
- # "two",
1260
- # "two"
1255
+ # "one",
1256
+ # "one",
1257
+ # "one",
1258
+ # "two",
1259
+ # "two",
1260
+ # "two"
1261
1261
  # ],
1262
1262
  # "value" => [1, 98, 2, 3, 99, 4]
1263
1263
  # }
@@ -1345,9 +1345,9 @@ module Polars
1345
1345
  # # │ 3 ┆ 4 │
1346
1346
  # # │ 2 ┆ 98 │
1347
1347
  # # └───────┴──────────┘
1348
- def top_k(k: 5, nulls_last: false, multithreaded: true)
1349
- k = Utils.parse_as_expression(k)
1350
- _from_rbexpr(_rbexpr.top_k(k, nulls_last, multithreaded))
1348
+ def top_k(k: 5)
1349
+ k = Utils.parse_into_expression(k)
1350
+ _from_rbexpr(_rbexpr.top_k(k))
1351
1351
  end
1352
1352
 
1353
1353
  # Return the `k` smallest elements.
@@ -1384,9 +1384,9 @@ module Polars
1384
1384
  # # │ 3 ┆ 4 │
1385
1385
  # # │ 2 ┆ 98 │
1386
1386
  # # └───────┴──────────┘
1387
- def bottom_k(k: 5, nulls_last: false, multithreaded: true)
1388
- k = Utils.parse_as_expression(k)
1389
- _from_rbexpr(_rbexpr.bottom_k(k, nulls_last, multithreaded))
1387
+ def bottom_k(k: 5)
1388
+ k = Utils.parse_into_expression(k)
1389
+ _from_rbexpr(_rbexpr.bottom_k(k))
1390
1390
  end
1391
1391
 
1392
1392
  # Get the index values that would sort this column.
@@ -1498,8 +1498,8 @@ module Polars
1498
1498
  # # │ 0 ┆ 2 ┆ 4 │
1499
1499
  # # └──────┴───────┴─────┘
1500
1500
  def search_sorted(element, side: "any")
1501
- element = Utils.expr_to_lit_or_expr(element, str_to_lit: false)
1502
- _from_rbexpr(_rbexpr.search_sorted(element._rbexpr, side))
1501
+ element = Utils.parse_into_expression(element, str_as_lit: false)
1502
+ _from_rbexpr(_rbexpr.search_sorted(element, side))
1503
1503
  end
1504
1504
 
1505
1505
  # Sort this column by the ordering of another column, or multiple other columns.
@@ -1545,13 +1545,14 @@ module Polars
1545
1545
  # # │ two │
1546
1546
  # # └───────┘
1547
1547
  def sort_by(by, *more_by, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false)
1548
- by = Utils.parse_as_list_of_expressions(by, *more_by)
1549
- if !reverse.is_a?(::Array)
1550
- reverse = [reverse]
1551
- elsif by.length != reverse.length
1552
- raise ArgumentError, "the length of `reverse` (#{reverse.length}) does not match the length of `by` (#{by.length})"
1553
- end
1554
- _from_rbexpr(_rbexpr.sort_by(by, reverse, nulls_last, multithreaded, maintain_order))
1548
+ by = Utils.parse_into_list_of_expressions(by, *more_by)
1549
+ reverse = Utils.extend_bool(reverse, by.length, "reverse", "by")
1550
+ nulls_last = Utils.extend_bool(nulls_last, by.length, "nulls_last", "by")
1551
+ _from_rbexpr(
1552
+ _rbexpr.sort_by(
1553
+ by, reverse, nulls_last, multithreaded, maintain_order
1554
+ )
1555
+ )
1555
1556
  end
1556
1557
 
1557
1558
  # Take values by index.
@@ -1588,14 +1589,51 @@ module Polars
1588
1589
  # # └───────┴───────────┘
1589
1590
  def gather(indices)
1590
1591
  if indices.is_a?(::Array)
1591
- indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
1592
+ indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))._rbexpr
1592
1593
  else
1593
- indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
1594
+ indices_lit = Utils.parse_into_expression(indices, str_as_lit: false)
1594
1595
  end
1595
- _from_rbexpr(_rbexpr.gather(indices_lit._rbexpr))
1596
+ _from_rbexpr(_rbexpr.gather(indices_lit))
1596
1597
  end
1597
1598
  alias_method :take, :gather
1598
1599
 
1600
+ # Return a single value by index.
1601
+ #
1602
+ # @param index [Object]
1603
+ # An expression that leads to a UInt32 index.
1604
+ #
1605
+ # @return [Expr]
1606
+ #
1607
+ # @example
1608
+ # df = Polars::DataFrame.new(
1609
+ # {
1610
+ # "group" => [
1611
+ # "one",
1612
+ # "one",
1613
+ # "one",
1614
+ # "two",
1615
+ # "two",
1616
+ # "two"
1617
+ # ],
1618
+ # "value" => [1, 98, 2, 3, 99, 4]
1619
+ # }
1620
+ # )
1621
+ # df.group_by("group", maintain_order: true).agg(Polars.col("value").get(1))
1622
+ # # =>
1623
+ # # shape: (2, 2)
1624
+ # # ┌───────┬───────┐
1625
+ # # │ group ┆ value │
1626
+ # # │ --- ┆ --- │
1627
+ # # │ str ┆ i64 │
1628
+ # # ╞═══════╪═══════╡
1629
+ # # │ one ┆ 98 │
1630
+ # # │ two ┆ 99 │
1631
+ # # └───────┴───────┘
1632
+ def get(index)
1633
+ index_lit = Utils.parse_into_expression(index)
1634
+ _from_rbexpr(_rbexpr.get(index_lit))
1635
+ end
1636
+
1599
1637
  # Shift the values by a given period.
1600
1638
  #
1601
1639
  # @param n [Integer]
@@ -1622,9 +1660,9 @@ module Polars
1622
1660
  # # └──────┘
1623
1661
  def shift(n = 1, fill_value: nil)
1624
1662
  if !fill_value.nil?
1625
- fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
1663
+ fill_value = Utils.parse_into_expression(fill_value, str_as_lit: true)
1626
1664
  end
1627
- n = Utils.parse_as_expression(n)
1665
+ n = Utils.parse_into_expression(n)
1628
1666
  _from_rbexpr(_rbexpr.shift(n, fill_value))
1629
1667
  end
1630
1668
 
@@ -1727,8 +1765,8 @@ module Polars
1727
1765
  end
1728
1766
 
1729
1767
  if !value.nil?
1730
- value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
1731
- _from_rbexpr(_rbexpr.fill_null(value._rbexpr))
1768
+ value = Utils.parse_into_expression(value, str_as_lit: true)
1769
+ _from_rbexpr(_rbexpr.fill_null(value))
1732
1770
  else
1733
1771
  _from_rbexpr(_rbexpr.fill_null_with_strategy(strategy, limit))
1734
1772
  end
@@ -1758,8 +1796,8 @@ module Polars
1758
1796
  # # │ zero ┆ 6.0 │
1759
1797
  # # └──────┴──────┘
1760
1798
  def fill_nan(fill_value)
1761
- fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
1762
- _from_rbexpr(_rbexpr.fill_nan(fill_value._rbexpr))
1799
+ fill_value = Utils.parse_into_expression(fill_value, str_as_lit: true)
1800
+ _from_rbexpr(_rbexpr.fill_nan(fill_value))
1763
1801
  end
1764
1802
 
1765
1803
  # Fill missing values with the latest seen values.
@@ -2275,7 +2313,7 @@ module Polars
2275
2313
  # # │ 4 │
2276
2314
  # # └────────┘
2277
2315
  def over(expr)
2278
- rbexprs = Utils.selection_to_rbexpr_list(expr)
2316
+ rbexprs = Utils.parse_into_list_of_expressions(expr)
2279
2317
  _from_rbexpr(_rbexpr.over(rbexprs))
2280
2318
  end
2281
2319
 
@@ -2470,8 +2508,8 @@ module Polars
2470
2508
  # # │ 1.5 │
2471
2509
  # # └─────┘
2472
2510
  def quantile(quantile, interpolation: "nearest")
2473
- quantile = Utils.expr_to_lit_or_expr(quantile, str_to_lit: false)
2474
- _from_rbexpr(_rbexpr.quantile(quantile._rbexpr, interpolation))
2511
+ quantile = Utils.parse_into_expression(quantile, str_as_lit: false)
2512
+ _from_rbexpr(_rbexpr.quantile(quantile, interpolation))
2475
2513
  end
2476
2514
 
2477
2515
  # Bin continuous values into discrete categories.
@@ -2515,17 +2553,17 @@ module Polars
2515
2553
  # ).unnest("cut")
2516
2554
  # # =>
2517
2555
  # # shape: (5, 3)
2518
- # # ┌─────┬──────┬────────────┐
2519
- # # │ foo ┆ brk foo_bin
2520
- # # │ --- ┆ --- ┆ --- │
2521
- # # │ i64 ┆ f64 ┆ cat │
2522
- # # ╞═════╪══════╪════════════╡
2523
- # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
2524
- # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
2525
- # # │ 0 ┆ 1.0 ┆ (-1, 1] │
2526
- # # │ 1 ┆ 1.0 ┆ (-1, 1] │
2527
- # # │ 2 ┆ inf ┆ (1, inf] │
2528
- # # └─────┴──────┴────────────┘
2556
+ # # ┌─────┬────────────┬────────────┐
2557
+ # # │ foo ┆ breakpoint category
2558
+ # # │ --- ┆ --- ┆ --- │
2559
+ # # │ i64 ┆ f64 ┆ cat │
2560
+ # # ╞═════╪════════════╪════════════╡
2561
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
2562
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
2563
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
2564
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
2565
+ # # │ 2 ┆ inf ┆ (1, inf] │
2566
+ # # └─────┴────────────┴────────────┘
2529
2567
  def cut(breaks, labels: nil, left_closed: false, include_breaks: false)
2530
2568
  _from_rbexpr(_rbexpr.cut(breaks, labels, left_closed, include_breaks))
2531
2569
  end
@@ -2596,17 +2634,17 @@ module Polars
2596
2634
  # ).unnest("qcut")
2597
2635
  # # =>
2598
2636
  # # shape: (5, 3)
2599
- # # ┌─────┬──────┬────────────┐
2600
- # # │ foo ┆ brk foo_bin
2601
- # # │ --- ┆ --- ┆ --- │
2602
- # # │ i64 ┆ f64 ┆ cat │
2603
- # # ╞═════╪══════╪════════════╡
2604
- # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
2605
- # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
2606
- # # │ 0 ┆ 1.0 ┆ (-1, 1] │
2607
- # # │ 1 ┆ 1.0 ┆ (-1, 1] │
2608
- # # │ 2 ┆ inf ┆ (1, inf] │
2609
- # # └─────┴──────┴────────────┘
2637
+ # # ┌─────┬────────────┬────────────┐
2638
+ # # │ foo ┆ breakpoint category
2639
+ # # │ --- ┆ --- ┆ --- │
2640
+ # # │ i64 ┆ f64 ┆ cat │
2641
+ # # ╞═════╪════════════╪════════════╡
2642
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
2643
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
2644
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
2645
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
2646
+ # # │ 2 ┆ inf ┆ (1, inf] │
2647
+ # # └─────┴────────────┴────────────┘
2610
2648
  def qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false)
2611
2649
  if quantiles.is_a?(Integer)
2612
2650
  rbexpr = _rbexpr.qcut_uniform(
@@ -2630,18 +2668,18 @@ module Polars
2630
2668
  # df.select(Polars.col("s").rle).unnest("s")
2631
2669
  # # =>
2632
2670
  # # shape: (6, 2)
2633
- # # ┌─────────┬────────┐
2634
- # # │ lengthsvalues
2635
- # # │ --- ┆ ---
2636
- # # │ i32 ┆ i64
2637
- # # ╞═════════╪════════╡
2638
- # # │ 2 ┆ 1
2639
- # # │ 1 ┆ 2
2640
- # # │ 1 ┆ 1
2641
- # # │ 1 ┆ null
2642
- # # │ 1 ┆ 1
2643
- # # │ 2 ┆ 3
2644
- # # └─────────┴────────┘
2671
+ # # ┌─────┬───────┐
2672
+ # # │ lenvalue
2673
+ # # │ --- ┆ ---
2674
+ # # │ u32 ┆ i64
2675
+ # # ╞═════╪═══════╡
2676
+ # # │ 2 ┆ 1
2677
+ # # │ 1 ┆ 2
2678
+ # # │ 1 ┆ 1
2679
+ # # │ 1 ┆ null
2680
+ # # │ 1 ┆ 1
2681
+ # # │ 2 ┆ 3
2682
+ # # └─────┴───────┘
2645
2683
  def rle
2646
2684
  _from_rbexpr(_rbexpr.rle)
2647
2685
  end
@@ -3104,7 +3142,7 @@ module Polars
3104
3142
  # # │ null ┆ null ┆ null ┆ true │
3105
3143
  # # └──────┴──────┴────────┴────────────────┘
3106
3144
  def eq_missing(other)
3107
- other = Utils.parse_as_expression(other, str_as_lit: true)
3145
+ other = Utils.parse_into_expression(other, str_as_lit: true)
3108
3146
  _from_rbexpr(_rbexpr.eq_missing(other))
3109
3147
  end
3110
3148
 
@@ -3308,7 +3346,7 @@ module Polars
3308
3346
  # # │ null ┆ null ┆ null ┆ false │
3309
3347
  # # └──────┴──────┴────────┴────────────────┘
3310
3348
  def ne_missing(other)
3311
- other = Utils.parse_as_expression(other, str_as_lit: true)
3349
+ other = Utils.parse_into_expression(other, str_as_lit: true)
3312
3350
  _from_rbexpr(_rbexpr.neq_missing(other))
3313
3351
  end
3314
3352
 
@@ -3611,14 +3649,14 @@ module Polars
3611
3649
  def is_in(other)
3612
3650
  if other.is_a?(::Array)
3613
3651
  if other.length == 0
3614
- other = Polars.lit(nil)
3652
+ other = Polars.lit(nil)._rbexpr
3615
3653
  else
3616
- other = Polars.lit(Series.new(other))
3654
+ other = Polars.lit(Series.new(other))._rbexpr
3617
3655
  end
3618
3656
  else
3619
- other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
3657
+ other = Utils.parse_into_expression(other, str_as_lit: false)
3620
3658
  end
3621
- _from_rbexpr(_rbexpr.is_in(other._rbexpr))
3659
+ _from_rbexpr(_rbexpr.is_in(other))
3622
3660
  end
3623
3661
  alias_method :in?, :is_in
3624
3662
 
@@ -3653,15 +3691,15 @@ module Polars
3653
3691
  # # │ ["z", "z", "z"] │
3654
3692
  # # └─────────────────┘
3655
3693
  def repeat_by(by)
3656
- by = Utils.expr_to_lit_or_expr(by, str_to_lit: false)
3657
- _from_rbexpr(_rbexpr.repeat_by(by._rbexpr))
3694
+ by = Utils.parse_into_expression(by, str_as_lit: false)
3695
+ _from_rbexpr(_rbexpr.repeat_by(by))
3658
3696
  end
3659
3697
 
3660
3698
  # Check if this expression is between start and end.
3661
3699
  #
3662
- # @param start [Object]
3700
+ # @param lower_bound [Object]
3663
3701
  # Lower bound as primitive type or datetime.
3664
- # @param _end [Object]
3702
+ # @param upper_bound [Object]
3665
3703
  # Upper bound as primitive type or datetime.
3666
3704
  # @param closed ["both", "left", "right", "none"]
3667
3705
  # Define which sides of the interval are closed (inclusive).
@@ -3723,22 +3761,13 @@ module Polars
3723
3761
  # # │ d ┆ false │
3724
3762
  # # │ e ┆ false │
3725
3763
  # # └─────┴────────────┘
3726
- def is_between(start, _end, closed: "both")
3727
- start = Utils.expr_to_lit_or_expr(start, str_to_lit: false)
3728
- _end = Utils.expr_to_lit_or_expr(_end, str_to_lit: false)
3729
-
3730
- case closed
3731
- when "none"
3732
- (self > start) & (self < _end)
3733
- when "both"
3734
- (self >= start) & (self <= _end)
3735
- when "right"
3736
- (self > start) & (self <= _end)
3737
- when "left"
3738
- (self >= start) & (self < _end)
3739
- else
3740
- raise ArgumentError, "closed must be one of 'left', 'right', 'both', or 'none'"
3741
- end
3764
+ def is_between(lower_bound, upper_bound, closed: "both")
3765
+ lower_bound = Utils.parse_into_expression(lower_bound)
3766
+ upper_bound = Utils.parse_into_expression(upper_bound)
3767
+
3768
+ _from_rbexpr(
3769
+ _rbexpr.is_between(lower_bound, upper_bound, closed)
3770
+ )
3742
3771
  end
3743
3772
 
3744
3773
  # Hash the elements in the selection.
@@ -3977,7 +4006,7 @@ module Polars
3977
4006
  warn_if_unsorted: nil
3978
4007
  )
3979
4008
  window_size = _prepare_rolling_by_window_args(window_size)
3980
- by = Utils.parse_as_expression(by)
4009
+ by = Utils.parse_into_expression(by)
3981
4010
  _from_rbexpr(
3982
4011
  _rbexpr.rolling_min_by(by, window_size, min_periods, closed)
3983
4012
  )
@@ -4106,7 +4135,7 @@ module Polars
4106
4135
  warn_if_unsorted: nil
4107
4136
  )
4108
4137
  window_size = _prepare_rolling_by_window_args(window_size)
4109
- by = Utils.parse_as_expression(by)
4138
+ by = Utils.parse_into_expression(by)
4110
4139
  _from_rbexpr(
4111
4140
  _rbexpr.rolling_max_by(by, window_size, min_periods, closed)
4112
4141
  )
@@ -4237,7 +4266,7 @@ module Polars
4237
4266
  warn_if_unsorted: nil
4238
4267
  )
4239
4268
  window_size = _prepare_rolling_by_window_args(window_size)
4240
- by = Utils.parse_as_expression(by)
4269
+ by = Utils.parse_into_expression(by)
4241
4270
  _from_rbexpr(
4242
4271
  _rbexpr.rolling_mean_by(
4243
4272
  by,
@@ -4371,7 +4400,7 @@ module Polars
4371
4400
  warn_if_unsorted: nil
4372
4401
  )
4373
4402
  window_size = _prepare_rolling_by_window_args(window_size)
4374
- by = Utils.parse_as_expression(by)
4403
+ by = Utils.parse_into_expression(by)
4375
4404
  _from_rbexpr(
4376
4405
  _rbexpr.rolling_sum_by(by, window_size, min_periods, closed)
4377
4406
  )
@@ -4503,7 +4532,7 @@ module Polars
4503
4532
  warn_if_unsorted: nil
4504
4533
  )
4505
4534
  window_size = _prepare_rolling_by_window_args(window_size)
4506
- by = Utils.parse_as_expression(by)
4535
+ by = Utils.parse_into_expression(by)
4507
4536
  _from_rbexpr(
4508
4537
  _rbexpr.rolling_std_by(
4509
4538
  by,
@@ -4641,7 +4670,7 @@ module Polars
4641
4670
  warn_if_unsorted: nil
4642
4671
  )
4643
4672
  window_size = _prepare_rolling_by_window_args(window_size)
4644
- by = Utils.parse_as_expression(by)
4673
+ by = Utils.parse_into_expression(by)
4645
4674
  _from_rbexpr(
4646
4675
  _rbexpr.rolling_var_by(
4647
4676
  by,
@@ -4752,7 +4781,7 @@ module Polars
4752
4781
  warn_if_unsorted: nil
4753
4782
  )
4754
4783
  window_size = _prepare_rolling_by_window_args(window_size)
4755
- by = Utils.parse_as_expression(by)
4784
+ by = Utils.parse_into_expression(by)
4756
4785
  _from_rbexpr(
4757
4786
  _rbexpr.rolling_median_by(by, window_size, min_periods, closed)
4758
4787
  )
@@ -4863,7 +4892,7 @@ module Polars
4863
4892
  warn_if_unsorted: nil
4864
4893
  )
4865
4894
  window_size = _prepare_rolling_by_window_args(window_size)
4866
- by = Utils.parse_as_expression(by)
4895
+ by = Utils.parse_into_expression(by)
4867
4896
  _from_rbexpr(
4868
4897
  _rbexpr.rolling_quantile_by(
4869
4898
  by,
@@ -4908,12 +4937,6 @@ module Polars
4908
4937
  # a result. If None, it will be set equal to window size.
4909
4938
  # @param center [Boolean]
4910
4939
  # Set the labels at the center of the window
4911
- # @param by [String]
4912
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
4913
- # set the column that will be used to determine the windows. This column must
4914
- # be of dtype `{Date, Datetime}`
4915
- # @param closed ["left", "right", "both", "none"]
4916
- # Define whether the temporal window interval is closed or not.
4917
4940
  #
4918
4941
  # @note
4919
4942
  # This functionality is experimental and may change without it being considered a
@@ -4951,24 +4974,8 @@ module Polars
4951
4974
  window_size,
4952
4975
  weights: nil,
4953
4976
  min_periods: nil,
4954
- center: false,
4955
- by: nil,
4956
- closed: nil
4977
+ center: false
4957
4978
  )
4958
- window_size, min_periods = _prepare_rolling_window_args(
4959
- window_size, min_periods
4960
- )
4961
- if !by.nil?
4962
- Utils.validate_rolling_by_aggs_arguments(weights, center: center)
4963
- return rolling_min_by(
4964
- by,
4965
- window_size,
4966
- min_periods: min_periods,
4967
- closed: closed || "right",
4968
- warn_if_unsorted: warn_if_unsorted
4969
- )
4970
- end
4971
- window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
4972
4979
  _from_rbexpr(
4973
4980
  _rbexpr.rolling_min(
4974
4981
  window_size, weights, min_periods, center
@@ -5008,12 +5015,6 @@ module Polars
5008
5015
  # a result. If None, it will be set equal to window size.
5009
5016
  # @param center [Boolean]
5010
5017
  # Set the labels at the center of the window
5011
- # @param by [String]
5012
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
5013
- # set the column that will be used to determine the windows. This column must
5014
- # be of dtype `{Date, Datetime}`
5015
- # @param closed ["left", "right", "both", "none"]
5016
- # Define whether the temporal window interval is closed or not.
5017
5018
  #
5018
5019
  # @note
5019
5020
  # This functionality is experimental and may change without it being considered a
@@ -5051,24 +5052,8 @@ module Polars
5051
5052
  window_size,
5052
5053
  weights: nil,
5053
5054
  min_periods: nil,
5054
- center: false,
5055
- by: nil,
5056
- closed: nil
5055
+ center: false
5057
5056
  )
5058
- window_size, min_periods = _prepare_rolling_window_args(
5059
- window_size, min_periods
5060
- )
5061
- if !by.nil?
5062
- Utils.validate_rolling_by_aggs_arguments(weights, center: center)
5063
- return rolling_max_by(
5064
- by,
5065
- window_size,
5066
- min_periods: min_periods,
5067
- closed: closed || "right",
5068
- warn_if_unsorted: warn_if_unsorted
5069
- )
5070
- end
5071
- window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
5072
5057
  _from_rbexpr(
5073
5058
  _rbexpr.rolling_max(
5074
5059
  window_size, weights, min_periods, center
@@ -5108,12 +5093,6 @@ module Polars
5108
5093
  # a result. If None, it will be set equal to window size.
5109
5094
  # @param center [Boolean]
5110
5095
  # Set the labels at the center of the window
5111
- # @param by [String]
5112
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
5113
- # set the column that will be used to determine the windows. This column must
5114
- # be of dtype `{Date, Datetime}`
5115
- # @param closed ["left", "right", "both", "none"]
5116
- # Define whether the temporal window interval is closed or not.
5117
5096
  #
5118
5097
  # @note
5119
5098
  # This functionality is experimental and may change without it being considered a
@@ -5151,24 +5130,8 @@ module Polars
5151
5130
  window_size,
5152
5131
  weights: nil,
5153
5132
  min_periods: nil,
5154
- center: false,
5155
- by: nil,
5156
- closed: nil
5133
+ center: false
5157
5134
  )
5158
- window_size, min_periods = _prepare_rolling_window_args(
5159
- window_size, min_periods
5160
- )
5161
- if !by.nil?
5162
- Utils.validate_rolling_by_aggs_arguments(weights, center: center)
5163
- return rolling_mean_by(
5164
- by,
5165
- window_size,
5166
- min_periods: min_periods,
5167
- closed: closed || "right",
5168
- warn_if_unsorted: warn_if_unsorted
5169
- )
5170
- end
5171
- window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
5172
5135
  _from_rbexpr(
5173
5136
  _rbexpr.rolling_mean(
5174
5137
  window_size, weights, min_periods, center
@@ -5208,12 +5171,6 @@ module Polars
5208
5171
  # a result. If None, it will be set equal to window size.
5209
5172
  # @param center [Boolean]
5210
5173
  # Set the labels at the center of the window
5211
- # @param by [String]
5212
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
5213
- # set the column that will be used to determine the windows. This column must
5214
- # be of dtype `{Date, Datetime}`
5215
- # @param closed ["left", "right", "both", "none"]
5216
- # Define whether the temporal window interval is closed or not.
5217
5174
  #
5218
5175
  # @note
5219
5176
  # This functionality is experimental and may change without it being considered a
@@ -5251,24 +5208,8 @@ module Polars
5251
5208
  window_size,
5252
5209
  weights: nil,
5253
5210
  min_periods: nil,
5254
- center: false,
5255
- by: nil,
5256
- closed: nil
5211
+ center: false
5257
5212
  )
5258
- window_size, min_periods = _prepare_rolling_window_args(
5259
- window_size, min_periods
5260
- )
5261
- if !by.nil?
5262
- Utils.validate_rolling_by_aggs_arguments(weights, center: center)
5263
- return rolling_sum_by(
5264
- by,
5265
- window_size,
5266
- min_periods: min_periods,
5267
- closed: closed || "right",
5268
- warn_if_unsorted: warn_if_unsorted
5269
- )
5270
- end
5271
- window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
5272
5213
  _from_rbexpr(
5273
5214
  _rbexpr.rolling_sum(
5274
5215
  window_size, weights, min_periods, center
@@ -5308,12 +5249,6 @@ module Polars
5308
5249
  # a result. If None, it will be set equal to window size.
5309
5250
  # @param center [Boolean]
5310
5251
  # Set the labels at the center of the window
5311
- # @param by [String]
5312
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
5313
- # set the column that will be used to determine the windows. This column must
5314
- # be of dtype `{Date, Datetime}`
5315
- # @param closed ["left", "right", "both", "none"]
5316
- # Define whether the temporal window interval is closed or not.
5317
5252
  #
5318
5253
  # @note
5319
5254
  # This functionality is experimental and may change without it being considered a
@@ -5352,26 +5287,8 @@ module Polars
5352
5287
  weights: nil,
5353
5288
  min_periods: nil,
5354
5289
  center: false,
5355
- by: nil,
5356
- closed: nil,
5357
- ddof: 1,
5358
- warn_if_unsorted: true
5290
+ ddof: 1
5359
5291
  )
5360
- window_size, min_periods = _prepare_rolling_window_args(
5361
- window_size, min_periods
5362
- )
5363
- if !by.nil?
5364
- Utils.validate_rolling_by_aggs_arguments(weights, center: center)
5365
- return rolling_std_by(
5366
- by,
5367
- window_size,
5368
- min_periods: min_periods,
5369
- closed: closed || "right",
5370
- ddof: ddof,
5371
- warn_if_unsorted: warn_if_unsorted
5372
- )
5373
- end
5374
- window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
5375
5292
  _from_rbexpr(
5376
5293
  _rbexpr.rolling_std(
5377
5294
  window_size, weights, min_periods, center, ddof
@@ -5411,12 +5328,6 @@ module Polars
5411
5328
  # a result. If None, it will be set equal to window size.
5412
5329
  # @param center [Boolean]
5413
5330
  # Set the labels at the center of the window
5414
- # @param by [String]
5415
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
5416
- # set the column that will be used to determine the windows. This column must
5417
- # be of dtype `{Date, Datetime}`
5418
- # @param closed ["left", "right", "both", "none"]
5419
- # Define whether the temporal window interval is closed or not.
5420
5331
  #
5421
5332
  # @note
5422
5333
  # This functionality is experimental and may change without it being considered a
@@ -5455,26 +5366,8 @@ module Polars
5455
5366
  weights: nil,
5456
5367
  min_periods: nil,
5457
5368
  center: false,
5458
- by: nil,
5459
- closed: nil,
5460
- ddof: 1,
5461
- warn_if_unsorted: true
5369
+ ddof: 1
5462
5370
  )
5463
- window_size, min_periods = _prepare_rolling_window_args(
5464
- window_size, min_periods
5465
- )
5466
- if !by.nil?
5467
- Utils.validate_rolling_by_aggs_arguments(weights, center: center)
5468
- return rolling_var_by(
5469
- by,
5470
- window_size,
5471
- min_periods: min_periods,
5472
- closed: closed || "right",
5473
- ddof: ddof,
5474
- warn_if_unsorted: warn_if_unsorted
5475
- )
5476
- end
5477
- window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
5478
5371
  _from_rbexpr(
5479
5372
  _rbexpr.rolling_var(
5480
5373
  window_size, weights, min_periods, center, ddof
@@ -5510,12 +5403,6 @@ module Polars
5510
5403
  # a result. If None, it will be set equal to window size.
5511
5404
  # @param center [Boolean]
5512
5405
  # Set the labels at the center of the window
5513
- # @param by [String]
5514
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
5515
- # set the column that will be used to determine the windows. This column must
5516
- # be of dtype `{Date, Datetime}`
5517
- # @param closed ["left", "right", "both", "none"]
5518
- # Define whether the temporal window interval is closed or not.
5519
5406
  #
5520
5407
  # @note
5521
5408
  # This functionality is experimental and may change without it being considered a
@@ -5553,25 +5440,8 @@ module Polars
5553
5440
  window_size,
5554
5441
  weights: nil,
5555
5442
  min_periods: nil,
5556
- center: false,
5557
- by: nil,
5558
- closed: nil,
5559
- warn_if_unsorted: true
5443
+ center: false
5560
5444
  )
5561
- window_size, min_periods = _prepare_rolling_window_args(
5562
- window_size, min_periods
5563
- )
5564
- if !by.nil?
5565
- Utils.validate_rolling_by_aggs_arguments(weights, center: center)
5566
- return rolling_median_by(
5567
- by,
5568
- window_size,
5569
- min_periods: min_periods,
5570
- closed: closed || "right",
5571
- warn_if_unsorted: warn_if_unsorted
5572
- )
5573
- end
5574
- window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
5575
5445
  _from_rbexpr(
5576
5446
  _rbexpr.rolling_median(
5577
5447
  window_size, weights, min_periods, center
@@ -5611,12 +5481,6 @@ module Polars
5611
5481
  # a result. If None, it will be set equal to window size.
5612
5482
  # @param center [Boolean]
5613
5483
  # Set the labels at the center of the window
5614
- # @param by [String]
5615
- # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
5616
- # set the column that will be used to determine the windows. This column must
5617
- # be of dtype `{Date, Datetime}`
5618
- # @param closed ["left", "right", "both", "none"]
5619
- # Define whether the temporal window interval is closed or not.
5620
5484
  #
5621
5485
  # @note
5622
5486
  # This functionality is experimental and may change without it being considered a
@@ -5656,26 +5520,8 @@ module Polars
5656
5520
  window_size: 2,
5657
5521
  weights: nil,
5658
5522
  min_periods: nil,
5659
- center: false,
5660
- by: nil,
5661
- closed: nil,
5662
- warn_if_unsorted: true
5523
+ center: false
5663
5524
  )
5664
- window_size, min_periods = _prepare_rolling_window_args(
5665
- window_size, min_periods
5666
- )
5667
- if !by.nil?
5668
- Utils.validate_rolling_by_aggs_arguments(weights, center: center)
5669
- return rolling_quantile_by(
5670
- by,
5671
- window_size,
5672
- min_periods: min_periods,
5673
- closed: closed || "right",
5674
- warn_if_unsorted: warn_if_unsorted,
5675
- quantile: quantile
5676
- )
5677
- end
5678
- window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
5679
5525
  _from_rbexpr(
5680
5526
  _rbexpr.rolling_quantile(
5681
5527
  quantile, interpolation, window_size, weights, min_periods, center
@@ -5947,7 +5793,7 @@ module Polars
5947
5793
  # # │ 12 ┆ 0.0 │
5948
5794
  # # └──────┴────────────┘
5949
5795
  def pct_change(n: 1)
5950
- n = Utils.parse_as_expression(n)
5796
+ n = Utils.parse_into_expression(n)
5951
5797
  _from_rbexpr(_rbexpr.pct_change(n))
5952
5798
  end
5953
5799
 
@@ -6039,12 +5885,12 @@ module Polars
6039
5885
  # # │ null ┆ null │
6040
5886
  # # │ 50 ┆ 10 │
6041
5887
  # # └──────┴─────────────┘
6042
- def clip(lower_bound, upper_bound)
5888
+ def clip(lower_bound = nil, upper_bound = nil)
6043
5889
  if !lower_bound.nil?
6044
- lower_bound = Utils.parse_as_expression(lower_bound, str_as_lit: true)
5890
+ lower_bound = Utils.parse_into_expression(lower_bound)
6045
5891
  end
6046
5892
  if !upper_bound.nil?
6047
- upper_bound = Utils.parse_as_expression(upper_bound, str_as_lit: true)
5893
+ upper_bound = Utils.parse_into_expression(upper_bound)
6048
5894
  end
6049
5895
  _from_rbexpr(_rbexpr.clip(lower_bound, upper_bound))
6050
5896
  end
@@ -6431,18 +6277,38 @@ module Polars
6431
6277
  #
6432
6278
  # @example
6433
6279
  # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
6434
- # df.select(Polars.col("foo").reshape([3, 3]))
6280
+ # square = df.select(Polars.col("foo").reshape([3, 3]))
6435
6281
  # # =>
6436
6282
  # # shape: (3, 1)
6437
- # # ┌───────────┐
6438
- # # │ foo
6439
- # # │ ---
6440
- # # │ list[i64] │
6441
- # # ╞═══════════╡
6442
- # # │ [1, 2, 3]
6443
- # # │ [4, 5, 6]
6444
- # # │ [7, 8, 9]
6445
- # # └───────────┘
6283
+ # # ┌───────────────┐
6284
+ # # │ foo
6285
+ # # │ ---
6286
+ # # │ array[i64, 3] │
6287
+ # # ╞═══════════════╡
6288
+ # # │ [1, 2, 3]
6289
+ # # │ [4, 5, 6]
6290
+ # # │ [7, 8, 9]
6291
+ # # └───────────────┘
6292
+ #
6293
+ # @example
6294
+ # square.select(Polars.col("foo").reshape([9]))
6295
+ # # =>
6296
+ # # shape: (9, 1)
6297
+ # # ┌─────┐
6298
+ # # │ foo │
6299
+ # # │ --- │
6300
+ # # │ i64 │
6301
+ # # ╞═════╡
6302
+ # # │ 1 │
6303
+ # # │ 2 │
6304
+ # # │ 3 │
6305
+ # # │ 4 │
6306
+ # # │ 5 │
6307
+ # # │ 6 │
6308
+ # # │ 7 │
6309
+ # # │ 8 │
6310
+ # # │ 9 │
6311
+ # # └─────┘
6446
6312
  def reshape(dims)
6447
6313
  _from_rbexpr(_rbexpr.reshape(dims))
6448
6314
  end
@@ -6518,14 +6384,14 @@ module Polars
6518
6384
  end
6519
6385
 
6520
6386
  if !n.nil? && frac.nil?
6521
- n = Utils.parse_as_expression(n)
6387
+ n = Utils.parse_into_expression(n)
6522
6388
  return _from_rbexpr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
6523
6389
  end
6524
6390
 
6525
6391
  if frac.nil?
6526
6392
  frac = 1.0
6527
6393
  end
6528
- frac = Utils.parse_as_expression(frac)
6394
+ frac = Utils.parse_into_expression(frac)
6529
6395
  _from_rbexpr(
6530
6396
  _rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
6531
6397
  )
@@ -6658,11 +6524,17 @@ module Polars
6658
6524
 
6659
6525
  # Count all unique values and create a struct mapping value to count.
6660
6526
  #
6661
- # @param multithreaded [Boolean]
6662
- # Better to turn this off in the aggregation context, as it can lead to
6663
- # contention.
6664
6527
  # @param sort [Boolean]
6665
- # Ensure the output is sorted from most values to least.
6528
+ # Sort the output by count in descending order.
6529
+ # If set to `false` (default), the order of the output is random.
6530
+ # @param parallel [Boolean]
6531
+ # Execute the computation in parallel.
6532
+ # @param name [String]
6533
+ # Give the resulting count column a specific name;
6534
+ # if `normalize` is true defaults to "count",
6535
+ # otherwise defaults to "proportion".
6536
+ # @param normalize [Boolean]
6537
+ # If true gives relative frequencies of the unique values
6666
6538
  #
6667
6539
  # @return [Expr]
6668
6540
  #
@@ -6688,8 +6560,22 @@ module Polars
6688
6560
  # # │ {"b",2} │
6689
6561
  # # │ {"a",1} │
6690
6562
  # # └───────────┘
6691
- def value_counts(multithreaded: false, sort: false)
6692
- _from_rbexpr(_rbexpr.value_counts(multithreaded, sort))
6563
+ def value_counts(
6564
+ sort: false,
6565
+ parallel: false,
6566
+ name: nil,
6567
+ normalize: false
6568
+ )
6569
+ if name.nil?
6570
+ if normalize
6571
+ name = "proportion"
6572
+ else
6573
+ name = "count"
6574
+ end
6575
+ end
6576
+ _from_rbexpr(
6577
+ _rbexpr.value_counts(sort, parallel, name, normalize)
6578
+ )
6693
6579
  end
6694
6580
 
6695
6581
  # Return a count of the unique values in the order of appearance.
@@ -7064,6 +6950,10 @@ module Polars
7064
6950
  # # │ 3 ┆ 1.0 ┆ 10.0 │
7065
6951
  # # └─────┴─────┴──────────┘
7066
6952
  def replace(old, new = NO_DEFAULT, default: NO_DEFAULT, return_dtype: nil)
6953
+ if !default.eql?(NO_DEFAULT)
6954
+ return replace_strict(old, new, default: default, return_dtype: return_dtype)
6955
+ end
6956
+
7067
6957
  if new.eql?(NO_DEFAULT) && old.is_a?(Hash)
7068
6958
  new = Series.new(old.values)
7069
6959
  old = Series.new(old.keys)
@@ -7076,17 +6966,164 @@ module Polars
7076
6966
  end
7077
6967
  end
7078
6968
 
7079
- old = Utils.parse_as_expression(old, str_as_lit: true)
7080
- new = Utils.parse_as_expression(new, str_as_lit: true)
6969
+ old = Utils.parse_into_expression(old, str_as_lit: true)
6970
+ new = Utils.parse_into_expression(new, str_as_lit: true)
7081
6971
 
7082
- default =
7083
- if default.eql?(NO_DEFAULT)
7084
- nil
7085
- else
7086
- Utils.parse_as_expression(default, str_as_lit: true)
7087
- end
6972
+ result = _from_rbexpr(_rbexpr.replace(old, new))
6973
+
6974
+ if !return_dtype.nil?
6975
+ result = result.cast(return_dtype)
6976
+ end
6977
+
6978
+ result
6979
+ end
6980
+
6981
+ # Replace all values by different values.
6982
+ #
6983
+ # @param old [Object]
6984
+ # Value or sequence of values to replace.
6985
+ # Accepts expression input. Sequences are parsed as Series,
6986
+ # other non-expression inputs are parsed as literals.
6987
+ # Also accepts a mapping of values to their replacement as syntactic sugar for
6988
+ # `replace_all(old: Series.new(mapping.keys), new: Serie.new(mapping.values))`.
6989
+ # @param new [Object]
6990
+ # Value or sequence of values to replace by.
6991
+ # Accepts expression input. Sequences are parsed as Series,
6992
+ # other non-expression inputs are parsed as literals.
6993
+ # Length must match the length of `old` or have length 1.
6994
+ # @param default [Object]
6995
+ # Set values that were not replaced to this value. If no default is specified,
6996
+ # (default), an error is raised if any values were not replaced.
6997
+ # Accepts expression input. Non-expression inputs are parsed as literals.
6998
+ # @param return_dtype [Object]
6999
+ # The data type of the resulting expression. If set to `nil` (default),
7000
+ # the data type is determined automatically based on the other inputs.
7001
+ #
7002
+ # @return [Expr]
7003
+ #
7004
+ # @note
7005
+ # The global string cache must be enabled when replacing categorical values.
7006
+ #
7007
+ # @example Replace values by passing sequences to the `old` and `new` parameters.
7008
+ # df = Polars::DataFrame.new({"a" => [1, 2, 2, 3]})
7009
+ # df.with_columns(
7010
+ # replaced: Polars.col("a").replace_strict([1, 2, 3], [100, 200, 300])
7011
+ # )
7012
+ # # =>
7013
+ # # shape: (4, 2)
7014
+ # # ┌─────┬──────────┐
7015
+ # # │ a ┆ replaced │
7016
+ # # │ --- ┆ --- │
7017
+ # # │ i64 ┆ i64 │
7018
+ # # ╞═════╪══════════╡
7019
+ # # │ 1 ┆ 100 │
7020
+ # # │ 2 ┆ 200 │
7021
+ # # │ 2 ┆ 200 │
7022
+ # # │ 3 ┆ 300 │
7023
+ # # └─────┴──────────┘
7024
+ #
7025
+ # @example By default, an error is raised if any non-null values were not replaced. Specify a default to set all values that were not matched.
7026
+ # mapping = {2 => 200, 3 => 300}
7027
+ # df.with_columns(replaced: Polars.col("a").replace_strict(mapping, default: -1))
7028
+ # # =>
7029
+ # # shape: (4, 2)
7030
+ # # ┌─────┬──────────┐
7031
+ # # │ a ┆ replaced │
7032
+ # # │ --- ┆ --- │
7033
+ # # │ i64 ┆ i64 │
7034
+ # # ╞═════╪══════════╡
7035
+ # # │ 1 ┆ -1 │
7036
+ # # │ 2 ┆ 200 │
7037
+ # # │ 2 ┆ 200 │
7038
+ # # │ 3 ┆ 300 │
7039
+ # # └─────┴──────────┘
7040
+ #
7041
+ # @example Replacing by values of a different data type sets the return type based on a combination of the `new` data type and the `default` data type.
7042
+ # df = Polars::DataFrame.new({"a" => ["x", "y", "z"]})
7043
+ # mapping = {"x" => 1, "y" => 2, "z" => 3}
7044
+ # df.with_columns(replaced: Polars.col("a").replace_strict(mapping))
7045
+ # # =>
7046
+ # # shape: (3, 2)
7047
+ # # ┌─────┬──────────┐
7048
+ # # │ a ┆ replaced │
7049
+ # # │ --- ┆ --- │
7050
+ # # │ str ┆ i64 │
7051
+ # # ╞═════╪══════════╡
7052
+ # # │ x ┆ 1 │
7053
+ # # │ y ┆ 2 │
7054
+ # # │ z ┆ 3 │
7055
+ # # └─────┴──────────┘
7056
+ #
7057
+ # @example
7058
+ # df.with_columns(replaced: Polars.col("a").replace_strict(mapping, default: "x"))
7059
+ # # =>
7060
+ # # shape: (3, 2)
7061
+ # # ┌─────┬──────────┐
7062
+ # # │ a ┆ replaced │
7063
+ # # │ --- ┆ --- │
7064
+ # # │ str ┆ str │
7065
+ # # ╞═════╪══════════╡
7066
+ # # │ x ┆ 1 │
7067
+ # # │ y ┆ 2 │
7068
+ # # │ z ┆ 3 │
7069
+ # # └─────┴──────────┘
7070
+ #
7071
+ # @example Set the `return_dtype` parameter to control the resulting data type directly.
7072
+ # df.with_columns(
7073
+ # replaced: Polars.col("a").replace_strict(mapping, return_dtype: Polars::UInt8)
7074
+ # )
7075
+ # # =>
7076
+ # # shape: (3, 2)
7077
+ # # ┌─────┬──────────┐
7078
+ # # │ a ┆ replaced │
7079
+ # # │ --- ┆ --- │
7080
+ # # │ str ┆ u8 │
7081
+ # # ╞═════╪══════════╡
7082
+ # # │ x ┆ 1 │
7083
+ # # │ y ┆ 2 │
7084
+ # # │ z ┆ 3 │
7085
+ # # └─────┴──────────┘
7086
+ #
7087
+ # @example Expression input is supported for all parameters.
7088
+ # df = Polars::DataFrame.new({"a" => [1, 2, 2, 3], "b" => [1.5, 2.5, 5.0, 1.0]})
7089
+ # df.with_columns(
7090
+ # replaced: Polars.col("a").replace_strict(
7091
+ # Polars.col("a").max,
7092
+ # Polars.col("b").sum,
7093
+ # default: Polars.col("b")
7094
+ # )
7095
+ # )
7096
+ # # =>
7097
+ # # shape: (4, 3)
7098
+ # # ┌─────┬─────┬──────────┐
7099
+ # # │ a ┆ b ┆ replaced │
7100
+ # # │ --- ┆ --- ┆ --- │
7101
+ # # │ i64 ┆ f64 ┆ f64 │
7102
+ # # ╞═════╪═════╪══════════╡
7103
+ # # │ 1 ┆ 1.5 ┆ 1.5 │
7104
+ # # │ 2 ┆ 2.5 ┆ 2.5 │
7105
+ # # │ 2 ┆ 5.0 ┆ 5.0 │
7106
+ # # │ 3 ┆ 1.0 ┆ 10.0 │
7107
+ # # └─────┴─────┴──────────┘
7108
+ def replace_strict(
7109
+ old,
7110
+ new = NO_DEFAULT,
7111
+ default: NO_DEFAULT,
7112
+ return_dtype: nil
7113
+ )
7114
+ if new.eql?(NO_DEFAULT) && old.is_a?(Hash)
7115
+ new = Series.new(old.values)
7116
+ old = Series.new(old.keys)
7117
+ end
7088
7118
 
7089
- _from_rbexpr(_rbexpr.replace(old, new, default, return_dtype))
7119
+ old = Utils.parse_into_expression(old, str_as_lit: true, list_as_series: true)
7120
+ new = Utils.parse_into_expression(new, str_as_lit: true, list_as_series: true)
7121
+
7122
+ default = default.eql?(NO_DEFAULT) ? nil : Utils.parse_into_expression(default, str_as_lit: true)
7123
+
7124
+ _from_rbexpr(
7125
+ _rbexpr.replace_strict(old, new, default, return_dtype)
7126
+ )
7090
7127
  end
7091
7128
 
7092
7129
  # Create an object namespace of all list related methods.
@@ -7163,7 +7200,7 @@ module Polars
7163
7200
  end
7164
7201
 
7165
7202
  def _to_expr(other)
7166
- other.is_a?(Expr) ? other : Utils.lit(other)
7203
+ other.is_a?(Expr) ? other : F.lit(other)
7167
7204
  end
7168
7205
 
7169
7206
  def _prepare_alpha(com, span, half_life, alpha)