polars-df 0.11.0-x86_64-darwin → 0.12.0-x86_64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +360 -361
- data/LICENSE-THIRD-PARTY.txt +1067 -880
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +2 -2
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/data_frame.rb +93 -101
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -573
- data/lib/polars/date_time_name_space.rb +263 -464
- data/lib/polars/dynamic_group_by.rb +3 -3
- data/lib/polars/exceptions.rb +3 -0
- data/lib/polars/expr.rb +367 -330
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +18 -77
- data/lib/polars/functions/range/datetime_range.rb +4 -4
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +4 -4
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/io/csv.rb +8 -8
- data/lib/polars/io/ipc.rb +3 -3
- data/lib/polars/io/json.rb +13 -2
- data/lib/polars/io/ndjson.rb +15 -4
- data/lib/polars/io/parquet.rb +5 -4
- data/lib/polars/lazy_frame.rb +120 -106
- data/lib/polars/lazy_group_by.rb +1 -1
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +5 -7
- data/lib/polars/series.rb +105 -189
- data/lib/polars/string_expr.rb +42 -67
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +4 -330
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +11 -0
- metadata +7 -2
data/lib/polars/expr.rb
CHANGED
@@ -82,8 +82,8 @@ module Polars
|
|
82
82
|
#
|
83
83
|
# @return [Expr]
|
84
84
|
def **(power)
|
85
|
-
exponent = Utils.
|
86
|
-
_from_rbexpr(_rbexpr.pow(exponent
|
85
|
+
exponent = Utils.parse_into_expression(power)
|
86
|
+
_from_rbexpr(_rbexpr.pow(exponent))
|
87
87
|
end
|
88
88
|
|
89
89
|
# Greater than or equal.
|
@@ -811,8 +811,8 @@ module Polars
|
|
811
811
|
# # │ 10 ┆ 4 │
|
812
812
|
# # └─────┴──────┘
|
813
813
|
def append(other, upcast: true)
|
814
|
-
other = Utils.
|
815
|
-
_from_rbexpr(_rbexpr.append(other
|
814
|
+
other = Utils.parse_into_expression(other)
|
815
|
+
_from_rbexpr(_rbexpr.append(other, upcast))
|
816
816
|
end
|
817
817
|
|
818
818
|
# Create a single chunk of memory for this Series.
|
@@ -1165,8 +1165,8 @@ module Polars
|
|
1165
1165
|
# # │ 44 │
|
1166
1166
|
# # └─────┘
|
1167
1167
|
def dot(other)
|
1168
|
-
other = Utils.
|
1169
|
-
_from_rbexpr(_rbexpr.dot(other
|
1168
|
+
other = Utils.parse_into_expression(other, str_as_lit: false)
|
1169
|
+
_from_rbexpr(_rbexpr.dot(other))
|
1170
1170
|
end
|
1171
1171
|
|
1172
1172
|
# Compute the most occurring value(s).
|
@@ -1252,12 +1252,12 @@ module Polars
|
|
1252
1252
|
# df = Polars::DataFrame.new(
|
1253
1253
|
# {
|
1254
1254
|
# "group" => [
|
1255
|
-
#
|
1256
|
-
#
|
1257
|
-
#
|
1258
|
-
#
|
1259
|
-
#
|
1260
|
-
#
|
1255
|
+
# "one",
|
1256
|
+
# "one",
|
1257
|
+
# "one",
|
1258
|
+
# "two",
|
1259
|
+
# "two",
|
1260
|
+
# "two"
|
1261
1261
|
# ],
|
1262
1262
|
# "value" => [1, 98, 2, 3, 99, 4]
|
1263
1263
|
# }
|
@@ -1345,9 +1345,9 @@ module Polars
|
|
1345
1345
|
# # │ 3 ┆ 4 │
|
1346
1346
|
# # │ 2 ┆ 98 │
|
1347
1347
|
# # └───────┴──────────┘
|
1348
|
-
def top_k(k: 5
|
1349
|
-
k = Utils.
|
1350
|
-
_from_rbexpr(_rbexpr.top_k(k
|
1348
|
+
def top_k(k: 5)
|
1349
|
+
k = Utils.parse_into_expression(k)
|
1350
|
+
_from_rbexpr(_rbexpr.top_k(k))
|
1351
1351
|
end
|
1352
1352
|
|
1353
1353
|
# Return the `k` smallest elements.
|
@@ -1384,9 +1384,9 @@ module Polars
|
|
1384
1384
|
# # │ 3 ┆ 4 │
|
1385
1385
|
# # │ 2 ┆ 98 │
|
1386
1386
|
# # └───────┴──────────┘
|
1387
|
-
def bottom_k(k: 5
|
1388
|
-
k = Utils.
|
1389
|
-
_from_rbexpr(_rbexpr.bottom_k(k
|
1387
|
+
def bottom_k(k: 5)
|
1388
|
+
k = Utils.parse_into_expression(k)
|
1389
|
+
_from_rbexpr(_rbexpr.bottom_k(k))
|
1390
1390
|
end
|
1391
1391
|
|
1392
1392
|
# Get the index values that would sort this column.
|
@@ -1498,8 +1498,8 @@ module Polars
|
|
1498
1498
|
# # │ 0 ┆ 2 ┆ 4 │
|
1499
1499
|
# # └──────┴───────┴─────┘
|
1500
1500
|
def search_sorted(element, side: "any")
|
1501
|
-
element = Utils.
|
1502
|
-
_from_rbexpr(_rbexpr.search_sorted(element
|
1501
|
+
element = Utils.parse_into_expression(element, str_as_lit: false)
|
1502
|
+
_from_rbexpr(_rbexpr.search_sorted(element, side))
|
1503
1503
|
end
|
1504
1504
|
|
1505
1505
|
# Sort this column by the ordering of another column, or multiple other columns.
|
@@ -1545,13 +1545,14 @@ module Polars
|
|
1545
1545
|
# # │ two │
|
1546
1546
|
# # └───────┘
|
1547
1547
|
def sort_by(by, *more_by, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false)
|
1548
|
-
by = Utils.
|
1549
|
-
|
1550
|
-
|
1551
|
-
|
1552
|
-
|
1553
|
-
|
1554
|
-
|
1548
|
+
by = Utils.parse_into_list_of_expressions(by, *more_by)
|
1549
|
+
reverse = Utils.extend_bool(reverse, by.length, "reverse", "by")
|
1550
|
+
nulls_last = Utils.extend_bool(nulls_last, by.length, "nulls_last", "by")
|
1551
|
+
_from_rbexpr(
|
1552
|
+
_rbexpr.sort_by(
|
1553
|
+
by, reverse, nulls_last, multithreaded, maintain_order
|
1554
|
+
)
|
1555
|
+
)
|
1555
1556
|
end
|
1556
1557
|
|
1557
1558
|
# Take values by index.
|
@@ -1588,14 +1589,51 @@ module Polars
|
|
1588
1589
|
# # └───────┴───────────┘
|
1589
1590
|
def gather(indices)
|
1590
1591
|
if indices.is_a?(::Array)
|
1591
|
-
indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
|
1592
|
+
indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))._rbexpr
|
1592
1593
|
else
|
1593
|
-
indices_lit = Utils.
|
1594
|
+
indices_lit = Utils.parse_into_expression(indices, str_as_lit: false)
|
1594
1595
|
end
|
1595
|
-
_from_rbexpr(_rbexpr.gather(indices_lit
|
1596
|
+
_from_rbexpr(_rbexpr.gather(indices_lit))
|
1596
1597
|
end
|
1597
1598
|
alias_method :take, :gather
|
1598
1599
|
|
1600
|
+
# Return a single value by index.
|
1601
|
+
#
|
1602
|
+
# @param index [Object]
|
1603
|
+
# An expression that leads to a UInt32 index.
|
1604
|
+
#
|
1605
|
+
# @return [Expr]
|
1606
|
+
#
|
1607
|
+
# @example
|
1608
|
+
# df = Polars::DataFrame.new(
|
1609
|
+
# {
|
1610
|
+
# "group" => [
|
1611
|
+
# "one",
|
1612
|
+
# "one",
|
1613
|
+
# "one",
|
1614
|
+
# "two",
|
1615
|
+
# "two",
|
1616
|
+
# "two"
|
1617
|
+
# ],
|
1618
|
+
# "value" => [1, 98, 2, 3, 99, 4]
|
1619
|
+
# }
|
1620
|
+
# )
|
1621
|
+
# df.group_by("group", maintain_order: true).agg(Polars.col("value").get(1))
|
1622
|
+
# # =>
|
1623
|
+
# # shape: (2, 2)
|
1624
|
+
# # ┌───────┬───────┐
|
1625
|
+
# # │ group ┆ value │
|
1626
|
+
# # │ --- ┆ --- │
|
1627
|
+
# # │ str ┆ i64 │
|
1628
|
+
# # ╞═══════╪═══════╡
|
1629
|
+
# # │ one ┆ 98 │
|
1630
|
+
# # │ two ┆ 99 │
|
1631
|
+
# # └───────┴───────┘
|
1632
|
+
def get(index)
|
1633
|
+
index_lit = Utils.parse_into_expression(index)
|
1634
|
+
_from_rbexpr(_rbexpr.get(index_lit))
|
1635
|
+
end
|
1636
|
+
|
1599
1637
|
# Shift the values by a given period.
|
1600
1638
|
#
|
1601
1639
|
# @param n [Integer]
|
@@ -1622,9 +1660,9 @@ module Polars
|
|
1622
1660
|
# # └──────┘
|
1623
1661
|
def shift(n = 1, fill_value: nil)
|
1624
1662
|
if !fill_value.nil?
|
1625
|
-
fill_value = Utils.
|
1663
|
+
fill_value = Utils.parse_into_expression(fill_value, str_as_lit: true)
|
1626
1664
|
end
|
1627
|
-
n = Utils.
|
1665
|
+
n = Utils.parse_into_expression(n)
|
1628
1666
|
_from_rbexpr(_rbexpr.shift(n, fill_value))
|
1629
1667
|
end
|
1630
1668
|
|
@@ -1727,8 +1765,8 @@ module Polars
|
|
1727
1765
|
end
|
1728
1766
|
|
1729
1767
|
if !value.nil?
|
1730
|
-
value = Utils.
|
1731
|
-
_from_rbexpr(_rbexpr.fill_null(value
|
1768
|
+
value = Utils.parse_into_expression(value, str_as_lit: true)
|
1769
|
+
_from_rbexpr(_rbexpr.fill_null(value))
|
1732
1770
|
else
|
1733
1771
|
_from_rbexpr(_rbexpr.fill_null_with_strategy(strategy, limit))
|
1734
1772
|
end
|
@@ -1758,8 +1796,8 @@ module Polars
|
|
1758
1796
|
# # │ zero ┆ 6.0 │
|
1759
1797
|
# # └──────┴──────┘
|
1760
1798
|
def fill_nan(fill_value)
|
1761
|
-
fill_value = Utils.
|
1762
|
-
_from_rbexpr(_rbexpr.fill_nan(fill_value
|
1799
|
+
fill_value = Utils.parse_into_expression(fill_value, str_as_lit: true)
|
1800
|
+
_from_rbexpr(_rbexpr.fill_nan(fill_value))
|
1763
1801
|
end
|
1764
1802
|
|
1765
1803
|
# Fill missing values with the latest seen values.
|
@@ -2275,7 +2313,7 @@ module Polars
|
|
2275
2313
|
# # │ 4 │
|
2276
2314
|
# # └────────┘
|
2277
2315
|
def over(expr)
|
2278
|
-
rbexprs = Utils.
|
2316
|
+
rbexprs = Utils.parse_into_list_of_expressions(expr)
|
2279
2317
|
_from_rbexpr(_rbexpr.over(rbexprs))
|
2280
2318
|
end
|
2281
2319
|
|
@@ -2470,8 +2508,8 @@ module Polars
|
|
2470
2508
|
# # │ 1.5 │
|
2471
2509
|
# # └─────┘
|
2472
2510
|
def quantile(quantile, interpolation: "nearest")
|
2473
|
-
quantile = Utils.
|
2474
|
-
_from_rbexpr(_rbexpr.quantile(quantile
|
2511
|
+
quantile = Utils.parse_into_expression(quantile, str_as_lit: false)
|
2512
|
+
_from_rbexpr(_rbexpr.quantile(quantile, interpolation))
|
2475
2513
|
end
|
2476
2514
|
|
2477
2515
|
# Bin continuous values into discrete categories.
|
@@ -2515,17 +2553,17 @@ module Polars
|
|
2515
2553
|
# ).unnest("cut")
|
2516
2554
|
# # =>
|
2517
2555
|
# # shape: (5, 3)
|
2518
|
-
# #
|
2519
|
-
# # │ foo ┆
|
2520
|
-
# # │ --- ┆ ---
|
2521
|
-
# # │ i64 ┆ f64
|
2522
|
-
# #
|
2523
|
-
# # │ -2 ┆ -1.0
|
2524
|
-
# # │ -1 ┆ -1.0
|
2525
|
-
# # │ 0 ┆ 1.0
|
2526
|
-
# # │ 1 ┆ 1.0
|
2527
|
-
# # │ 2 ┆ inf
|
2528
|
-
# #
|
2556
|
+
# # ┌─────┬────────────┬────────────┐
|
2557
|
+
# # │ foo ┆ breakpoint ┆ category │
|
2558
|
+
# # │ --- ┆ --- ┆ --- │
|
2559
|
+
# # │ i64 ┆ f64 ┆ cat │
|
2560
|
+
# # ╞═════╪════════════╪════════════╡
|
2561
|
+
# # │ -2 ┆ -1.0 ┆ (-inf, -1] │
|
2562
|
+
# # │ -1 ┆ -1.0 ┆ (-inf, -1] │
|
2563
|
+
# # │ 0 ┆ 1.0 ┆ (-1, 1] │
|
2564
|
+
# # │ 1 ┆ 1.0 ┆ (-1, 1] │
|
2565
|
+
# # │ 2 ┆ inf ┆ (1, inf] │
|
2566
|
+
# # └─────┴────────────┴────────────┘
|
2529
2567
|
def cut(breaks, labels: nil, left_closed: false, include_breaks: false)
|
2530
2568
|
_from_rbexpr(_rbexpr.cut(breaks, labels, left_closed, include_breaks))
|
2531
2569
|
end
|
@@ -2596,17 +2634,17 @@ module Polars
|
|
2596
2634
|
# ).unnest("qcut")
|
2597
2635
|
# # =>
|
2598
2636
|
# # shape: (5, 3)
|
2599
|
-
# #
|
2600
|
-
# # │ foo ┆
|
2601
|
-
# # │ --- ┆ ---
|
2602
|
-
# # │ i64 ┆ f64
|
2603
|
-
# #
|
2604
|
-
# # │ -2 ┆ -1.0
|
2605
|
-
# # │ -1 ┆ -1.0
|
2606
|
-
# # │ 0 ┆ 1.0
|
2607
|
-
# # │ 1 ┆ 1.0
|
2608
|
-
# # │ 2 ┆ inf
|
2609
|
-
# #
|
2637
|
+
# # ┌─────┬────────────┬────────────┐
|
2638
|
+
# # │ foo ┆ breakpoint ┆ category │
|
2639
|
+
# # │ --- ┆ --- ┆ --- │
|
2640
|
+
# # │ i64 ┆ f64 ┆ cat │
|
2641
|
+
# # ╞═════╪════════════╪════════════╡
|
2642
|
+
# # │ -2 ┆ -1.0 ┆ (-inf, -1] │
|
2643
|
+
# # │ -1 ┆ -1.0 ┆ (-inf, -1] │
|
2644
|
+
# # │ 0 ┆ 1.0 ┆ (-1, 1] │
|
2645
|
+
# # │ 1 ┆ 1.0 ┆ (-1, 1] │
|
2646
|
+
# # │ 2 ┆ inf ┆ (1, inf] │
|
2647
|
+
# # └─────┴────────────┴────────────┘
|
2610
2648
|
def qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false)
|
2611
2649
|
if quantiles.is_a?(Integer)
|
2612
2650
|
rbexpr = _rbexpr.qcut_uniform(
|
@@ -2630,18 +2668,18 @@ module Polars
|
|
2630
2668
|
# df.select(Polars.col("s").rle).unnest("s")
|
2631
2669
|
# # =>
|
2632
2670
|
# # shape: (6, 2)
|
2633
|
-
# #
|
2634
|
-
# # │
|
2635
|
-
# # │ ---
|
2636
|
-
# # │
|
2637
|
-
# #
|
2638
|
-
# # │ 2
|
2639
|
-
# # │ 1
|
2640
|
-
# # │ 1
|
2641
|
-
# # │ 1
|
2642
|
-
# # │ 1
|
2643
|
-
# # │ 2
|
2644
|
-
# #
|
2671
|
+
# # ┌─────┬───────┐
|
2672
|
+
# # │ len ┆ value │
|
2673
|
+
# # │ --- ┆ --- │
|
2674
|
+
# # │ u32 ┆ i64 │
|
2675
|
+
# # ╞═════╪═══════╡
|
2676
|
+
# # │ 2 ┆ 1 │
|
2677
|
+
# # │ 1 ┆ 2 │
|
2678
|
+
# # │ 1 ┆ 1 │
|
2679
|
+
# # │ 1 ┆ null │
|
2680
|
+
# # │ 1 ┆ 1 │
|
2681
|
+
# # │ 2 ┆ 3 │
|
2682
|
+
# # └─────┴───────┘
|
2645
2683
|
def rle
|
2646
2684
|
_from_rbexpr(_rbexpr.rle)
|
2647
2685
|
end
|
@@ -3104,7 +3142,7 @@ module Polars
|
|
3104
3142
|
# # │ null ┆ null ┆ null ┆ true │
|
3105
3143
|
# # └──────┴──────┴────────┴────────────────┘
|
3106
3144
|
def eq_missing(other)
|
3107
|
-
other = Utils.
|
3145
|
+
other = Utils.parse_into_expression(other, str_as_lit: true)
|
3108
3146
|
_from_rbexpr(_rbexpr.eq_missing(other))
|
3109
3147
|
end
|
3110
3148
|
|
@@ -3308,7 +3346,7 @@ module Polars
|
|
3308
3346
|
# # │ null ┆ null ┆ null ┆ false │
|
3309
3347
|
# # └──────┴──────┴────────┴────────────────┘
|
3310
3348
|
def ne_missing(other)
|
3311
|
-
other = Utils.
|
3349
|
+
other = Utils.parse_into_expression(other, str_as_lit: true)
|
3312
3350
|
_from_rbexpr(_rbexpr.neq_missing(other))
|
3313
3351
|
end
|
3314
3352
|
|
@@ -3611,14 +3649,14 @@ module Polars
|
|
3611
3649
|
def is_in(other)
|
3612
3650
|
if other.is_a?(::Array)
|
3613
3651
|
if other.length == 0
|
3614
|
-
other = Polars.lit(nil)
|
3652
|
+
other = Polars.lit(nil)._rbexpr
|
3615
3653
|
else
|
3616
|
-
other = Polars.lit(Series.new(other))
|
3654
|
+
other = Polars.lit(Series.new(other))._rbexpr
|
3617
3655
|
end
|
3618
3656
|
else
|
3619
|
-
other = Utils.
|
3657
|
+
other = Utils.parse_into_expression(other, str_as_lit: false)
|
3620
3658
|
end
|
3621
|
-
_from_rbexpr(_rbexpr.is_in(other
|
3659
|
+
_from_rbexpr(_rbexpr.is_in(other))
|
3622
3660
|
end
|
3623
3661
|
alias_method :in?, :is_in
|
3624
3662
|
|
@@ -3653,15 +3691,15 @@ module Polars
|
|
3653
3691
|
# # │ ["z", "z", "z"] │
|
3654
3692
|
# # └─────────────────┘
|
3655
3693
|
def repeat_by(by)
|
3656
|
-
by = Utils.
|
3657
|
-
_from_rbexpr(_rbexpr.repeat_by(by
|
3694
|
+
by = Utils.parse_into_expression(by, str_as_lit: false)
|
3695
|
+
_from_rbexpr(_rbexpr.repeat_by(by))
|
3658
3696
|
end
|
3659
3697
|
|
3660
3698
|
# Check if this expression is between start and end.
|
3661
3699
|
#
|
3662
|
-
# @param
|
3700
|
+
# @param lower_bound [Object]
|
3663
3701
|
# Lower bound as primitive type or datetime.
|
3664
|
-
# @param
|
3702
|
+
# @param upper_bound [Object]
|
3665
3703
|
# Upper bound as primitive type or datetime.
|
3666
3704
|
# @param closed ["both", "left", "right", "none"]
|
3667
3705
|
# Define which sides of the interval are closed (inclusive).
|
@@ -3723,22 +3761,13 @@ module Polars
|
|
3723
3761
|
# # │ d ┆ false │
|
3724
3762
|
# # │ e ┆ false │
|
3725
3763
|
# # └─────┴────────────┘
|
3726
|
-
def is_between(
|
3727
|
-
|
3728
|
-
|
3729
|
-
|
3730
|
-
|
3731
|
-
|
3732
|
-
|
3733
|
-
when "both"
|
3734
|
-
(self >= start) & (self <= _end)
|
3735
|
-
when "right"
|
3736
|
-
(self > start) & (self <= _end)
|
3737
|
-
when "left"
|
3738
|
-
(self >= start) & (self < _end)
|
3739
|
-
else
|
3740
|
-
raise ArgumentError, "closed must be one of 'left', 'right', 'both', or 'none'"
|
3741
|
-
end
|
3764
|
+
def is_between(lower_bound, upper_bound, closed: "both")
|
3765
|
+
lower_bound = Utils.parse_into_expression(lower_bound)
|
3766
|
+
upper_bound = Utils.parse_into_expression(upper_bound)
|
3767
|
+
|
3768
|
+
_from_rbexpr(
|
3769
|
+
_rbexpr.is_between(lower_bound, upper_bound, closed)
|
3770
|
+
)
|
3742
3771
|
end
|
3743
3772
|
|
3744
3773
|
# Hash the elements in the selection.
|
@@ -3977,7 +4006,7 @@ module Polars
|
|
3977
4006
|
warn_if_unsorted: nil
|
3978
4007
|
)
|
3979
4008
|
window_size = _prepare_rolling_by_window_args(window_size)
|
3980
|
-
by = Utils.
|
4009
|
+
by = Utils.parse_into_expression(by)
|
3981
4010
|
_from_rbexpr(
|
3982
4011
|
_rbexpr.rolling_min_by(by, window_size, min_periods, closed)
|
3983
4012
|
)
|
@@ -4106,7 +4135,7 @@ module Polars
|
|
4106
4135
|
warn_if_unsorted: nil
|
4107
4136
|
)
|
4108
4137
|
window_size = _prepare_rolling_by_window_args(window_size)
|
4109
|
-
by = Utils.
|
4138
|
+
by = Utils.parse_into_expression(by)
|
4110
4139
|
_from_rbexpr(
|
4111
4140
|
_rbexpr.rolling_max_by(by, window_size, min_periods, closed)
|
4112
4141
|
)
|
@@ -4237,7 +4266,7 @@ module Polars
|
|
4237
4266
|
warn_if_unsorted: nil
|
4238
4267
|
)
|
4239
4268
|
window_size = _prepare_rolling_by_window_args(window_size)
|
4240
|
-
by = Utils.
|
4269
|
+
by = Utils.parse_into_expression(by)
|
4241
4270
|
_from_rbexpr(
|
4242
4271
|
_rbexpr.rolling_mean_by(
|
4243
4272
|
by,
|
@@ -4371,7 +4400,7 @@ module Polars
|
|
4371
4400
|
warn_if_unsorted: nil
|
4372
4401
|
)
|
4373
4402
|
window_size = _prepare_rolling_by_window_args(window_size)
|
4374
|
-
by = Utils.
|
4403
|
+
by = Utils.parse_into_expression(by)
|
4375
4404
|
_from_rbexpr(
|
4376
4405
|
_rbexpr.rolling_sum_by(by, window_size, min_periods, closed)
|
4377
4406
|
)
|
@@ -4503,7 +4532,7 @@ module Polars
|
|
4503
4532
|
warn_if_unsorted: nil
|
4504
4533
|
)
|
4505
4534
|
window_size = _prepare_rolling_by_window_args(window_size)
|
4506
|
-
by = Utils.
|
4535
|
+
by = Utils.parse_into_expression(by)
|
4507
4536
|
_from_rbexpr(
|
4508
4537
|
_rbexpr.rolling_std_by(
|
4509
4538
|
by,
|
@@ -4641,7 +4670,7 @@ module Polars
|
|
4641
4670
|
warn_if_unsorted: nil
|
4642
4671
|
)
|
4643
4672
|
window_size = _prepare_rolling_by_window_args(window_size)
|
4644
|
-
by = Utils.
|
4673
|
+
by = Utils.parse_into_expression(by)
|
4645
4674
|
_from_rbexpr(
|
4646
4675
|
_rbexpr.rolling_var_by(
|
4647
4676
|
by,
|
@@ -4752,7 +4781,7 @@ module Polars
|
|
4752
4781
|
warn_if_unsorted: nil
|
4753
4782
|
)
|
4754
4783
|
window_size = _prepare_rolling_by_window_args(window_size)
|
4755
|
-
by = Utils.
|
4784
|
+
by = Utils.parse_into_expression(by)
|
4756
4785
|
_from_rbexpr(
|
4757
4786
|
_rbexpr.rolling_median_by(by, window_size, min_periods, closed)
|
4758
4787
|
)
|
@@ -4863,7 +4892,7 @@ module Polars
|
|
4863
4892
|
warn_if_unsorted: nil
|
4864
4893
|
)
|
4865
4894
|
window_size = _prepare_rolling_by_window_args(window_size)
|
4866
|
-
by = Utils.
|
4895
|
+
by = Utils.parse_into_expression(by)
|
4867
4896
|
_from_rbexpr(
|
4868
4897
|
_rbexpr.rolling_quantile_by(
|
4869
4898
|
by,
|
@@ -4908,12 +4937,6 @@ module Polars
|
|
4908
4937
|
# a result. If None, it will be set equal to window size.
|
4909
4938
|
# @param center [Boolean]
|
4910
4939
|
# Set the labels at the center of the window
|
4911
|
-
# @param by [String]
|
4912
|
-
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
4913
|
-
# set the column that will be used to determine the windows. This column must
|
4914
|
-
# be of dtype `{Date, Datetime}`
|
4915
|
-
# @param closed ["left", "right", "both", "none"]
|
4916
|
-
# Define whether the temporal window interval is closed or not.
|
4917
4940
|
#
|
4918
4941
|
# @note
|
4919
4942
|
# This functionality is experimental and may change without it being considered a
|
@@ -4951,24 +4974,8 @@ module Polars
|
|
4951
4974
|
window_size,
|
4952
4975
|
weights: nil,
|
4953
4976
|
min_periods: nil,
|
4954
|
-
center: false
|
4955
|
-
by: nil,
|
4956
|
-
closed: nil
|
4977
|
+
center: false
|
4957
4978
|
)
|
4958
|
-
window_size, min_periods = _prepare_rolling_window_args(
|
4959
|
-
window_size, min_periods
|
4960
|
-
)
|
4961
|
-
if !by.nil?
|
4962
|
-
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
4963
|
-
return rolling_min_by(
|
4964
|
-
by,
|
4965
|
-
window_size,
|
4966
|
-
min_periods: min_periods,
|
4967
|
-
closed: closed || "right",
|
4968
|
-
warn_if_unsorted: warn_if_unsorted
|
4969
|
-
)
|
4970
|
-
end
|
4971
|
-
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
4972
4979
|
_from_rbexpr(
|
4973
4980
|
_rbexpr.rolling_min(
|
4974
4981
|
window_size, weights, min_periods, center
|
@@ -5008,12 +5015,6 @@ module Polars
|
|
5008
5015
|
# a result. If None, it will be set equal to window size.
|
5009
5016
|
# @param center [Boolean]
|
5010
5017
|
# Set the labels at the center of the window
|
5011
|
-
# @param by [String]
|
5012
|
-
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
5013
|
-
# set the column that will be used to determine the windows. This column must
|
5014
|
-
# be of dtype `{Date, Datetime}`
|
5015
|
-
# @param closed ["left", "right", "both", "none"]
|
5016
|
-
# Define whether the temporal window interval is closed or not.
|
5017
5018
|
#
|
5018
5019
|
# @note
|
5019
5020
|
# This functionality is experimental and may change without it being considered a
|
@@ -5051,24 +5052,8 @@ module Polars
|
|
5051
5052
|
window_size,
|
5052
5053
|
weights: nil,
|
5053
5054
|
min_periods: nil,
|
5054
|
-
center: false
|
5055
|
-
by: nil,
|
5056
|
-
closed: nil
|
5055
|
+
center: false
|
5057
5056
|
)
|
5058
|
-
window_size, min_periods = _prepare_rolling_window_args(
|
5059
|
-
window_size, min_periods
|
5060
|
-
)
|
5061
|
-
if !by.nil?
|
5062
|
-
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5063
|
-
return rolling_max_by(
|
5064
|
-
by,
|
5065
|
-
window_size,
|
5066
|
-
min_periods: min_periods,
|
5067
|
-
closed: closed || "right",
|
5068
|
-
warn_if_unsorted: warn_if_unsorted
|
5069
|
-
)
|
5070
|
-
end
|
5071
|
-
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
5072
5057
|
_from_rbexpr(
|
5073
5058
|
_rbexpr.rolling_max(
|
5074
5059
|
window_size, weights, min_periods, center
|
@@ -5108,12 +5093,6 @@ module Polars
|
|
5108
5093
|
# a result. If None, it will be set equal to window size.
|
5109
5094
|
# @param center [Boolean]
|
5110
5095
|
# Set the labels at the center of the window
|
5111
|
-
# @param by [String]
|
5112
|
-
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
5113
|
-
# set the column that will be used to determine the windows. This column must
|
5114
|
-
# be of dtype `{Date, Datetime}`
|
5115
|
-
# @param closed ["left", "right", "both", "none"]
|
5116
|
-
# Define whether the temporal window interval is closed or not.
|
5117
5096
|
#
|
5118
5097
|
# @note
|
5119
5098
|
# This functionality is experimental and may change without it being considered a
|
@@ -5151,24 +5130,8 @@ module Polars
|
|
5151
5130
|
window_size,
|
5152
5131
|
weights: nil,
|
5153
5132
|
min_periods: nil,
|
5154
|
-
center: false
|
5155
|
-
by: nil,
|
5156
|
-
closed: nil
|
5133
|
+
center: false
|
5157
5134
|
)
|
5158
|
-
window_size, min_periods = _prepare_rolling_window_args(
|
5159
|
-
window_size, min_periods
|
5160
|
-
)
|
5161
|
-
if !by.nil?
|
5162
|
-
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5163
|
-
return rolling_mean_by(
|
5164
|
-
by,
|
5165
|
-
window_size,
|
5166
|
-
min_periods: min_periods,
|
5167
|
-
closed: closed || "right",
|
5168
|
-
warn_if_unsorted: warn_if_unsorted
|
5169
|
-
)
|
5170
|
-
end
|
5171
|
-
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
5172
5135
|
_from_rbexpr(
|
5173
5136
|
_rbexpr.rolling_mean(
|
5174
5137
|
window_size, weights, min_periods, center
|
@@ -5208,12 +5171,6 @@ module Polars
|
|
5208
5171
|
# a result. If None, it will be set equal to window size.
|
5209
5172
|
# @param center [Boolean]
|
5210
5173
|
# Set the labels at the center of the window
|
5211
|
-
# @param by [String]
|
5212
|
-
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
5213
|
-
# set the column that will be used to determine the windows. This column must
|
5214
|
-
# be of dtype `{Date, Datetime}`
|
5215
|
-
# @param closed ["left", "right", "both", "none"]
|
5216
|
-
# Define whether the temporal window interval is closed or not.
|
5217
5174
|
#
|
5218
5175
|
# @note
|
5219
5176
|
# This functionality is experimental and may change without it being considered a
|
@@ -5251,24 +5208,8 @@ module Polars
|
|
5251
5208
|
window_size,
|
5252
5209
|
weights: nil,
|
5253
5210
|
min_periods: nil,
|
5254
|
-
center: false
|
5255
|
-
by: nil,
|
5256
|
-
closed: nil
|
5211
|
+
center: false
|
5257
5212
|
)
|
5258
|
-
window_size, min_periods = _prepare_rolling_window_args(
|
5259
|
-
window_size, min_periods
|
5260
|
-
)
|
5261
|
-
if !by.nil?
|
5262
|
-
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5263
|
-
return rolling_sum_by(
|
5264
|
-
by,
|
5265
|
-
window_size,
|
5266
|
-
min_periods: min_periods,
|
5267
|
-
closed: closed || "right",
|
5268
|
-
warn_if_unsorted: warn_if_unsorted
|
5269
|
-
)
|
5270
|
-
end
|
5271
|
-
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
5272
5213
|
_from_rbexpr(
|
5273
5214
|
_rbexpr.rolling_sum(
|
5274
5215
|
window_size, weights, min_periods, center
|
@@ -5308,12 +5249,6 @@ module Polars
|
|
5308
5249
|
# a result. If None, it will be set equal to window size.
|
5309
5250
|
# @param center [Boolean]
|
5310
5251
|
# Set the labels at the center of the window
|
5311
|
-
# @param by [String]
|
5312
|
-
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
5313
|
-
# set the column that will be used to determine the windows. This column must
|
5314
|
-
# be of dtype `{Date, Datetime}`
|
5315
|
-
# @param closed ["left", "right", "both", "none"]
|
5316
|
-
# Define whether the temporal window interval is closed or not.
|
5317
5252
|
#
|
5318
5253
|
# @note
|
5319
5254
|
# This functionality is experimental and may change without it being considered a
|
@@ -5352,26 +5287,8 @@ module Polars
|
|
5352
5287
|
weights: nil,
|
5353
5288
|
min_periods: nil,
|
5354
5289
|
center: false,
|
5355
|
-
|
5356
|
-
closed: nil,
|
5357
|
-
ddof: 1,
|
5358
|
-
warn_if_unsorted: true
|
5290
|
+
ddof: 1
|
5359
5291
|
)
|
5360
|
-
window_size, min_periods = _prepare_rolling_window_args(
|
5361
|
-
window_size, min_periods
|
5362
|
-
)
|
5363
|
-
if !by.nil?
|
5364
|
-
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5365
|
-
return rolling_std_by(
|
5366
|
-
by,
|
5367
|
-
window_size,
|
5368
|
-
min_periods: min_periods,
|
5369
|
-
closed: closed || "right",
|
5370
|
-
ddof: ddof,
|
5371
|
-
warn_if_unsorted: warn_if_unsorted
|
5372
|
-
)
|
5373
|
-
end
|
5374
|
-
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
5375
5292
|
_from_rbexpr(
|
5376
5293
|
_rbexpr.rolling_std(
|
5377
5294
|
window_size, weights, min_periods, center, ddof
|
@@ -5411,12 +5328,6 @@ module Polars
|
|
5411
5328
|
# a result. If None, it will be set equal to window size.
|
5412
5329
|
# @param center [Boolean]
|
5413
5330
|
# Set the labels at the center of the window
|
5414
|
-
# @param by [String]
|
5415
|
-
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
5416
|
-
# set the column that will be used to determine the windows. This column must
|
5417
|
-
# be of dtype `{Date, Datetime}`
|
5418
|
-
# @param closed ["left", "right", "both", "none"]
|
5419
|
-
# Define whether the temporal window interval is closed or not.
|
5420
5331
|
#
|
5421
5332
|
# @note
|
5422
5333
|
# This functionality is experimental and may change without it being considered a
|
@@ -5455,26 +5366,8 @@ module Polars
|
|
5455
5366
|
weights: nil,
|
5456
5367
|
min_periods: nil,
|
5457
5368
|
center: false,
|
5458
|
-
|
5459
|
-
closed: nil,
|
5460
|
-
ddof: 1,
|
5461
|
-
warn_if_unsorted: true
|
5369
|
+
ddof: 1
|
5462
5370
|
)
|
5463
|
-
window_size, min_periods = _prepare_rolling_window_args(
|
5464
|
-
window_size, min_periods
|
5465
|
-
)
|
5466
|
-
if !by.nil?
|
5467
|
-
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5468
|
-
return rolling_var_by(
|
5469
|
-
by,
|
5470
|
-
window_size,
|
5471
|
-
min_periods: min_periods,
|
5472
|
-
closed: closed || "right",
|
5473
|
-
ddof: ddof,
|
5474
|
-
warn_if_unsorted: warn_if_unsorted
|
5475
|
-
)
|
5476
|
-
end
|
5477
|
-
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
5478
5371
|
_from_rbexpr(
|
5479
5372
|
_rbexpr.rolling_var(
|
5480
5373
|
window_size, weights, min_periods, center, ddof
|
@@ -5510,12 +5403,6 @@ module Polars
|
|
5510
5403
|
# a result. If None, it will be set equal to window size.
|
5511
5404
|
# @param center [Boolean]
|
5512
5405
|
# Set the labels at the center of the window
|
5513
|
-
# @param by [String]
|
5514
|
-
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
5515
|
-
# set the column that will be used to determine the windows. This column must
|
5516
|
-
# be of dtype `{Date, Datetime}`
|
5517
|
-
# @param closed ["left", "right", "both", "none"]
|
5518
|
-
# Define whether the temporal window interval is closed or not.
|
5519
5406
|
#
|
5520
5407
|
# @note
|
5521
5408
|
# This functionality is experimental and may change without it being considered a
|
@@ -5553,25 +5440,8 @@ module Polars
|
|
5553
5440
|
window_size,
|
5554
5441
|
weights: nil,
|
5555
5442
|
min_periods: nil,
|
5556
|
-
center: false
|
5557
|
-
by: nil,
|
5558
|
-
closed: nil,
|
5559
|
-
warn_if_unsorted: true
|
5443
|
+
center: false
|
5560
5444
|
)
|
5561
|
-
window_size, min_periods = _prepare_rolling_window_args(
|
5562
|
-
window_size, min_periods
|
5563
|
-
)
|
5564
|
-
if !by.nil?
|
5565
|
-
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5566
|
-
return rolling_median_by(
|
5567
|
-
by,
|
5568
|
-
window_size,
|
5569
|
-
min_periods: min_periods,
|
5570
|
-
closed: closed || "right",
|
5571
|
-
warn_if_unsorted: warn_if_unsorted
|
5572
|
-
)
|
5573
|
-
end
|
5574
|
-
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
5575
5445
|
_from_rbexpr(
|
5576
5446
|
_rbexpr.rolling_median(
|
5577
5447
|
window_size, weights, min_periods, center
|
@@ -5611,12 +5481,6 @@ module Polars
|
|
5611
5481
|
# a result. If None, it will be set equal to window size.
|
5612
5482
|
# @param center [Boolean]
|
5613
5483
|
# Set the labels at the center of the window
|
5614
|
-
# @param by [String]
|
5615
|
-
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
5616
|
-
# set the column that will be used to determine the windows. This column must
|
5617
|
-
# be of dtype `{Date, Datetime}`
|
5618
|
-
# @param closed ["left", "right", "both", "none"]
|
5619
|
-
# Define whether the temporal window interval is closed or not.
|
5620
5484
|
#
|
5621
5485
|
# @note
|
5622
5486
|
# This functionality is experimental and may change without it being considered a
|
@@ -5656,26 +5520,8 @@ module Polars
|
|
5656
5520
|
window_size: 2,
|
5657
5521
|
weights: nil,
|
5658
5522
|
min_periods: nil,
|
5659
|
-
center: false
|
5660
|
-
by: nil,
|
5661
|
-
closed: nil,
|
5662
|
-
warn_if_unsorted: true
|
5523
|
+
center: false
|
5663
5524
|
)
|
5664
|
-
window_size, min_periods = _prepare_rolling_window_args(
|
5665
|
-
window_size, min_periods
|
5666
|
-
)
|
5667
|
-
if !by.nil?
|
5668
|
-
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5669
|
-
return rolling_quantile_by(
|
5670
|
-
by,
|
5671
|
-
window_size,
|
5672
|
-
min_periods: min_periods,
|
5673
|
-
closed: closed || "right",
|
5674
|
-
warn_if_unsorted: warn_if_unsorted,
|
5675
|
-
quantile: quantile
|
5676
|
-
)
|
5677
|
-
end
|
5678
|
-
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
5679
5525
|
_from_rbexpr(
|
5680
5526
|
_rbexpr.rolling_quantile(
|
5681
5527
|
quantile, interpolation, window_size, weights, min_periods, center
|
@@ -5947,7 +5793,7 @@ module Polars
|
|
5947
5793
|
# # │ 12 ┆ 0.0 │
|
5948
5794
|
# # └──────┴────────────┘
|
5949
5795
|
def pct_change(n: 1)
|
5950
|
-
n = Utils.
|
5796
|
+
n = Utils.parse_into_expression(n)
|
5951
5797
|
_from_rbexpr(_rbexpr.pct_change(n))
|
5952
5798
|
end
|
5953
5799
|
|
@@ -6039,12 +5885,12 @@ module Polars
|
|
6039
5885
|
# # │ null ┆ null │
|
6040
5886
|
# # │ 50 ┆ 10 │
|
6041
5887
|
# # └──────┴─────────────┘
|
6042
|
-
def clip(lower_bound, upper_bound)
|
5888
|
+
def clip(lower_bound = nil, upper_bound = nil)
|
6043
5889
|
if !lower_bound.nil?
|
6044
|
-
lower_bound = Utils.
|
5890
|
+
lower_bound = Utils.parse_into_expression(lower_bound)
|
6045
5891
|
end
|
6046
5892
|
if !upper_bound.nil?
|
6047
|
-
upper_bound = Utils.
|
5893
|
+
upper_bound = Utils.parse_into_expression(upper_bound)
|
6048
5894
|
end
|
6049
5895
|
_from_rbexpr(_rbexpr.clip(lower_bound, upper_bound))
|
6050
5896
|
end
|
@@ -6431,18 +6277,38 @@ module Polars
|
|
6431
6277
|
#
|
6432
6278
|
# @example
|
6433
6279
|
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
|
6434
|
-
# df.select(Polars.col("foo").reshape([3, 3]))
|
6280
|
+
# square = df.select(Polars.col("foo").reshape([3, 3]))
|
6435
6281
|
# # =>
|
6436
6282
|
# # shape: (3, 1)
|
6437
|
-
# #
|
6438
|
-
# # │ foo
|
6439
|
-
# # │ ---
|
6440
|
-
# # │
|
6441
|
-
# #
|
6442
|
-
# # │ [1, 2, 3]
|
6443
|
-
# # │ [4, 5, 6]
|
6444
|
-
# # │ [7, 8, 9]
|
6445
|
-
# #
|
6283
|
+
# # ┌───────────────┐
|
6284
|
+
# # │ foo │
|
6285
|
+
# # │ --- │
|
6286
|
+
# # │ array[i64, 3] │
|
6287
|
+
# # ╞═══════════════╡
|
6288
|
+
# # │ [1, 2, 3] │
|
6289
|
+
# # │ [4, 5, 6] │
|
6290
|
+
# # │ [7, 8, 9] │
|
6291
|
+
# # └───────────────┘
|
6292
|
+
#
|
6293
|
+
# @example
|
6294
|
+
# square.select(Polars.col("foo").reshape([9]))
|
6295
|
+
# # =>
|
6296
|
+
# # shape: (9, 1)
|
6297
|
+
# # ┌─────┐
|
6298
|
+
# # │ foo │
|
6299
|
+
# # │ --- │
|
6300
|
+
# # │ i64 │
|
6301
|
+
# # ╞═════╡
|
6302
|
+
# # │ 1 │
|
6303
|
+
# # │ 2 │
|
6304
|
+
# # │ 3 │
|
6305
|
+
# # │ 4 │
|
6306
|
+
# # │ 5 │
|
6307
|
+
# # │ 6 │
|
6308
|
+
# # │ 7 │
|
6309
|
+
# # │ 8 │
|
6310
|
+
# # │ 9 │
|
6311
|
+
# # └─────┘
|
6446
6312
|
def reshape(dims)
|
6447
6313
|
_from_rbexpr(_rbexpr.reshape(dims))
|
6448
6314
|
end
|
@@ -6518,14 +6384,14 @@ module Polars
|
|
6518
6384
|
end
|
6519
6385
|
|
6520
6386
|
if !n.nil? && frac.nil?
|
6521
|
-
n = Utils.
|
6387
|
+
n = Utils.parse_into_expression(n)
|
6522
6388
|
return _from_rbexpr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
|
6523
6389
|
end
|
6524
6390
|
|
6525
6391
|
if frac.nil?
|
6526
6392
|
frac = 1.0
|
6527
6393
|
end
|
6528
|
-
frac = Utils.
|
6394
|
+
frac = Utils.parse_into_expression(frac)
|
6529
6395
|
_from_rbexpr(
|
6530
6396
|
_rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
|
6531
6397
|
)
|
@@ -6658,11 +6524,17 @@ module Polars
|
|
6658
6524
|
|
6659
6525
|
# Count all unique values and create a struct mapping value to count.
|
6660
6526
|
#
|
6661
|
-
# @param multithreaded [Boolean]
|
6662
|
-
# Better to turn this off in the aggregation context, as it can lead to
|
6663
|
-
# contention.
|
6664
6527
|
# @param sort [Boolean]
|
6665
|
-
#
|
6528
|
+
# Sort the output by count in descending order.
|
6529
|
+
# If set to `false` (default), the order of the output is random.
|
6530
|
+
# @param parallel [Boolean]
|
6531
|
+
# Execute the computation in parallel.
|
6532
|
+
# @param name [String]
|
6533
|
+
# Give the resulting count column a specific name;
|
6534
|
+
# if `normalize` is true defaults to "count",
|
6535
|
+
# otherwise defaults to "proportion".
|
6536
|
+
# @param normalize [Boolean]
|
6537
|
+
# If true gives relative frequencies of the unique values
|
6666
6538
|
#
|
6667
6539
|
# @return [Expr]
|
6668
6540
|
#
|
@@ -6688,8 +6560,22 @@ module Polars
|
|
6688
6560
|
# # │ {"b",2} │
|
6689
6561
|
# # │ {"a",1} │
|
6690
6562
|
# # └───────────┘
|
6691
|
-
def value_counts(
|
6692
|
-
|
6563
|
+
def value_counts(
|
6564
|
+
sort: false,
|
6565
|
+
parallel: false,
|
6566
|
+
name: nil,
|
6567
|
+
normalize: false
|
6568
|
+
)
|
6569
|
+
if name.nil?
|
6570
|
+
if normalize
|
6571
|
+
name = "proportion"
|
6572
|
+
else
|
6573
|
+
name = "count"
|
6574
|
+
end
|
6575
|
+
end
|
6576
|
+
_from_rbexpr(
|
6577
|
+
_rbexpr.value_counts(sort, parallel, name, normalize)
|
6578
|
+
)
|
6693
6579
|
end
|
6694
6580
|
|
6695
6581
|
# Return a count of the unique values in the order of appearance.
|
@@ -7064,6 +6950,10 @@ module Polars
|
|
7064
6950
|
# # │ 3 ┆ 1.0 ┆ 10.0 │
|
7065
6951
|
# # └─────┴─────┴──────────┘
|
7066
6952
|
def replace(old, new = NO_DEFAULT, default: NO_DEFAULT, return_dtype: nil)
|
6953
|
+
if !default.eql?(NO_DEFAULT)
|
6954
|
+
return replace_strict(old, new, default: default, return_dtype: return_dtype)
|
6955
|
+
end
|
6956
|
+
|
7067
6957
|
if new.eql?(NO_DEFAULT) && old.is_a?(Hash)
|
7068
6958
|
new = Series.new(old.values)
|
7069
6959
|
old = Series.new(old.keys)
|
@@ -7076,17 +6966,164 @@ module Polars
|
|
7076
6966
|
end
|
7077
6967
|
end
|
7078
6968
|
|
7079
|
-
old = Utils.
|
7080
|
-
new = Utils.
|
6969
|
+
old = Utils.parse_into_expression(old, str_as_lit: true)
|
6970
|
+
new = Utils.parse_into_expression(new, str_as_lit: true)
|
7081
6971
|
|
7082
|
-
|
7083
|
-
|
7084
|
-
|
7085
|
-
|
7086
|
-
|
7087
|
-
|
6972
|
+
result = _from_rbexpr(_rbexpr.replace(old, new))
|
6973
|
+
|
6974
|
+
if !return_dtype.nil?
|
6975
|
+
result = result.cast(return_dtype)
|
6976
|
+
end
|
6977
|
+
|
6978
|
+
result
|
6979
|
+
end
|
6980
|
+
|
6981
|
+
# Replace all values by different values.
|
6982
|
+
#
|
6983
|
+
# @param old [Object]
|
6984
|
+
# Value or sequence of values to replace.
|
6985
|
+
# Accepts expression input. Sequences are parsed as Series,
|
6986
|
+
# other non-expression inputs are parsed as literals.
|
6987
|
+
# Also accepts a mapping of values to their replacement as syntactic sugar for
|
6988
|
+
# `replace_all(old: Series.new(mapping.keys), new: Serie.new(mapping.values))`.
|
6989
|
+
# @param new [Object]
|
6990
|
+
# Value or sequence of values to replace by.
|
6991
|
+
# Accepts expression input. Sequences are parsed as Series,
|
6992
|
+
# other non-expression inputs are parsed as literals.
|
6993
|
+
# Length must match the length of `old` or have length 1.
|
6994
|
+
# @param default [Object]
|
6995
|
+
# Set values that were not replaced to this value. If no default is specified,
|
6996
|
+
# (default), an error is raised if any values were not replaced.
|
6997
|
+
# Accepts expression input. Non-expression inputs are parsed as literals.
|
6998
|
+
# @param return_dtype [Object]
|
6999
|
+
# The data type of the resulting expression. If set to `nil` (default),
|
7000
|
+
# the data type is determined automatically based on the other inputs.
|
7001
|
+
#
|
7002
|
+
# @return [Expr]
|
7003
|
+
#
|
7004
|
+
# @note
|
7005
|
+
# The global string cache must be enabled when replacing categorical values.
|
7006
|
+
#
|
7007
|
+
# @example Replace values by passing sequences to the `old` and `new` parameters.
|
7008
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 2, 3]})
|
7009
|
+
# df.with_columns(
|
7010
|
+
# replaced: Polars.col("a").replace_strict([1, 2, 3], [100, 200, 300])
|
7011
|
+
# )
|
7012
|
+
# # =>
|
7013
|
+
# # shape: (4, 2)
|
7014
|
+
# # ┌─────┬──────────┐
|
7015
|
+
# # │ a ┆ replaced │
|
7016
|
+
# # │ --- ┆ --- │
|
7017
|
+
# # │ i64 ┆ i64 │
|
7018
|
+
# # ╞═════╪══════════╡
|
7019
|
+
# # │ 1 ┆ 100 │
|
7020
|
+
# # │ 2 ┆ 200 │
|
7021
|
+
# # │ 2 ┆ 200 │
|
7022
|
+
# # │ 3 ┆ 300 │
|
7023
|
+
# # └─────┴──────────┘
|
7024
|
+
#
|
7025
|
+
# @example By default, an error is raised if any non-null values were not replaced. Specify a default to set all values that were not matched.
|
7026
|
+
# mapping = {2 => 200, 3 => 300}
|
7027
|
+
# df.with_columns(replaced: Polars.col("a").replace_strict(mapping, default: -1))
|
7028
|
+
# # =>
|
7029
|
+
# # shape: (4, 2)
|
7030
|
+
# # ┌─────┬──────────┐
|
7031
|
+
# # │ a ┆ replaced │
|
7032
|
+
# # │ --- ┆ --- │
|
7033
|
+
# # │ i64 ┆ i64 │
|
7034
|
+
# # ╞═════╪══════════╡
|
7035
|
+
# # │ 1 ┆ -1 │
|
7036
|
+
# # │ 2 ┆ 200 │
|
7037
|
+
# # │ 2 ┆ 200 │
|
7038
|
+
# # │ 3 ┆ 300 │
|
7039
|
+
# # └─────┴──────────┘
|
7040
|
+
#
|
7041
|
+
# @example Replacing by values of a different data type sets the return type based on a combination of the `new` data type and the `default` data type.
|
7042
|
+
# df = Polars::DataFrame.new({"a" => ["x", "y", "z"]})
|
7043
|
+
# mapping = {"x" => 1, "y" => 2, "z" => 3}
|
7044
|
+
# df.with_columns(replaced: Polars.col("a").replace_strict(mapping))
|
7045
|
+
# # =>
|
7046
|
+
# # shape: (3, 2)
|
7047
|
+
# # ┌─────┬──────────┐
|
7048
|
+
# # │ a ┆ replaced │
|
7049
|
+
# # │ --- ┆ --- │
|
7050
|
+
# # │ str ┆ i64 │
|
7051
|
+
# # ╞═════╪══════════╡
|
7052
|
+
# # │ x ┆ 1 │
|
7053
|
+
# # │ y ┆ 2 │
|
7054
|
+
# # │ z ┆ 3 │
|
7055
|
+
# # └─────┴──────────┘
|
7056
|
+
#
|
7057
|
+
# @example
|
7058
|
+
# df.with_columns(replaced: Polars.col("a").replace_strict(mapping, default: "x"))
|
7059
|
+
# # =>
|
7060
|
+
# # shape: (3, 2)
|
7061
|
+
# # ┌─────┬──────────┐
|
7062
|
+
# # │ a ┆ replaced │
|
7063
|
+
# # │ --- ┆ --- │
|
7064
|
+
# # │ str ┆ str │
|
7065
|
+
# # ╞═════╪══════════╡
|
7066
|
+
# # │ x ┆ 1 │
|
7067
|
+
# # │ y ┆ 2 │
|
7068
|
+
# # │ z ┆ 3 │
|
7069
|
+
# # └─────┴──────────┘
|
7070
|
+
#
|
7071
|
+
# @example Set the `return_dtype` parameter to control the resulting data type directly.
|
7072
|
+
# df.with_columns(
|
7073
|
+
# replaced: Polars.col("a").replace_strict(mapping, return_dtype: Polars::UInt8)
|
7074
|
+
# )
|
7075
|
+
# # =>
|
7076
|
+
# # shape: (3, 2)
|
7077
|
+
# # ┌─────┬──────────┐
|
7078
|
+
# # │ a ┆ replaced │
|
7079
|
+
# # │ --- ┆ --- │
|
7080
|
+
# # │ str ┆ u8 │
|
7081
|
+
# # ╞═════╪══════════╡
|
7082
|
+
# # │ x ┆ 1 │
|
7083
|
+
# # │ y ┆ 2 │
|
7084
|
+
# # │ z ┆ 3 │
|
7085
|
+
# # └─────┴──────────┘
|
7086
|
+
#
|
7087
|
+
# @example Expression input is supported for all parameters.
|
7088
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 2, 3], "b" => [1.5, 2.5, 5.0, 1.0]})
|
7089
|
+
# df.with_columns(
|
7090
|
+
# replaced: Polars.col("a").replace_strict(
|
7091
|
+
# Polars.col("a").max,
|
7092
|
+
# Polars.col("b").sum,
|
7093
|
+
# default: Polars.col("b")
|
7094
|
+
# )
|
7095
|
+
# )
|
7096
|
+
# # =>
|
7097
|
+
# # shape: (4, 3)
|
7098
|
+
# # ┌─────┬─────┬──────────┐
|
7099
|
+
# # │ a ┆ b ┆ replaced │
|
7100
|
+
# # │ --- ┆ --- ┆ --- │
|
7101
|
+
# # │ i64 ┆ f64 ┆ f64 │
|
7102
|
+
# # ╞═════╪═════╪══════════╡
|
7103
|
+
# # │ 1 ┆ 1.5 ┆ 1.5 │
|
7104
|
+
# # │ 2 ┆ 2.5 ┆ 2.5 │
|
7105
|
+
# # │ 2 ┆ 5.0 ┆ 5.0 │
|
7106
|
+
# # │ 3 ┆ 1.0 ┆ 10.0 │
|
7107
|
+
# # └─────┴─────┴──────────┘
|
7108
|
+
def replace_strict(
|
7109
|
+
old,
|
7110
|
+
new = NO_DEFAULT,
|
7111
|
+
default: NO_DEFAULT,
|
7112
|
+
return_dtype: nil
|
7113
|
+
)
|
7114
|
+
if new.eql?(NO_DEFAULT) && old.is_a?(Hash)
|
7115
|
+
new = Series.new(old.values)
|
7116
|
+
old = Series.new(old.keys)
|
7117
|
+
end
|
7088
7118
|
|
7089
|
-
|
7119
|
+
old = Utils.parse_into_expression(old, str_as_lit: true, list_as_series: true)
|
7120
|
+
new = Utils.parse_into_expression(new, str_as_lit: true, list_as_series: true)
|
7121
|
+
|
7122
|
+
default = default.eql?(NO_DEFAULT) ? nil : Utils.parse_into_expression(default, str_as_lit: true)
|
7123
|
+
|
7124
|
+
_from_rbexpr(
|
7125
|
+
_rbexpr.replace_strict(old, new, default, return_dtype)
|
7126
|
+
)
|
7090
7127
|
end
|
7091
7128
|
|
7092
7129
|
# Create an object namespace of all list related methods.
|
@@ -7163,7 +7200,7 @@ module Polars
|
|
7163
7200
|
end
|
7164
7201
|
|
7165
7202
|
def _to_expr(other)
|
7166
|
-
other.is_a?(Expr) ? other :
|
7203
|
+
other.is_a?(Expr) ? other : F.lit(other)
|
7167
7204
|
end
|
7168
7205
|
|
7169
7206
|
def _prepare_alpha(com, span, half_life, alpha)
|