polars-df 0.11.0-aarch64-linux → 0.12.0-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +360 -361
- data/LICENSE-THIRD-PARTY.txt +1065 -878
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +2 -2
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/data_frame.rb +93 -101
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -573
- data/lib/polars/date_time_name_space.rb +263 -464
- data/lib/polars/dynamic_group_by.rb +3 -3
- data/lib/polars/exceptions.rb +3 -0
- data/lib/polars/expr.rb +367 -330
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +18 -77
- data/lib/polars/functions/range/datetime_range.rb +4 -4
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +4 -4
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/io/csv.rb +8 -8
- data/lib/polars/io/ipc.rb +3 -3
- data/lib/polars/io/json.rb +13 -2
- data/lib/polars/io/ndjson.rb +15 -4
- data/lib/polars/io/parquet.rb +5 -4
- data/lib/polars/lazy_frame.rb +120 -106
- data/lib/polars/lazy_group_by.rb +1 -1
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +5 -7
- data/lib/polars/series.rb +105 -189
- data/lib/polars/string_expr.rb +42 -67
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +4 -330
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +11 -0
- metadata +7 -2
data/lib/polars/expr.rb
CHANGED
@@ -82,8 +82,8 @@ module Polars
|
|
82
82
|
#
|
83
83
|
# @return [Expr]
|
84
84
|
def **(power)
|
85
|
-
exponent = Utils.
|
86
|
-
_from_rbexpr(_rbexpr.pow(exponent
|
85
|
+
exponent = Utils.parse_into_expression(power)
|
86
|
+
_from_rbexpr(_rbexpr.pow(exponent))
|
87
87
|
end
|
88
88
|
|
89
89
|
# Greater than or equal.
|
@@ -811,8 +811,8 @@ module Polars
|
|
811
811
|
# # │ 10 ┆ 4 │
|
812
812
|
# # └─────┴──────┘
|
813
813
|
def append(other, upcast: true)
|
814
|
-
other = Utils.
|
815
|
-
_from_rbexpr(_rbexpr.append(other
|
814
|
+
other = Utils.parse_into_expression(other)
|
815
|
+
_from_rbexpr(_rbexpr.append(other, upcast))
|
816
816
|
end
|
817
817
|
|
818
818
|
# Create a single chunk of memory for this Series.
|
@@ -1165,8 +1165,8 @@ module Polars
|
|
1165
1165
|
# # │ 44 │
|
1166
1166
|
# # └─────┘
|
1167
1167
|
def dot(other)
|
1168
|
-
other = Utils.
|
1169
|
-
_from_rbexpr(_rbexpr.dot(other
|
1168
|
+
other = Utils.parse_into_expression(other, str_as_lit: false)
|
1169
|
+
_from_rbexpr(_rbexpr.dot(other))
|
1170
1170
|
end
|
1171
1171
|
|
1172
1172
|
# Compute the most occurring value(s).
|
@@ -1252,12 +1252,12 @@ module Polars
|
|
1252
1252
|
# df = Polars::DataFrame.new(
|
1253
1253
|
# {
|
1254
1254
|
# "group" => [
|
1255
|
-
#
|
1256
|
-
#
|
1257
|
-
#
|
1258
|
-
#
|
1259
|
-
#
|
1260
|
-
#
|
1255
|
+
# "one",
|
1256
|
+
# "one",
|
1257
|
+
# "one",
|
1258
|
+
# "two",
|
1259
|
+
# "two",
|
1260
|
+
# "two"
|
1261
1261
|
# ],
|
1262
1262
|
# "value" => [1, 98, 2, 3, 99, 4]
|
1263
1263
|
# }
|
@@ -1345,9 +1345,9 @@ module Polars
|
|
1345
1345
|
# # │ 3 ┆ 4 │
|
1346
1346
|
# # │ 2 ┆ 98 │
|
1347
1347
|
# # └───────┴──────────┘
|
1348
|
-
def top_k(k: 5
|
1349
|
-
k = Utils.
|
1350
|
-
_from_rbexpr(_rbexpr.top_k(k
|
1348
|
+
def top_k(k: 5)
|
1349
|
+
k = Utils.parse_into_expression(k)
|
1350
|
+
_from_rbexpr(_rbexpr.top_k(k))
|
1351
1351
|
end
|
1352
1352
|
|
1353
1353
|
# Return the `k` smallest elements.
|
@@ -1384,9 +1384,9 @@ module Polars
|
|
1384
1384
|
# # │ 3 ┆ 4 │
|
1385
1385
|
# # │ 2 ┆ 98 │
|
1386
1386
|
# # └───────┴──────────┘
|
1387
|
-
def bottom_k(k: 5
|
1388
|
-
k = Utils.
|
1389
|
-
_from_rbexpr(_rbexpr.bottom_k(k
|
1387
|
+
def bottom_k(k: 5)
|
1388
|
+
k = Utils.parse_into_expression(k)
|
1389
|
+
_from_rbexpr(_rbexpr.bottom_k(k))
|
1390
1390
|
end
|
1391
1391
|
|
1392
1392
|
# Get the index values that would sort this column.
|
@@ -1498,8 +1498,8 @@ module Polars
|
|
1498
1498
|
# # │ 0 ┆ 2 ┆ 4 │
|
1499
1499
|
# # └──────┴───────┴─────┘
|
1500
1500
|
def search_sorted(element, side: "any")
|
1501
|
-
element = Utils.
|
1502
|
-
_from_rbexpr(_rbexpr.search_sorted(element
|
1501
|
+
element = Utils.parse_into_expression(element, str_as_lit: false)
|
1502
|
+
_from_rbexpr(_rbexpr.search_sorted(element, side))
|
1503
1503
|
end
|
1504
1504
|
|
1505
1505
|
# Sort this column by the ordering of another column, or multiple other columns.
|
@@ -1545,13 +1545,14 @@ module Polars
|
|
1545
1545
|
# # │ two │
|
1546
1546
|
# # └───────┘
|
1547
1547
|
def sort_by(by, *more_by, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false)
|
1548
|
-
by = Utils.
|
1549
|
-
|
1550
|
-
|
1551
|
-
|
1552
|
-
|
1553
|
-
|
1554
|
-
|
1548
|
+
by = Utils.parse_into_list_of_expressions(by, *more_by)
|
1549
|
+
reverse = Utils.extend_bool(reverse, by.length, "reverse", "by")
|
1550
|
+
nulls_last = Utils.extend_bool(nulls_last, by.length, "nulls_last", "by")
|
1551
|
+
_from_rbexpr(
|
1552
|
+
_rbexpr.sort_by(
|
1553
|
+
by, reverse, nulls_last, multithreaded, maintain_order
|
1554
|
+
)
|
1555
|
+
)
|
1555
1556
|
end
|
1556
1557
|
|
1557
1558
|
# Take values by index.
|
@@ -1588,14 +1589,51 @@ module Polars
|
|
1588
1589
|
# # └───────┴───────────┘
|
1589
1590
|
def gather(indices)
|
1590
1591
|
if indices.is_a?(::Array)
|
1591
|
-
indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
|
1592
|
+
indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))._rbexpr
|
1592
1593
|
else
|
1593
|
-
indices_lit = Utils.
|
1594
|
+
indices_lit = Utils.parse_into_expression(indices, str_as_lit: false)
|
1594
1595
|
end
|
1595
|
-
_from_rbexpr(_rbexpr.gather(indices_lit
|
1596
|
+
_from_rbexpr(_rbexpr.gather(indices_lit))
|
1596
1597
|
end
|
1597
1598
|
alias_method :take, :gather
|
1598
1599
|
|
1600
|
+
# Return a single value by index.
|
1601
|
+
#
|
1602
|
+
# @param index [Object]
|
1603
|
+
# An expression that leads to a UInt32 index.
|
1604
|
+
#
|
1605
|
+
# @return [Expr]
|
1606
|
+
#
|
1607
|
+
# @example
|
1608
|
+
# df = Polars::DataFrame.new(
|
1609
|
+
# {
|
1610
|
+
# "group" => [
|
1611
|
+
# "one",
|
1612
|
+
# "one",
|
1613
|
+
# "one",
|
1614
|
+
# "two",
|
1615
|
+
# "two",
|
1616
|
+
# "two"
|
1617
|
+
# ],
|
1618
|
+
# "value" => [1, 98, 2, 3, 99, 4]
|
1619
|
+
# }
|
1620
|
+
# )
|
1621
|
+
# df.group_by("group", maintain_order: true).agg(Polars.col("value").get(1))
|
1622
|
+
# # =>
|
1623
|
+
# # shape: (2, 2)
|
1624
|
+
# # ┌───────┬───────┐
|
1625
|
+
# # │ group ┆ value │
|
1626
|
+
# # │ --- ┆ --- │
|
1627
|
+
# # │ str ┆ i64 │
|
1628
|
+
# # ╞═══════╪═══════╡
|
1629
|
+
# # │ one ┆ 98 │
|
1630
|
+
# # │ two ┆ 99 │
|
1631
|
+
# # └───────┴───────┘
|
1632
|
+
def get(index)
|
1633
|
+
index_lit = Utils.parse_into_expression(index)
|
1634
|
+
_from_rbexpr(_rbexpr.get(index_lit))
|
1635
|
+
end
|
1636
|
+
|
1599
1637
|
# Shift the values by a given period.
|
1600
1638
|
#
|
1601
1639
|
# @param n [Integer]
|
@@ -1622,9 +1660,9 @@ module Polars
|
|
1622
1660
|
# # └──────┘
|
1623
1661
|
def shift(n = 1, fill_value: nil)
|
1624
1662
|
if !fill_value.nil?
|
1625
|
-
fill_value = Utils.
|
1663
|
+
fill_value = Utils.parse_into_expression(fill_value, str_as_lit: true)
|
1626
1664
|
end
|
1627
|
-
n = Utils.
|
1665
|
+
n = Utils.parse_into_expression(n)
|
1628
1666
|
_from_rbexpr(_rbexpr.shift(n, fill_value))
|
1629
1667
|
end
|
1630
1668
|
|
@@ -1727,8 +1765,8 @@ module Polars
|
|
1727
1765
|
end
|
1728
1766
|
|
1729
1767
|
if !value.nil?
|
1730
|
-
value = Utils.
|
1731
|
-
_from_rbexpr(_rbexpr.fill_null(value
|
1768
|
+
value = Utils.parse_into_expression(value, str_as_lit: true)
|
1769
|
+
_from_rbexpr(_rbexpr.fill_null(value))
|
1732
1770
|
else
|
1733
1771
|
_from_rbexpr(_rbexpr.fill_null_with_strategy(strategy, limit))
|
1734
1772
|
end
|
@@ -1758,8 +1796,8 @@ module Polars
|
|
1758
1796
|
# # │ zero ┆ 6.0 │
|
1759
1797
|
# # └──────┴──────┘
|
1760
1798
|
def fill_nan(fill_value)
|
1761
|
-
fill_value = Utils.
|
1762
|
-
_from_rbexpr(_rbexpr.fill_nan(fill_value
|
1799
|
+
fill_value = Utils.parse_into_expression(fill_value, str_as_lit: true)
|
1800
|
+
_from_rbexpr(_rbexpr.fill_nan(fill_value))
|
1763
1801
|
end
|
1764
1802
|
|
1765
1803
|
# Fill missing values with the latest seen values.
|
@@ -2275,7 +2313,7 @@ module Polars
|
|
2275
2313
|
# # │ 4 │
|
2276
2314
|
# # └────────┘
|
2277
2315
|
def over(expr)
|
2278
|
-
rbexprs = Utils.
|
2316
|
+
rbexprs = Utils.parse_into_list_of_expressions(expr)
|
2279
2317
|
_from_rbexpr(_rbexpr.over(rbexprs))
|
2280
2318
|
end
|
2281
2319
|
|
@@ -2470,8 +2508,8 @@ module Polars
|
|
2470
2508
|
# # │ 1.5 │
|
2471
2509
|
# # └─────┘
|
2472
2510
|
def quantile(quantile, interpolation: "nearest")
|
2473
|
-
quantile = Utils.
|
2474
|
-
_from_rbexpr(_rbexpr.quantile(quantile
|
2511
|
+
quantile = Utils.parse_into_expression(quantile, str_as_lit: false)
|
2512
|
+
_from_rbexpr(_rbexpr.quantile(quantile, interpolation))
|
2475
2513
|
end
|
2476
2514
|
|
2477
2515
|
# Bin continuous values into discrete categories.
|
@@ -2515,17 +2553,17 @@ module Polars
|
|
2515
2553
|
# ).unnest("cut")
|
2516
2554
|
# # =>
|
2517
2555
|
# # shape: (5, 3)
|
2518
|
-
# #
|
2519
|
-
# # │ foo ┆
|
2520
|
-
# # │ --- ┆ ---
|
2521
|
-
# # │ i64 ┆ f64
|
2522
|
-
# #
|
2523
|
-
# # │ -2 ┆ -1.0
|
2524
|
-
# # │ -1 ┆ -1.0
|
2525
|
-
# # │ 0 ┆ 1.0
|
2526
|
-
# # │ 1 ┆ 1.0
|
2527
|
-
# # │ 2 ┆ inf
|
2528
|
-
# #
|
2556
|
+
# # ┌─────┬────────────┬────────────┐
|
2557
|
+
# # │ foo ┆ breakpoint ┆ category │
|
2558
|
+
# # │ --- ┆ --- ┆ --- │
|
2559
|
+
# # │ i64 ┆ f64 ┆ cat │
|
2560
|
+
# # ╞═════╪════════════╪════════════╡
|
2561
|
+
# # │ -2 ┆ -1.0 ┆ (-inf, -1] │
|
2562
|
+
# # │ -1 ┆ -1.0 ┆ (-inf, -1] │
|
2563
|
+
# # │ 0 ┆ 1.0 ┆ (-1, 1] │
|
2564
|
+
# # │ 1 ┆ 1.0 ┆ (-1, 1] │
|
2565
|
+
# # │ 2 ┆ inf ┆ (1, inf] │
|
2566
|
+
# # └─────┴────────────┴────────────┘
|
2529
2567
|
def cut(breaks, labels: nil, left_closed: false, include_breaks: false)
|
2530
2568
|
_from_rbexpr(_rbexpr.cut(breaks, labels, left_closed, include_breaks))
|
2531
2569
|
end
|
@@ -2596,17 +2634,17 @@ module Polars
|
|
2596
2634
|
# ).unnest("qcut")
|
2597
2635
|
# # =>
|
2598
2636
|
# # shape: (5, 3)
|
2599
|
-
# #
|
2600
|
-
# # │ foo ┆
|
2601
|
-
# # │ --- ┆ ---
|
2602
|
-
# # │ i64 ┆ f64
|
2603
|
-
# #
|
2604
|
-
# # │ -2 ┆ -1.0
|
2605
|
-
# # │ -1 ┆ -1.0
|
2606
|
-
# # │ 0 ┆ 1.0
|
2607
|
-
# # │ 1 ┆ 1.0
|
2608
|
-
# # │ 2 ┆ inf
|
2609
|
-
# #
|
2637
|
+
# # ┌─────┬────────────┬────────────┐
|
2638
|
+
# # │ foo ┆ breakpoint ┆ category │
|
2639
|
+
# # │ --- ┆ --- ┆ --- │
|
2640
|
+
# # │ i64 ┆ f64 ┆ cat │
|
2641
|
+
# # ╞═════╪════════════╪════════════╡
|
2642
|
+
# # │ -2 ┆ -1.0 ┆ (-inf, -1] │
|
2643
|
+
# # │ -1 ┆ -1.0 ┆ (-inf, -1] │
|
2644
|
+
# # │ 0 ┆ 1.0 ┆ (-1, 1] │
|
2645
|
+
# # │ 1 ┆ 1.0 ┆ (-1, 1] │
|
2646
|
+
# # │ 2 ┆ inf ┆ (1, inf] │
|
2647
|
+
# # └─────┴────────────┴────────────┘
|
2610
2648
|
def qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false)
|
2611
2649
|
if quantiles.is_a?(Integer)
|
2612
2650
|
rbexpr = _rbexpr.qcut_uniform(
|
@@ -2630,18 +2668,18 @@ module Polars
|
|
2630
2668
|
# df.select(Polars.col("s").rle).unnest("s")
|
2631
2669
|
# # =>
|
2632
2670
|
# # shape: (6, 2)
|
2633
|
-
# #
|
2634
|
-
# # │
|
2635
|
-
# # │ ---
|
2636
|
-
# # │
|
2637
|
-
# #
|
2638
|
-
# # │ 2
|
2639
|
-
# # │ 1
|
2640
|
-
# # │ 1
|
2641
|
-
# # │ 1
|
2642
|
-
# # │ 1
|
2643
|
-
# # │ 2
|
2644
|
-
# #
|
2671
|
+
# # ┌─────┬───────┐
|
2672
|
+
# # │ len ┆ value │
|
2673
|
+
# # │ --- ┆ --- │
|
2674
|
+
# # │ u32 ┆ i64 │
|
2675
|
+
# # ╞═════╪═══════╡
|
2676
|
+
# # │ 2 ┆ 1 │
|
2677
|
+
# # │ 1 ┆ 2 │
|
2678
|
+
# # │ 1 ┆ 1 │
|
2679
|
+
# # │ 1 ┆ null │
|
2680
|
+
# # │ 1 ┆ 1 │
|
2681
|
+
# # │ 2 ┆ 3 │
|
2682
|
+
# # └─────┴───────┘
|
2645
2683
|
def rle
|
2646
2684
|
_from_rbexpr(_rbexpr.rle)
|
2647
2685
|
end
|
@@ -3104,7 +3142,7 @@ module Polars
|
|
3104
3142
|
# # │ null ┆ null ┆ null ┆ true │
|
3105
3143
|
# # └──────┴──────┴────────┴────────────────┘
|
3106
3144
|
def eq_missing(other)
|
3107
|
-
other = Utils.
|
3145
|
+
other = Utils.parse_into_expression(other, str_as_lit: true)
|
3108
3146
|
_from_rbexpr(_rbexpr.eq_missing(other))
|
3109
3147
|
end
|
3110
3148
|
|
@@ -3308,7 +3346,7 @@ module Polars
|
|
3308
3346
|
# # │ null ┆ null ┆ null ┆ false │
|
3309
3347
|
# # └──────┴──────┴────────┴────────────────┘
|
3310
3348
|
def ne_missing(other)
|
3311
|
-
other = Utils.
|
3349
|
+
other = Utils.parse_into_expression(other, str_as_lit: true)
|
3312
3350
|
_from_rbexpr(_rbexpr.neq_missing(other))
|
3313
3351
|
end
|
3314
3352
|
|
@@ -3611,14 +3649,14 @@ module Polars
|
|
3611
3649
|
def is_in(other)
|
3612
3650
|
if other.is_a?(::Array)
|
3613
3651
|
if other.length == 0
|
3614
|
-
other = Polars.lit(nil)
|
3652
|
+
other = Polars.lit(nil)._rbexpr
|
3615
3653
|
else
|
3616
|
-
other = Polars.lit(Series.new(other))
|
3654
|
+
other = Polars.lit(Series.new(other))._rbexpr
|
3617
3655
|
end
|
3618
3656
|
else
|
3619
|
-
other = Utils.
|
3657
|
+
other = Utils.parse_into_expression(other, str_as_lit: false)
|
3620
3658
|
end
|
3621
|
-
_from_rbexpr(_rbexpr.is_in(other
|
3659
|
+
_from_rbexpr(_rbexpr.is_in(other))
|
3622
3660
|
end
|
3623
3661
|
alias_method :in?, :is_in
|
3624
3662
|
|
@@ -3653,15 +3691,15 @@ module Polars
|
|
3653
3691
|
# # │ ["z", "z", "z"] │
|
3654
3692
|
# # └─────────────────┘
|
3655
3693
|
def repeat_by(by)
|
3656
|
-
by = Utils.
|
3657
|
-
_from_rbexpr(_rbexpr.repeat_by(by
|
3694
|
+
by = Utils.parse_into_expression(by, str_as_lit: false)
|
3695
|
+
_from_rbexpr(_rbexpr.repeat_by(by))
|
3658
3696
|
end
|
3659
3697
|
|
3660
3698
|
# Check if this expression is between start and end.
|
3661
3699
|
#
|
3662
|
-
# @param
|
3700
|
+
# @param lower_bound [Object]
|
3663
3701
|
# Lower bound as primitive type or datetime.
|
3664
|
-
# @param
|
3702
|
+
# @param upper_bound [Object]
|
3665
3703
|
# Upper bound as primitive type or datetime.
|
3666
3704
|
# @param closed ["both", "left", "right", "none"]
|
3667
3705
|
# Define which sides of the interval are closed (inclusive).
|
@@ -3723,22 +3761,13 @@ module Polars
|
|
3723
3761
|
# # │ d ┆ false │
|
3724
3762
|
# # │ e ┆ false │
|
3725
3763
|
# # └─────┴────────────┘
|
3726
|
-
def is_between(
|
3727
|
-
|
3728
|
-
|
3729
|
-
|
3730
|
-
|
3731
|
-
|
3732
|
-
|
3733
|
-
when "both"
|
3734
|
-
(self >= start) & (self <= _end)
|
3735
|
-
when "right"
|
3736
|
-
(self > start) & (self <= _end)
|
3737
|
-
when "left"
|
3738
|
-
(self >= start) & (self < _end)
|
3739
|
-
else
|
3740
|
-
raise ArgumentError, "closed must be one of 'left', 'right', 'both', or 'none'"
|
3741
|
-
end
|
3764
|
+
def is_between(lower_bound, upper_bound, closed: "both")
|
3765
|
+
lower_bound = Utils.parse_into_expression(lower_bound)
|
3766
|
+
upper_bound = Utils.parse_into_expression(upper_bound)
|
3767
|
+
|
3768
|
+
_from_rbexpr(
|
3769
|
+
_rbexpr.is_between(lower_bound, upper_bound, closed)
|
3770
|
+
)
|
3742
3771
|
end
|
3743
3772
|
|
3744
3773
|
# Hash the elements in the selection.
|
@@ -3977,7 +4006,7 @@ module Polars
|
|
3977
4006
|
warn_if_unsorted: nil
|
3978
4007
|
)
|
3979
4008
|
window_size = _prepare_rolling_by_window_args(window_size)
|
3980
|
-
by = Utils.
|
4009
|
+
by = Utils.parse_into_expression(by)
|
3981
4010
|
_from_rbexpr(
|
3982
4011
|
_rbexpr.rolling_min_by(by, window_size, min_periods, closed)
|
3983
4012
|
)
|
@@ -4106,7 +4135,7 @@ module Polars
|
|
4106
4135
|
warn_if_unsorted: nil
|
4107
4136
|
)
|
4108
4137
|
window_size = _prepare_rolling_by_window_args(window_size)
|
4109
|
-
by = Utils.
|
4138
|
+
by = Utils.parse_into_expression(by)
|
4110
4139
|
_from_rbexpr(
|
4111
4140
|
_rbexpr.rolling_max_by(by, window_size, min_periods, closed)
|
4112
4141
|
)
|
@@ -4237,7 +4266,7 @@ module Polars
|
|
4237
4266
|
warn_if_unsorted: nil
|
4238
4267
|
)
|
4239
4268
|
window_size = _prepare_rolling_by_window_args(window_size)
|
4240
|
-
by = Utils.
|
4269
|
+
by = Utils.parse_into_expression(by)
|
4241
4270
|
_from_rbexpr(
|
4242
4271
|
_rbexpr.rolling_mean_by(
|
4243
4272
|
by,
|
@@ -4371,7 +4400,7 @@ module Polars
|
|
4371
4400
|
warn_if_unsorted: nil
|
4372
4401
|
)
|
4373
4402
|
window_size = _prepare_rolling_by_window_args(window_size)
|
4374
|
-
by = Utils.
|
4403
|
+
by = Utils.parse_into_expression(by)
|
4375
4404
|
_from_rbexpr(
|
4376
4405
|
_rbexpr.rolling_sum_by(by, window_size, min_periods, closed)
|
4377
4406
|
)
|
@@ -4503,7 +4532,7 @@ module Polars
|
|
4503
4532
|
warn_if_unsorted: nil
|
4504
4533
|
)
|
4505
4534
|
window_size = _prepare_rolling_by_window_args(window_size)
|
4506
|
-
by = Utils.
|
4535
|
+
by = Utils.parse_into_expression(by)
|
4507
4536
|
_from_rbexpr(
|
4508
4537
|
_rbexpr.rolling_std_by(
|
4509
4538
|
by,
|
@@ -4641,7 +4670,7 @@ module Polars
|
|
4641
4670
|
warn_if_unsorted: nil
|
4642
4671
|
)
|
4643
4672
|
window_size = _prepare_rolling_by_window_args(window_size)
|
4644
|
-
by = Utils.
|
4673
|
+
by = Utils.parse_into_expression(by)
|
4645
4674
|
_from_rbexpr(
|
4646
4675
|
_rbexpr.rolling_var_by(
|
4647
4676
|
by,
|
@@ -4752,7 +4781,7 @@ module Polars
|
|
4752
4781
|
warn_if_unsorted: nil
|
4753
4782
|
)
|
4754
4783
|
window_size = _prepare_rolling_by_window_args(window_size)
|
4755
|
-
by = Utils.
|
4784
|
+
by = Utils.parse_into_expression(by)
|
4756
4785
|
_from_rbexpr(
|
4757
4786
|
_rbexpr.rolling_median_by(by, window_size, min_periods, closed)
|
4758
4787
|
)
|
@@ -4863,7 +4892,7 @@ module Polars
|
|
4863
4892
|
warn_if_unsorted: nil
|
4864
4893
|
)
|
4865
4894
|
window_size = _prepare_rolling_by_window_args(window_size)
|
4866
|
-
by = Utils.
|
4895
|
+
by = Utils.parse_into_expression(by)
|
4867
4896
|
_from_rbexpr(
|
4868
4897
|
_rbexpr.rolling_quantile_by(
|
4869
4898
|
by,
|
@@ -4908,12 +4937,6 @@ module Polars
|
|
4908
4937
|
# a result. If None, it will be set equal to window size.
|
4909
4938
|
# @param center [Boolean]
|
4910
4939
|
# Set the labels at the center of the window
|
4911
|
-
# @param by [String]
|
4912
|
-
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
4913
|
-
# set the column that will be used to determine the windows. This column must
|
4914
|
-
# be of dtype `{Date, Datetime}`
|
4915
|
-
# @param closed ["left", "right", "both", "none"]
|
4916
|
-
# Define whether the temporal window interval is closed or not.
|
4917
4940
|
#
|
4918
4941
|
# @note
|
4919
4942
|
# This functionality is experimental and may change without it being considered a
|
@@ -4951,24 +4974,8 @@ module Polars
|
|
4951
4974
|
window_size,
|
4952
4975
|
weights: nil,
|
4953
4976
|
min_periods: nil,
|
4954
|
-
center: false
|
4955
|
-
by: nil,
|
4956
|
-
closed: nil
|
4977
|
+
center: false
|
4957
4978
|
)
|
4958
|
-
window_size, min_periods = _prepare_rolling_window_args(
|
4959
|
-
window_size, min_periods
|
4960
|
-
)
|
4961
|
-
if !by.nil?
|
4962
|
-
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
4963
|
-
return rolling_min_by(
|
4964
|
-
by,
|
4965
|
-
window_size,
|
4966
|
-
min_periods: min_periods,
|
4967
|
-
closed: closed || "right",
|
4968
|
-
warn_if_unsorted: warn_if_unsorted
|
4969
|
-
)
|
4970
|
-
end
|
4971
|
-
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
4972
4979
|
_from_rbexpr(
|
4973
4980
|
_rbexpr.rolling_min(
|
4974
4981
|
window_size, weights, min_periods, center
|
@@ -5008,12 +5015,6 @@ module Polars
|
|
5008
5015
|
# a result. If None, it will be set equal to window size.
|
5009
5016
|
# @param center [Boolean]
|
5010
5017
|
# Set the labels at the center of the window
|
5011
|
-
# @param by [String]
|
5012
|
-
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
5013
|
-
# set the column that will be used to determine the windows. This column must
|
5014
|
-
# be of dtype `{Date, Datetime}`
|
5015
|
-
# @param closed ["left", "right", "both", "none"]
|
5016
|
-
# Define whether the temporal window interval is closed or not.
|
5017
5018
|
#
|
5018
5019
|
# @note
|
5019
5020
|
# This functionality is experimental and may change without it being considered a
|
@@ -5051,24 +5052,8 @@ module Polars
|
|
5051
5052
|
window_size,
|
5052
5053
|
weights: nil,
|
5053
5054
|
min_periods: nil,
|
5054
|
-
center: false
|
5055
|
-
by: nil,
|
5056
|
-
closed: nil
|
5055
|
+
center: false
|
5057
5056
|
)
|
5058
|
-
window_size, min_periods = _prepare_rolling_window_args(
|
5059
|
-
window_size, min_periods
|
5060
|
-
)
|
5061
|
-
if !by.nil?
|
5062
|
-
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5063
|
-
return rolling_max_by(
|
5064
|
-
by,
|
5065
|
-
window_size,
|
5066
|
-
min_periods: min_periods,
|
5067
|
-
closed: closed || "right",
|
5068
|
-
warn_if_unsorted: warn_if_unsorted
|
5069
|
-
)
|
5070
|
-
end
|
5071
|
-
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
5072
5057
|
_from_rbexpr(
|
5073
5058
|
_rbexpr.rolling_max(
|
5074
5059
|
window_size, weights, min_periods, center
|
@@ -5108,12 +5093,6 @@ module Polars
|
|
5108
5093
|
# a result. If None, it will be set equal to window size.
|
5109
5094
|
# @param center [Boolean]
|
5110
5095
|
# Set the labels at the center of the window
|
5111
|
-
# @param by [String]
|
5112
|
-
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
5113
|
-
# set the column that will be used to determine the windows. This column must
|
5114
|
-
# be of dtype `{Date, Datetime}`
|
5115
|
-
# @param closed ["left", "right", "both", "none"]
|
5116
|
-
# Define whether the temporal window interval is closed or not.
|
5117
5096
|
#
|
5118
5097
|
# @note
|
5119
5098
|
# This functionality is experimental and may change without it being considered a
|
@@ -5151,24 +5130,8 @@ module Polars
|
|
5151
5130
|
window_size,
|
5152
5131
|
weights: nil,
|
5153
5132
|
min_periods: nil,
|
5154
|
-
center: false
|
5155
|
-
by: nil,
|
5156
|
-
closed: nil
|
5133
|
+
center: false
|
5157
5134
|
)
|
5158
|
-
window_size, min_periods = _prepare_rolling_window_args(
|
5159
|
-
window_size, min_periods
|
5160
|
-
)
|
5161
|
-
if !by.nil?
|
5162
|
-
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5163
|
-
return rolling_mean_by(
|
5164
|
-
by,
|
5165
|
-
window_size,
|
5166
|
-
min_periods: min_periods,
|
5167
|
-
closed: closed || "right",
|
5168
|
-
warn_if_unsorted: warn_if_unsorted
|
5169
|
-
)
|
5170
|
-
end
|
5171
|
-
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
5172
5135
|
_from_rbexpr(
|
5173
5136
|
_rbexpr.rolling_mean(
|
5174
5137
|
window_size, weights, min_periods, center
|
@@ -5208,12 +5171,6 @@ module Polars
|
|
5208
5171
|
# a result. If None, it will be set equal to window size.
|
5209
5172
|
# @param center [Boolean]
|
5210
5173
|
# Set the labels at the center of the window
|
5211
|
-
# @param by [String]
|
5212
|
-
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
5213
|
-
# set the column that will be used to determine the windows. This column must
|
5214
|
-
# be of dtype `{Date, Datetime}`
|
5215
|
-
# @param closed ["left", "right", "both", "none"]
|
5216
|
-
# Define whether the temporal window interval is closed or not.
|
5217
5174
|
#
|
5218
5175
|
# @note
|
5219
5176
|
# This functionality is experimental and may change without it being considered a
|
@@ -5251,24 +5208,8 @@ module Polars
|
|
5251
5208
|
window_size,
|
5252
5209
|
weights: nil,
|
5253
5210
|
min_periods: nil,
|
5254
|
-
center: false
|
5255
|
-
by: nil,
|
5256
|
-
closed: nil
|
5211
|
+
center: false
|
5257
5212
|
)
|
5258
|
-
window_size, min_periods = _prepare_rolling_window_args(
|
5259
|
-
window_size, min_periods
|
5260
|
-
)
|
5261
|
-
if !by.nil?
|
5262
|
-
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5263
|
-
return rolling_sum_by(
|
5264
|
-
by,
|
5265
|
-
window_size,
|
5266
|
-
min_periods: min_periods,
|
5267
|
-
closed: closed || "right",
|
5268
|
-
warn_if_unsorted: warn_if_unsorted
|
5269
|
-
)
|
5270
|
-
end
|
5271
|
-
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
5272
5213
|
_from_rbexpr(
|
5273
5214
|
_rbexpr.rolling_sum(
|
5274
5215
|
window_size, weights, min_periods, center
|
@@ -5308,12 +5249,6 @@ module Polars
|
|
5308
5249
|
# a result. If None, it will be set equal to window size.
|
5309
5250
|
# @param center [Boolean]
|
5310
5251
|
# Set the labels at the center of the window
|
5311
|
-
# @param by [String]
|
5312
|
-
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
5313
|
-
# set the column that will be used to determine the windows. This column must
|
5314
|
-
# be of dtype `{Date, Datetime}`
|
5315
|
-
# @param closed ["left", "right", "both", "none"]
|
5316
|
-
# Define whether the temporal window interval is closed or not.
|
5317
5252
|
#
|
5318
5253
|
# @note
|
5319
5254
|
# This functionality is experimental and may change without it being considered a
|
@@ -5352,26 +5287,8 @@ module Polars
|
|
5352
5287
|
weights: nil,
|
5353
5288
|
min_periods: nil,
|
5354
5289
|
center: false,
|
5355
|
-
|
5356
|
-
closed: nil,
|
5357
|
-
ddof: 1,
|
5358
|
-
warn_if_unsorted: true
|
5290
|
+
ddof: 1
|
5359
5291
|
)
|
5360
|
-
window_size, min_periods = _prepare_rolling_window_args(
|
5361
|
-
window_size, min_periods
|
5362
|
-
)
|
5363
|
-
if !by.nil?
|
5364
|
-
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5365
|
-
return rolling_std_by(
|
5366
|
-
by,
|
5367
|
-
window_size,
|
5368
|
-
min_periods: min_periods,
|
5369
|
-
closed: closed || "right",
|
5370
|
-
ddof: ddof,
|
5371
|
-
warn_if_unsorted: warn_if_unsorted
|
5372
|
-
)
|
5373
|
-
end
|
5374
|
-
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
5375
5292
|
_from_rbexpr(
|
5376
5293
|
_rbexpr.rolling_std(
|
5377
5294
|
window_size, weights, min_periods, center, ddof
|
@@ -5411,12 +5328,6 @@ module Polars
|
|
5411
5328
|
# a result. If None, it will be set equal to window size.
|
5412
5329
|
# @param center [Boolean]
|
5413
5330
|
# Set the labels at the center of the window
|
5414
|
-
# @param by [String]
|
5415
|
-
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
5416
|
-
# set the column that will be used to determine the windows. This column must
|
5417
|
-
# be of dtype `{Date, Datetime}`
|
5418
|
-
# @param closed ["left", "right", "both", "none"]
|
5419
|
-
# Define whether the temporal window interval is closed or not.
|
5420
5331
|
#
|
5421
5332
|
# @note
|
5422
5333
|
# This functionality is experimental and may change without it being considered a
|
@@ -5455,26 +5366,8 @@ module Polars
|
|
5455
5366
|
weights: nil,
|
5456
5367
|
min_periods: nil,
|
5457
5368
|
center: false,
|
5458
|
-
|
5459
|
-
closed: nil,
|
5460
|
-
ddof: 1,
|
5461
|
-
warn_if_unsorted: true
|
5369
|
+
ddof: 1
|
5462
5370
|
)
|
5463
|
-
window_size, min_periods = _prepare_rolling_window_args(
|
5464
|
-
window_size, min_periods
|
5465
|
-
)
|
5466
|
-
if !by.nil?
|
5467
|
-
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5468
|
-
return rolling_var_by(
|
5469
|
-
by,
|
5470
|
-
window_size,
|
5471
|
-
min_periods: min_periods,
|
5472
|
-
closed: closed || "right",
|
5473
|
-
ddof: ddof,
|
5474
|
-
warn_if_unsorted: warn_if_unsorted
|
5475
|
-
)
|
5476
|
-
end
|
5477
|
-
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
5478
5371
|
_from_rbexpr(
|
5479
5372
|
_rbexpr.rolling_var(
|
5480
5373
|
window_size, weights, min_periods, center, ddof
|
@@ -5510,12 +5403,6 @@ module Polars
|
|
5510
5403
|
# a result. If None, it will be set equal to window size.
|
5511
5404
|
# @param center [Boolean]
|
5512
5405
|
# Set the labels at the center of the window
|
5513
|
-
# @param by [String]
|
5514
|
-
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
5515
|
-
# set the column that will be used to determine the windows. This column must
|
5516
|
-
# be of dtype `{Date, Datetime}`
|
5517
|
-
# @param closed ["left", "right", "both", "none"]
|
5518
|
-
# Define whether the temporal window interval is closed or not.
|
5519
5406
|
#
|
5520
5407
|
# @note
|
5521
5408
|
# This functionality is experimental and may change without it being considered a
|
@@ -5553,25 +5440,8 @@ module Polars
|
|
5553
5440
|
window_size,
|
5554
5441
|
weights: nil,
|
5555
5442
|
min_periods: nil,
|
5556
|
-
center: false
|
5557
|
-
by: nil,
|
5558
|
-
closed: nil,
|
5559
|
-
warn_if_unsorted: true
|
5443
|
+
center: false
|
5560
5444
|
)
|
5561
|
-
window_size, min_periods = _prepare_rolling_window_args(
|
5562
|
-
window_size, min_periods
|
5563
|
-
)
|
5564
|
-
if !by.nil?
|
5565
|
-
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5566
|
-
return rolling_median_by(
|
5567
|
-
by,
|
5568
|
-
window_size,
|
5569
|
-
min_periods: min_periods,
|
5570
|
-
closed: closed || "right",
|
5571
|
-
warn_if_unsorted: warn_if_unsorted
|
5572
|
-
)
|
5573
|
-
end
|
5574
|
-
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
5575
5445
|
_from_rbexpr(
|
5576
5446
|
_rbexpr.rolling_median(
|
5577
5447
|
window_size, weights, min_periods, center
|
@@ -5611,12 +5481,6 @@ module Polars
|
|
5611
5481
|
# a result. If None, it will be set equal to window size.
|
5612
5482
|
# @param center [Boolean]
|
5613
5483
|
# Set the labels at the center of the window
|
5614
|
-
# @param by [String]
|
5615
|
-
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
5616
|
-
# set the column that will be used to determine the windows. This column must
|
5617
|
-
# be of dtype `{Date, Datetime}`
|
5618
|
-
# @param closed ["left", "right", "both", "none"]
|
5619
|
-
# Define whether the temporal window interval is closed or not.
|
5620
5484
|
#
|
5621
5485
|
# @note
|
5622
5486
|
# This functionality is experimental and may change without it being considered a
|
@@ -5656,26 +5520,8 @@ module Polars
|
|
5656
5520
|
window_size: 2,
|
5657
5521
|
weights: nil,
|
5658
5522
|
min_periods: nil,
|
5659
|
-
center: false
|
5660
|
-
by: nil,
|
5661
|
-
closed: nil,
|
5662
|
-
warn_if_unsorted: true
|
5523
|
+
center: false
|
5663
5524
|
)
|
5664
|
-
window_size, min_periods = _prepare_rolling_window_args(
|
5665
|
-
window_size, min_periods
|
5666
|
-
)
|
5667
|
-
if !by.nil?
|
5668
|
-
Utils.validate_rolling_by_aggs_arguments(weights, center: center)
|
5669
|
-
return rolling_quantile_by(
|
5670
|
-
by,
|
5671
|
-
window_size,
|
5672
|
-
min_periods: min_periods,
|
5673
|
-
closed: closed || "right",
|
5674
|
-
warn_if_unsorted: warn_if_unsorted,
|
5675
|
-
quantile: quantile
|
5676
|
-
)
|
5677
|
-
end
|
5678
|
-
window_size = Utils.validate_rolling_aggs_arguments(window_size, closed)
|
5679
5525
|
_from_rbexpr(
|
5680
5526
|
_rbexpr.rolling_quantile(
|
5681
5527
|
quantile, interpolation, window_size, weights, min_periods, center
|
@@ -5947,7 +5793,7 @@ module Polars
|
|
5947
5793
|
# # │ 12 ┆ 0.0 │
|
5948
5794
|
# # └──────┴────────────┘
|
5949
5795
|
def pct_change(n: 1)
|
5950
|
-
n = Utils.
|
5796
|
+
n = Utils.parse_into_expression(n)
|
5951
5797
|
_from_rbexpr(_rbexpr.pct_change(n))
|
5952
5798
|
end
|
5953
5799
|
|
@@ -6039,12 +5885,12 @@ module Polars
|
|
6039
5885
|
# # │ null ┆ null │
|
6040
5886
|
# # │ 50 ┆ 10 │
|
6041
5887
|
# # └──────┴─────────────┘
|
6042
|
-
def clip(lower_bound, upper_bound)
|
5888
|
+
def clip(lower_bound = nil, upper_bound = nil)
|
6043
5889
|
if !lower_bound.nil?
|
6044
|
-
lower_bound = Utils.
|
5890
|
+
lower_bound = Utils.parse_into_expression(lower_bound)
|
6045
5891
|
end
|
6046
5892
|
if !upper_bound.nil?
|
6047
|
-
upper_bound = Utils.
|
5893
|
+
upper_bound = Utils.parse_into_expression(upper_bound)
|
6048
5894
|
end
|
6049
5895
|
_from_rbexpr(_rbexpr.clip(lower_bound, upper_bound))
|
6050
5896
|
end
|
@@ -6431,18 +6277,38 @@ module Polars
|
|
6431
6277
|
#
|
6432
6278
|
# @example
|
6433
6279
|
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
|
6434
|
-
# df.select(Polars.col("foo").reshape([3, 3]))
|
6280
|
+
# square = df.select(Polars.col("foo").reshape([3, 3]))
|
6435
6281
|
# # =>
|
6436
6282
|
# # shape: (3, 1)
|
6437
|
-
# #
|
6438
|
-
# # │ foo
|
6439
|
-
# # │ ---
|
6440
|
-
# # │
|
6441
|
-
# #
|
6442
|
-
# # │ [1, 2, 3]
|
6443
|
-
# # │ [4, 5, 6]
|
6444
|
-
# # │ [7, 8, 9]
|
6445
|
-
# #
|
6283
|
+
# # ┌───────────────┐
|
6284
|
+
# # │ foo │
|
6285
|
+
# # │ --- │
|
6286
|
+
# # │ array[i64, 3] │
|
6287
|
+
# # ╞═══════════════╡
|
6288
|
+
# # │ [1, 2, 3] │
|
6289
|
+
# # │ [4, 5, 6] │
|
6290
|
+
# # │ [7, 8, 9] │
|
6291
|
+
# # └───────────────┘
|
6292
|
+
#
|
6293
|
+
# @example
|
6294
|
+
# square.select(Polars.col("foo").reshape([9]))
|
6295
|
+
# # =>
|
6296
|
+
# # shape: (9, 1)
|
6297
|
+
# # ┌─────┐
|
6298
|
+
# # │ foo │
|
6299
|
+
# # │ --- │
|
6300
|
+
# # │ i64 │
|
6301
|
+
# # ╞═════╡
|
6302
|
+
# # │ 1 │
|
6303
|
+
# # │ 2 │
|
6304
|
+
# # │ 3 │
|
6305
|
+
# # │ 4 │
|
6306
|
+
# # │ 5 │
|
6307
|
+
# # │ 6 │
|
6308
|
+
# # │ 7 │
|
6309
|
+
# # │ 8 │
|
6310
|
+
# # │ 9 │
|
6311
|
+
# # └─────┘
|
6446
6312
|
def reshape(dims)
|
6447
6313
|
_from_rbexpr(_rbexpr.reshape(dims))
|
6448
6314
|
end
|
@@ -6518,14 +6384,14 @@ module Polars
|
|
6518
6384
|
end
|
6519
6385
|
|
6520
6386
|
if !n.nil? && frac.nil?
|
6521
|
-
n = Utils.
|
6387
|
+
n = Utils.parse_into_expression(n)
|
6522
6388
|
return _from_rbexpr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
|
6523
6389
|
end
|
6524
6390
|
|
6525
6391
|
if frac.nil?
|
6526
6392
|
frac = 1.0
|
6527
6393
|
end
|
6528
|
-
frac = Utils.
|
6394
|
+
frac = Utils.parse_into_expression(frac)
|
6529
6395
|
_from_rbexpr(
|
6530
6396
|
_rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
|
6531
6397
|
)
|
@@ -6658,11 +6524,17 @@ module Polars
|
|
6658
6524
|
|
6659
6525
|
# Count all unique values and create a struct mapping value to count.
|
6660
6526
|
#
|
6661
|
-
# @param multithreaded [Boolean]
|
6662
|
-
# Better to turn this off in the aggregation context, as it can lead to
|
6663
|
-
# contention.
|
6664
6527
|
# @param sort [Boolean]
|
6665
|
-
#
|
6528
|
+
# Sort the output by count in descending order.
|
6529
|
+
# If set to `false` (default), the order of the output is random.
|
6530
|
+
# @param parallel [Boolean]
|
6531
|
+
# Execute the computation in parallel.
|
6532
|
+
# @param name [String]
|
6533
|
+
# Give the resulting count column a specific name;
|
6534
|
+
# if `normalize` is true defaults to "count",
|
6535
|
+
# otherwise defaults to "proportion".
|
6536
|
+
# @param normalize [Boolean]
|
6537
|
+
# If true gives relative frequencies of the unique values
|
6666
6538
|
#
|
6667
6539
|
# @return [Expr]
|
6668
6540
|
#
|
@@ -6688,8 +6560,22 @@ module Polars
|
|
6688
6560
|
# # │ {"b",2} │
|
6689
6561
|
# # │ {"a",1} │
|
6690
6562
|
# # └───────────┘
|
6691
|
-
def value_counts(
|
6692
|
-
|
6563
|
+
def value_counts(
|
6564
|
+
sort: false,
|
6565
|
+
parallel: false,
|
6566
|
+
name: nil,
|
6567
|
+
normalize: false
|
6568
|
+
)
|
6569
|
+
if name.nil?
|
6570
|
+
if normalize
|
6571
|
+
name = "proportion"
|
6572
|
+
else
|
6573
|
+
name = "count"
|
6574
|
+
end
|
6575
|
+
end
|
6576
|
+
_from_rbexpr(
|
6577
|
+
_rbexpr.value_counts(sort, parallel, name, normalize)
|
6578
|
+
)
|
6693
6579
|
end
|
6694
6580
|
|
6695
6581
|
# Return a count of the unique values in the order of appearance.
|
@@ -7064,6 +6950,10 @@ module Polars
|
|
7064
6950
|
# # │ 3 ┆ 1.0 ┆ 10.0 │
|
7065
6951
|
# # └─────┴─────┴──────────┘
|
7066
6952
|
def replace(old, new = NO_DEFAULT, default: NO_DEFAULT, return_dtype: nil)
|
6953
|
+
if !default.eql?(NO_DEFAULT)
|
6954
|
+
return replace_strict(old, new, default: default, return_dtype: return_dtype)
|
6955
|
+
end
|
6956
|
+
|
7067
6957
|
if new.eql?(NO_DEFAULT) && old.is_a?(Hash)
|
7068
6958
|
new = Series.new(old.values)
|
7069
6959
|
old = Series.new(old.keys)
|
@@ -7076,17 +6966,164 @@ module Polars
|
|
7076
6966
|
end
|
7077
6967
|
end
|
7078
6968
|
|
7079
|
-
old = Utils.
|
7080
|
-
new = Utils.
|
6969
|
+
old = Utils.parse_into_expression(old, str_as_lit: true)
|
6970
|
+
new = Utils.parse_into_expression(new, str_as_lit: true)
|
7081
6971
|
|
7082
|
-
|
7083
|
-
|
7084
|
-
|
7085
|
-
|
7086
|
-
|
7087
|
-
|
6972
|
+
result = _from_rbexpr(_rbexpr.replace(old, new))
|
6973
|
+
|
6974
|
+
if !return_dtype.nil?
|
6975
|
+
result = result.cast(return_dtype)
|
6976
|
+
end
|
6977
|
+
|
6978
|
+
result
|
6979
|
+
end
|
6980
|
+
|
6981
|
+
# Replace all values by different values.
|
6982
|
+
#
|
6983
|
+
# @param old [Object]
|
6984
|
+
# Value or sequence of values to replace.
|
6985
|
+
# Accepts expression input. Sequences are parsed as Series,
|
6986
|
+
# other non-expression inputs are parsed as literals.
|
6987
|
+
# Also accepts a mapping of values to their replacement as syntactic sugar for
|
6988
|
+
# `replace_all(old: Series.new(mapping.keys), new: Serie.new(mapping.values))`.
|
6989
|
+
# @param new [Object]
|
6990
|
+
# Value or sequence of values to replace by.
|
6991
|
+
# Accepts expression input. Sequences are parsed as Series,
|
6992
|
+
# other non-expression inputs are parsed as literals.
|
6993
|
+
# Length must match the length of `old` or have length 1.
|
6994
|
+
# @param default [Object]
|
6995
|
+
# Set values that were not replaced to this value. If no default is specified,
|
6996
|
+
# (default), an error is raised if any values were not replaced.
|
6997
|
+
# Accepts expression input. Non-expression inputs are parsed as literals.
|
6998
|
+
# @param return_dtype [Object]
|
6999
|
+
# The data type of the resulting expression. If set to `nil` (default),
|
7000
|
+
# the data type is determined automatically based on the other inputs.
|
7001
|
+
#
|
7002
|
+
# @return [Expr]
|
7003
|
+
#
|
7004
|
+
# @note
|
7005
|
+
# The global string cache must be enabled when replacing categorical values.
|
7006
|
+
#
|
7007
|
+
# @example Replace values by passing sequences to the `old` and `new` parameters.
|
7008
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 2, 3]})
|
7009
|
+
# df.with_columns(
|
7010
|
+
# replaced: Polars.col("a").replace_strict([1, 2, 3], [100, 200, 300])
|
7011
|
+
# )
|
7012
|
+
# # =>
|
7013
|
+
# # shape: (4, 2)
|
7014
|
+
# # ┌─────┬──────────┐
|
7015
|
+
# # │ a ┆ replaced │
|
7016
|
+
# # │ --- ┆ --- │
|
7017
|
+
# # │ i64 ┆ i64 │
|
7018
|
+
# # ╞═════╪══════════╡
|
7019
|
+
# # │ 1 ┆ 100 │
|
7020
|
+
# # │ 2 ┆ 200 │
|
7021
|
+
# # │ 2 ┆ 200 │
|
7022
|
+
# # │ 3 ┆ 300 │
|
7023
|
+
# # └─────┴──────────┘
|
7024
|
+
#
|
7025
|
+
# @example By default, an error is raised if any non-null values were not replaced. Specify a default to set all values that were not matched.
|
7026
|
+
# mapping = {2 => 200, 3 => 300}
|
7027
|
+
# df.with_columns(replaced: Polars.col("a").replace_strict(mapping, default: -1))
|
7028
|
+
# # =>
|
7029
|
+
# # shape: (4, 2)
|
7030
|
+
# # ┌─────┬──────────┐
|
7031
|
+
# # │ a ┆ replaced │
|
7032
|
+
# # │ --- ┆ --- │
|
7033
|
+
# # │ i64 ┆ i64 │
|
7034
|
+
# # ╞═════╪══════════╡
|
7035
|
+
# # │ 1 ┆ -1 │
|
7036
|
+
# # │ 2 ┆ 200 │
|
7037
|
+
# # │ 2 ┆ 200 │
|
7038
|
+
# # │ 3 ┆ 300 │
|
7039
|
+
# # └─────┴──────────┘
|
7040
|
+
#
|
7041
|
+
# @example Replacing by values of a different data type sets the return type based on a combination of the `new` data type and the `default` data type.
|
7042
|
+
# df = Polars::DataFrame.new({"a" => ["x", "y", "z"]})
|
7043
|
+
# mapping = {"x" => 1, "y" => 2, "z" => 3}
|
7044
|
+
# df.with_columns(replaced: Polars.col("a").replace_strict(mapping))
|
7045
|
+
# # =>
|
7046
|
+
# # shape: (3, 2)
|
7047
|
+
# # ┌─────┬──────────┐
|
7048
|
+
# # │ a ┆ replaced │
|
7049
|
+
# # │ --- ┆ --- │
|
7050
|
+
# # │ str ┆ i64 │
|
7051
|
+
# # ╞═════╪══════════╡
|
7052
|
+
# # │ x ┆ 1 │
|
7053
|
+
# # │ y ┆ 2 │
|
7054
|
+
# # │ z ┆ 3 │
|
7055
|
+
# # └─────┴──────────┘
|
7056
|
+
#
|
7057
|
+
# @example
|
7058
|
+
# df.with_columns(replaced: Polars.col("a").replace_strict(mapping, default: "x"))
|
7059
|
+
# # =>
|
7060
|
+
# # shape: (3, 2)
|
7061
|
+
# # ┌─────┬──────────┐
|
7062
|
+
# # │ a ┆ replaced │
|
7063
|
+
# # │ --- ┆ --- │
|
7064
|
+
# # │ str ┆ str │
|
7065
|
+
# # ╞═════╪══════════╡
|
7066
|
+
# # │ x ┆ 1 │
|
7067
|
+
# # │ y ┆ 2 │
|
7068
|
+
# # │ z ┆ 3 │
|
7069
|
+
# # └─────┴──────────┘
|
7070
|
+
#
|
7071
|
+
# @example Set the `return_dtype` parameter to control the resulting data type directly.
|
7072
|
+
# df.with_columns(
|
7073
|
+
# replaced: Polars.col("a").replace_strict(mapping, return_dtype: Polars::UInt8)
|
7074
|
+
# )
|
7075
|
+
# # =>
|
7076
|
+
# # shape: (3, 2)
|
7077
|
+
# # ┌─────┬──────────┐
|
7078
|
+
# # │ a ┆ replaced │
|
7079
|
+
# # │ --- ┆ --- │
|
7080
|
+
# # │ str ┆ u8 │
|
7081
|
+
# # ╞═════╪══════════╡
|
7082
|
+
# # │ x ┆ 1 │
|
7083
|
+
# # │ y ┆ 2 │
|
7084
|
+
# # │ z ┆ 3 │
|
7085
|
+
# # └─────┴──────────┘
|
7086
|
+
#
|
7087
|
+
# @example Expression input is supported for all parameters.
|
7088
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 2, 3], "b" => [1.5, 2.5, 5.0, 1.0]})
|
7089
|
+
# df.with_columns(
|
7090
|
+
# replaced: Polars.col("a").replace_strict(
|
7091
|
+
# Polars.col("a").max,
|
7092
|
+
# Polars.col("b").sum,
|
7093
|
+
# default: Polars.col("b")
|
7094
|
+
# )
|
7095
|
+
# )
|
7096
|
+
# # =>
|
7097
|
+
# # shape: (4, 3)
|
7098
|
+
# # ┌─────┬─────┬──────────┐
|
7099
|
+
# # │ a ┆ b ┆ replaced │
|
7100
|
+
# # │ --- ┆ --- ┆ --- │
|
7101
|
+
# # │ i64 ┆ f64 ┆ f64 │
|
7102
|
+
# # ╞═════╪═════╪══════════╡
|
7103
|
+
# # │ 1 ┆ 1.5 ┆ 1.5 │
|
7104
|
+
# # │ 2 ┆ 2.5 ┆ 2.5 │
|
7105
|
+
# # │ 2 ┆ 5.0 ┆ 5.0 │
|
7106
|
+
# # │ 3 ┆ 1.0 ┆ 10.0 │
|
7107
|
+
# # └─────┴─────┴──────────┘
|
7108
|
+
def replace_strict(
|
7109
|
+
old,
|
7110
|
+
new = NO_DEFAULT,
|
7111
|
+
default: NO_DEFAULT,
|
7112
|
+
return_dtype: nil
|
7113
|
+
)
|
7114
|
+
if new.eql?(NO_DEFAULT) && old.is_a?(Hash)
|
7115
|
+
new = Series.new(old.values)
|
7116
|
+
old = Series.new(old.keys)
|
7117
|
+
end
|
7088
7118
|
|
7089
|
-
|
7119
|
+
old = Utils.parse_into_expression(old, str_as_lit: true, list_as_series: true)
|
7120
|
+
new = Utils.parse_into_expression(new, str_as_lit: true, list_as_series: true)
|
7121
|
+
|
7122
|
+
default = default.eql?(NO_DEFAULT) ? nil : Utils.parse_into_expression(default, str_as_lit: true)
|
7123
|
+
|
7124
|
+
_from_rbexpr(
|
7125
|
+
_rbexpr.replace_strict(old, new, default, return_dtype)
|
7126
|
+
)
|
7090
7127
|
end
|
7091
7128
|
|
7092
7129
|
# Create an object namespace of all list related methods.
|
@@ -7163,7 +7200,7 @@ module Polars
|
|
7163
7200
|
end
|
7164
7201
|
|
7165
7202
|
def _to_expr(other)
|
7166
|
-
other.is_a?(Expr) ? other :
|
7203
|
+
other.is_a?(Expr) ? other : F.lit(other)
|
7167
7204
|
end
|
7168
7205
|
|
7169
7206
|
def _prepare_alpha(com, span, half_life, alpha)
|