polars-df 0.6.0-x86_64-darwin → 0.8.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/Cargo.lock +597 -599
- data/Cargo.toml +1 -0
- data/LICENSE-THIRD-PARTY.txt +5523 -6947
- data/README.md +8 -7
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/{3.0 → 3.3}/polars.bundle +0 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +182 -145
- data/lib/polars/data_types.rb +4 -1
- data/lib/polars/date_time_expr.rb +23 -28
- data/lib/polars/date_time_name_space.rb +17 -37
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +398 -110
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +40 -5
- data/lib/polars/lazy_frame.rb +116 -89
- data/lib/polars/lazy_functions.rb +40 -68
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +315 -43
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -13
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +8 -5
data/lib/polars/expr.rb
CHANGED
@@ -131,6 +131,13 @@ module Polars
|
|
131
131
|
wrap_expr(_rbexpr.gt(_to_expr(other)._rbexpr))
|
132
132
|
end
|
133
133
|
|
134
|
+
# Performs boolean not.
|
135
|
+
#
|
136
|
+
# @return [Expr]
|
137
|
+
def !
|
138
|
+
is_not
|
139
|
+
end
|
140
|
+
|
134
141
|
# Performs negation.
|
135
142
|
#
|
136
143
|
# @return [Expr]
|
@@ -191,8 +198,8 @@ module Polars
|
|
191
198
|
# # ╞══════╪═══════╡
|
192
199
|
# # │ true ┆ false │
|
193
200
|
# # └──────┴───────┘
|
194
|
-
def any
|
195
|
-
wrap_expr(_rbexpr.any)
|
201
|
+
def any(drop_nulls: true)
|
202
|
+
wrap_expr(_rbexpr.any(drop_nulls))
|
196
203
|
end
|
197
204
|
|
198
205
|
# Check if all boolean values in a Boolean column are `true`.
|
@@ -216,8 +223,8 @@ module Polars
|
|
216
223
|
# # ╞══════╪═══════╪═══════╡
|
217
224
|
# # │ true ┆ false ┆ false │
|
218
225
|
# # └──────┴───────┴───────┘
|
219
|
-
def all
|
220
|
-
wrap_expr(_rbexpr.all)
|
226
|
+
def all(drop_nulls: true)
|
227
|
+
wrap_expr(_rbexpr.all(drop_nulls))
|
221
228
|
end
|
222
229
|
|
223
230
|
# Compute the square root of the elements.
|
@@ -359,7 +366,7 @@ module Polars
|
|
359
366
|
# # │ 3 ┆ 1.5 │
|
360
367
|
# # └─────┴──────┘
|
361
368
|
def exclude(columns)
|
362
|
-
if columns.is_a?(String)
|
369
|
+
if columns.is_a?(::String)
|
363
370
|
columns = [columns]
|
364
371
|
return wrap_expr(_rbexpr.exclude(columns))
|
365
372
|
elsif !columns.is_a?(::Array)
|
@@ -367,11 +374,11 @@ module Polars
|
|
367
374
|
return wrap_expr(_rbexpr.exclude_dtype(columns))
|
368
375
|
end
|
369
376
|
|
370
|
-
if !columns.all? { |a| a.is_a?(String) } || !columns.all? { |a| Utils.is_polars_dtype(a) }
|
377
|
+
if !columns.all? { |a| a.is_a?(::String) } || !columns.all? { |a| Utils.is_polars_dtype(a) }
|
371
378
|
raise ArgumentError, "input should be all string or all DataType"
|
372
379
|
end
|
373
380
|
|
374
|
-
if columns[0].is_a?(String)
|
381
|
+
if columns[0].is_a?(::String)
|
375
382
|
wrap_expr(_rbexpr.exclude(columns))
|
376
383
|
else
|
377
384
|
wrap_expr(_rbexpr.exclude_dtype(columns))
|
@@ -401,21 +408,21 @@ module Polars
|
|
401
408
|
# # │ 18 ┆ 4 │
|
402
409
|
# # └─────┴─────┘
|
403
410
|
def keep_name
|
404
|
-
|
411
|
+
name.keep
|
405
412
|
end
|
406
413
|
|
407
414
|
# Add a prefix to the root column name of the expression.
|
408
415
|
#
|
409
416
|
# @return [Expr]
|
410
417
|
def prefix(prefix)
|
411
|
-
|
418
|
+
name.prefix(prefix)
|
412
419
|
end
|
413
420
|
|
414
421
|
# Add a suffix to the root column name of the expression.
|
415
422
|
#
|
416
423
|
# @return [Expr]
|
417
424
|
def suffix(suffix)
|
418
|
-
|
425
|
+
name.suffix(suffix)
|
419
426
|
end
|
420
427
|
|
421
428
|
# Rename the output of an expression by mapping a function over the root name.
|
@@ -443,7 +450,7 @@ module Polars
|
|
443
450
|
# # │ 1 ┆ 3 │
|
444
451
|
# # └───────────┴───────────┘
|
445
452
|
def map_alias(&f)
|
446
|
-
|
453
|
+
name.map(&f)
|
447
454
|
end
|
448
455
|
|
449
456
|
# Negate a boolean expression.
|
@@ -682,7 +689,7 @@ module Polars
|
|
682
689
|
# "value" => [94, 95, 96, 97, 97, 99]
|
683
690
|
# }
|
684
691
|
# )
|
685
|
-
# df.
|
692
|
+
# df.group_by("group", maintain_order: true).agg(Polars.col("value").agg_groups)
|
686
693
|
# # =>
|
687
694
|
# # shape: (2, 2)
|
688
695
|
# # ┌───────┬───────────┐
|
@@ -714,13 +721,13 @@ module Polars
|
|
714
721
|
# # │ 3 ┆ 3 │
|
715
722
|
# # └─────┴─────┘
|
716
723
|
def count
|
717
|
-
|
724
|
+
warn "`Expr#count` will exclude null values in 0.9.0. Use `Expr#length` instead."
|
725
|
+
# wrap_expr(_rbexpr.count)
|
726
|
+
wrap_expr(_rbexpr.len)
|
718
727
|
end
|
719
728
|
|
720
729
|
# Count the number of values in this expression.
|
721
730
|
#
|
722
|
-
# Alias for {#count}.
|
723
|
-
#
|
724
731
|
# @return [Expr]
|
725
732
|
#
|
726
733
|
# @example
|
@@ -736,8 +743,9 @@ module Polars
|
|
736
743
|
# # │ 3 ┆ 3 │
|
737
744
|
# # └─────┴─────┘
|
738
745
|
def len
|
739
|
-
|
746
|
+
wrap_expr(_rbexpr.len)
|
740
747
|
end
|
748
|
+
alias_method :length, :len
|
741
749
|
|
742
750
|
# Get a slice of this expression.
|
743
751
|
#
|
@@ -905,8 +913,8 @@ module Polars
|
|
905
913
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
906
914
|
# df.select(
|
907
915
|
# [
|
908
|
-
# Polars.col("a").
|
909
|
-
# Polars.col("a").
|
916
|
+
# Polars.col("a").cum_sum,
|
917
|
+
# Polars.col("a").cum_sum(reverse: true).alias("a_reverse")
|
910
918
|
# ]
|
911
919
|
# )
|
912
920
|
# # =>
|
@@ -921,9 +929,10 @@ module Polars
|
|
921
929
|
# # │ 6 ┆ 7 │
|
922
930
|
# # │ 10 ┆ 4 │
|
923
931
|
# # └─────┴───────────┘
|
924
|
-
def
|
925
|
-
wrap_expr(_rbexpr.
|
932
|
+
def cum_sum(reverse: false)
|
933
|
+
wrap_expr(_rbexpr.cum_sum(reverse))
|
926
934
|
end
|
935
|
+
alias_method :cumsum, :cum_sum
|
927
936
|
|
928
937
|
# Get an array with the cumulative product computed at every element.
|
929
938
|
#
|
@@ -940,8 +949,8 @@ module Polars
|
|
940
949
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
941
950
|
# df.select(
|
942
951
|
# [
|
943
|
-
# Polars.col("a").
|
944
|
-
# Polars.col("a").
|
952
|
+
# Polars.col("a").cum_prod,
|
953
|
+
# Polars.col("a").cum_prod(reverse: true).alias("a_reverse")
|
945
954
|
# ]
|
946
955
|
# )
|
947
956
|
# # =>
|
@@ -956,9 +965,10 @@ module Polars
|
|
956
965
|
# # │ 6 ┆ 12 │
|
957
966
|
# # │ 24 ┆ 4 │
|
958
967
|
# # └─────┴───────────┘
|
959
|
-
def
|
960
|
-
wrap_expr(_rbexpr.
|
968
|
+
def cum_prod(reverse: false)
|
969
|
+
wrap_expr(_rbexpr.cum_prod(reverse))
|
961
970
|
end
|
971
|
+
alias_method :cumprod, :cum_prod
|
962
972
|
|
963
973
|
# Get an array with the cumulative min computed at every element.
|
964
974
|
#
|
@@ -971,8 +981,8 @@ module Polars
|
|
971
981
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
972
982
|
# df.select(
|
973
983
|
# [
|
974
|
-
# Polars.col("a").
|
975
|
-
# Polars.col("a").
|
984
|
+
# Polars.col("a").cum_min,
|
985
|
+
# Polars.col("a").cum_min(reverse: true).alias("a_reverse")
|
976
986
|
# ]
|
977
987
|
# )
|
978
988
|
# # =>
|
@@ -987,9 +997,10 @@ module Polars
|
|
987
997
|
# # │ 1 ┆ 3 │
|
988
998
|
# # │ 1 ┆ 4 │
|
989
999
|
# # └─────┴───────────┘
|
990
|
-
def
|
991
|
-
wrap_expr(_rbexpr.
|
1000
|
+
def cum_min(reverse: false)
|
1001
|
+
wrap_expr(_rbexpr.cum_min(reverse))
|
992
1002
|
end
|
1003
|
+
alias_method :cummin, :cum_min
|
993
1004
|
|
994
1005
|
# Get an array with the cumulative max computed at every element.
|
995
1006
|
#
|
@@ -1002,8 +1013,8 @@ module Polars
|
|
1002
1013
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
1003
1014
|
# df.select(
|
1004
1015
|
# [
|
1005
|
-
# Polars.col("a").
|
1006
|
-
# Polars.col("a").
|
1016
|
+
# Polars.col("a").cum_max,
|
1017
|
+
# Polars.col("a").cum_max(reverse: true).alias("a_reverse")
|
1007
1018
|
# ]
|
1008
1019
|
# )
|
1009
1020
|
# # =>
|
@@ -1018,9 +1029,10 @@ module Polars
|
|
1018
1029
|
# # │ 3 ┆ 4 │
|
1019
1030
|
# # │ 4 ┆ 4 │
|
1020
1031
|
# # └─────┴───────────┘
|
1021
|
-
def
|
1022
|
-
wrap_expr(_rbexpr.
|
1032
|
+
def cum_max(reverse: false)
|
1033
|
+
wrap_expr(_rbexpr.cum_max(reverse))
|
1023
1034
|
end
|
1035
|
+
alias_method :cummax, :cum_max
|
1024
1036
|
|
1025
1037
|
# Get an array with the cumulative count computed at every element.
|
1026
1038
|
#
|
@@ -1035,8 +1047,8 @@ module Polars
|
|
1035
1047
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
1036
1048
|
# df.select(
|
1037
1049
|
# [
|
1038
|
-
# Polars.col("a").
|
1039
|
-
# Polars.col("a").
|
1050
|
+
# Polars.col("a").cum_count,
|
1051
|
+
# Polars.col("a").cum_count(reverse: true).alias("a_reverse")
|
1040
1052
|
# ]
|
1041
1053
|
# )
|
1042
1054
|
# # =>
|
@@ -1051,9 +1063,10 @@ module Polars
|
|
1051
1063
|
# # │ 2 ┆ 1 │
|
1052
1064
|
# # │ 3 ┆ 0 │
|
1053
1065
|
# # └─────┴───────────┘
|
1054
|
-
def
|
1055
|
-
wrap_expr(_rbexpr.
|
1066
|
+
def cum_count(reverse: false)
|
1067
|
+
wrap_expr(_rbexpr.cum_count(reverse))
|
1056
1068
|
end
|
1069
|
+
alias_method :cumcount, :cum_count
|
1057
1070
|
|
1058
1071
|
# Rounds down to the nearest integer value.
|
1059
1072
|
#
|
@@ -1229,7 +1242,7 @@ module Polars
|
|
1229
1242
|
|
1230
1243
|
# Sort this column. In projection/ selection context the whole column is sorted.
|
1231
1244
|
#
|
1232
|
-
# If used in a
|
1245
|
+
# If used in a group by context, the groups are sorted.
|
1233
1246
|
#
|
1234
1247
|
# @param reverse [Boolean]
|
1235
1248
|
# false -> order from small to large.
|
@@ -1287,7 +1300,7 @@ module Polars
|
|
1287
1300
|
# # └───────┘
|
1288
1301
|
#
|
1289
1302
|
# @example
|
1290
|
-
# df.
|
1303
|
+
# df.group_by("group").agg(Polars.col("value").sort)
|
1291
1304
|
# # =>
|
1292
1305
|
# # shape: (2, 2)
|
1293
1306
|
# # ┌───────┬────────────┐
|
@@ -1337,6 +1350,7 @@ module Polars
|
|
1337
1350
|
# # │ 2 ┆ 98 │
|
1338
1351
|
# # └───────┴──────────┘
|
1339
1352
|
def top_k(k: 5)
|
1353
|
+
k = Utils.parse_as_expression(k)
|
1340
1354
|
wrap_expr(_rbexpr.top_k(k))
|
1341
1355
|
end
|
1342
1356
|
|
@@ -1375,6 +1389,7 @@ module Polars
|
|
1375
1389
|
# # │ 2 ┆ 98 │
|
1376
1390
|
# # └───────┴──────────┘
|
1377
1391
|
def bottom_k(k: 5)
|
1392
|
+
k = Utils.parse_as_expression(k)
|
1378
1393
|
wrap_expr(_rbexpr.bottom_k(k))
|
1379
1394
|
end
|
1380
1395
|
|
@@ -1494,7 +1509,7 @@ module Polars
|
|
1494
1509
|
# Sort this column by the ordering of another column, or multiple other columns.
|
1495
1510
|
#
|
1496
1511
|
# In projection/ selection context the whole column is sorted.
|
1497
|
-
# If used in a
|
1512
|
+
# If used in a group by context, the groups are sorted.
|
1498
1513
|
#
|
1499
1514
|
# @param by [Object]
|
1500
1515
|
# The column(s) used for sorting.
|
@@ -1566,30 +1581,33 @@ module Polars
|
|
1566
1581
|
# "value" => [1, 98, 2, 3, 99, 4]
|
1567
1582
|
# }
|
1568
1583
|
# )
|
1569
|
-
# df.
|
1584
|
+
# df.group_by("group", maintain_order: true).agg(Polars.col("value").take([2, 1]))
|
1570
1585
|
# # =>
|
1571
1586
|
# # shape: (2, 2)
|
1572
|
-
# #
|
1573
|
-
# # │ group ┆ value
|
1574
|
-
# # │ --- ┆ ---
|
1575
|
-
# # │ str ┆ i64
|
1576
|
-
# #
|
1577
|
-
# # │ one ┆ 98
|
1578
|
-
# # │ two ┆ 99
|
1579
|
-
# #
|
1580
|
-
def
|
1587
|
+
# # ┌───────┬───────────┐
|
1588
|
+
# # │ group ┆ value │
|
1589
|
+
# # │ --- ┆ --- │
|
1590
|
+
# # │ str ┆ list[i64] │
|
1591
|
+
# # ╞═══════╪═══════════╡
|
1592
|
+
# # │ one ┆ [2, 98] │
|
1593
|
+
# # │ two ┆ [4, 99] │
|
1594
|
+
# # └───────┴───────────┘
|
1595
|
+
def gather(indices)
|
1581
1596
|
if indices.is_a?(::Array)
|
1582
1597
|
indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
|
1583
1598
|
else
|
1584
1599
|
indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
|
1585
1600
|
end
|
1586
|
-
wrap_expr(_rbexpr.
|
1601
|
+
wrap_expr(_rbexpr.gather(indices_lit._rbexpr))
|
1587
1602
|
end
|
1603
|
+
alias_method :take, :gather
|
1588
1604
|
|
1589
1605
|
# Shift the values by a given period.
|
1590
1606
|
#
|
1591
|
-
# @param
|
1607
|
+
# @param n [Integer]
|
1592
1608
|
# Number of places to shift (may be negative).
|
1609
|
+
# @param fill_value [Object]
|
1610
|
+
# Fill the resulting null values with this value.
|
1593
1611
|
#
|
1594
1612
|
# @return [Expr]
|
1595
1613
|
#
|
@@ -1608,8 +1626,12 @@ module Polars
|
|
1608
1626
|
# # │ 2 │
|
1609
1627
|
# # │ 3 │
|
1610
1628
|
# # └──────┘
|
1611
|
-
def shift(
|
1612
|
-
|
1629
|
+
def shift(n = 1, fill_value: nil)
|
1630
|
+
if !fill_value.nil?
|
1631
|
+
fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
|
1632
|
+
end
|
1633
|
+
n = Utils.parse_as_expression(n)
|
1634
|
+
wrap_expr(_rbexpr.shift(n, fill_value))
|
1613
1635
|
end
|
1614
1636
|
|
1615
1637
|
# Shift the values by a given period and fill the resulting null values.
|
@@ -1637,8 +1659,7 @@ module Polars
|
|
1637
1659
|
# # │ 3 │
|
1638
1660
|
# # └─────┘
|
1639
1661
|
def shift_and_fill(periods, fill_value)
|
1640
|
-
|
1641
|
-
wrap_expr(_rbexpr.shift_and_fill(periods, fill_value._rbexpr))
|
1662
|
+
shift(periods, fill_value: fill_value)
|
1642
1663
|
end
|
1643
1664
|
|
1644
1665
|
# Fill null values using the specified value or strategy.
|
@@ -2063,7 +2084,7 @@ module Polars
|
|
2063
2084
|
# # │ 2 │
|
2064
2085
|
# # └─────┘
|
2065
2086
|
def approx_unique
|
2066
|
-
wrap_expr(_rbexpr.
|
2087
|
+
wrap_expr(_rbexpr.approx_n_unique)
|
2067
2088
|
end
|
2068
2089
|
|
2069
2090
|
# Count null values.
|
@@ -2201,7 +2222,7 @@ module Polars
|
|
2201
2222
|
|
2202
2223
|
# Apply window function over a subgroup.
|
2203
2224
|
#
|
2204
|
-
# This is similar to a
|
2225
|
+
# This is similar to a group by + aggregation + self join.
|
2205
2226
|
# Or similar to [window functions in Postgres](https://www.postgresql.org/docs/current/tutorial-window.html).
|
2206
2227
|
#
|
2207
2228
|
# @param expr [Object]
|
@@ -2309,9 +2330,10 @@ module Polars
|
|
2309
2330
|
# # │ 1 ┆ false │
|
2310
2331
|
# # │ 5 ┆ true │
|
2311
2332
|
# # └─────┴──────────┘
|
2312
|
-
def
|
2313
|
-
wrap_expr(_rbexpr.
|
2333
|
+
def is_first_distinct
|
2334
|
+
wrap_expr(_rbexpr.is_first_distinct)
|
2314
2335
|
end
|
2336
|
+
alias_method :is_first, :is_first_distinct
|
2315
2337
|
|
2316
2338
|
# Get mask of duplicated values.
|
2317
2339
|
#
|
@@ -2335,6 +2357,54 @@ module Polars
|
|
2335
2357
|
wrap_expr(_rbexpr.is_duplicated)
|
2336
2358
|
end
|
2337
2359
|
|
2360
|
+
# Get a boolean mask of the local maximum peaks.
|
2361
|
+
#
|
2362
|
+
# @return [Expr]
|
2363
|
+
#
|
2364
|
+
# @example
|
2365
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
|
2366
|
+
# df.select(Polars.col("a").peak_max)
|
2367
|
+
# # =>
|
2368
|
+
# # shape: (5, 1)
|
2369
|
+
# # ┌───────┐
|
2370
|
+
# # │ a │
|
2371
|
+
# # │ --- │
|
2372
|
+
# # │ bool │
|
2373
|
+
# # ╞═══════╡
|
2374
|
+
# # │ false │
|
2375
|
+
# # │ false │
|
2376
|
+
# # │ false │
|
2377
|
+
# # │ false │
|
2378
|
+
# # │ true │
|
2379
|
+
# # └───────┘
|
2380
|
+
def peak_max
|
2381
|
+
wrap_expr(_rbexpr.peak_max)
|
2382
|
+
end
|
2383
|
+
|
2384
|
+
# Get a boolean mask of the local minimum peaks.
|
2385
|
+
#
|
2386
|
+
# @return [Expr]
|
2387
|
+
#
|
2388
|
+
# @example
|
2389
|
+
# df = Polars::DataFrame.new({"a" => [4, 1, 3, 2, 5]})
|
2390
|
+
# df.select(Polars.col("a").peak_min)
|
2391
|
+
# # =>
|
2392
|
+
# # shape: (5, 1)
|
2393
|
+
# # ┌───────┐
|
2394
|
+
# # │ a │
|
2395
|
+
# # │ --- │
|
2396
|
+
# # │ bool │
|
2397
|
+
# # ╞═══════╡
|
2398
|
+
# # │ false │
|
2399
|
+
# # │ true │
|
2400
|
+
# # │ false │
|
2401
|
+
# # │ true │
|
2402
|
+
# # │ false │
|
2403
|
+
# # └───────┘
|
2404
|
+
def peak_min
|
2405
|
+
wrap_expr(_rbexpr.peak_min)
|
2406
|
+
end
|
2407
|
+
|
2338
2408
|
# Get quantile value.
|
2339
2409
|
#
|
2340
2410
|
# @param quantile [Float]
|
@@ -2354,7 +2424,7 @@ module Polars
|
|
2354
2424
|
# # │ --- │
|
2355
2425
|
# # │ f64 │
|
2356
2426
|
# # ╞═════╡
|
2357
|
-
# # │
|
2427
|
+
# # │ 2.0 │
|
2358
2428
|
# # └─────┘
|
2359
2429
|
#
|
2360
2430
|
# @example
|
@@ -2409,6 +2479,206 @@ module Polars
|
|
2409
2479
|
wrap_expr(_rbexpr.quantile(quantile._rbexpr, interpolation))
|
2410
2480
|
end
|
2411
2481
|
|
2482
|
+
# Bin continuous values into discrete categories.
|
2483
|
+
#
|
2484
|
+
# @param breaks [Array]
|
2485
|
+
# List of unique cut points.
|
2486
|
+
# @param labels [Array]
|
2487
|
+
# Names of the categories. The number of labels must be equal to the number
|
2488
|
+
# of cut points plus one.
|
2489
|
+
# @param left_closed [Boolean]
|
2490
|
+
# Set the intervals to be left-closed instead of right-closed.
|
2491
|
+
# @param include_breaks [Boolean]
|
2492
|
+
# Include a column with the right endpoint of the bin each observation falls
|
2493
|
+
# in. This will change the data type of the output from a
|
2494
|
+
# `Categorical` to a `Struct`.
|
2495
|
+
#
|
2496
|
+
# @return [Expr]
|
2497
|
+
#
|
2498
|
+
# @example Divide a column into three categories.
|
2499
|
+
# df = Polars::DataFrame.new({"foo" => [-2, -1, 0, 1, 2]})
|
2500
|
+
# df.with_columns(
|
2501
|
+
# Polars.col("foo").cut([-1, 1], labels: ["a", "b", "c"]).alias("cut")
|
2502
|
+
# )
|
2503
|
+
# # =>
|
2504
|
+
# # shape: (5, 2)
|
2505
|
+
# # ┌─────┬─────┐
|
2506
|
+
# # │ foo ┆ cut │
|
2507
|
+
# # │ --- ┆ --- │
|
2508
|
+
# # │ i64 ┆ cat │
|
2509
|
+
# # ╞═════╪═════╡
|
2510
|
+
# # │ -2 ┆ a │
|
2511
|
+
# # │ -1 ┆ a │
|
2512
|
+
# # │ 0 ┆ b │
|
2513
|
+
# # │ 1 ┆ b │
|
2514
|
+
# # │ 2 ┆ c │
|
2515
|
+
# # └─────┴─────┘
|
2516
|
+
#
|
2517
|
+
# @example Add both the category and the breakpoint.
|
2518
|
+
# df.with_columns(
|
2519
|
+
# Polars.col("foo").cut([-1, 1], include_breaks: true).alias("cut")
|
2520
|
+
# ).unnest("cut")
|
2521
|
+
# # =>
|
2522
|
+
# # shape: (5, 3)
|
2523
|
+
# # ┌─────┬──────┬────────────┐
|
2524
|
+
# # │ foo ┆ brk ┆ foo_bin │
|
2525
|
+
# # │ --- ┆ --- ┆ --- │
|
2526
|
+
# # │ i64 ┆ f64 ┆ cat │
|
2527
|
+
# # ╞═════╪══════╪════════════╡
|
2528
|
+
# # │ -2 ┆ -1.0 ┆ (-inf, -1] │
|
2529
|
+
# # │ -1 ┆ -1.0 ┆ (-inf, -1] │
|
2530
|
+
# # │ 0 ┆ 1.0 ┆ (-1, 1] │
|
2531
|
+
# # │ 1 ┆ 1.0 ┆ (-1, 1] │
|
2532
|
+
# # │ 2 ┆ inf ┆ (1, inf] │
|
2533
|
+
# # └─────┴──────┴────────────┘
|
2534
|
+
def cut(breaks, labels: nil, left_closed: false, include_breaks: false)
|
2535
|
+
wrap_expr(_rbexpr.cut(breaks, labels, left_closed, include_breaks))
|
2536
|
+
end
|
2537
|
+
|
2538
|
+
# Bin continuous values into discrete categories based on their quantiles.
|
2539
|
+
#
|
2540
|
+
# @param quantiles [Array]
|
2541
|
+
# Either a list of quantile probabilities between 0 and 1 or a positive
|
2542
|
+
# integer determining the number of bins with uniform probability.
|
2543
|
+
# @param labels [Array]
|
2544
|
+
# Names of the categories. The number of labels must be equal to the number
|
2545
|
+
# of categories.
|
2546
|
+
# @param left_closed [Boolean]
|
2547
|
+
# Set the intervals to be left-closed instead of right-closed.
|
2548
|
+
# @param allow_duplicates [Boolean]
|
2549
|
+
# If set to `true`, duplicates in the resulting quantiles are dropped,
|
2550
|
+
# rather than raising a `DuplicateError`. This can happen even with unique
|
2551
|
+
# probabilities, depending on the data.
|
2552
|
+
# @param include_breaks [Boolean]
|
2553
|
+
# Include a column with the right endpoint of the bin each observation falls
|
2554
|
+
# in. This will change the data type of the output from a
|
2555
|
+
# `Categorical` to a `Struct`.
|
2556
|
+
#
|
2557
|
+
# @return [Expr]
|
2558
|
+
#
|
2559
|
+
# @example Divide a column into three categories according to pre-defined quantile probabilities.
|
2560
|
+
# df = Polars::DataFrame.new({"foo" => [-2, -1, 0, 1, 2]})
|
2561
|
+
# df.with_columns(
|
2562
|
+
# Polars.col("foo").qcut([0.25, 0.75], labels: ["a", "b", "c"]).alias("qcut")
|
2563
|
+
# )
|
2564
|
+
# # =>
|
2565
|
+
# # shape: (5, 2)
|
2566
|
+
# # ┌─────┬──────┐
|
2567
|
+
# # │ foo ┆ qcut │
|
2568
|
+
# # │ --- ┆ --- │
|
2569
|
+
# # │ i64 ┆ cat │
|
2570
|
+
# # ╞═════╪══════╡
|
2571
|
+
# # │ -2 ┆ a │
|
2572
|
+
# # │ -1 ┆ a │
|
2573
|
+
# # │ 0 ┆ b │
|
2574
|
+
# # │ 1 ┆ b │
|
2575
|
+
# # │ 2 ┆ c │
|
2576
|
+
# # └─────┴──────┘
|
2577
|
+
#
|
2578
|
+
# @example Divide a column into two categories using uniform quantile probabilities.
|
2579
|
+
# df.with_columns(
|
2580
|
+
# Polars.col("foo")
|
2581
|
+
# .qcut(2, labels: ["low", "high"], left_closed: true)
|
2582
|
+
# .alias("qcut")
|
2583
|
+
# )
|
2584
|
+
# # =>
|
2585
|
+
# # shape: (5, 2)
|
2586
|
+
# # ┌─────┬──────┐
|
2587
|
+
# # │ foo ┆ qcut │
|
2588
|
+
# # │ --- ┆ --- │
|
2589
|
+
# # │ i64 ┆ cat │
|
2590
|
+
# # ╞═════╪══════╡
|
2591
|
+
# # │ -2 ┆ low │
|
2592
|
+
# # │ -1 ┆ low │
|
2593
|
+
# # │ 0 ┆ high │
|
2594
|
+
# # │ 1 ┆ high │
|
2595
|
+
# # │ 2 ┆ high │
|
2596
|
+
# # └─────┴──────┘
|
2597
|
+
#
|
2598
|
+
# @example Add both the category and the breakpoint.
|
2599
|
+
# df.with_columns(
|
2600
|
+
# Polars.col("foo").qcut([0.25, 0.75], include_breaks: true).alias("qcut")
|
2601
|
+
# ).unnest("qcut")
|
2602
|
+
# # =>
|
2603
|
+
# # shape: (5, 3)
|
2604
|
+
# # ┌─────┬──────┬────────────┐
|
2605
|
+
# # │ foo ┆ brk ┆ foo_bin │
|
2606
|
+
# # │ --- ┆ --- ┆ --- │
|
2607
|
+
# # │ i64 ┆ f64 ┆ cat │
|
2608
|
+
# # ╞═════╪══════╪════════════╡
|
2609
|
+
# # │ -2 ┆ -1.0 ┆ (-inf, -1] │
|
2610
|
+
# # │ -1 ┆ -1.0 ┆ (-inf, -1] │
|
2611
|
+
# # │ 0 ┆ 1.0 ┆ (-1, 1] │
|
2612
|
+
# # │ 1 ┆ 1.0 ┆ (-1, 1] │
|
2613
|
+
# # │ 2 ┆ inf ┆ (1, inf] │
|
2614
|
+
# # └─────┴──────┴────────────┘
|
2615
|
+
def qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false)
|
2616
|
+
if quantiles.is_a?(Integer)
|
2617
|
+
rbexpr = _rbexpr.qcut_uniform(
|
2618
|
+
quantiles, labels, left_closed, allow_duplicates, include_breaks
|
2619
|
+
)
|
2620
|
+
else
|
2621
|
+
rbexpr = _rbexpr.qcut(
|
2622
|
+
quantiles, labels, left_closed, allow_duplicates, include_breaks
|
2623
|
+
)
|
2624
|
+
end
|
2625
|
+
|
2626
|
+
wrap_expr(rbexpr)
|
2627
|
+
end
|
2628
|
+
|
2629
|
+
# Get the lengths of runs of identical values.
|
2630
|
+
#
|
2631
|
+
# @return [Expr]
|
2632
|
+
#
|
2633
|
+
# @example
|
2634
|
+
# df = Polars::DataFrame.new(Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3]))
|
2635
|
+
# df.select(Polars.col("s").rle).unnest("s")
|
2636
|
+
# # =>
|
2637
|
+
# # shape: (6, 2)
|
2638
|
+
# # ┌─────────┬────────┐
|
2639
|
+
# # │ lengths ┆ values │
|
2640
|
+
# # │ --- ┆ --- │
|
2641
|
+
# # │ i32 ┆ i64 │
|
2642
|
+
# # ╞═════════╪════════╡
|
2643
|
+
# # │ 2 ┆ 1 │
|
2644
|
+
# # │ 1 ┆ 2 │
|
2645
|
+
# # │ 1 ┆ 1 │
|
2646
|
+
# # │ 1 ┆ null │
|
2647
|
+
# # │ 1 ┆ 1 │
|
2648
|
+
# # │ 2 ┆ 3 │
|
2649
|
+
# # └─────────┴────────┘
|
2650
|
+
def rle
|
2651
|
+
wrap_expr(_rbexpr.rle)
|
2652
|
+
end
|
2653
|
+
|
2654
|
+
# Map values to run IDs.
|
2655
|
+
#
|
2656
|
+
# Similar to RLE, but it maps each value to an ID corresponding to the run into
|
2657
|
+
# which it falls. This is especially useful when you want to define groups by
|
2658
|
+
# runs of identical values rather than the values themselves.
|
2659
|
+
#
|
2660
|
+
# @return [Expr]
|
2661
|
+
#
|
2662
|
+
# @example
|
2663
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 1, 1, 1], "b" => ["x", "x", nil, "y", "y"]})
|
2664
|
+
# df.with_columns([Polars.col("a").rle_id.alias("a_r"), Polars.struct(["a", "b"]).rle_id.alias("ab_r")])
|
2665
|
+
# # =>
|
2666
|
+
# # shape: (5, 4)
|
2667
|
+
# # ┌─────┬──────┬─────┬──────┐
|
2668
|
+
# # │ a ┆ b ┆ a_r ┆ ab_r │
|
2669
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
2670
|
+
# # │ i64 ┆ str ┆ u32 ┆ u32 │
|
2671
|
+
# # ╞═════╪══════╪═════╪══════╡
|
2672
|
+
# # │ 1 ┆ x ┆ 0 ┆ 0 │
|
2673
|
+
# # │ 2 ┆ x ┆ 1 ┆ 1 │
|
2674
|
+
# # │ 1 ┆ null ┆ 2 ┆ 2 │
|
2675
|
+
# # │ 1 ┆ y ┆ 2 ┆ 3 │
|
2676
|
+
# # │ 1 ┆ y ┆ 2 ┆ 3 │
|
2677
|
+
# # └─────┴──────┴─────┴──────┘
|
2678
|
+
def rle_id
|
2679
|
+
wrap_expr(_rbexpr.rle_id)
|
2680
|
+
end
|
2681
|
+
|
2412
2682
|
# Filter a single column.
|
2413
2683
|
#
|
2414
2684
|
# Mostly useful in an aggregation context. If you want to filter on a DataFrame
|
@@ -2427,7 +2697,7 @@ module Polars
|
|
2427
2697
|
# }
|
2428
2698
|
# )
|
2429
2699
|
# (
|
2430
|
-
# df.
|
2700
|
+
# df.group_by("group_col").agg(
|
2431
2701
|
# [
|
2432
2702
|
# Polars.col("b").filter(Polars.col("b") < 2).sum.alias("lt"),
|
2433
2703
|
# Polars.col("b").filter(Polars.col("b") >= 2).sum.alias("gte")
|
@@ -2465,7 +2735,7 @@ module Polars
|
|
2465
2735
|
# }
|
2466
2736
|
# )
|
2467
2737
|
# (
|
2468
|
-
# df.
|
2738
|
+
# df.group_by("group_col").agg(
|
2469
2739
|
# [
|
2470
2740
|
# Polars.col("b").where(Polars.col("b") < 2).sum.alias("lt"),
|
2471
2741
|
# Polars.col("b").where(Polars.col("b") >= 2).sum.alias("gte")
|
@@ -2583,7 +2853,7 @@ module Polars
|
|
2583
2853
|
#
|
2584
2854
|
# @example In a GroupBy context the function is applied by group:
|
2585
2855
|
# df.lazy
|
2586
|
-
# .
|
2856
|
+
# .group_by("b", maintain_order: true)
|
2587
2857
|
# .agg(
|
2588
2858
|
# [
|
2589
2859
|
# Polars.col("a").apply { |x| x.sum }
|
@@ -2622,7 +2892,7 @@ module Polars
|
|
2622
2892
|
# "values" => [[1, 2], [2, 3], [4]]
|
2623
2893
|
# }
|
2624
2894
|
# )
|
2625
|
-
# df.
|
2895
|
+
# df.group_by("group").agg(Polars.col("values").flatten)
|
2626
2896
|
# # =>
|
2627
2897
|
# # shape: (2, 2)
|
2628
2898
|
# # ┌───────┬───────────┐
|
@@ -2670,7 +2940,7 @@ module Polars
|
|
2670
2940
|
#
|
2671
2941
|
# @example
|
2672
2942
|
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
|
2673
|
-
# df.select(Polars.col("foo").
|
2943
|
+
# df.select(Polars.col("foo").gather_every(3))
|
2674
2944
|
# # =>
|
2675
2945
|
# # shape: (3, 1)
|
2676
2946
|
# # ┌─────┐
|
@@ -2682,9 +2952,10 @@ module Polars
|
|
2682
2952
|
# # │ 4 │
|
2683
2953
|
# # │ 7 │
|
2684
2954
|
# # └─────┘
|
2685
|
-
def
|
2686
|
-
wrap_expr(_rbexpr.
|
2955
|
+
def gather_every(n, offset = 0)
|
2956
|
+
wrap_expr(_rbexpr.gather_every(n, offset))
|
2687
2957
|
end
|
2958
|
+
alias_method :take_every, :gather_every
|
2688
2959
|
|
2689
2960
|
# Get the first `n` rows.
|
2690
2961
|
#
|
@@ -3057,11 +3328,11 @@ module Polars
|
|
3057
3328
|
# # ┌─────┬─────┐
|
3058
3329
|
# # │ a ┆ b │
|
3059
3330
|
# # │ --- ┆ --- │
|
3060
|
-
# # │
|
3331
|
+
# # │ f64 ┆ f64 │
|
3061
3332
|
# # ╞═════╪═════╡
|
3062
|
-
# # │ 1
|
3063
|
-
# # │ 2
|
3064
|
-
# # │ 3
|
3333
|
+
# # │ 1.0 ┆ 1.0 │
|
3334
|
+
# # │ 2.0 ┆ NaN │
|
3335
|
+
# # │ 3.0 ┆ 3.0 │
|
3065
3336
|
# # └─────┴─────┘
|
3066
3337
|
def interpolate(method: "linear")
|
3067
3338
|
wrap_expr(_rbexpr.interpolate(method))
|
@@ -3112,7 +3383,7 @@ module Polars
|
|
3112
3383
|
#
|
3113
3384
|
# @note
|
3114
3385
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3115
|
-
# window, consider using `
|
3386
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3116
3387
|
# computation.
|
3117
3388
|
#
|
3118
3389
|
# @return [Expr]
|
@@ -3201,7 +3472,7 @@ module Polars
|
|
3201
3472
|
#
|
3202
3473
|
# @note
|
3203
3474
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3204
|
-
# window, consider using `
|
3475
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3205
3476
|
# computation.
|
3206
3477
|
#
|
3207
3478
|
# @return [Expr]
|
@@ -3290,7 +3561,7 @@ module Polars
|
|
3290
3561
|
#
|
3291
3562
|
# @note
|
3292
3563
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3293
|
-
# window, consider using `
|
3564
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3294
3565
|
# computation.
|
3295
3566
|
#
|
3296
3567
|
# @return [Expr]
|
@@ -3379,7 +3650,7 @@ module Polars
|
|
3379
3650
|
#
|
3380
3651
|
# @note
|
3381
3652
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3382
|
-
# window, consider using `
|
3653
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3383
3654
|
# computation.
|
3384
3655
|
#
|
3385
3656
|
# @return [Expr]
|
@@ -3468,7 +3739,7 @@ module Polars
|
|
3468
3739
|
#
|
3469
3740
|
# @note
|
3470
3741
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3471
|
-
# window, consider using `
|
3742
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3472
3743
|
# computation.
|
3473
3744
|
#
|
3474
3745
|
# @return [Expr]
|
@@ -3501,14 +3772,15 @@ module Polars
|
|
3501
3772
|
center: false,
|
3502
3773
|
by: nil,
|
3503
3774
|
closed: "left",
|
3504
|
-
ddof: 1
|
3775
|
+
ddof: 1,
|
3776
|
+
warn_if_unsorted: true
|
3505
3777
|
)
|
3506
3778
|
window_size, min_periods = _prepare_rolling_window_args(
|
3507
3779
|
window_size, min_periods
|
3508
3780
|
)
|
3509
3781
|
wrap_expr(
|
3510
3782
|
_rbexpr.rolling_std(
|
3511
|
-
window_size, weights, min_periods, center, by, closed, ddof
|
3783
|
+
window_size, weights, min_periods, center, by, closed, ddof, warn_if_unsorted
|
3512
3784
|
)
|
3513
3785
|
)
|
3514
3786
|
end
|
@@ -3558,7 +3830,7 @@ module Polars
|
|
3558
3830
|
#
|
3559
3831
|
# @note
|
3560
3832
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3561
|
-
# window, consider using `
|
3833
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3562
3834
|
# computation.
|
3563
3835
|
#
|
3564
3836
|
# @return [Expr]
|
@@ -3591,14 +3863,15 @@ module Polars
|
|
3591
3863
|
center: false,
|
3592
3864
|
by: nil,
|
3593
3865
|
closed: "left",
|
3594
|
-
ddof: 1
|
3866
|
+
ddof: 1,
|
3867
|
+
warn_if_unsorted: true
|
3595
3868
|
)
|
3596
3869
|
window_size, min_periods = _prepare_rolling_window_args(
|
3597
3870
|
window_size, min_periods
|
3598
3871
|
)
|
3599
3872
|
wrap_expr(
|
3600
3873
|
_rbexpr.rolling_var(
|
3601
|
-
window_size, weights, min_periods, center, by, closed, ddof
|
3874
|
+
window_size, weights, min_periods, center, by, closed, ddof, warn_if_unsorted
|
3602
3875
|
)
|
3603
3876
|
)
|
3604
3877
|
end
|
@@ -3644,7 +3917,7 @@ module Polars
|
|
3644
3917
|
#
|
3645
3918
|
# @note
|
3646
3919
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3647
|
-
# window, consider using `
|
3920
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3648
3921
|
# computation.
|
3649
3922
|
#
|
3650
3923
|
# @return [Expr]
|
@@ -3676,14 +3949,15 @@ module Polars
|
|
3676
3949
|
min_periods: nil,
|
3677
3950
|
center: false,
|
3678
3951
|
by: nil,
|
3679
|
-
closed: "left"
|
3952
|
+
closed: "left",
|
3953
|
+
warn_if_unsorted: true
|
3680
3954
|
)
|
3681
3955
|
window_size, min_periods = _prepare_rolling_window_args(
|
3682
3956
|
window_size, min_periods
|
3683
3957
|
)
|
3684
3958
|
wrap_expr(
|
3685
3959
|
_rbexpr.rolling_median(
|
3686
|
-
window_size, weights, min_periods, center, by, closed
|
3960
|
+
window_size, weights, min_periods, center, by, closed, warn_if_unsorted
|
3687
3961
|
)
|
3688
3962
|
)
|
3689
3963
|
end
|
@@ -3733,7 +4007,7 @@ module Polars
|
|
3733
4007
|
#
|
3734
4008
|
# @note
|
3735
4009
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3736
|
-
# window, consider using `
|
4010
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3737
4011
|
# computation.
|
3738
4012
|
#
|
3739
4013
|
# @return [Expr]
|
@@ -3767,14 +4041,15 @@ module Polars
|
|
3767
4041
|
min_periods: nil,
|
3768
4042
|
center: false,
|
3769
4043
|
by: nil,
|
3770
|
-
closed: "left"
|
4044
|
+
closed: "left",
|
4045
|
+
warn_if_unsorted: true
|
3771
4046
|
)
|
3772
4047
|
window_size, min_periods = _prepare_rolling_window_args(
|
3773
4048
|
window_size, min_periods
|
3774
4049
|
)
|
3775
4050
|
wrap_expr(
|
3776
4051
|
_rbexpr.rolling_quantile(
|
3777
|
-
quantile, interpolation, window_size, weights, min_periods, center, by, closed
|
4052
|
+
quantile, interpolation, window_size, weights, min_periods, center, by, closed, warn_if_unsorted
|
3778
4053
|
)
|
3779
4054
|
)
|
3780
4055
|
end
|
@@ -3948,7 +4223,7 @@ module Polars
|
|
3948
4223
|
# # ┌─────┐
|
3949
4224
|
# # │ a │
|
3950
4225
|
# # │ --- │
|
3951
|
-
# # │
|
4226
|
+
# # │ f64 │
|
3952
4227
|
# # ╞═════╡
|
3953
4228
|
# # │ 3.0 │
|
3954
4229
|
# # │ 4.5 │
|
@@ -4041,6 +4316,7 @@ module Polars
|
|
4041
4316
|
# # │ 12 ┆ 0.0 │
|
4042
4317
|
# # └──────┴────────────┘
|
4043
4318
|
def pct_change(n: 1)
|
4319
|
+
n = Utils.parse_as_expression(n)
|
4044
4320
|
wrap_expr(_rbexpr.pct_change(n))
|
4045
4321
|
end
|
4046
4322
|
|
@@ -4105,16 +4381,14 @@ module Polars
|
|
4105
4381
|
wrap_expr(_rbexpr.kurtosis(fisher, bias))
|
4106
4382
|
end
|
4107
4383
|
|
4108
|
-
#
|
4109
|
-
#
|
4110
|
-
# Only works for numerical types.
|
4384
|
+
# Set values outside the given boundaries to the boundary value.
|
4111
4385
|
#
|
4112
|
-
# If you want to clip other
|
4113
|
-
#
|
4386
|
+
# Only works for numeric and temporal columns. If you want to clip other data
|
4387
|
+
# types, consider writing a `when-then-otherwise` expression.
|
4114
4388
|
#
|
4115
|
-
# @param
|
4389
|
+
# @param lower_bound [Numeric]
|
4116
4390
|
# Minimum value.
|
4117
|
-
# @param
|
4391
|
+
# @param upper_bound [Numeric]
|
4118
4392
|
# Maximum value.
|
4119
4393
|
#
|
4120
4394
|
# @return [Expr]
|
@@ -4134,8 +4408,14 @@ module Polars
|
|
4134
4408
|
# # │ null ┆ null │
|
4135
4409
|
# # │ 50 ┆ 10 │
|
4136
4410
|
# # └──────┴─────────────┘
|
4137
|
-
def clip(
|
4138
|
-
|
4411
|
+
def clip(lower_bound, upper_bound)
|
4412
|
+
if !lower_bound.nil?
|
4413
|
+
lower_bound = Utils.parse_as_expression(lower_bound, str_as_lit: true)
|
4414
|
+
end
|
4415
|
+
if !upper_bound.nil?
|
4416
|
+
upper_bound = Utils.parse_as_expression(upper_bound, str_as_lit: true)
|
4417
|
+
end
|
4418
|
+
wrap_expr(_rbexpr.clip(lower_bound, upper_bound))
|
4139
4419
|
end
|
4140
4420
|
|
4141
4421
|
# Clip (limit) the values in an array to a `min` boundary.
|
@@ -4145,7 +4425,7 @@ module Polars
|
|
4145
4425
|
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
4146
4426
|
# expression. See `when` for more information.
|
4147
4427
|
#
|
4148
|
-
# @param
|
4428
|
+
# @param lower_bound [Numeric]
|
4149
4429
|
# Minimum value.
|
4150
4430
|
#
|
4151
4431
|
# @return [Expr]
|
@@ -4165,8 +4445,8 @@ module Polars
|
|
4165
4445
|
# # │ null ┆ null │
|
4166
4446
|
# # │ 50 ┆ 50 │
|
4167
4447
|
# # └──────┴─────────────┘
|
4168
|
-
def clip_min(
|
4169
|
-
|
4448
|
+
def clip_min(lower_bound)
|
4449
|
+
clip(lower_bound, nil)
|
4170
4450
|
end
|
4171
4451
|
|
4172
4452
|
# Clip (limit) the values in an array to a `max` boundary.
|
@@ -4176,7 +4456,7 @@ module Polars
|
|
4176
4456
|
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
4177
4457
|
# expression. See `when` for more information.
|
4178
4458
|
#
|
4179
|
-
# @param
|
4459
|
+
# @param upper_bound [Numeric]
|
4180
4460
|
# Maximum value.
|
4181
4461
|
#
|
4182
4462
|
# @return [Expr]
|
@@ -4196,8 +4476,8 @@ module Polars
|
|
4196
4476
|
# # │ null ┆ null │
|
4197
4477
|
# # │ 50 ┆ 0 │
|
4198
4478
|
# # └──────┴─────────────┘
|
4199
|
-
def clip_max(
|
4200
|
-
|
4479
|
+
def clip_max(upper_bound)
|
4480
|
+
clip(nil, upper_bound)
|
4201
4481
|
end
|
4202
4482
|
|
4203
4483
|
# Calculate the lower bound.
|
@@ -4558,11 +4838,11 @@ module Polars
|
|
4558
4838
|
# # │ 1 │
|
4559
4839
|
# # │ 3 │
|
4560
4840
|
# # └─────┘
|
4561
|
-
def shuffle(seed: nil
|
4841
|
+
def shuffle(seed: nil)
|
4562
4842
|
if seed.nil?
|
4563
4843
|
seed = rand(10000)
|
4564
4844
|
end
|
4565
|
-
wrap_expr(_rbexpr.shuffle(seed
|
4845
|
+
wrap_expr(_rbexpr.shuffle(seed))
|
4566
4846
|
end
|
4567
4847
|
|
4568
4848
|
# Sample from this expression.
|
@@ -4600,22 +4880,23 @@ module Polars
|
|
4600
4880
|
with_replacement: true,
|
4601
4881
|
shuffle: false,
|
4602
4882
|
seed: nil,
|
4603
|
-
n: nil
|
4604
|
-
fixed_seed: false
|
4883
|
+
n: nil
|
4605
4884
|
)
|
4606
4885
|
if !n.nil? && !frac.nil?
|
4607
4886
|
raise ArgumentError, "cannot specify both `n` and `frac`"
|
4608
4887
|
end
|
4609
4888
|
|
4610
4889
|
if !n.nil? && frac.nil?
|
4611
|
-
|
4890
|
+
n = Utils.parse_as_expression(n)
|
4891
|
+
return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
|
4612
4892
|
end
|
4613
4893
|
|
4614
4894
|
if frac.nil?
|
4615
4895
|
frac = 1.0
|
4616
4896
|
end
|
4897
|
+
frac = Utils.parse_as_expression(frac)
|
4617
4898
|
wrap_expr(
|
4618
|
-
_rbexpr.sample_frac(frac, with_replacement, shuffle, seed
|
4899
|
+
_rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
|
4619
4900
|
)
|
4620
4901
|
end
|
4621
4902
|
|
@@ -4885,7 +5166,7 @@ module Polars
|
|
4885
5166
|
# Number of valid values there should be in the window before the expression
|
4886
5167
|
# is evaluated. valid values = `length - null_count`
|
4887
5168
|
# @param parallel [Boolean]
|
4888
|
-
# Run in parallel. Don't do this in a
|
5169
|
+
# Run in parallel. Don't do this in a group by or another operation that
|
4889
5170
|
# already has much parallelization.
|
4890
5171
|
#
|
4891
5172
|
# @return [Expr]
|
@@ -5057,6 +5338,13 @@ module Polars
|
|
5057
5338
|
MetaExpr.new(self)
|
5058
5339
|
end
|
5059
5340
|
|
5341
|
+
# Create an object namespace of all expressions that modify expression names.
|
5342
|
+
#
|
5343
|
+
# @return [NameExpr]
|
5344
|
+
def name
|
5345
|
+
NameExpr.new(self)
|
5346
|
+
end
|
5347
|
+
|
5060
5348
|
# Create an object namespace of all string related methods.
|
5061
5349
|
#
|
5062
5350
|
# @return [StringExpr]
|