polars-df 0.6.0-x86_64-linux → 0.8.0-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/Cargo.lock +597 -599
- data/Cargo.toml +1 -0
- data/LICENSE-THIRD-PARTY.txt +4491 -5915
- data/README.md +8 -7
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/{3.0 → 3.3}/polars.so +0 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +182 -145
- data/lib/polars/data_types.rb +4 -1
- data/lib/polars/date_time_expr.rb +23 -28
- data/lib/polars/date_time_name_space.rb +17 -37
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +398 -110
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +40 -5
- data/lib/polars/lazy_frame.rb +116 -89
- data/lib/polars/lazy_functions.rb +40 -68
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +315 -43
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -13
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +8 -5
data/lib/polars/expr.rb
CHANGED
@@ -131,6 +131,13 @@ module Polars
|
|
131
131
|
wrap_expr(_rbexpr.gt(_to_expr(other)._rbexpr))
|
132
132
|
end
|
133
133
|
|
134
|
+
# Performs boolean not.
|
135
|
+
#
|
136
|
+
# @return [Expr]
|
137
|
+
def !
|
138
|
+
is_not
|
139
|
+
end
|
140
|
+
|
134
141
|
# Performs negation.
|
135
142
|
#
|
136
143
|
# @return [Expr]
|
@@ -191,8 +198,8 @@ module Polars
|
|
191
198
|
# # ╞══════╪═══════╡
|
192
199
|
# # │ true ┆ false │
|
193
200
|
# # └──────┴───────┘
|
194
|
-
def any
|
195
|
-
wrap_expr(_rbexpr.any)
|
201
|
+
def any(drop_nulls: true)
|
202
|
+
wrap_expr(_rbexpr.any(drop_nulls))
|
196
203
|
end
|
197
204
|
|
198
205
|
# Check if all boolean values in a Boolean column are `true`.
|
@@ -216,8 +223,8 @@ module Polars
|
|
216
223
|
# # ╞══════╪═══════╪═══════╡
|
217
224
|
# # │ true ┆ false ┆ false │
|
218
225
|
# # └──────┴───────┴───────┘
|
219
|
-
def all
|
220
|
-
wrap_expr(_rbexpr.all)
|
226
|
+
def all(drop_nulls: true)
|
227
|
+
wrap_expr(_rbexpr.all(drop_nulls))
|
221
228
|
end
|
222
229
|
|
223
230
|
# Compute the square root of the elements.
|
@@ -359,7 +366,7 @@ module Polars
|
|
359
366
|
# # │ 3 ┆ 1.5 │
|
360
367
|
# # └─────┴──────┘
|
361
368
|
def exclude(columns)
|
362
|
-
if columns.is_a?(String)
|
369
|
+
if columns.is_a?(::String)
|
363
370
|
columns = [columns]
|
364
371
|
return wrap_expr(_rbexpr.exclude(columns))
|
365
372
|
elsif !columns.is_a?(::Array)
|
@@ -367,11 +374,11 @@ module Polars
|
|
367
374
|
return wrap_expr(_rbexpr.exclude_dtype(columns))
|
368
375
|
end
|
369
376
|
|
370
|
-
if !columns.all? { |a| a.is_a?(String) } || !columns.all? { |a| Utils.is_polars_dtype(a) }
|
377
|
+
if !columns.all? { |a| a.is_a?(::String) } || !columns.all? { |a| Utils.is_polars_dtype(a) }
|
371
378
|
raise ArgumentError, "input should be all string or all DataType"
|
372
379
|
end
|
373
380
|
|
374
|
-
if columns[0].is_a?(String)
|
381
|
+
if columns[0].is_a?(::String)
|
375
382
|
wrap_expr(_rbexpr.exclude(columns))
|
376
383
|
else
|
377
384
|
wrap_expr(_rbexpr.exclude_dtype(columns))
|
@@ -401,21 +408,21 @@ module Polars
|
|
401
408
|
# # │ 18 ┆ 4 │
|
402
409
|
# # └─────┴─────┘
|
403
410
|
def keep_name
|
404
|
-
|
411
|
+
name.keep
|
405
412
|
end
|
406
413
|
|
407
414
|
# Add a prefix to the root column name of the expression.
|
408
415
|
#
|
409
416
|
# @return [Expr]
|
410
417
|
def prefix(prefix)
|
411
|
-
|
418
|
+
name.prefix(prefix)
|
412
419
|
end
|
413
420
|
|
414
421
|
# Add a suffix to the root column name of the expression.
|
415
422
|
#
|
416
423
|
# @return [Expr]
|
417
424
|
def suffix(suffix)
|
418
|
-
|
425
|
+
name.suffix(suffix)
|
419
426
|
end
|
420
427
|
|
421
428
|
# Rename the output of an expression by mapping a function over the root name.
|
@@ -443,7 +450,7 @@ module Polars
|
|
443
450
|
# # │ 1 ┆ 3 │
|
444
451
|
# # └───────────┴───────────┘
|
445
452
|
def map_alias(&f)
|
446
|
-
|
453
|
+
name.map(&f)
|
447
454
|
end
|
448
455
|
|
449
456
|
# Negate a boolean expression.
|
@@ -682,7 +689,7 @@ module Polars
|
|
682
689
|
# "value" => [94, 95, 96, 97, 97, 99]
|
683
690
|
# }
|
684
691
|
# )
|
685
|
-
# df.
|
692
|
+
# df.group_by("group", maintain_order: true).agg(Polars.col("value").agg_groups)
|
686
693
|
# # =>
|
687
694
|
# # shape: (2, 2)
|
688
695
|
# # ┌───────┬───────────┐
|
@@ -714,13 +721,13 @@ module Polars
|
|
714
721
|
# # │ 3 ┆ 3 │
|
715
722
|
# # └─────┴─────┘
|
716
723
|
def count
|
717
|
-
|
724
|
+
warn "`Expr#count` will exclude null values in 0.9.0. Use `Expr#length` instead."
|
725
|
+
# wrap_expr(_rbexpr.count)
|
726
|
+
wrap_expr(_rbexpr.len)
|
718
727
|
end
|
719
728
|
|
720
729
|
# Count the number of values in this expression.
|
721
730
|
#
|
722
|
-
# Alias for {#count}.
|
723
|
-
#
|
724
731
|
# @return [Expr]
|
725
732
|
#
|
726
733
|
# @example
|
@@ -736,8 +743,9 @@ module Polars
|
|
736
743
|
# # │ 3 ┆ 3 │
|
737
744
|
# # └─────┴─────┘
|
738
745
|
def len
|
739
|
-
|
746
|
+
wrap_expr(_rbexpr.len)
|
740
747
|
end
|
748
|
+
alias_method :length, :len
|
741
749
|
|
742
750
|
# Get a slice of this expression.
|
743
751
|
#
|
@@ -905,8 +913,8 @@ module Polars
|
|
905
913
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
906
914
|
# df.select(
|
907
915
|
# [
|
908
|
-
# Polars.col("a").
|
909
|
-
# Polars.col("a").
|
916
|
+
# Polars.col("a").cum_sum,
|
917
|
+
# Polars.col("a").cum_sum(reverse: true).alias("a_reverse")
|
910
918
|
# ]
|
911
919
|
# )
|
912
920
|
# # =>
|
@@ -921,9 +929,10 @@ module Polars
|
|
921
929
|
# # │ 6 ┆ 7 │
|
922
930
|
# # │ 10 ┆ 4 │
|
923
931
|
# # └─────┴───────────┘
|
924
|
-
def
|
925
|
-
wrap_expr(_rbexpr.
|
932
|
+
def cum_sum(reverse: false)
|
933
|
+
wrap_expr(_rbexpr.cum_sum(reverse))
|
926
934
|
end
|
935
|
+
alias_method :cumsum, :cum_sum
|
927
936
|
|
928
937
|
# Get an array with the cumulative product computed at every element.
|
929
938
|
#
|
@@ -940,8 +949,8 @@ module Polars
|
|
940
949
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
941
950
|
# df.select(
|
942
951
|
# [
|
943
|
-
# Polars.col("a").
|
944
|
-
# Polars.col("a").
|
952
|
+
# Polars.col("a").cum_prod,
|
953
|
+
# Polars.col("a").cum_prod(reverse: true).alias("a_reverse")
|
945
954
|
# ]
|
946
955
|
# )
|
947
956
|
# # =>
|
@@ -956,9 +965,10 @@ module Polars
|
|
956
965
|
# # │ 6 ┆ 12 │
|
957
966
|
# # │ 24 ┆ 4 │
|
958
967
|
# # └─────┴───────────┘
|
959
|
-
def
|
960
|
-
wrap_expr(_rbexpr.
|
968
|
+
def cum_prod(reverse: false)
|
969
|
+
wrap_expr(_rbexpr.cum_prod(reverse))
|
961
970
|
end
|
971
|
+
alias_method :cumprod, :cum_prod
|
962
972
|
|
963
973
|
# Get an array with the cumulative min computed at every element.
|
964
974
|
#
|
@@ -971,8 +981,8 @@ module Polars
|
|
971
981
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
972
982
|
# df.select(
|
973
983
|
# [
|
974
|
-
# Polars.col("a").
|
975
|
-
# Polars.col("a").
|
984
|
+
# Polars.col("a").cum_min,
|
985
|
+
# Polars.col("a").cum_min(reverse: true).alias("a_reverse")
|
976
986
|
# ]
|
977
987
|
# )
|
978
988
|
# # =>
|
@@ -987,9 +997,10 @@ module Polars
|
|
987
997
|
# # │ 1 ┆ 3 │
|
988
998
|
# # │ 1 ┆ 4 │
|
989
999
|
# # └─────┴───────────┘
|
990
|
-
def
|
991
|
-
wrap_expr(_rbexpr.
|
1000
|
+
def cum_min(reverse: false)
|
1001
|
+
wrap_expr(_rbexpr.cum_min(reverse))
|
992
1002
|
end
|
1003
|
+
alias_method :cummin, :cum_min
|
993
1004
|
|
994
1005
|
# Get an array with the cumulative max computed at every element.
|
995
1006
|
#
|
@@ -1002,8 +1013,8 @@ module Polars
|
|
1002
1013
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
1003
1014
|
# df.select(
|
1004
1015
|
# [
|
1005
|
-
# Polars.col("a").
|
1006
|
-
# Polars.col("a").
|
1016
|
+
# Polars.col("a").cum_max,
|
1017
|
+
# Polars.col("a").cum_max(reverse: true).alias("a_reverse")
|
1007
1018
|
# ]
|
1008
1019
|
# )
|
1009
1020
|
# # =>
|
@@ -1018,9 +1029,10 @@ module Polars
|
|
1018
1029
|
# # │ 3 ┆ 4 │
|
1019
1030
|
# # │ 4 ┆ 4 │
|
1020
1031
|
# # └─────┴───────────┘
|
1021
|
-
def
|
1022
|
-
wrap_expr(_rbexpr.
|
1032
|
+
def cum_max(reverse: false)
|
1033
|
+
wrap_expr(_rbexpr.cum_max(reverse))
|
1023
1034
|
end
|
1035
|
+
alias_method :cummax, :cum_max
|
1024
1036
|
|
1025
1037
|
# Get an array with the cumulative count computed at every element.
|
1026
1038
|
#
|
@@ -1035,8 +1047,8 @@ module Polars
|
|
1035
1047
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
1036
1048
|
# df.select(
|
1037
1049
|
# [
|
1038
|
-
# Polars.col("a").
|
1039
|
-
# Polars.col("a").
|
1050
|
+
# Polars.col("a").cum_count,
|
1051
|
+
# Polars.col("a").cum_count(reverse: true).alias("a_reverse")
|
1040
1052
|
# ]
|
1041
1053
|
# )
|
1042
1054
|
# # =>
|
@@ -1051,9 +1063,10 @@ module Polars
|
|
1051
1063
|
# # │ 2 ┆ 1 │
|
1052
1064
|
# # │ 3 ┆ 0 │
|
1053
1065
|
# # └─────┴───────────┘
|
1054
|
-
def
|
1055
|
-
wrap_expr(_rbexpr.
|
1066
|
+
def cum_count(reverse: false)
|
1067
|
+
wrap_expr(_rbexpr.cum_count(reverse))
|
1056
1068
|
end
|
1069
|
+
alias_method :cumcount, :cum_count
|
1057
1070
|
|
1058
1071
|
# Rounds down to the nearest integer value.
|
1059
1072
|
#
|
@@ -1229,7 +1242,7 @@ module Polars
|
|
1229
1242
|
|
1230
1243
|
# Sort this column. In projection/ selection context the whole column is sorted.
|
1231
1244
|
#
|
1232
|
-
# If used in a
|
1245
|
+
# If used in a group by context, the groups are sorted.
|
1233
1246
|
#
|
1234
1247
|
# @param reverse [Boolean]
|
1235
1248
|
# false -> order from small to large.
|
@@ -1287,7 +1300,7 @@ module Polars
|
|
1287
1300
|
# # └───────┘
|
1288
1301
|
#
|
1289
1302
|
# @example
|
1290
|
-
# df.
|
1303
|
+
# df.group_by("group").agg(Polars.col("value").sort)
|
1291
1304
|
# # =>
|
1292
1305
|
# # shape: (2, 2)
|
1293
1306
|
# # ┌───────┬────────────┐
|
@@ -1337,6 +1350,7 @@ module Polars
|
|
1337
1350
|
# # │ 2 ┆ 98 │
|
1338
1351
|
# # └───────┴──────────┘
|
1339
1352
|
def top_k(k: 5)
|
1353
|
+
k = Utils.parse_as_expression(k)
|
1340
1354
|
wrap_expr(_rbexpr.top_k(k))
|
1341
1355
|
end
|
1342
1356
|
|
@@ -1375,6 +1389,7 @@ module Polars
|
|
1375
1389
|
# # │ 2 ┆ 98 │
|
1376
1390
|
# # └───────┴──────────┘
|
1377
1391
|
def bottom_k(k: 5)
|
1392
|
+
k = Utils.parse_as_expression(k)
|
1378
1393
|
wrap_expr(_rbexpr.bottom_k(k))
|
1379
1394
|
end
|
1380
1395
|
|
@@ -1494,7 +1509,7 @@ module Polars
|
|
1494
1509
|
# Sort this column by the ordering of another column, or multiple other columns.
|
1495
1510
|
#
|
1496
1511
|
# In projection/ selection context the whole column is sorted.
|
1497
|
-
# If used in a
|
1512
|
+
# If used in a group by context, the groups are sorted.
|
1498
1513
|
#
|
1499
1514
|
# @param by [Object]
|
1500
1515
|
# The column(s) used for sorting.
|
@@ -1566,30 +1581,33 @@ module Polars
|
|
1566
1581
|
# "value" => [1, 98, 2, 3, 99, 4]
|
1567
1582
|
# }
|
1568
1583
|
# )
|
1569
|
-
# df.
|
1584
|
+
# df.group_by("group", maintain_order: true).agg(Polars.col("value").take([2, 1]))
|
1570
1585
|
# # =>
|
1571
1586
|
# # shape: (2, 2)
|
1572
|
-
# #
|
1573
|
-
# # │ group ┆ value
|
1574
|
-
# # │ --- ┆ ---
|
1575
|
-
# # │ str ┆ i64
|
1576
|
-
# #
|
1577
|
-
# # │ one ┆ 98
|
1578
|
-
# # │ two ┆ 99
|
1579
|
-
# #
|
1580
|
-
def
|
1587
|
+
# # ┌───────┬───────────┐
|
1588
|
+
# # │ group ┆ value │
|
1589
|
+
# # │ --- ┆ --- │
|
1590
|
+
# # │ str ┆ list[i64] │
|
1591
|
+
# # ╞═══════╪═══════════╡
|
1592
|
+
# # │ one ┆ [2, 98] │
|
1593
|
+
# # │ two ┆ [4, 99] │
|
1594
|
+
# # └───────┴───────────┘
|
1595
|
+
def gather(indices)
|
1581
1596
|
if indices.is_a?(::Array)
|
1582
1597
|
indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
|
1583
1598
|
else
|
1584
1599
|
indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
|
1585
1600
|
end
|
1586
|
-
wrap_expr(_rbexpr.
|
1601
|
+
wrap_expr(_rbexpr.gather(indices_lit._rbexpr))
|
1587
1602
|
end
|
1603
|
+
alias_method :take, :gather
|
1588
1604
|
|
1589
1605
|
# Shift the values by a given period.
|
1590
1606
|
#
|
1591
|
-
# @param
|
1607
|
+
# @param n [Integer]
|
1592
1608
|
# Number of places to shift (may be negative).
|
1609
|
+
# @param fill_value [Object]
|
1610
|
+
# Fill the resulting null values with this value.
|
1593
1611
|
#
|
1594
1612
|
# @return [Expr]
|
1595
1613
|
#
|
@@ -1608,8 +1626,12 @@ module Polars
|
|
1608
1626
|
# # │ 2 │
|
1609
1627
|
# # │ 3 │
|
1610
1628
|
# # └──────┘
|
1611
|
-
def shift(
|
1612
|
-
|
1629
|
+
def shift(n = 1, fill_value: nil)
|
1630
|
+
if !fill_value.nil?
|
1631
|
+
fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
|
1632
|
+
end
|
1633
|
+
n = Utils.parse_as_expression(n)
|
1634
|
+
wrap_expr(_rbexpr.shift(n, fill_value))
|
1613
1635
|
end
|
1614
1636
|
|
1615
1637
|
# Shift the values by a given period and fill the resulting null values.
|
@@ -1637,8 +1659,7 @@ module Polars
|
|
1637
1659
|
# # │ 3 │
|
1638
1660
|
# # └─────┘
|
1639
1661
|
def shift_and_fill(periods, fill_value)
|
1640
|
-
|
1641
|
-
wrap_expr(_rbexpr.shift_and_fill(periods, fill_value._rbexpr))
|
1662
|
+
shift(periods, fill_value: fill_value)
|
1642
1663
|
end
|
1643
1664
|
|
1644
1665
|
# Fill null values using the specified value or strategy.
|
@@ -2063,7 +2084,7 @@ module Polars
|
|
2063
2084
|
# # │ 2 │
|
2064
2085
|
# # └─────┘
|
2065
2086
|
def approx_unique
|
2066
|
-
wrap_expr(_rbexpr.
|
2087
|
+
wrap_expr(_rbexpr.approx_n_unique)
|
2067
2088
|
end
|
2068
2089
|
|
2069
2090
|
# Count null values.
|
@@ -2201,7 +2222,7 @@ module Polars
|
|
2201
2222
|
|
2202
2223
|
# Apply window function over a subgroup.
|
2203
2224
|
#
|
2204
|
-
# This is similar to a
|
2225
|
+
# This is similar to a group by + aggregation + self join.
|
2205
2226
|
# Or similar to [window functions in Postgres](https://www.postgresql.org/docs/current/tutorial-window.html).
|
2206
2227
|
#
|
2207
2228
|
# @param expr [Object]
|
@@ -2309,9 +2330,10 @@ module Polars
|
|
2309
2330
|
# # │ 1 ┆ false │
|
2310
2331
|
# # │ 5 ┆ true │
|
2311
2332
|
# # └─────┴──────────┘
|
2312
|
-
def
|
2313
|
-
wrap_expr(_rbexpr.
|
2333
|
+
def is_first_distinct
|
2334
|
+
wrap_expr(_rbexpr.is_first_distinct)
|
2314
2335
|
end
|
2336
|
+
alias_method :is_first, :is_first_distinct
|
2315
2337
|
|
2316
2338
|
# Get mask of duplicated values.
|
2317
2339
|
#
|
@@ -2335,6 +2357,54 @@ module Polars
|
|
2335
2357
|
wrap_expr(_rbexpr.is_duplicated)
|
2336
2358
|
end
|
2337
2359
|
|
2360
|
+
# Get a boolean mask of the local maximum peaks.
|
2361
|
+
#
|
2362
|
+
# @return [Expr]
|
2363
|
+
#
|
2364
|
+
# @example
|
2365
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
|
2366
|
+
# df.select(Polars.col("a").peak_max)
|
2367
|
+
# # =>
|
2368
|
+
# # shape: (5, 1)
|
2369
|
+
# # ┌───────┐
|
2370
|
+
# # │ a │
|
2371
|
+
# # │ --- │
|
2372
|
+
# # │ bool │
|
2373
|
+
# # ╞═══════╡
|
2374
|
+
# # │ false │
|
2375
|
+
# # │ false │
|
2376
|
+
# # │ false │
|
2377
|
+
# # │ false │
|
2378
|
+
# # │ true │
|
2379
|
+
# # └───────┘
|
2380
|
+
def peak_max
|
2381
|
+
wrap_expr(_rbexpr.peak_max)
|
2382
|
+
end
|
2383
|
+
|
2384
|
+
# Get a boolean mask of the local minimum peaks.
|
2385
|
+
#
|
2386
|
+
# @return [Expr]
|
2387
|
+
#
|
2388
|
+
# @example
|
2389
|
+
# df = Polars::DataFrame.new({"a" => [4, 1, 3, 2, 5]})
|
2390
|
+
# df.select(Polars.col("a").peak_min)
|
2391
|
+
# # =>
|
2392
|
+
# # shape: (5, 1)
|
2393
|
+
# # ┌───────┐
|
2394
|
+
# # │ a │
|
2395
|
+
# # │ --- │
|
2396
|
+
# # │ bool │
|
2397
|
+
# # ╞═══════╡
|
2398
|
+
# # │ false │
|
2399
|
+
# # │ true │
|
2400
|
+
# # │ false │
|
2401
|
+
# # │ true │
|
2402
|
+
# # │ false │
|
2403
|
+
# # └───────┘
|
2404
|
+
def peak_min
|
2405
|
+
wrap_expr(_rbexpr.peak_min)
|
2406
|
+
end
|
2407
|
+
|
2338
2408
|
# Get quantile value.
|
2339
2409
|
#
|
2340
2410
|
# @param quantile [Float]
|
@@ -2354,7 +2424,7 @@ module Polars
|
|
2354
2424
|
# # │ --- │
|
2355
2425
|
# # │ f64 │
|
2356
2426
|
# # ╞═════╡
|
2357
|
-
# # │
|
2427
|
+
# # │ 2.0 │
|
2358
2428
|
# # └─────┘
|
2359
2429
|
#
|
2360
2430
|
# @example
|
@@ -2409,6 +2479,206 @@ module Polars
|
|
2409
2479
|
wrap_expr(_rbexpr.quantile(quantile._rbexpr, interpolation))
|
2410
2480
|
end
|
2411
2481
|
|
2482
|
+
# Bin continuous values into discrete categories.
|
2483
|
+
#
|
2484
|
+
# @param breaks [Array]
|
2485
|
+
# List of unique cut points.
|
2486
|
+
# @param labels [Array]
|
2487
|
+
# Names of the categories. The number of labels must be equal to the number
|
2488
|
+
# of cut points plus one.
|
2489
|
+
# @param left_closed [Boolean]
|
2490
|
+
# Set the intervals to be left-closed instead of right-closed.
|
2491
|
+
# @param include_breaks [Boolean]
|
2492
|
+
# Include a column with the right endpoint of the bin each observation falls
|
2493
|
+
# in. This will change the data type of the output from a
|
2494
|
+
# `Categorical` to a `Struct`.
|
2495
|
+
#
|
2496
|
+
# @return [Expr]
|
2497
|
+
#
|
2498
|
+
# @example Divide a column into three categories.
|
2499
|
+
# df = Polars::DataFrame.new({"foo" => [-2, -1, 0, 1, 2]})
|
2500
|
+
# df.with_columns(
|
2501
|
+
# Polars.col("foo").cut([-1, 1], labels: ["a", "b", "c"]).alias("cut")
|
2502
|
+
# )
|
2503
|
+
# # =>
|
2504
|
+
# # shape: (5, 2)
|
2505
|
+
# # ┌─────┬─────┐
|
2506
|
+
# # │ foo ┆ cut │
|
2507
|
+
# # │ --- ┆ --- │
|
2508
|
+
# # │ i64 ┆ cat │
|
2509
|
+
# # ╞═════╪═════╡
|
2510
|
+
# # │ -2 ┆ a │
|
2511
|
+
# # │ -1 ┆ a │
|
2512
|
+
# # │ 0 ┆ b │
|
2513
|
+
# # │ 1 ┆ b │
|
2514
|
+
# # │ 2 ┆ c │
|
2515
|
+
# # └─────┴─────┘
|
2516
|
+
#
|
2517
|
+
# @example Add both the category and the breakpoint.
|
2518
|
+
# df.with_columns(
|
2519
|
+
# Polars.col("foo").cut([-1, 1], include_breaks: true).alias("cut")
|
2520
|
+
# ).unnest("cut")
|
2521
|
+
# # =>
|
2522
|
+
# # shape: (5, 3)
|
2523
|
+
# # ┌─────┬──────┬────────────┐
|
2524
|
+
# # │ foo ┆ brk ┆ foo_bin │
|
2525
|
+
# # │ --- ┆ --- ┆ --- │
|
2526
|
+
# # │ i64 ┆ f64 ┆ cat │
|
2527
|
+
# # ╞═════╪══════╪════════════╡
|
2528
|
+
# # │ -2 ┆ -1.0 ┆ (-inf, -1] │
|
2529
|
+
# # │ -1 ┆ -1.0 ┆ (-inf, -1] │
|
2530
|
+
# # │ 0 ┆ 1.0 ┆ (-1, 1] │
|
2531
|
+
# # │ 1 ┆ 1.0 ┆ (-1, 1] │
|
2532
|
+
# # │ 2 ┆ inf ┆ (1, inf] │
|
2533
|
+
# # └─────┴──────┴────────────┘
|
2534
|
+
def cut(breaks, labels: nil, left_closed: false, include_breaks: false)
|
2535
|
+
wrap_expr(_rbexpr.cut(breaks, labels, left_closed, include_breaks))
|
2536
|
+
end
|
2537
|
+
|
2538
|
+
# Bin continuous values into discrete categories based on their quantiles.
|
2539
|
+
#
|
2540
|
+
# @param quantiles [Array]
|
2541
|
+
# Either a list of quantile probabilities between 0 and 1 or a positive
|
2542
|
+
# integer determining the number of bins with uniform probability.
|
2543
|
+
# @param labels [Array]
|
2544
|
+
# Names of the categories. The number of labels must be equal to the number
|
2545
|
+
# of categories.
|
2546
|
+
# @param left_closed [Boolean]
|
2547
|
+
# Set the intervals to be left-closed instead of right-closed.
|
2548
|
+
# @param allow_duplicates [Boolean]
|
2549
|
+
# If set to `true`, duplicates in the resulting quantiles are dropped,
|
2550
|
+
# rather than raising a `DuplicateError`. This can happen even with unique
|
2551
|
+
# probabilities, depending on the data.
|
2552
|
+
# @param include_breaks [Boolean]
|
2553
|
+
# Include a column with the right endpoint of the bin each observation falls
|
2554
|
+
# in. This will change the data type of the output from a
|
2555
|
+
# `Categorical` to a `Struct`.
|
2556
|
+
#
|
2557
|
+
# @return [Expr]
|
2558
|
+
#
|
2559
|
+
# @example Divide a column into three categories according to pre-defined quantile probabilities.
|
2560
|
+
# df = Polars::DataFrame.new({"foo" => [-2, -1, 0, 1, 2]})
|
2561
|
+
# df.with_columns(
|
2562
|
+
# Polars.col("foo").qcut([0.25, 0.75], labels: ["a", "b", "c"]).alias("qcut")
|
2563
|
+
# )
|
2564
|
+
# # =>
|
2565
|
+
# # shape: (5, 2)
|
2566
|
+
# # ┌─────┬──────┐
|
2567
|
+
# # │ foo ┆ qcut │
|
2568
|
+
# # │ --- ┆ --- │
|
2569
|
+
# # │ i64 ┆ cat │
|
2570
|
+
# # ╞═════╪══════╡
|
2571
|
+
# # │ -2 ┆ a │
|
2572
|
+
# # │ -1 ┆ a │
|
2573
|
+
# # │ 0 ┆ b │
|
2574
|
+
# # │ 1 ┆ b │
|
2575
|
+
# # │ 2 ┆ c │
|
2576
|
+
# # └─────┴──────┘
|
2577
|
+
#
|
2578
|
+
# @example Divide a column into two categories using uniform quantile probabilities.
|
2579
|
+
# df.with_columns(
|
2580
|
+
# Polars.col("foo")
|
2581
|
+
# .qcut(2, labels: ["low", "high"], left_closed: true)
|
2582
|
+
# .alias("qcut")
|
2583
|
+
# )
|
2584
|
+
# # =>
|
2585
|
+
# # shape: (5, 2)
|
2586
|
+
# # ┌─────┬──────┐
|
2587
|
+
# # │ foo ┆ qcut │
|
2588
|
+
# # │ --- ┆ --- │
|
2589
|
+
# # │ i64 ┆ cat │
|
2590
|
+
# # ╞═════╪══════╡
|
2591
|
+
# # │ -2 ┆ low │
|
2592
|
+
# # │ -1 ┆ low │
|
2593
|
+
# # │ 0 ┆ high │
|
2594
|
+
# # │ 1 ┆ high │
|
2595
|
+
# # │ 2 ┆ high │
|
2596
|
+
# # └─────┴──────┘
|
2597
|
+
#
|
2598
|
+
# @example Add both the category and the breakpoint.
|
2599
|
+
# df.with_columns(
|
2600
|
+
# Polars.col("foo").qcut([0.25, 0.75], include_breaks: true).alias("qcut")
|
2601
|
+
# ).unnest("qcut")
|
2602
|
+
# # =>
|
2603
|
+
# # shape: (5, 3)
|
2604
|
+
# # ┌─────┬──────┬────────────┐
|
2605
|
+
# # │ foo ┆ brk ┆ foo_bin │
|
2606
|
+
# # │ --- ┆ --- ┆ --- │
|
2607
|
+
# # │ i64 ┆ f64 ┆ cat │
|
2608
|
+
# # ╞═════╪══════╪════════════╡
|
2609
|
+
# # │ -2 ┆ -1.0 ┆ (-inf, -1] │
|
2610
|
+
# # │ -1 ┆ -1.0 ┆ (-inf, -1] │
|
2611
|
+
# # │ 0 ┆ 1.0 ┆ (-1, 1] │
|
2612
|
+
# # │ 1 ┆ 1.0 ┆ (-1, 1] │
|
2613
|
+
# # │ 2 ┆ inf ┆ (1, inf] │
|
2614
|
+
# # └─────┴──────┴────────────┘
|
2615
|
+
def qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false)
|
2616
|
+
if quantiles.is_a?(Integer)
|
2617
|
+
rbexpr = _rbexpr.qcut_uniform(
|
2618
|
+
quantiles, labels, left_closed, allow_duplicates, include_breaks
|
2619
|
+
)
|
2620
|
+
else
|
2621
|
+
rbexpr = _rbexpr.qcut(
|
2622
|
+
quantiles, labels, left_closed, allow_duplicates, include_breaks
|
2623
|
+
)
|
2624
|
+
end
|
2625
|
+
|
2626
|
+
wrap_expr(rbexpr)
|
2627
|
+
end
|
2628
|
+
|
2629
|
+
# Get the lengths of runs of identical values.
|
2630
|
+
#
|
2631
|
+
# @return [Expr]
|
2632
|
+
#
|
2633
|
+
# @example
|
2634
|
+
# df = Polars::DataFrame.new(Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3]))
|
2635
|
+
# df.select(Polars.col("s").rle).unnest("s")
|
2636
|
+
# # =>
|
2637
|
+
# # shape: (6, 2)
|
2638
|
+
# # ┌─────────┬────────┐
|
2639
|
+
# # │ lengths ┆ values │
|
2640
|
+
# # │ --- ┆ --- │
|
2641
|
+
# # │ i32 ┆ i64 │
|
2642
|
+
# # ╞═════════╪════════╡
|
2643
|
+
# # │ 2 ┆ 1 │
|
2644
|
+
# # │ 1 ┆ 2 │
|
2645
|
+
# # │ 1 ┆ 1 │
|
2646
|
+
# # │ 1 ┆ null │
|
2647
|
+
# # │ 1 ┆ 1 │
|
2648
|
+
# # │ 2 ┆ 3 │
|
2649
|
+
# # └─────────┴────────┘
|
2650
|
+
def rle
|
2651
|
+
wrap_expr(_rbexpr.rle)
|
2652
|
+
end
|
2653
|
+
|
2654
|
+
# Map values to run IDs.
|
2655
|
+
#
|
2656
|
+
# Similar to RLE, but it maps each value to an ID corresponding to the run into
|
2657
|
+
# which it falls. This is especially useful when you want to define groups by
|
2658
|
+
# runs of identical values rather than the values themselves.
|
2659
|
+
#
|
2660
|
+
# @return [Expr]
|
2661
|
+
#
|
2662
|
+
# @example
|
2663
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 1, 1, 1], "b" => ["x", "x", nil, "y", "y"]})
|
2664
|
+
# df.with_columns([Polars.col("a").rle_id.alias("a_r"), Polars.struct(["a", "b"]).rle_id.alias("ab_r")])
|
2665
|
+
# # =>
|
2666
|
+
# # shape: (5, 4)
|
2667
|
+
# # ┌─────┬──────┬─────┬──────┐
|
2668
|
+
# # │ a ┆ b ┆ a_r ┆ ab_r │
|
2669
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
2670
|
+
# # │ i64 ┆ str ┆ u32 ┆ u32 │
|
2671
|
+
# # ╞═════╪══════╪═════╪══════╡
|
2672
|
+
# # │ 1 ┆ x ┆ 0 ┆ 0 │
|
2673
|
+
# # │ 2 ┆ x ┆ 1 ┆ 1 │
|
2674
|
+
# # │ 1 ┆ null ┆ 2 ┆ 2 │
|
2675
|
+
# # │ 1 ┆ y ┆ 2 ┆ 3 │
|
2676
|
+
# # │ 1 ┆ y ┆ 2 ┆ 3 │
|
2677
|
+
# # └─────┴──────┴─────┴──────┘
|
2678
|
+
def rle_id
|
2679
|
+
wrap_expr(_rbexpr.rle_id)
|
2680
|
+
end
|
2681
|
+
|
2412
2682
|
# Filter a single column.
|
2413
2683
|
#
|
2414
2684
|
# Mostly useful in an aggregation context. If you want to filter on a DataFrame
|
@@ -2427,7 +2697,7 @@ module Polars
|
|
2427
2697
|
# }
|
2428
2698
|
# )
|
2429
2699
|
# (
|
2430
|
-
# df.
|
2700
|
+
# df.group_by("group_col").agg(
|
2431
2701
|
# [
|
2432
2702
|
# Polars.col("b").filter(Polars.col("b") < 2).sum.alias("lt"),
|
2433
2703
|
# Polars.col("b").filter(Polars.col("b") >= 2).sum.alias("gte")
|
@@ -2465,7 +2735,7 @@ module Polars
|
|
2465
2735
|
# }
|
2466
2736
|
# )
|
2467
2737
|
# (
|
2468
|
-
# df.
|
2738
|
+
# df.group_by("group_col").agg(
|
2469
2739
|
# [
|
2470
2740
|
# Polars.col("b").where(Polars.col("b") < 2).sum.alias("lt"),
|
2471
2741
|
# Polars.col("b").where(Polars.col("b") >= 2).sum.alias("gte")
|
@@ -2583,7 +2853,7 @@ module Polars
|
|
2583
2853
|
#
|
2584
2854
|
# @example In a GroupBy context the function is applied by group:
|
2585
2855
|
# df.lazy
|
2586
|
-
# .
|
2856
|
+
# .group_by("b", maintain_order: true)
|
2587
2857
|
# .agg(
|
2588
2858
|
# [
|
2589
2859
|
# Polars.col("a").apply { |x| x.sum }
|
@@ -2622,7 +2892,7 @@ module Polars
|
|
2622
2892
|
# "values" => [[1, 2], [2, 3], [4]]
|
2623
2893
|
# }
|
2624
2894
|
# )
|
2625
|
-
# df.
|
2895
|
+
# df.group_by("group").agg(Polars.col("values").flatten)
|
2626
2896
|
# # =>
|
2627
2897
|
# # shape: (2, 2)
|
2628
2898
|
# # ┌───────┬───────────┐
|
@@ -2670,7 +2940,7 @@ module Polars
|
|
2670
2940
|
#
|
2671
2941
|
# @example
|
2672
2942
|
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
|
2673
|
-
# df.select(Polars.col("foo").
|
2943
|
+
# df.select(Polars.col("foo").gather_every(3))
|
2674
2944
|
# # =>
|
2675
2945
|
# # shape: (3, 1)
|
2676
2946
|
# # ┌─────┐
|
@@ -2682,9 +2952,10 @@ module Polars
|
|
2682
2952
|
# # │ 4 │
|
2683
2953
|
# # │ 7 │
|
2684
2954
|
# # └─────┘
|
2685
|
-
def
|
2686
|
-
wrap_expr(_rbexpr.
|
2955
|
+
def gather_every(n, offset = 0)
|
2956
|
+
wrap_expr(_rbexpr.gather_every(n, offset))
|
2687
2957
|
end
|
2958
|
+
alias_method :take_every, :gather_every
|
2688
2959
|
|
2689
2960
|
# Get the first `n` rows.
|
2690
2961
|
#
|
@@ -3057,11 +3328,11 @@ module Polars
|
|
3057
3328
|
# # ┌─────┬─────┐
|
3058
3329
|
# # │ a ┆ b │
|
3059
3330
|
# # │ --- ┆ --- │
|
3060
|
-
# # │
|
3331
|
+
# # │ f64 ┆ f64 │
|
3061
3332
|
# # ╞═════╪═════╡
|
3062
|
-
# # │ 1
|
3063
|
-
# # │ 2
|
3064
|
-
# # │ 3
|
3333
|
+
# # │ 1.0 ┆ 1.0 │
|
3334
|
+
# # │ 2.0 ┆ NaN │
|
3335
|
+
# # │ 3.0 ┆ 3.0 │
|
3065
3336
|
# # └─────┴─────┘
|
3066
3337
|
def interpolate(method: "linear")
|
3067
3338
|
wrap_expr(_rbexpr.interpolate(method))
|
@@ -3112,7 +3383,7 @@ module Polars
|
|
3112
3383
|
#
|
3113
3384
|
# @note
|
3114
3385
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3115
|
-
# window, consider using `
|
3386
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3116
3387
|
# computation.
|
3117
3388
|
#
|
3118
3389
|
# @return [Expr]
|
@@ -3201,7 +3472,7 @@ module Polars
|
|
3201
3472
|
#
|
3202
3473
|
# @note
|
3203
3474
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3204
|
-
# window, consider using `
|
3475
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3205
3476
|
# computation.
|
3206
3477
|
#
|
3207
3478
|
# @return [Expr]
|
@@ -3290,7 +3561,7 @@ module Polars
|
|
3290
3561
|
#
|
3291
3562
|
# @note
|
3292
3563
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3293
|
-
# window, consider using `
|
3564
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3294
3565
|
# computation.
|
3295
3566
|
#
|
3296
3567
|
# @return [Expr]
|
@@ -3379,7 +3650,7 @@ module Polars
|
|
3379
3650
|
#
|
3380
3651
|
# @note
|
3381
3652
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3382
|
-
# window, consider using `
|
3653
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3383
3654
|
# computation.
|
3384
3655
|
#
|
3385
3656
|
# @return [Expr]
|
@@ -3468,7 +3739,7 @@ module Polars
|
|
3468
3739
|
#
|
3469
3740
|
# @note
|
3470
3741
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3471
|
-
# window, consider using `
|
3742
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3472
3743
|
# computation.
|
3473
3744
|
#
|
3474
3745
|
# @return [Expr]
|
@@ -3501,14 +3772,15 @@ module Polars
|
|
3501
3772
|
center: false,
|
3502
3773
|
by: nil,
|
3503
3774
|
closed: "left",
|
3504
|
-
ddof: 1
|
3775
|
+
ddof: 1,
|
3776
|
+
warn_if_unsorted: true
|
3505
3777
|
)
|
3506
3778
|
window_size, min_periods = _prepare_rolling_window_args(
|
3507
3779
|
window_size, min_periods
|
3508
3780
|
)
|
3509
3781
|
wrap_expr(
|
3510
3782
|
_rbexpr.rolling_std(
|
3511
|
-
window_size, weights, min_periods, center, by, closed, ddof
|
3783
|
+
window_size, weights, min_periods, center, by, closed, ddof, warn_if_unsorted
|
3512
3784
|
)
|
3513
3785
|
)
|
3514
3786
|
end
|
@@ -3558,7 +3830,7 @@ module Polars
|
|
3558
3830
|
#
|
3559
3831
|
# @note
|
3560
3832
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3561
|
-
# window, consider using `
|
3833
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3562
3834
|
# computation.
|
3563
3835
|
#
|
3564
3836
|
# @return [Expr]
|
@@ -3591,14 +3863,15 @@ module Polars
|
|
3591
3863
|
center: false,
|
3592
3864
|
by: nil,
|
3593
3865
|
closed: "left",
|
3594
|
-
ddof: 1
|
3866
|
+
ddof: 1,
|
3867
|
+
warn_if_unsorted: true
|
3595
3868
|
)
|
3596
3869
|
window_size, min_periods = _prepare_rolling_window_args(
|
3597
3870
|
window_size, min_periods
|
3598
3871
|
)
|
3599
3872
|
wrap_expr(
|
3600
3873
|
_rbexpr.rolling_var(
|
3601
|
-
window_size, weights, min_periods, center, by, closed, ddof
|
3874
|
+
window_size, weights, min_periods, center, by, closed, ddof, warn_if_unsorted
|
3602
3875
|
)
|
3603
3876
|
)
|
3604
3877
|
end
|
@@ -3644,7 +3917,7 @@ module Polars
|
|
3644
3917
|
#
|
3645
3918
|
# @note
|
3646
3919
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3647
|
-
# window, consider using `
|
3920
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3648
3921
|
# computation.
|
3649
3922
|
#
|
3650
3923
|
# @return [Expr]
|
@@ -3676,14 +3949,15 @@ module Polars
|
|
3676
3949
|
min_periods: nil,
|
3677
3950
|
center: false,
|
3678
3951
|
by: nil,
|
3679
|
-
closed: "left"
|
3952
|
+
closed: "left",
|
3953
|
+
warn_if_unsorted: true
|
3680
3954
|
)
|
3681
3955
|
window_size, min_periods = _prepare_rolling_window_args(
|
3682
3956
|
window_size, min_periods
|
3683
3957
|
)
|
3684
3958
|
wrap_expr(
|
3685
3959
|
_rbexpr.rolling_median(
|
3686
|
-
window_size, weights, min_periods, center, by, closed
|
3960
|
+
window_size, weights, min_periods, center, by, closed, warn_if_unsorted
|
3687
3961
|
)
|
3688
3962
|
)
|
3689
3963
|
end
|
@@ -3733,7 +4007,7 @@ module Polars
|
|
3733
4007
|
#
|
3734
4008
|
# @note
|
3735
4009
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3736
|
-
# window, consider using `
|
4010
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3737
4011
|
# computation.
|
3738
4012
|
#
|
3739
4013
|
# @return [Expr]
|
@@ -3767,14 +4041,15 @@ module Polars
|
|
3767
4041
|
min_periods: nil,
|
3768
4042
|
center: false,
|
3769
4043
|
by: nil,
|
3770
|
-
closed: "left"
|
4044
|
+
closed: "left",
|
4045
|
+
warn_if_unsorted: true
|
3771
4046
|
)
|
3772
4047
|
window_size, min_periods = _prepare_rolling_window_args(
|
3773
4048
|
window_size, min_periods
|
3774
4049
|
)
|
3775
4050
|
wrap_expr(
|
3776
4051
|
_rbexpr.rolling_quantile(
|
3777
|
-
quantile, interpolation, window_size, weights, min_periods, center, by, closed
|
4052
|
+
quantile, interpolation, window_size, weights, min_periods, center, by, closed, warn_if_unsorted
|
3778
4053
|
)
|
3779
4054
|
)
|
3780
4055
|
end
|
@@ -3948,7 +4223,7 @@ module Polars
|
|
3948
4223
|
# # ┌─────┐
|
3949
4224
|
# # │ a │
|
3950
4225
|
# # │ --- │
|
3951
|
-
# # │
|
4226
|
+
# # │ f64 │
|
3952
4227
|
# # ╞═════╡
|
3953
4228
|
# # │ 3.0 │
|
3954
4229
|
# # │ 4.5 │
|
@@ -4041,6 +4316,7 @@ module Polars
|
|
4041
4316
|
# # │ 12 ┆ 0.0 │
|
4042
4317
|
# # └──────┴────────────┘
|
4043
4318
|
def pct_change(n: 1)
|
4319
|
+
n = Utils.parse_as_expression(n)
|
4044
4320
|
wrap_expr(_rbexpr.pct_change(n))
|
4045
4321
|
end
|
4046
4322
|
|
@@ -4105,16 +4381,14 @@ module Polars
|
|
4105
4381
|
wrap_expr(_rbexpr.kurtosis(fisher, bias))
|
4106
4382
|
end
|
4107
4383
|
|
4108
|
-
#
|
4109
|
-
#
|
4110
|
-
# Only works for numerical types.
|
4384
|
+
# Set values outside the given boundaries to the boundary value.
|
4111
4385
|
#
|
4112
|
-
# If you want to clip other
|
4113
|
-
#
|
4386
|
+
# Only works for numeric and temporal columns. If you want to clip other data
|
4387
|
+
# types, consider writing a `when-then-otherwise` expression.
|
4114
4388
|
#
|
4115
|
-
# @param
|
4389
|
+
# @param lower_bound [Numeric]
|
4116
4390
|
# Minimum value.
|
4117
|
-
# @param
|
4391
|
+
# @param upper_bound [Numeric]
|
4118
4392
|
# Maximum value.
|
4119
4393
|
#
|
4120
4394
|
# @return [Expr]
|
@@ -4134,8 +4408,14 @@ module Polars
|
|
4134
4408
|
# # │ null ┆ null │
|
4135
4409
|
# # │ 50 ┆ 10 │
|
4136
4410
|
# # └──────┴─────────────┘
|
4137
|
-
def clip(
|
4138
|
-
|
4411
|
+
def clip(lower_bound, upper_bound)
|
4412
|
+
if !lower_bound.nil?
|
4413
|
+
lower_bound = Utils.parse_as_expression(lower_bound, str_as_lit: true)
|
4414
|
+
end
|
4415
|
+
if !upper_bound.nil?
|
4416
|
+
upper_bound = Utils.parse_as_expression(upper_bound, str_as_lit: true)
|
4417
|
+
end
|
4418
|
+
wrap_expr(_rbexpr.clip(lower_bound, upper_bound))
|
4139
4419
|
end
|
4140
4420
|
|
4141
4421
|
# Clip (limit) the values in an array to a `min` boundary.
|
@@ -4145,7 +4425,7 @@ module Polars
|
|
4145
4425
|
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
4146
4426
|
# expression. See `when` for more information.
|
4147
4427
|
#
|
4148
|
-
# @param
|
4428
|
+
# @param lower_bound [Numeric]
|
4149
4429
|
# Minimum value.
|
4150
4430
|
#
|
4151
4431
|
# @return [Expr]
|
@@ -4165,8 +4445,8 @@ module Polars
|
|
4165
4445
|
# # │ null ┆ null │
|
4166
4446
|
# # │ 50 ┆ 50 │
|
4167
4447
|
# # └──────┴─────────────┘
|
4168
|
-
def clip_min(
|
4169
|
-
|
4448
|
+
def clip_min(lower_bound)
|
4449
|
+
clip(lower_bound, nil)
|
4170
4450
|
end
|
4171
4451
|
|
4172
4452
|
# Clip (limit) the values in an array to a `max` boundary.
|
@@ -4176,7 +4456,7 @@ module Polars
|
|
4176
4456
|
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
4177
4457
|
# expression. See `when` for more information.
|
4178
4458
|
#
|
4179
|
-
# @param
|
4459
|
+
# @param upper_bound [Numeric]
|
4180
4460
|
# Maximum value.
|
4181
4461
|
#
|
4182
4462
|
# @return [Expr]
|
@@ -4196,8 +4476,8 @@ module Polars
|
|
4196
4476
|
# # │ null ┆ null │
|
4197
4477
|
# # │ 50 ┆ 0 │
|
4198
4478
|
# # └──────┴─────────────┘
|
4199
|
-
def clip_max(
|
4200
|
-
|
4479
|
+
def clip_max(upper_bound)
|
4480
|
+
clip(nil, upper_bound)
|
4201
4481
|
end
|
4202
4482
|
|
4203
4483
|
# Calculate the lower bound.
|
@@ -4558,11 +4838,11 @@ module Polars
|
|
4558
4838
|
# # │ 1 │
|
4559
4839
|
# # │ 3 │
|
4560
4840
|
# # └─────┘
|
4561
|
-
def shuffle(seed: nil
|
4841
|
+
def shuffle(seed: nil)
|
4562
4842
|
if seed.nil?
|
4563
4843
|
seed = rand(10000)
|
4564
4844
|
end
|
4565
|
-
wrap_expr(_rbexpr.shuffle(seed
|
4845
|
+
wrap_expr(_rbexpr.shuffle(seed))
|
4566
4846
|
end
|
4567
4847
|
|
4568
4848
|
# Sample from this expression.
|
@@ -4600,22 +4880,23 @@ module Polars
|
|
4600
4880
|
with_replacement: true,
|
4601
4881
|
shuffle: false,
|
4602
4882
|
seed: nil,
|
4603
|
-
n: nil
|
4604
|
-
fixed_seed: false
|
4883
|
+
n: nil
|
4605
4884
|
)
|
4606
4885
|
if !n.nil? && !frac.nil?
|
4607
4886
|
raise ArgumentError, "cannot specify both `n` and `frac`"
|
4608
4887
|
end
|
4609
4888
|
|
4610
4889
|
if !n.nil? && frac.nil?
|
4611
|
-
|
4890
|
+
n = Utils.parse_as_expression(n)
|
4891
|
+
return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
|
4612
4892
|
end
|
4613
4893
|
|
4614
4894
|
if frac.nil?
|
4615
4895
|
frac = 1.0
|
4616
4896
|
end
|
4897
|
+
frac = Utils.parse_as_expression(frac)
|
4617
4898
|
wrap_expr(
|
4618
|
-
_rbexpr.sample_frac(frac, with_replacement, shuffle, seed
|
4899
|
+
_rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
|
4619
4900
|
)
|
4620
4901
|
end
|
4621
4902
|
|
@@ -4885,7 +5166,7 @@ module Polars
|
|
4885
5166
|
# Number of valid values there should be in the window before the expression
|
4886
5167
|
# is evaluated. valid values = `length - null_count`
|
4887
5168
|
# @param parallel [Boolean]
|
4888
|
-
# Run in parallel. Don't do this in a
|
5169
|
+
# Run in parallel. Don't do this in a group by or another operation that
|
4889
5170
|
# already has much parallelization.
|
4890
5171
|
#
|
4891
5172
|
# @return [Expr]
|
@@ -5057,6 +5338,13 @@ module Polars
|
|
5057
5338
|
MetaExpr.new(self)
|
5058
5339
|
end
|
5059
5340
|
|
5341
|
+
# Create an object namespace of all expressions that modify expression names.
|
5342
|
+
#
|
5343
|
+
# @return [NameExpr]
|
5344
|
+
def name
|
5345
|
+
NameExpr.new(self)
|
5346
|
+
end
|
5347
|
+
|
5060
5348
|
# Create an object namespace of all string related methods.
|
5061
5349
|
#
|
5062
5350
|
# @return [StringExpr]
|