polars-df 0.6.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +468 -538
- data/Cargo.toml +1 -0
- data/README.md +8 -7
- data/ext/polars/Cargo.toml +17 -10
- data/ext/polars/src/batched_csv.rs +26 -26
- data/ext/polars/src/conversion.rs +121 -93
- data/ext/polars/src/dataframe.rs +116 -71
- data/ext/polars/src/error.rs +0 -5
- data/ext/polars/src/expr/binary.rs +18 -6
- data/ext/polars/src/expr/datetime.rs +10 -12
- data/ext/polars/src/expr/general.rs +68 -284
- data/ext/polars/src/expr/list.rs +17 -9
- data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
- data/ext/polars/src/expr/name.rs +44 -0
- data/ext/polars/src/expr/rolling.rs +196 -0
- data/ext/polars/src/expr/string.rs +85 -58
- data/ext/polars/src/file.rs +3 -3
- data/ext/polars/src/functions/aggregation.rs +35 -0
- data/ext/polars/src/functions/eager.rs +7 -31
- data/ext/polars/src/functions/io.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +66 -41
- data/ext/polars/src/functions/meta.rs +30 -0
- data/ext/polars/src/functions/misc.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/random.rs +6 -0
- data/ext/polars/src/functions/range.rs +46 -0
- data/ext/polars/src/functions/string_cache.rs +11 -0
- data/ext/polars/src/functions/whenthen.rs +7 -7
- data/ext/polars/src/lazyframe.rs +47 -42
- data/ext/polars/src/lib.rs +156 -72
- data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
- data/ext/polars/src/{apply → map}/mod.rs +3 -3
- data/ext/polars/src/{apply → map}/series.rs +12 -16
- data/ext/polars/src/object.rs +1 -1
- data/ext/polars/src/rb_modules.rs +22 -7
- data/ext/polars/src/series/construction.rs +4 -4
- data/ext/polars/src/series/export.rs +2 -2
- data/ext/polars/src/series/set_at_idx.rs +33 -17
- data/ext/polars/src/series.rs +7 -27
- data/ext/polars/src/sql.rs +46 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +115 -82
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +5 -25
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +177 -94
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +93 -66
- data/lib/polars/lazy_functions.rb +36 -48
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +26 -13
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/utils.rb +12 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +18 -7
- /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
data/lib/polars/expr.rb
CHANGED
@@ -131,6 +131,13 @@ module Polars
|
|
131
131
|
wrap_expr(_rbexpr.gt(_to_expr(other)._rbexpr))
|
132
132
|
end
|
133
133
|
|
134
|
+
# Performs boolean not.
|
135
|
+
#
|
136
|
+
# @return [Expr]
|
137
|
+
def !
|
138
|
+
is_not
|
139
|
+
end
|
140
|
+
|
134
141
|
# Performs negation.
|
135
142
|
#
|
136
143
|
# @return [Expr]
|
@@ -191,8 +198,8 @@ module Polars
|
|
191
198
|
# # ╞══════╪═══════╡
|
192
199
|
# # │ true ┆ false │
|
193
200
|
# # └──────┴───────┘
|
194
|
-
def any
|
195
|
-
wrap_expr(_rbexpr.any)
|
201
|
+
def any(drop_nulls: true)
|
202
|
+
wrap_expr(_rbexpr.any(drop_nulls))
|
196
203
|
end
|
197
204
|
|
198
205
|
# Check if all boolean values in a Boolean column are `true`.
|
@@ -216,8 +223,8 @@ module Polars
|
|
216
223
|
# # ╞══════╪═══════╪═══════╡
|
217
224
|
# # │ true ┆ false ┆ false │
|
218
225
|
# # └──────┴───────┴───────┘
|
219
|
-
def all
|
220
|
-
wrap_expr(_rbexpr.all)
|
226
|
+
def all(drop_nulls: true)
|
227
|
+
wrap_expr(_rbexpr.all(drop_nulls))
|
221
228
|
end
|
222
229
|
|
223
230
|
# Compute the square root of the elements.
|
@@ -401,21 +408,21 @@ module Polars
|
|
401
408
|
# # │ 18 ┆ 4 │
|
402
409
|
# # └─────┴─────┘
|
403
410
|
def keep_name
|
404
|
-
|
411
|
+
name.keep
|
405
412
|
end
|
406
413
|
|
407
414
|
# Add a prefix to the root column name of the expression.
|
408
415
|
#
|
409
416
|
# @return [Expr]
|
410
417
|
def prefix(prefix)
|
411
|
-
|
418
|
+
name.prefix(prefix)
|
412
419
|
end
|
413
420
|
|
414
421
|
# Add a suffix to the root column name of the expression.
|
415
422
|
#
|
416
423
|
# @return [Expr]
|
417
424
|
def suffix(suffix)
|
418
|
-
|
425
|
+
name.suffix(suffix)
|
419
426
|
end
|
420
427
|
|
421
428
|
# Rename the output of an expression by mapping a function over the root name.
|
@@ -443,7 +450,7 @@ module Polars
|
|
443
450
|
# # │ 1 ┆ 3 │
|
444
451
|
# # └───────────┴───────────┘
|
445
452
|
def map_alias(&f)
|
446
|
-
|
453
|
+
name.map(&f)
|
447
454
|
end
|
448
455
|
|
449
456
|
# Negate a boolean expression.
|
@@ -682,7 +689,7 @@ module Polars
|
|
682
689
|
# "value" => [94, 95, 96, 97, 97, 99]
|
683
690
|
# }
|
684
691
|
# )
|
685
|
-
# df.
|
692
|
+
# df.group_by("group", maintain_order: true).agg(Polars.col("value").agg_groups)
|
686
693
|
# # =>
|
687
694
|
# # shape: (2, 2)
|
688
695
|
# # ┌───────┬───────────┐
|
@@ -905,8 +912,8 @@ module Polars
|
|
905
912
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
906
913
|
# df.select(
|
907
914
|
# [
|
908
|
-
# Polars.col("a").
|
909
|
-
# Polars.col("a").
|
915
|
+
# Polars.col("a").cum_sum,
|
916
|
+
# Polars.col("a").cum_sum(reverse: true).alias("a_reverse")
|
910
917
|
# ]
|
911
918
|
# )
|
912
919
|
# # =>
|
@@ -921,9 +928,10 @@ module Polars
|
|
921
928
|
# # │ 6 ┆ 7 │
|
922
929
|
# # │ 10 ┆ 4 │
|
923
930
|
# # └─────┴───────────┘
|
924
|
-
def
|
925
|
-
wrap_expr(_rbexpr.
|
931
|
+
def cum_sum(reverse: false)
|
932
|
+
wrap_expr(_rbexpr.cum_sum(reverse))
|
926
933
|
end
|
934
|
+
alias_method :cumsum, :cum_sum
|
927
935
|
|
928
936
|
# Get an array with the cumulative product computed at every element.
|
929
937
|
#
|
@@ -940,8 +948,8 @@ module Polars
|
|
940
948
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
941
949
|
# df.select(
|
942
950
|
# [
|
943
|
-
# Polars.col("a").
|
944
|
-
# Polars.col("a").
|
951
|
+
# Polars.col("a").cum_prod,
|
952
|
+
# Polars.col("a").cum_prod(reverse: true).alias("a_reverse")
|
945
953
|
# ]
|
946
954
|
# )
|
947
955
|
# # =>
|
@@ -956,9 +964,10 @@ module Polars
|
|
956
964
|
# # │ 6 ┆ 12 │
|
957
965
|
# # │ 24 ┆ 4 │
|
958
966
|
# # └─────┴───────────┘
|
959
|
-
def
|
960
|
-
wrap_expr(_rbexpr.
|
967
|
+
def cum_prod(reverse: false)
|
968
|
+
wrap_expr(_rbexpr.cum_prod(reverse))
|
961
969
|
end
|
970
|
+
alias_method :cumprod, :cum_prod
|
962
971
|
|
963
972
|
# Get an array with the cumulative min computed at every element.
|
964
973
|
#
|
@@ -971,8 +980,8 @@ module Polars
|
|
971
980
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
972
981
|
# df.select(
|
973
982
|
# [
|
974
|
-
# Polars.col("a").
|
975
|
-
# Polars.col("a").
|
983
|
+
# Polars.col("a").cum_min,
|
984
|
+
# Polars.col("a").cum_min(reverse: true).alias("a_reverse")
|
976
985
|
# ]
|
977
986
|
# )
|
978
987
|
# # =>
|
@@ -987,9 +996,10 @@ module Polars
|
|
987
996
|
# # │ 1 ┆ 3 │
|
988
997
|
# # │ 1 ┆ 4 │
|
989
998
|
# # └─────┴───────────┘
|
990
|
-
def
|
991
|
-
wrap_expr(_rbexpr.
|
999
|
+
def cum_min(reverse: false)
|
1000
|
+
wrap_expr(_rbexpr.cum_min(reverse))
|
992
1001
|
end
|
1002
|
+
alias_method :cummin, :cum_min
|
993
1003
|
|
994
1004
|
# Get an array with the cumulative max computed at every element.
|
995
1005
|
#
|
@@ -1002,8 +1012,8 @@ module Polars
|
|
1002
1012
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
1003
1013
|
# df.select(
|
1004
1014
|
# [
|
1005
|
-
# Polars.col("a").
|
1006
|
-
# Polars.col("a").
|
1015
|
+
# Polars.col("a").cum_max,
|
1016
|
+
# Polars.col("a").cum_max(reverse: true).alias("a_reverse")
|
1007
1017
|
# ]
|
1008
1018
|
# )
|
1009
1019
|
# # =>
|
@@ -1018,9 +1028,10 @@ module Polars
|
|
1018
1028
|
# # │ 3 ┆ 4 │
|
1019
1029
|
# # │ 4 ┆ 4 │
|
1020
1030
|
# # └─────┴───────────┘
|
1021
|
-
def
|
1022
|
-
wrap_expr(_rbexpr.
|
1031
|
+
def cum_max(reverse: false)
|
1032
|
+
wrap_expr(_rbexpr.cum_max(reverse))
|
1023
1033
|
end
|
1034
|
+
alias_method :cummax, :cum_max
|
1024
1035
|
|
1025
1036
|
# Get an array with the cumulative count computed at every element.
|
1026
1037
|
#
|
@@ -1035,8 +1046,8 @@ module Polars
|
|
1035
1046
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
1036
1047
|
# df.select(
|
1037
1048
|
# [
|
1038
|
-
# Polars.col("a").
|
1039
|
-
# Polars.col("a").
|
1049
|
+
# Polars.col("a").cum_count,
|
1050
|
+
# Polars.col("a").cum_count(reverse: true).alias("a_reverse")
|
1040
1051
|
# ]
|
1041
1052
|
# )
|
1042
1053
|
# # =>
|
@@ -1051,9 +1062,10 @@ module Polars
|
|
1051
1062
|
# # │ 2 ┆ 1 │
|
1052
1063
|
# # │ 3 ┆ 0 │
|
1053
1064
|
# # └─────┴───────────┘
|
1054
|
-
def
|
1055
|
-
wrap_expr(_rbexpr.
|
1065
|
+
def cum_count(reverse: false)
|
1066
|
+
wrap_expr(_rbexpr.cum_count(reverse))
|
1056
1067
|
end
|
1068
|
+
alias_method :cumcount, :cum_count
|
1057
1069
|
|
1058
1070
|
# Rounds down to the nearest integer value.
|
1059
1071
|
#
|
@@ -1229,7 +1241,7 @@ module Polars
|
|
1229
1241
|
|
1230
1242
|
# Sort this column. In projection/ selection context the whole column is sorted.
|
1231
1243
|
#
|
1232
|
-
# If used in a
|
1244
|
+
# If used in a group by context, the groups are sorted.
|
1233
1245
|
#
|
1234
1246
|
# @param reverse [Boolean]
|
1235
1247
|
# false -> order from small to large.
|
@@ -1287,7 +1299,7 @@ module Polars
|
|
1287
1299
|
# # └───────┘
|
1288
1300
|
#
|
1289
1301
|
# @example
|
1290
|
-
# df.
|
1302
|
+
# df.group_by("group").agg(Polars.col("value").sort)
|
1291
1303
|
# # =>
|
1292
1304
|
# # shape: (2, 2)
|
1293
1305
|
# # ┌───────┬────────────┐
|
@@ -1337,6 +1349,7 @@ module Polars
|
|
1337
1349
|
# # │ 2 ┆ 98 │
|
1338
1350
|
# # └───────┴──────────┘
|
1339
1351
|
def top_k(k: 5)
|
1352
|
+
k = Utils.parse_as_expression(k)
|
1340
1353
|
wrap_expr(_rbexpr.top_k(k))
|
1341
1354
|
end
|
1342
1355
|
|
@@ -1375,6 +1388,7 @@ module Polars
|
|
1375
1388
|
# # │ 2 ┆ 98 │
|
1376
1389
|
# # └───────┴──────────┘
|
1377
1390
|
def bottom_k(k: 5)
|
1391
|
+
k = Utils.parse_as_expression(k)
|
1378
1392
|
wrap_expr(_rbexpr.bottom_k(k))
|
1379
1393
|
end
|
1380
1394
|
|
@@ -1494,7 +1508,7 @@ module Polars
|
|
1494
1508
|
# Sort this column by the ordering of another column, or multiple other columns.
|
1495
1509
|
#
|
1496
1510
|
# In projection/ selection context the whole column is sorted.
|
1497
|
-
# If used in a
|
1511
|
+
# If used in a group by context, the groups are sorted.
|
1498
1512
|
#
|
1499
1513
|
# @param by [Object]
|
1500
1514
|
# The column(s) used for sorting.
|
@@ -1566,30 +1580,33 @@ module Polars
|
|
1566
1580
|
# "value" => [1, 98, 2, 3, 99, 4]
|
1567
1581
|
# }
|
1568
1582
|
# )
|
1569
|
-
# df.
|
1583
|
+
# df.group_by("group", maintain_order: true).agg(Polars.col("value").take([2, 1]))
|
1570
1584
|
# # =>
|
1571
1585
|
# # shape: (2, 2)
|
1572
|
-
# #
|
1573
|
-
# # │ group ┆ value
|
1574
|
-
# # │ --- ┆ ---
|
1575
|
-
# # │ str ┆ i64
|
1576
|
-
# #
|
1577
|
-
# # │ one ┆ 98
|
1578
|
-
# # │ two ┆ 99
|
1579
|
-
# #
|
1580
|
-
def
|
1586
|
+
# # ┌───────┬───────────┐
|
1587
|
+
# # │ group ┆ value │
|
1588
|
+
# # │ --- ┆ --- │
|
1589
|
+
# # │ str ┆ list[i64] │
|
1590
|
+
# # ╞═══════╪═══════════╡
|
1591
|
+
# # │ one ┆ [2, 98] │
|
1592
|
+
# # │ two ┆ [4, 99] │
|
1593
|
+
# # └───────┴───────────┘
|
1594
|
+
def gather(indices)
|
1581
1595
|
if indices.is_a?(::Array)
|
1582
1596
|
indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
|
1583
1597
|
else
|
1584
1598
|
indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
|
1585
1599
|
end
|
1586
|
-
wrap_expr(_rbexpr.
|
1600
|
+
wrap_expr(_rbexpr.gather(indices_lit._rbexpr))
|
1587
1601
|
end
|
1602
|
+
alias_method :take, :gather
|
1588
1603
|
|
1589
1604
|
# Shift the values by a given period.
|
1590
1605
|
#
|
1591
|
-
# @param
|
1606
|
+
# @param n [Integer]
|
1592
1607
|
# Number of places to shift (may be negative).
|
1608
|
+
# @param fill_value [Object]
|
1609
|
+
# Fill the resulting null values with this value.
|
1593
1610
|
#
|
1594
1611
|
# @return [Expr]
|
1595
1612
|
#
|
@@ -1608,8 +1625,12 @@ module Polars
|
|
1608
1625
|
# # │ 2 │
|
1609
1626
|
# # │ 3 │
|
1610
1627
|
# # └──────┘
|
1611
|
-
def shift(
|
1612
|
-
|
1628
|
+
def shift(n = 1, fill_value: nil)
|
1629
|
+
if !fill_value.nil?
|
1630
|
+
fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
|
1631
|
+
end
|
1632
|
+
n = Utils.parse_as_expression(n)
|
1633
|
+
wrap_expr(_rbexpr.shift(n, fill_value))
|
1613
1634
|
end
|
1614
1635
|
|
1615
1636
|
# Shift the values by a given period and fill the resulting null values.
|
@@ -1637,8 +1658,7 @@ module Polars
|
|
1637
1658
|
# # │ 3 │
|
1638
1659
|
# # └─────┘
|
1639
1660
|
def shift_and_fill(periods, fill_value)
|
1640
|
-
|
1641
|
-
wrap_expr(_rbexpr.shift_and_fill(periods, fill_value._rbexpr))
|
1661
|
+
shift(periods, fill_value: fill_value)
|
1642
1662
|
end
|
1643
1663
|
|
1644
1664
|
# Fill null values using the specified value or strategy.
|
@@ -2063,7 +2083,7 @@ module Polars
|
|
2063
2083
|
# # │ 2 │
|
2064
2084
|
# # └─────┘
|
2065
2085
|
def approx_unique
|
2066
|
-
wrap_expr(_rbexpr.
|
2086
|
+
wrap_expr(_rbexpr.approx_n_unique)
|
2067
2087
|
end
|
2068
2088
|
|
2069
2089
|
# Count null values.
|
@@ -2201,7 +2221,7 @@ module Polars
|
|
2201
2221
|
|
2202
2222
|
# Apply window function over a subgroup.
|
2203
2223
|
#
|
2204
|
-
# This is similar to a
|
2224
|
+
# This is similar to a group by + aggregation + self join.
|
2205
2225
|
# Or similar to [window functions in Postgres](https://www.postgresql.org/docs/current/tutorial-window.html).
|
2206
2226
|
#
|
2207
2227
|
# @param expr [Object]
|
@@ -2309,9 +2329,10 @@ module Polars
|
|
2309
2329
|
# # │ 1 ┆ false │
|
2310
2330
|
# # │ 5 ┆ true │
|
2311
2331
|
# # └─────┴──────────┘
|
2312
|
-
def
|
2313
|
-
wrap_expr(_rbexpr.
|
2332
|
+
def is_first_distinct
|
2333
|
+
wrap_expr(_rbexpr.is_first_distinct)
|
2314
2334
|
end
|
2335
|
+
alias_method :is_first, :is_first_distinct
|
2315
2336
|
|
2316
2337
|
# Get mask of duplicated values.
|
2317
2338
|
#
|
@@ -2335,6 +2356,54 @@ module Polars
|
|
2335
2356
|
wrap_expr(_rbexpr.is_duplicated)
|
2336
2357
|
end
|
2337
2358
|
|
2359
|
+
# Get a boolean mask of the local maximum peaks.
|
2360
|
+
#
|
2361
|
+
# @return [Expr]
|
2362
|
+
#
|
2363
|
+
# @example
|
2364
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
|
2365
|
+
# df.select(Polars.col("a").peak_max)
|
2366
|
+
# # =>
|
2367
|
+
# # shape: (5, 1)
|
2368
|
+
# # ┌───────┐
|
2369
|
+
# # │ a │
|
2370
|
+
# # │ --- │
|
2371
|
+
# # │ bool │
|
2372
|
+
# # ╞═══════╡
|
2373
|
+
# # │ false │
|
2374
|
+
# # │ false │
|
2375
|
+
# # │ false │
|
2376
|
+
# # │ false │
|
2377
|
+
# # │ true │
|
2378
|
+
# # └───────┘
|
2379
|
+
def peak_max
|
2380
|
+
wrap_expr(_rbexpr.peak_max)
|
2381
|
+
end
|
2382
|
+
|
2383
|
+
# Get a boolean mask of the local minimum peaks.
|
2384
|
+
#
|
2385
|
+
# @return [Expr]
|
2386
|
+
#
|
2387
|
+
# @example
|
2388
|
+
# df = Polars::DataFrame.new({"a" => [4, 1, 3, 2, 5]})
|
2389
|
+
# df.select(Polars.col("a").peak_min)
|
2390
|
+
# # =>
|
2391
|
+
# # shape: (5, 1)
|
2392
|
+
# # ┌───────┐
|
2393
|
+
# # │ a │
|
2394
|
+
# # │ --- │
|
2395
|
+
# # │ bool │
|
2396
|
+
# # ╞═══════╡
|
2397
|
+
# # │ false │
|
2398
|
+
# # │ true │
|
2399
|
+
# # │ false │
|
2400
|
+
# # │ true │
|
2401
|
+
# # │ false │
|
2402
|
+
# # └───────┘
|
2403
|
+
def peak_min
|
2404
|
+
wrap_expr(_rbexpr.peak_min)
|
2405
|
+
end
|
2406
|
+
|
2338
2407
|
# Get quantile value.
|
2339
2408
|
#
|
2340
2409
|
# @param quantile [Float]
|
@@ -2427,7 +2496,7 @@ module Polars
|
|
2427
2496
|
# }
|
2428
2497
|
# )
|
2429
2498
|
# (
|
2430
|
-
# df.
|
2499
|
+
# df.group_by("group_col").agg(
|
2431
2500
|
# [
|
2432
2501
|
# Polars.col("b").filter(Polars.col("b") < 2).sum.alias("lt"),
|
2433
2502
|
# Polars.col("b").filter(Polars.col("b") >= 2).sum.alias("gte")
|
@@ -2465,7 +2534,7 @@ module Polars
|
|
2465
2534
|
# }
|
2466
2535
|
# )
|
2467
2536
|
# (
|
2468
|
-
# df.
|
2537
|
+
# df.group_by("group_col").agg(
|
2469
2538
|
# [
|
2470
2539
|
# Polars.col("b").where(Polars.col("b") < 2).sum.alias("lt"),
|
2471
2540
|
# Polars.col("b").where(Polars.col("b") >= 2).sum.alias("gte")
|
@@ -2583,7 +2652,7 @@ module Polars
|
|
2583
2652
|
#
|
2584
2653
|
# @example In a GroupBy context the function is applied by group:
|
2585
2654
|
# df.lazy
|
2586
|
-
# .
|
2655
|
+
# .group_by("b", maintain_order: true)
|
2587
2656
|
# .agg(
|
2588
2657
|
# [
|
2589
2658
|
# Polars.col("a").apply { |x| x.sum }
|
@@ -2622,7 +2691,7 @@ module Polars
|
|
2622
2691
|
# "values" => [[1, 2], [2, 3], [4]]
|
2623
2692
|
# }
|
2624
2693
|
# )
|
2625
|
-
# df.
|
2694
|
+
# df.group_by("group").agg(Polars.col("values").flatten)
|
2626
2695
|
# # =>
|
2627
2696
|
# # shape: (2, 2)
|
2628
2697
|
# # ┌───────┬───────────┐
|
@@ -2670,7 +2739,7 @@ module Polars
|
|
2670
2739
|
#
|
2671
2740
|
# @example
|
2672
2741
|
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
|
2673
|
-
# df.select(Polars.col("foo").
|
2742
|
+
# df.select(Polars.col("foo").gather_every(3))
|
2674
2743
|
# # =>
|
2675
2744
|
# # shape: (3, 1)
|
2676
2745
|
# # ┌─────┐
|
@@ -2682,9 +2751,10 @@ module Polars
|
|
2682
2751
|
# # │ 4 │
|
2683
2752
|
# # │ 7 │
|
2684
2753
|
# # └─────┘
|
2685
|
-
def
|
2686
|
-
wrap_expr(_rbexpr.
|
2754
|
+
def gather_every(n)
|
2755
|
+
wrap_expr(_rbexpr.gather_every(n))
|
2687
2756
|
end
|
2757
|
+
alias_method :take_every, :gather_every
|
2688
2758
|
|
2689
2759
|
# Get the first `n` rows.
|
2690
2760
|
#
|
@@ -3057,11 +3127,11 @@ module Polars
|
|
3057
3127
|
# # ┌─────┬─────┐
|
3058
3128
|
# # │ a ┆ b │
|
3059
3129
|
# # │ --- ┆ --- │
|
3060
|
-
# # │
|
3130
|
+
# # │ f64 ┆ f64 │
|
3061
3131
|
# # ╞═════╪═════╡
|
3062
|
-
# # │ 1
|
3063
|
-
# # │ 2
|
3064
|
-
# # │ 3
|
3132
|
+
# # │ 1.0 ┆ 1.0 │
|
3133
|
+
# # │ 2.0 ┆ NaN │
|
3134
|
+
# # │ 3.0 ┆ 3.0 │
|
3065
3135
|
# # └─────┴─────┘
|
3066
3136
|
def interpolate(method: "linear")
|
3067
3137
|
wrap_expr(_rbexpr.interpolate(method))
|
@@ -3112,7 +3182,7 @@ module Polars
|
|
3112
3182
|
#
|
3113
3183
|
# @note
|
3114
3184
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3115
|
-
# window, consider using `
|
3185
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3116
3186
|
# computation.
|
3117
3187
|
#
|
3118
3188
|
# @return [Expr]
|
@@ -3201,7 +3271,7 @@ module Polars
|
|
3201
3271
|
#
|
3202
3272
|
# @note
|
3203
3273
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3204
|
-
# window, consider using `
|
3274
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3205
3275
|
# computation.
|
3206
3276
|
#
|
3207
3277
|
# @return [Expr]
|
@@ -3290,7 +3360,7 @@ module Polars
|
|
3290
3360
|
#
|
3291
3361
|
# @note
|
3292
3362
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3293
|
-
# window, consider using `
|
3363
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3294
3364
|
# computation.
|
3295
3365
|
#
|
3296
3366
|
# @return [Expr]
|
@@ -3379,7 +3449,7 @@ module Polars
|
|
3379
3449
|
#
|
3380
3450
|
# @note
|
3381
3451
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3382
|
-
# window, consider using `
|
3452
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3383
3453
|
# computation.
|
3384
3454
|
#
|
3385
3455
|
# @return [Expr]
|
@@ -3468,7 +3538,7 @@ module Polars
|
|
3468
3538
|
#
|
3469
3539
|
# @note
|
3470
3540
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3471
|
-
# window, consider using `
|
3541
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3472
3542
|
# computation.
|
3473
3543
|
#
|
3474
3544
|
# @return [Expr]
|
@@ -3558,7 +3628,7 @@ module Polars
|
|
3558
3628
|
#
|
3559
3629
|
# @note
|
3560
3630
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3561
|
-
# window, consider using `
|
3631
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3562
3632
|
# computation.
|
3563
3633
|
#
|
3564
3634
|
# @return [Expr]
|
@@ -3644,7 +3714,7 @@ module Polars
|
|
3644
3714
|
#
|
3645
3715
|
# @note
|
3646
3716
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3647
|
-
# window, consider using `
|
3717
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3648
3718
|
# computation.
|
3649
3719
|
#
|
3650
3720
|
# @return [Expr]
|
@@ -3733,7 +3803,7 @@ module Polars
|
|
3733
3803
|
#
|
3734
3804
|
# @note
|
3735
3805
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3736
|
-
# window, consider using `
|
3806
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3737
3807
|
# computation.
|
3738
3808
|
#
|
3739
3809
|
# @return [Expr]
|
@@ -3948,7 +4018,7 @@ module Polars
|
|
3948
4018
|
# # ┌─────┐
|
3949
4019
|
# # │ a │
|
3950
4020
|
# # │ --- │
|
3951
|
-
# # │
|
4021
|
+
# # │ f64 │
|
3952
4022
|
# # ╞═════╡
|
3953
4023
|
# # │ 3.0 │
|
3954
4024
|
# # │ 4.5 │
|
@@ -4041,6 +4111,7 @@ module Polars
|
|
4041
4111
|
# # │ 12 ┆ 0.0 │
|
4042
4112
|
# # └──────┴────────────┘
|
4043
4113
|
def pct_change(n: 1)
|
4114
|
+
n = Utils.parse_as_expression(n)
|
4044
4115
|
wrap_expr(_rbexpr.pct_change(n))
|
4045
4116
|
end
|
4046
4117
|
|
@@ -4105,16 +4176,14 @@ module Polars
|
|
4105
4176
|
wrap_expr(_rbexpr.kurtosis(fisher, bias))
|
4106
4177
|
end
|
4107
4178
|
|
4108
|
-
#
|
4109
|
-
#
|
4110
|
-
# Only works for numerical types.
|
4179
|
+
# Set values outside the given boundaries to the boundary value.
|
4111
4180
|
#
|
4112
|
-
# If you want to clip other
|
4113
|
-
#
|
4181
|
+
# Only works for numeric and temporal columns. If you want to clip other data
|
4182
|
+
# types, consider writing a `when-then-otherwise` expression.
|
4114
4183
|
#
|
4115
|
-
# @param
|
4184
|
+
# @param lower_bound [Numeric]
|
4116
4185
|
# Minimum value.
|
4117
|
-
# @param
|
4186
|
+
# @param upper_bound [Numeric]
|
4118
4187
|
# Maximum value.
|
4119
4188
|
#
|
4120
4189
|
# @return [Expr]
|
@@ -4134,8 +4203,14 @@ module Polars
|
|
4134
4203
|
# # │ null ┆ null │
|
4135
4204
|
# # │ 50 ┆ 10 │
|
4136
4205
|
# # └──────┴─────────────┘
|
4137
|
-
def clip(
|
4138
|
-
|
4206
|
+
def clip(lower_bound, upper_bound)
|
4207
|
+
if !lower_bound.nil?
|
4208
|
+
lower_bound = Utils.parse_as_expression(lower_bound, str_as_lit: true)
|
4209
|
+
end
|
4210
|
+
if !upper_bound.nil?
|
4211
|
+
upper_bound = Utils.parse_as_expression(upper_bound, str_as_lit: true)
|
4212
|
+
end
|
4213
|
+
wrap_expr(_rbexpr.clip(lower_bound, upper_bound))
|
4139
4214
|
end
|
4140
4215
|
|
4141
4216
|
# Clip (limit) the values in an array to a `min` boundary.
|
@@ -4145,7 +4220,7 @@ module Polars
|
|
4145
4220
|
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
4146
4221
|
# expression. See `when` for more information.
|
4147
4222
|
#
|
4148
|
-
# @param
|
4223
|
+
# @param lower_bound [Numeric]
|
4149
4224
|
# Minimum value.
|
4150
4225
|
#
|
4151
4226
|
# @return [Expr]
|
@@ -4165,8 +4240,8 @@ module Polars
|
|
4165
4240
|
# # │ null ┆ null │
|
4166
4241
|
# # │ 50 ┆ 50 │
|
4167
4242
|
# # └──────┴─────────────┘
|
4168
|
-
def clip_min(
|
4169
|
-
|
4243
|
+
def clip_min(lower_bound)
|
4244
|
+
clip(lower_bound, nil)
|
4170
4245
|
end
|
4171
4246
|
|
4172
4247
|
# Clip (limit) the values in an array to a `max` boundary.
|
@@ -4176,7 +4251,7 @@ module Polars
|
|
4176
4251
|
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
4177
4252
|
# expression. See `when` for more information.
|
4178
4253
|
#
|
4179
|
-
# @param
|
4254
|
+
# @param upper_bound [Numeric]
|
4180
4255
|
# Maximum value.
|
4181
4256
|
#
|
4182
4257
|
# @return [Expr]
|
@@ -4196,8 +4271,8 @@ module Polars
|
|
4196
4271
|
# # │ null ┆ null │
|
4197
4272
|
# # │ 50 ┆ 0 │
|
4198
4273
|
# # └──────┴─────────────┘
|
4199
|
-
def clip_max(
|
4200
|
-
|
4274
|
+
def clip_max(upper_bound)
|
4275
|
+
clip(nil, upper_bound)
|
4201
4276
|
end
|
4202
4277
|
|
4203
4278
|
# Calculate the lower bound.
|
@@ -4558,11 +4633,11 @@ module Polars
|
|
4558
4633
|
# # │ 1 │
|
4559
4634
|
# # │ 3 │
|
4560
4635
|
# # └─────┘
|
4561
|
-
def shuffle(seed: nil
|
4636
|
+
def shuffle(seed: nil)
|
4562
4637
|
if seed.nil?
|
4563
4638
|
seed = rand(10000)
|
4564
4639
|
end
|
4565
|
-
wrap_expr(_rbexpr.shuffle(seed
|
4640
|
+
wrap_expr(_rbexpr.shuffle(seed))
|
4566
4641
|
end
|
4567
4642
|
|
4568
4643
|
# Sample from this expression.
|
@@ -4600,22 +4675,23 @@ module Polars
|
|
4600
4675
|
with_replacement: true,
|
4601
4676
|
shuffle: false,
|
4602
4677
|
seed: nil,
|
4603
|
-
n: nil
|
4604
|
-
fixed_seed: false
|
4678
|
+
n: nil
|
4605
4679
|
)
|
4606
4680
|
if !n.nil? && !frac.nil?
|
4607
4681
|
raise ArgumentError, "cannot specify both `n` and `frac`"
|
4608
4682
|
end
|
4609
4683
|
|
4610
4684
|
if !n.nil? && frac.nil?
|
4611
|
-
|
4685
|
+
n = Utils.parse_as_expression(n)
|
4686
|
+
return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
|
4612
4687
|
end
|
4613
4688
|
|
4614
4689
|
if frac.nil?
|
4615
4690
|
frac = 1.0
|
4616
4691
|
end
|
4692
|
+
frac = Utils.parse_as_expression(frac)
|
4617
4693
|
wrap_expr(
|
4618
|
-
_rbexpr.sample_frac(frac, with_replacement, shuffle, seed
|
4694
|
+
_rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
|
4619
4695
|
)
|
4620
4696
|
end
|
4621
4697
|
|
@@ -4885,7 +4961,7 @@ module Polars
|
|
4885
4961
|
# Number of valid values there should be in the window before the expression
|
4886
4962
|
# is evaluated. valid values = `length - null_count`
|
4887
4963
|
# @param parallel [Boolean]
|
4888
|
-
# Run in parallel. Don't do this in a
|
4964
|
+
# Run in parallel. Don't do this in a group by or another operation that
|
4889
4965
|
# already has much parallelization.
|
4890
4966
|
#
|
4891
4967
|
# @return [Expr]
|
@@ -5057,6 +5133,13 @@ module Polars
|
|
5057
5133
|
MetaExpr.new(self)
|
5058
5134
|
end
|
5059
5135
|
|
5136
|
+
# Create an object namespace of all expressions that modify expression names.
|
5137
|
+
#
|
5138
|
+
# @return [NameExpr]
|
5139
|
+
def name
|
5140
|
+
NameExpr.new(self)
|
5141
|
+
end
|
5142
|
+
|
5060
5143
|
# Create an object namespace of all string related methods.
|
5061
5144
|
#
|
5062
5145
|
# @return [StringExpr]
|