polars-df 0.5.0-x86_64-linux → 0.7.0-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +595 -709
- data/Cargo.toml +1 -0
- data/LICENSE-THIRD-PARTY.txt +3854 -4496
- data/README.md +11 -9
- data/lib/polars/3.0/polars.so +0 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +206 -131
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +22 -28
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +241 -151
- data/lib/polars/functions.rb +29 -38
- data/lib/polars/group_by.rb +38 -76
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +174 -95
- data/lib/polars/lazy_functions.rb +87 -63
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +40 -36
- data/lib/polars/list_name_space.rb +15 -15
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +6 -4
- data/lib/polars/series.rb +95 -28
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +249 -69
- data/lib/polars/string_name_space.rb +155 -25
- data/lib/polars/utils.rb +119 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +7 -2
data/lib/polars/expr.rb
CHANGED
@@ -131,6 +131,13 @@ module Polars
|
|
131
131
|
wrap_expr(_rbexpr.gt(_to_expr(other)._rbexpr))
|
132
132
|
end
|
133
133
|
|
134
|
+
# Performs boolean not.
|
135
|
+
#
|
136
|
+
# @return [Expr]
|
137
|
+
def !
|
138
|
+
is_not
|
139
|
+
end
|
140
|
+
|
134
141
|
# Performs negation.
|
135
142
|
#
|
136
143
|
# @return [Expr]
|
@@ -191,8 +198,8 @@ module Polars
|
|
191
198
|
# # ╞══════╪═══════╡
|
192
199
|
# # │ true ┆ false │
|
193
200
|
# # └──────┴───────┘
|
194
|
-
def any
|
195
|
-
wrap_expr(_rbexpr.any)
|
201
|
+
def any(drop_nulls: true)
|
202
|
+
wrap_expr(_rbexpr.any(drop_nulls))
|
196
203
|
end
|
197
204
|
|
198
205
|
# Check if all boolean values in a Boolean column are `true`.
|
@@ -216,8 +223,8 @@ module Polars
|
|
216
223
|
# # ╞══════╪═══════╪═══════╡
|
217
224
|
# # │ true ┆ false ┆ false │
|
218
225
|
# # └──────┴───────┴───────┘
|
219
|
-
def all
|
220
|
-
wrap_expr(_rbexpr.all)
|
226
|
+
def all(drop_nulls: true)
|
227
|
+
wrap_expr(_rbexpr.all(drop_nulls))
|
221
228
|
end
|
222
229
|
|
223
230
|
# Compute the square root of the elements.
|
@@ -362,7 +369,7 @@ module Polars
|
|
362
369
|
if columns.is_a?(String)
|
363
370
|
columns = [columns]
|
364
371
|
return wrap_expr(_rbexpr.exclude(columns))
|
365
|
-
elsif !columns.is_a?(Array)
|
372
|
+
elsif !columns.is_a?(::Array)
|
366
373
|
columns = [columns]
|
367
374
|
return wrap_expr(_rbexpr.exclude_dtype(columns))
|
368
375
|
end
|
@@ -401,21 +408,21 @@ module Polars
|
|
401
408
|
# # │ 18 ┆ 4 │
|
402
409
|
# # └─────┴─────┘
|
403
410
|
def keep_name
|
404
|
-
|
411
|
+
name.keep
|
405
412
|
end
|
406
413
|
|
407
414
|
# Add a prefix to the root column name of the expression.
|
408
415
|
#
|
409
416
|
# @return [Expr]
|
410
417
|
def prefix(prefix)
|
411
|
-
|
418
|
+
name.prefix(prefix)
|
412
419
|
end
|
413
420
|
|
414
421
|
# Add a suffix to the root column name of the expression.
|
415
422
|
#
|
416
423
|
# @return [Expr]
|
417
424
|
def suffix(suffix)
|
418
|
-
|
425
|
+
name.suffix(suffix)
|
419
426
|
end
|
420
427
|
|
421
428
|
# Rename the output of an expression by mapping a function over the root name.
|
@@ -443,7 +450,7 @@ module Polars
|
|
443
450
|
# # │ 1 ┆ 3 │
|
444
451
|
# # └───────────┴───────────┘
|
445
452
|
def map_alias(&f)
|
446
|
-
|
453
|
+
name.map(&f)
|
447
454
|
end
|
448
455
|
|
449
456
|
# Negate a boolean expression.
|
@@ -682,7 +689,7 @@ module Polars
|
|
682
689
|
# "value" => [94, 95, 96, 97, 97, 99]
|
683
690
|
# }
|
684
691
|
# )
|
685
|
-
# df.
|
692
|
+
# df.group_by("group", maintain_order: true).agg(Polars.col("value").agg_groups)
|
686
693
|
# # =>
|
687
694
|
# # shape: (2, 2)
|
688
695
|
# # ┌───────┬───────────┐
|
@@ -820,18 +827,18 @@ module Polars
|
|
820
827
|
# df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
|
821
828
|
# # =>
|
822
829
|
# # shape: (6, 1)
|
823
|
-
# #
|
824
|
-
# # │
|
825
|
-
# # │ ---
|
826
|
-
# # │ i64
|
827
|
-
# #
|
828
|
-
# # │ null
|
829
|
-
# # │ null
|
830
|
-
# # │ null
|
831
|
-
# # │ 1
|
832
|
-
# # │ 1
|
833
|
-
# # │ 2
|
834
|
-
# #
|
830
|
+
# # ┌────────┐
|
831
|
+
# # │ repeat │
|
832
|
+
# # │ --- │
|
833
|
+
# # │ i64 │
|
834
|
+
# # ╞════════╡
|
835
|
+
# # │ null │
|
836
|
+
# # │ null │
|
837
|
+
# # │ null │
|
838
|
+
# # │ 1 │
|
839
|
+
# # │ 1 │
|
840
|
+
# # │ 2 │
|
841
|
+
# # └────────┘
|
835
842
|
def rechunk
|
836
843
|
wrap_expr(_rbexpr.rechunk)
|
837
844
|
end
|
@@ -905,8 +912,8 @@ module Polars
|
|
905
912
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
906
913
|
# df.select(
|
907
914
|
# [
|
908
|
-
# Polars.col("a").
|
909
|
-
# Polars.col("a").
|
915
|
+
# Polars.col("a").cum_sum,
|
916
|
+
# Polars.col("a").cum_sum(reverse: true).alias("a_reverse")
|
910
917
|
# ]
|
911
918
|
# )
|
912
919
|
# # =>
|
@@ -921,9 +928,10 @@ module Polars
|
|
921
928
|
# # │ 6 ┆ 7 │
|
922
929
|
# # │ 10 ┆ 4 │
|
923
930
|
# # └─────┴───────────┘
|
924
|
-
def
|
925
|
-
wrap_expr(_rbexpr.
|
931
|
+
def cum_sum(reverse: false)
|
932
|
+
wrap_expr(_rbexpr.cum_sum(reverse))
|
926
933
|
end
|
934
|
+
alias_method :cumsum, :cum_sum
|
927
935
|
|
928
936
|
# Get an array with the cumulative product computed at every element.
|
929
937
|
#
|
@@ -940,8 +948,8 @@ module Polars
|
|
940
948
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
941
949
|
# df.select(
|
942
950
|
# [
|
943
|
-
# Polars.col("a").
|
944
|
-
# Polars.col("a").
|
951
|
+
# Polars.col("a").cum_prod,
|
952
|
+
# Polars.col("a").cum_prod(reverse: true).alias("a_reverse")
|
945
953
|
# ]
|
946
954
|
# )
|
947
955
|
# # =>
|
@@ -956,9 +964,10 @@ module Polars
|
|
956
964
|
# # │ 6 ┆ 12 │
|
957
965
|
# # │ 24 ┆ 4 │
|
958
966
|
# # └─────┴───────────┘
|
959
|
-
def
|
960
|
-
wrap_expr(_rbexpr.
|
967
|
+
def cum_prod(reverse: false)
|
968
|
+
wrap_expr(_rbexpr.cum_prod(reverse))
|
961
969
|
end
|
970
|
+
alias_method :cumprod, :cum_prod
|
962
971
|
|
963
972
|
# Get an array with the cumulative min computed at every element.
|
964
973
|
#
|
@@ -971,8 +980,8 @@ module Polars
|
|
971
980
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
972
981
|
# df.select(
|
973
982
|
# [
|
974
|
-
# Polars.col("a").
|
975
|
-
# Polars.col("a").
|
983
|
+
# Polars.col("a").cum_min,
|
984
|
+
# Polars.col("a").cum_min(reverse: true).alias("a_reverse")
|
976
985
|
# ]
|
977
986
|
# )
|
978
987
|
# # =>
|
@@ -987,9 +996,10 @@ module Polars
|
|
987
996
|
# # │ 1 ┆ 3 │
|
988
997
|
# # │ 1 ┆ 4 │
|
989
998
|
# # └─────┴───────────┘
|
990
|
-
def
|
991
|
-
wrap_expr(_rbexpr.
|
999
|
+
def cum_min(reverse: false)
|
1000
|
+
wrap_expr(_rbexpr.cum_min(reverse))
|
992
1001
|
end
|
1002
|
+
alias_method :cummin, :cum_min
|
993
1003
|
|
994
1004
|
# Get an array with the cumulative max computed at every element.
|
995
1005
|
#
|
@@ -1002,8 +1012,8 @@ module Polars
|
|
1002
1012
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
1003
1013
|
# df.select(
|
1004
1014
|
# [
|
1005
|
-
# Polars.col("a").
|
1006
|
-
# Polars.col("a").
|
1015
|
+
# Polars.col("a").cum_max,
|
1016
|
+
# Polars.col("a").cum_max(reverse: true).alias("a_reverse")
|
1007
1017
|
# ]
|
1008
1018
|
# )
|
1009
1019
|
# # =>
|
@@ -1018,9 +1028,10 @@ module Polars
|
|
1018
1028
|
# # │ 3 ┆ 4 │
|
1019
1029
|
# # │ 4 ┆ 4 │
|
1020
1030
|
# # └─────┴───────────┘
|
1021
|
-
def
|
1022
|
-
wrap_expr(_rbexpr.
|
1031
|
+
def cum_max(reverse: false)
|
1032
|
+
wrap_expr(_rbexpr.cum_max(reverse))
|
1023
1033
|
end
|
1034
|
+
alias_method :cummax, :cum_max
|
1024
1035
|
|
1025
1036
|
# Get an array with the cumulative count computed at every element.
|
1026
1037
|
#
|
@@ -1035,8 +1046,8 @@ module Polars
|
|
1035
1046
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
1036
1047
|
# df.select(
|
1037
1048
|
# [
|
1038
|
-
# Polars.col("a").
|
1039
|
-
# Polars.col("a").
|
1049
|
+
# Polars.col("a").cum_count,
|
1050
|
+
# Polars.col("a").cum_count(reverse: true).alias("a_reverse")
|
1040
1051
|
# ]
|
1041
1052
|
# )
|
1042
1053
|
# # =>
|
@@ -1051,9 +1062,10 @@ module Polars
|
|
1051
1062
|
# # │ 2 ┆ 1 │
|
1052
1063
|
# # │ 3 ┆ 0 │
|
1053
1064
|
# # └─────┴───────────┘
|
1054
|
-
def
|
1055
|
-
wrap_expr(_rbexpr.
|
1065
|
+
def cum_count(reverse: false)
|
1066
|
+
wrap_expr(_rbexpr.cum_count(reverse))
|
1056
1067
|
end
|
1068
|
+
alias_method :cumcount, :cum_count
|
1057
1069
|
|
1058
1070
|
# Rounds down to the nearest integer value.
|
1059
1071
|
#
|
@@ -1229,7 +1241,7 @@ module Polars
|
|
1229
1241
|
|
1230
1242
|
# Sort this column. In projection/ selection context the whole column is sorted.
|
1231
1243
|
#
|
1232
|
-
# If used in a
|
1244
|
+
# If used in a group by context, the groups are sorted.
|
1233
1245
|
#
|
1234
1246
|
# @param reverse [Boolean]
|
1235
1247
|
# false -> order from small to large.
|
@@ -1287,7 +1299,7 @@ module Polars
|
|
1287
1299
|
# # └───────┘
|
1288
1300
|
#
|
1289
1301
|
# @example
|
1290
|
-
# df.
|
1302
|
+
# df.group_by("group").agg(Polars.col("value").sort)
|
1291
1303
|
# # =>
|
1292
1304
|
# # shape: (2, 2)
|
1293
1305
|
# # ┌───────┬────────────┐
|
@@ -1337,6 +1349,7 @@ module Polars
|
|
1337
1349
|
# # │ 2 ┆ 98 │
|
1338
1350
|
# # └───────┴──────────┘
|
1339
1351
|
def top_k(k: 5)
|
1352
|
+
k = Utils.parse_as_expression(k)
|
1340
1353
|
wrap_expr(_rbexpr.top_k(k))
|
1341
1354
|
end
|
1342
1355
|
|
@@ -1375,6 +1388,7 @@ module Polars
|
|
1375
1388
|
# # │ 2 ┆ 98 │
|
1376
1389
|
# # └───────┴──────────┘
|
1377
1390
|
def bottom_k(k: 5)
|
1391
|
+
k = Utils.parse_as_expression(k)
|
1378
1392
|
wrap_expr(_rbexpr.bottom_k(k))
|
1379
1393
|
end
|
1380
1394
|
|
@@ -1494,7 +1508,7 @@ module Polars
|
|
1494
1508
|
# Sort this column by the ordering of another column, or multiple other columns.
|
1495
1509
|
#
|
1496
1510
|
# In projection/ selection context the whole column is sorted.
|
1497
|
-
# If used in a
|
1511
|
+
# If used in a group by context, the groups are sorted.
|
1498
1512
|
#
|
1499
1513
|
# @param by [Object]
|
1500
1514
|
# The column(s) used for sorting.
|
@@ -1534,10 +1548,10 @@ module Polars
|
|
1534
1548
|
# # │ two │
|
1535
1549
|
# # └───────┘
|
1536
1550
|
def sort_by(by, reverse: false)
|
1537
|
-
if !by.is_a?(Array)
|
1551
|
+
if !by.is_a?(::Array)
|
1538
1552
|
by = [by]
|
1539
1553
|
end
|
1540
|
-
if !reverse.is_a?(Array)
|
1554
|
+
if !reverse.is_a?(::Array)
|
1541
1555
|
reverse = [reverse]
|
1542
1556
|
end
|
1543
1557
|
by = Utils.selection_to_rbexpr_list(by)
|
@@ -1566,30 +1580,33 @@ module Polars
|
|
1566
1580
|
# "value" => [1, 98, 2, 3, 99, 4]
|
1567
1581
|
# }
|
1568
1582
|
# )
|
1569
|
-
# df.
|
1583
|
+
# df.group_by("group", maintain_order: true).agg(Polars.col("value").take([2, 1]))
|
1570
1584
|
# # =>
|
1571
1585
|
# # shape: (2, 2)
|
1572
|
-
# #
|
1573
|
-
# # │ group ┆ value
|
1574
|
-
# # │ --- ┆ ---
|
1575
|
-
# # │ str ┆ i64
|
1576
|
-
# #
|
1577
|
-
# # │ one ┆ 98
|
1578
|
-
# # │ two ┆ 99
|
1579
|
-
# #
|
1580
|
-
def
|
1581
|
-
if indices.is_a?(Array)
|
1586
|
+
# # ┌───────┬───────────┐
|
1587
|
+
# # │ group ┆ value │
|
1588
|
+
# # │ --- ┆ --- │
|
1589
|
+
# # │ str ┆ list[i64] │
|
1590
|
+
# # ╞═══════╪═══════════╡
|
1591
|
+
# # │ one ┆ [2, 98] │
|
1592
|
+
# # │ two ┆ [4, 99] │
|
1593
|
+
# # └───────┴───────────┘
|
1594
|
+
def gather(indices)
|
1595
|
+
if indices.is_a?(::Array)
|
1582
1596
|
indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
|
1583
1597
|
else
|
1584
1598
|
indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
|
1585
1599
|
end
|
1586
|
-
wrap_expr(_rbexpr.
|
1600
|
+
wrap_expr(_rbexpr.gather(indices_lit._rbexpr))
|
1587
1601
|
end
|
1602
|
+
alias_method :take, :gather
|
1588
1603
|
|
1589
1604
|
# Shift the values by a given period.
|
1590
1605
|
#
|
1591
|
-
# @param
|
1606
|
+
# @param n [Integer]
|
1592
1607
|
# Number of places to shift (may be negative).
|
1608
|
+
# @param fill_value [Object]
|
1609
|
+
# Fill the resulting null values with this value.
|
1593
1610
|
#
|
1594
1611
|
# @return [Expr]
|
1595
1612
|
#
|
@@ -1608,8 +1625,12 @@ module Polars
|
|
1608
1625
|
# # │ 2 │
|
1609
1626
|
# # │ 3 │
|
1610
1627
|
# # └──────┘
|
1611
|
-
def shift(
|
1612
|
-
|
1628
|
+
def shift(n = 1, fill_value: nil)
|
1629
|
+
if !fill_value.nil?
|
1630
|
+
fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
|
1631
|
+
end
|
1632
|
+
n = Utils.parse_as_expression(n)
|
1633
|
+
wrap_expr(_rbexpr.shift(n, fill_value))
|
1613
1634
|
end
|
1614
1635
|
|
1615
1636
|
# Shift the values by a given period and fill the resulting null values.
|
@@ -1637,8 +1658,7 @@ module Polars
|
|
1637
1658
|
# # │ 3 │
|
1638
1659
|
# # └─────┘
|
1639
1660
|
def shift_and_fill(periods, fill_value)
|
1640
|
-
|
1641
|
-
wrap_expr(_rbexpr.shift_and_fill(periods, fill_value._rbexpr))
|
1661
|
+
shift(periods, fill_value: fill_value)
|
1642
1662
|
end
|
1643
1663
|
|
1644
1664
|
# Fill null values using the specified value or strategy.
|
@@ -2063,7 +2083,7 @@ module Polars
|
|
2063
2083
|
# # │ 2 │
|
2064
2084
|
# # └─────┘
|
2065
2085
|
def approx_unique
|
2066
|
-
wrap_expr(_rbexpr.
|
2086
|
+
wrap_expr(_rbexpr.approx_n_unique)
|
2067
2087
|
end
|
2068
2088
|
|
2069
2089
|
# Count null values.
|
@@ -2201,7 +2221,7 @@ module Polars
|
|
2201
2221
|
|
2202
2222
|
# Apply window function over a subgroup.
|
2203
2223
|
#
|
2204
|
-
# This is similar to a
|
2224
|
+
# This is similar to a group by + aggregation + self join.
|
2205
2225
|
# Or similar to [window functions in Postgres](https://www.postgresql.org/docs/current/tutorial-window.html).
|
2206
2226
|
#
|
2207
2227
|
# @param expr [Object]
|
@@ -2309,9 +2329,10 @@ module Polars
|
|
2309
2329
|
# # │ 1 ┆ false │
|
2310
2330
|
# # │ 5 ┆ true │
|
2311
2331
|
# # └─────┴──────────┘
|
2312
|
-
def
|
2313
|
-
wrap_expr(_rbexpr.
|
2332
|
+
def is_first_distinct
|
2333
|
+
wrap_expr(_rbexpr.is_first_distinct)
|
2314
2334
|
end
|
2335
|
+
alias_method :is_first, :is_first_distinct
|
2315
2336
|
|
2316
2337
|
# Get mask of duplicated values.
|
2317
2338
|
#
|
@@ -2335,6 +2356,54 @@ module Polars
|
|
2335
2356
|
wrap_expr(_rbexpr.is_duplicated)
|
2336
2357
|
end
|
2337
2358
|
|
2359
|
+
# Get a boolean mask of the local maximum peaks.
|
2360
|
+
#
|
2361
|
+
# @return [Expr]
|
2362
|
+
#
|
2363
|
+
# @example
|
2364
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
|
2365
|
+
# df.select(Polars.col("a").peak_max)
|
2366
|
+
# # =>
|
2367
|
+
# # shape: (5, 1)
|
2368
|
+
# # ┌───────┐
|
2369
|
+
# # │ a │
|
2370
|
+
# # │ --- │
|
2371
|
+
# # │ bool │
|
2372
|
+
# # ╞═══════╡
|
2373
|
+
# # │ false │
|
2374
|
+
# # │ false │
|
2375
|
+
# # │ false │
|
2376
|
+
# # │ false │
|
2377
|
+
# # │ true │
|
2378
|
+
# # └───────┘
|
2379
|
+
def peak_max
|
2380
|
+
wrap_expr(_rbexpr.peak_max)
|
2381
|
+
end
|
2382
|
+
|
2383
|
+
# Get a boolean mask of the local minimum peaks.
|
2384
|
+
#
|
2385
|
+
# @return [Expr]
|
2386
|
+
#
|
2387
|
+
# @example
|
2388
|
+
# df = Polars::DataFrame.new({"a" => [4, 1, 3, 2, 5]})
|
2389
|
+
# df.select(Polars.col("a").peak_min)
|
2390
|
+
# # =>
|
2391
|
+
# # shape: (5, 1)
|
2392
|
+
# # ┌───────┐
|
2393
|
+
# # │ a │
|
2394
|
+
# # │ --- │
|
2395
|
+
# # │ bool │
|
2396
|
+
# # ╞═══════╡
|
2397
|
+
# # │ false │
|
2398
|
+
# # │ true │
|
2399
|
+
# # │ false │
|
2400
|
+
# # │ true │
|
2401
|
+
# # │ false │
|
2402
|
+
# # └───────┘
|
2403
|
+
def peak_min
|
2404
|
+
wrap_expr(_rbexpr.peak_min)
|
2405
|
+
end
|
2406
|
+
|
2338
2407
|
# Get quantile value.
|
2339
2408
|
#
|
2340
2409
|
# @param quantile [Float]
|
@@ -2427,7 +2496,7 @@ module Polars
|
|
2427
2496
|
# }
|
2428
2497
|
# )
|
2429
2498
|
# (
|
2430
|
-
# df.
|
2499
|
+
# df.group_by("group_col").agg(
|
2431
2500
|
# [
|
2432
2501
|
# Polars.col("b").filter(Polars.col("b") < 2).sum.alias("lt"),
|
2433
2502
|
# Polars.col("b").filter(Polars.col("b") >= 2).sum.alias("gte")
|
@@ -2436,14 +2505,14 @@ module Polars
|
|
2436
2505
|
# ).sort("group_col")
|
2437
2506
|
# # =>
|
2438
2507
|
# # shape: (2, 3)
|
2439
|
-
# #
|
2440
|
-
# # │ group_col ┆ lt
|
2441
|
-
# # │ --- ┆ ---
|
2442
|
-
# # │ str ┆ i64
|
2443
|
-
# #
|
2444
|
-
# # │ g1 ┆ 1
|
2445
|
-
# # │ g2 ┆
|
2446
|
-
# #
|
2508
|
+
# # ┌───────────┬─────┬─────┐
|
2509
|
+
# # │ group_col ┆ lt ┆ gte │
|
2510
|
+
# # │ --- ┆ --- ┆ --- │
|
2511
|
+
# # │ str ┆ i64 ┆ i64 │
|
2512
|
+
# # ╞═══════════╪═════╪═════╡
|
2513
|
+
# # │ g1 ┆ 1 ┆ 2 │
|
2514
|
+
# # │ g2 ┆ 0 ┆ 3 │
|
2515
|
+
# # └───────────┴─────┴─────┘
|
2447
2516
|
def filter(predicate)
|
2448
2517
|
wrap_expr(_rbexpr.filter(predicate._rbexpr))
|
2449
2518
|
end
|
@@ -2465,7 +2534,7 @@ module Polars
|
|
2465
2534
|
# }
|
2466
2535
|
# )
|
2467
2536
|
# (
|
2468
|
-
# df.
|
2537
|
+
# df.group_by("group_col").agg(
|
2469
2538
|
# [
|
2470
2539
|
# Polars.col("b").where(Polars.col("b") < 2).sum.alias("lt"),
|
2471
2540
|
# Polars.col("b").where(Polars.col("b") >= 2).sum.alias("gte")
|
@@ -2474,14 +2543,14 @@ module Polars
|
|
2474
2543
|
# ).sort("group_col")
|
2475
2544
|
# # =>
|
2476
2545
|
# # shape: (2, 3)
|
2477
|
-
# #
|
2478
|
-
# # │ group_col ┆ lt
|
2479
|
-
# # │ --- ┆ ---
|
2480
|
-
# # │ str ┆ i64
|
2481
|
-
# #
|
2482
|
-
# # │ g1 ┆ 1
|
2483
|
-
# # │ g2 ┆
|
2484
|
-
# #
|
2546
|
+
# # ┌───────────┬─────┬─────┐
|
2547
|
+
# # │ group_col ┆ lt ┆ gte │
|
2548
|
+
# # │ --- ┆ --- ┆ --- │
|
2549
|
+
# # │ str ┆ i64 ┆ i64 │
|
2550
|
+
# # ╞═══════════╪═════╪═════╡
|
2551
|
+
# # │ g1 ┆ 1 ┆ 2 │
|
2552
|
+
# # │ g2 ┆ 0 ┆ 3 │
|
2553
|
+
# # └───────────┴─────┴─────┘
|
2485
2554
|
def where(predicate)
|
2486
2555
|
filter(predicate)
|
2487
2556
|
end
|
@@ -2583,7 +2652,7 @@ module Polars
|
|
2583
2652
|
#
|
2584
2653
|
# @example In a GroupBy context the function is applied by group:
|
2585
2654
|
# df.lazy
|
2586
|
-
# .
|
2655
|
+
# .group_by("b", maintain_order: true)
|
2587
2656
|
# .agg(
|
2588
2657
|
# [
|
2589
2658
|
# Polars.col("a").apply { |x| x.sum }
|
@@ -2616,25 +2685,23 @@ module Polars
|
|
2616
2685
|
# @return [Expr]
|
2617
2686
|
#
|
2618
2687
|
# @example
|
2619
|
-
#
|
2620
|
-
#
|
2621
|
-
#
|
2622
|
-
#
|
2623
|
-
#
|
2624
|
-
#
|
2625
|
-
#
|
2626
|
-
#
|
2627
|
-
#
|
2628
|
-
#
|
2629
|
-
#
|
2630
|
-
#
|
2631
|
-
#
|
2632
|
-
#
|
2633
|
-
#
|
2634
|
-
#
|
2635
|
-
#
|
2636
|
-
# # │ d │
|
2637
|
-
# # └─────┘
|
2688
|
+
# df = Polars::DataFrame.new(
|
2689
|
+
# {
|
2690
|
+
# "group" => ["a", "b", "b"],
|
2691
|
+
# "values" => [[1, 2], [2, 3], [4]]
|
2692
|
+
# }
|
2693
|
+
# )
|
2694
|
+
# df.group_by("group").agg(Polars.col("values").flatten)
|
2695
|
+
# # =>
|
2696
|
+
# # shape: (2, 2)
|
2697
|
+
# # ┌───────┬───────────┐
|
2698
|
+
# # │ group ┆ values │
|
2699
|
+
# # │ --- ┆ --- │
|
2700
|
+
# # │ str ┆ list[i64] │
|
2701
|
+
# # ╞═══════╪═══════════╡
|
2702
|
+
# # │ a ┆ [1, 2] │
|
2703
|
+
# # │ b ┆ [2, 3, 4] │
|
2704
|
+
# # └───────┴───────────┘
|
2638
2705
|
def flatten
|
2639
2706
|
wrap_expr(_rbexpr.explode)
|
2640
2707
|
end
|
@@ -2672,7 +2739,7 @@ module Polars
|
|
2672
2739
|
#
|
2673
2740
|
# @example
|
2674
2741
|
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
|
2675
|
-
# df.select(Polars.col("foo").
|
2742
|
+
# df.select(Polars.col("foo").gather_every(3))
|
2676
2743
|
# # =>
|
2677
2744
|
# # shape: (3, 1)
|
2678
2745
|
# # ┌─────┐
|
@@ -2684,9 +2751,10 @@ module Polars
|
|
2684
2751
|
# # │ 4 │
|
2685
2752
|
# # │ 7 │
|
2686
2753
|
# # └─────┘
|
2687
|
-
def
|
2688
|
-
wrap_expr(_rbexpr.
|
2754
|
+
def gather_every(n)
|
2755
|
+
wrap_expr(_rbexpr.gather_every(n))
|
2689
2756
|
end
|
2757
|
+
alias_method :take_every, :gather_every
|
2690
2758
|
|
2691
2759
|
# Get the first `n` rows.
|
2692
2760
|
#
|
@@ -2798,7 +2866,7 @@ module Polars
|
|
2798
2866
|
# # │ false │
|
2799
2867
|
# # └──────────┘
|
2800
2868
|
def is_in(other)
|
2801
|
-
if other.is_a?(Array)
|
2869
|
+
if other.is_a?(::Array)
|
2802
2870
|
if other.length == 0
|
2803
2871
|
other = Polars.lit(nil)
|
2804
2872
|
else
|
@@ -3059,11 +3127,11 @@ module Polars
|
|
3059
3127
|
# # ┌─────┬─────┐
|
3060
3128
|
# # │ a ┆ b │
|
3061
3129
|
# # │ --- ┆ --- │
|
3062
|
-
# # │
|
3130
|
+
# # │ f64 ┆ f64 │
|
3063
3131
|
# # ╞═════╪═════╡
|
3064
|
-
# # │ 1
|
3065
|
-
# # │ 2
|
3066
|
-
# # │ 3
|
3132
|
+
# # │ 1.0 ┆ 1.0 │
|
3133
|
+
# # │ 2.0 ┆ NaN │
|
3134
|
+
# # │ 3.0 ┆ 3.0 │
|
3067
3135
|
# # └─────┴─────┘
|
3068
3136
|
def interpolate(method: "linear")
|
3069
3137
|
wrap_expr(_rbexpr.interpolate(method))
|
@@ -3114,7 +3182,7 @@ module Polars
|
|
3114
3182
|
#
|
3115
3183
|
# @note
|
3116
3184
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3117
|
-
# window, consider using `
|
3185
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3118
3186
|
# computation.
|
3119
3187
|
#
|
3120
3188
|
# @return [Expr]
|
@@ -3203,7 +3271,7 @@ module Polars
|
|
3203
3271
|
#
|
3204
3272
|
# @note
|
3205
3273
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3206
|
-
# window, consider using `
|
3274
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3207
3275
|
# computation.
|
3208
3276
|
#
|
3209
3277
|
# @return [Expr]
|
@@ -3292,7 +3360,7 @@ module Polars
|
|
3292
3360
|
#
|
3293
3361
|
# @note
|
3294
3362
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3295
|
-
# window, consider using `
|
3363
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3296
3364
|
# computation.
|
3297
3365
|
#
|
3298
3366
|
# @return [Expr]
|
@@ -3381,7 +3449,7 @@ module Polars
|
|
3381
3449
|
#
|
3382
3450
|
# @note
|
3383
3451
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3384
|
-
# window, consider using `
|
3452
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3385
3453
|
# computation.
|
3386
3454
|
#
|
3387
3455
|
# @return [Expr]
|
@@ -3470,7 +3538,7 @@ module Polars
|
|
3470
3538
|
#
|
3471
3539
|
# @note
|
3472
3540
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3473
|
-
# window, consider using `
|
3541
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3474
3542
|
# computation.
|
3475
3543
|
#
|
3476
3544
|
# @return [Expr]
|
@@ -3502,14 +3570,15 @@ module Polars
|
|
3502
3570
|
min_periods: nil,
|
3503
3571
|
center: false,
|
3504
3572
|
by: nil,
|
3505
|
-
closed: "left"
|
3573
|
+
closed: "left",
|
3574
|
+
ddof: 1
|
3506
3575
|
)
|
3507
3576
|
window_size, min_periods = _prepare_rolling_window_args(
|
3508
3577
|
window_size, min_periods
|
3509
3578
|
)
|
3510
3579
|
wrap_expr(
|
3511
3580
|
_rbexpr.rolling_std(
|
3512
|
-
window_size, weights, min_periods, center, by, closed
|
3581
|
+
window_size, weights, min_periods, center, by, closed, ddof
|
3513
3582
|
)
|
3514
3583
|
)
|
3515
3584
|
end
|
@@ -3559,7 +3628,7 @@ module Polars
|
|
3559
3628
|
#
|
3560
3629
|
# @note
|
3561
3630
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3562
|
-
# window, consider using `
|
3631
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3563
3632
|
# computation.
|
3564
3633
|
#
|
3565
3634
|
# @return [Expr]
|
@@ -3591,14 +3660,15 @@ module Polars
|
|
3591
3660
|
min_periods: nil,
|
3592
3661
|
center: false,
|
3593
3662
|
by: nil,
|
3594
|
-
closed: "left"
|
3663
|
+
closed: "left",
|
3664
|
+
ddof: 1
|
3595
3665
|
)
|
3596
3666
|
window_size, min_periods = _prepare_rolling_window_args(
|
3597
3667
|
window_size, min_periods
|
3598
3668
|
)
|
3599
3669
|
wrap_expr(
|
3600
3670
|
_rbexpr.rolling_var(
|
3601
|
-
window_size, weights, min_periods, center, by, closed
|
3671
|
+
window_size, weights, min_periods, center, by, closed, ddof
|
3602
3672
|
)
|
3603
3673
|
)
|
3604
3674
|
end
|
@@ -3644,7 +3714,7 @@ module Polars
|
|
3644
3714
|
#
|
3645
3715
|
# @note
|
3646
3716
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3647
|
-
# window, consider using `
|
3717
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3648
3718
|
# computation.
|
3649
3719
|
#
|
3650
3720
|
# @return [Expr]
|
@@ -3733,7 +3803,7 @@ module Polars
|
|
3733
3803
|
#
|
3734
3804
|
# @note
|
3735
3805
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3736
|
-
# window, consider using `
|
3806
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3737
3807
|
# computation.
|
3738
3808
|
#
|
3739
3809
|
# @return [Expr]
|
@@ -3948,7 +4018,7 @@ module Polars
|
|
3948
4018
|
# # ┌─────┐
|
3949
4019
|
# # │ a │
|
3950
4020
|
# # │ --- │
|
3951
|
-
# # │
|
4021
|
+
# # │ f64 │
|
3952
4022
|
# # ╞═════╡
|
3953
4023
|
# # │ 3.0 │
|
3954
4024
|
# # │ 4.5 │
|
@@ -4041,6 +4111,7 @@ module Polars
|
|
4041
4111
|
# # │ 12 ┆ 0.0 │
|
4042
4112
|
# # └──────┴────────────┘
|
4043
4113
|
def pct_change(n: 1)
|
4114
|
+
n = Utils.parse_as_expression(n)
|
4044
4115
|
wrap_expr(_rbexpr.pct_change(n))
|
4045
4116
|
end
|
4046
4117
|
|
@@ -4105,16 +4176,14 @@ module Polars
|
|
4105
4176
|
wrap_expr(_rbexpr.kurtosis(fisher, bias))
|
4106
4177
|
end
|
4107
4178
|
|
4108
|
-
#
|
4109
|
-
#
|
4110
|
-
# Only works for numerical types.
|
4179
|
+
# Set values outside the given boundaries to the boundary value.
|
4111
4180
|
#
|
4112
|
-
# If you want to clip other
|
4113
|
-
#
|
4181
|
+
# Only works for numeric and temporal columns. If you want to clip other data
|
4182
|
+
# types, consider writing a `when-then-otherwise` expression.
|
4114
4183
|
#
|
4115
|
-
# @param
|
4184
|
+
# @param lower_bound [Numeric]
|
4116
4185
|
# Minimum value.
|
4117
|
-
# @param
|
4186
|
+
# @param upper_bound [Numeric]
|
4118
4187
|
# Maximum value.
|
4119
4188
|
#
|
4120
4189
|
# @return [Expr]
|
@@ -4134,8 +4203,14 @@ module Polars
|
|
4134
4203
|
# # │ null ┆ null │
|
4135
4204
|
# # │ 50 ┆ 10 │
|
4136
4205
|
# # └──────┴─────────────┘
|
4137
|
-
def clip(
|
4138
|
-
|
4206
|
+
def clip(lower_bound, upper_bound)
|
4207
|
+
if !lower_bound.nil?
|
4208
|
+
lower_bound = Utils.parse_as_expression(lower_bound, str_as_lit: true)
|
4209
|
+
end
|
4210
|
+
if !upper_bound.nil?
|
4211
|
+
upper_bound = Utils.parse_as_expression(upper_bound, str_as_lit: true)
|
4212
|
+
end
|
4213
|
+
wrap_expr(_rbexpr.clip(lower_bound, upper_bound))
|
4139
4214
|
end
|
4140
4215
|
|
4141
4216
|
# Clip (limit) the values in an array to a `min` boundary.
|
@@ -4145,7 +4220,7 @@ module Polars
|
|
4145
4220
|
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
4146
4221
|
# expression. See `when` for more information.
|
4147
4222
|
#
|
4148
|
-
# @param
|
4223
|
+
# @param lower_bound [Numeric]
|
4149
4224
|
# Minimum value.
|
4150
4225
|
#
|
4151
4226
|
# @return [Expr]
|
@@ -4165,8 +4240,8 @@ module Polars
|
|
4165
4240
|
# # │ null ┆ null │
|
4166
4241
|
# # │ 50 ┆ 50 │
|
4167
4242
|
# # └──────┴─────────────┘
|
4168
|
-
def clip_min(
|
4169
|
-
|
4243
|
+
def clip_min(lower_bound)
|
4244
|
+
clip(lower_bound, nil)
|
4170
4245
|
end
|
4171
4246
|
|
4172
4247
|
# Clip (limit) the values in an array to a `max` boundary.
|
@@ -4176,7 +4251,7 @@ module Polars
|
|
4176
4251
|
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
4177
4252
|
# expression. See `when` for more information.
|
4178
4253
|
#
|
4179
|
-
# @param
|
4254
|
+
# @param upper_bound [Numeric]
|
4180
4255
|
# Maximum value.
|
4181
4256
|
#
|
4182
4257
|
# @return [Expr]
|
@@ -4196,8 +4271,8 @@ module Polars
|
|
4196
4271
|
# # │ null ┆ null │
|
4197
4272
|
# # │ 50 ┆ 0 │
|
4198
4273
|
# # └──────┴─────────────┘
|
4199
|
-
def clip_max(
|
4200
|
-
|
4274
|
+
def clip_max(upper_bound)
|
4275
|
+
clip(nil, upper_bound)
|
4201
4276
|
end
|
4202
4277
|
|
4203
4278
|
# Calculate the lower bound.
|
@@ -4607,12 +4682,14 @@ module Polars
|
|
4607
4682
|
end
|
4608
4683
|
|
4609
4684
|
if !n.nil? && frac.nil?
|
4685
|
+
n = Utils.parse_as_expression(n)
|
4610
4686
|
return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
|
4611
4687
|
end
|
4612
4688
|
|
4613
4689
|
if frac.nil?
|
4614
4690
|
frac = 1.0
|
4615
4691
|
end
|
4692
|
+
frac = Utils.parse_as_expression(frac)
|
4616
4693
|
wrap_expr(
|
4617
4694
|
_rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
|
4618
4695
|
)
|
@@ -4884,7 +4961,7 @@ module Polars
|
|
4884
4961
|
# Number of valid values there should be in the window before the expression
|
4885
4962
|
# is evaluated. valid values = `length - null_count`
|
4886
4963
|
# @param parallel [Boolean]
|
4887
|
-
# Run in parallel. Don't do this in a
|
4964
|
+
# Run in parallel. Don't do this in a group by or another operation that
|
4888
4965
|
# already has much parallelization.
|
4889
4966
|
#
|
4890
4967
|
# @return [Expr]
|
@@ -4929,8 +5006,8 @@ module Polars
|
|
4929
5006
|
#
|
4930
5007
|
# Enables downstream code to user fast paths for sorted arrays.
|
4931
5008
|
#
|
4932
|
-
# @param
|
4933
|
-
#
|
5009
|
+
# @param descending [Boolean]
|
5010
|
+
# Whether the `Series` order is descending.
|
4934
5011
|
#
|
4935
5012
|
# @return [Expr]
|
4936
5013
|
#
|
@@ -4950,9 +5027,9 @@ module Polars
|
|
4950
5027
|
# # ╞════════╡
|
4951
5028
|
# # │ 3 │
|
4952
5029
|
# # └────────┘
|
4953
|
-
|
4954
|
-
|
4955
|
-
|
5030
|
+
def set_sorted(descending: false)
|
5031
|
+
wrap_expr(_rbexpr.set_sorted_flag(descending))
|
5032
|
+
end
|
4956
5033
|
|
4957
5034
|
# Aggregate to list.
|
4958
5035
|
#
|
@@ -4965,7 +5042,7 @@ module Polars
|
|
4965
5042
|
# "b" => [4, 5, 6]
|
4966
5043
|
# }
|
4967
5044
|
# )
|
4968
|
-
# df.select(Polars.all.
|
5045
|
+
# df.select(Polars.all.implode)
|
4969
5046
|
# # =>
|
4970
5047
|
# # shape: (1, 2)
|
4971
5048
|
# # ┌───────────┬───────────┐
|
@@ -4978,7 +5055,6 @@ module Polars
|
|
4978
5055
|
def implode
|
4979
5056
|
wrap_expr(_rbexpr.implode)
|
4980
5057
|
end
|
4981
|
-
alias_method :list, :implode
|
4982
5058
|
|
4983
5059
|
# Shrink numeric columns to the minimal required datatype.
|
4984
5060
|
#
|
@@ -5018,10 +5094,17 @@ module Polars
|
|
5018
5094
|
# Create an object namespace of all list related methods.
|
5019
5095
|
#
|
5020
5096
|
# @return [ListExpr]
|
5021
|
-
def
|
5097
|
+
def list
|
5022
5098
|
ListExpr.new(self)
|
5023
5099
|
end
|
5024
5100
|
|
5101
|
+
# Create an object namespace of all array related methods.
|
5102
|
+
#
|
5103
|
+
# @return [ArrayExpr]
|
5104
|
+
def arr
|
5105
|
+
ArrayExpr.new(self)
|
5106
|
+
end
|
5107
|
+
|
5025
5108
|
# Create an object namespace of all binary related methods.
|
5026
5109
|
#
|
5027
5110
|
# @return [BinaryExpr]
|
@@ -5050,6 +5133,13 @@ module Polars
|
|
5050
5133
|
MetaExpr.new(self)
|
5051
5134
|
end
|
5052
5135
|
|
5136
|
+
# Create an object namespace of all expressions that modify expression names.
|
5137
|
+
#
|
5138
|
+
# @return [NameExpr]
|
5139
|
+
def name
|
5140
|
+
NameExpr.new(self)
|
5141
|
+
end
|
5142
|
+
|
5053
5143
|
# Create an object namespace of all string related methods.
|
5054
5144
|
#
|
5055
5145
|
# @return [StringExpr]
|