polars-df 0.5.0-x86_64-darwin → 0.7.0-x86_64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +595 -709
- data/Cargo.toml +1 -0
- data/LICENSE-THIRD-PARTY.txt +4572 -5214
- data/README.md +11 -9
- data/lib/polars/3.0/polars.bundle +0 -0
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +206 -131
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +22 -28
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +241 -151
- data/lib/polars/functions.rb +29 -38
- data/lib/polars/group_by.rb +38 -76
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +174 -95
- data/lib/polars/lazy_functions.rb +87 -63
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +40 -36
- data/lib/polars/list_name_space.rb +15 -15
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +6 -4
- data/lib/polars/series.rb +95 -28
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +249 -69
- data/lib/polars/string_name_space.rb +155 -25
- data/lib/polars/utils.rb +119 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +7 -2
data/lib/polars/expr.rb
CHANGED
@@ -131,6 +131,13 @@ module Polars
|
|
131
131
|
wrap_expr(_rbexpr.gt(_to_expr(other)._rbexpr))
|
132
132
|
end
|
133
133
|
|
134
|
+
# Performs boolean not.
|
135
|
+
#
|
136
|
+
# @return [Expr]
|
137
|
+
def !
|
138
|
+
is_not
|
139
|
+
end
|
140
|
+
|
134
141
|
# Performs negation.
|
135
142
|
#
|
136
143
|
# @return [Expr]
|
@@ -191,8 +198,8 @@ module Polars
|
|
191
198
|
# # ╞══════╪═══════╡
|
192
199
|
# # │ true ┆ false │
|
193
200
|
# # └──────┴───────┘
|
194
|
-
def any
|
195
|
-
wrap_expr(_rbexpr.any)
|
201
|
+
def any(drop_nulls: true)
|
202
|
+
wrap_expr(_rbexpr.any(drop_nulls))
|
196
203
|
end
|
197
204
|
|
198
205
|
# Check if all boolean values in a Boolean column are `true`.
|
@@ -216,8 +223,8 @@ module Polars
|
|
216
223
|
# # ╞══════╪═══════╪═══════╡
|
217
224
|
# # │ true ┆ false ┆ false │
|
218
225
|
# # └──────┴───────┴───────┘
|
219
|
-
def all
|
220
|
-
wrap_expr(_rbexpr.all)
|
226
|
+
def all(drop_nulls: true)
|
227
|
+
wrap_expr(_rbexpr.all(drop_nulls))
|
221
228
|
end
|
222
229
|
|
223
230
|
# Compute the square root of the elements.
|
@@ -362,7 +369,7 @@ module Polars
|
|
362
369
|
if columns.is_a?(String)
|
363
370
|
columns = [columns]
|
364
371
|
return wrap_expr(_rbexpr.exclude(columns))
|
365
|
-
elsif !columns.is_a?(Array)
|
372
|
+
elsif !columns.is_a?(::Array)
|
366
373
|
columns = [columns]
|
367
374
|
return wrap_expr(_rbexpr.exclude_dtype(columns))
|
368
375
|
end
|
@@ -401,21 +408,21 @@ module Polars
|
|
401
408
|
# # │ 18 ┆ 4 │
|
402
409
|
# # └─────┴─────┘
|
403
410
|
def keep_name
|
404
|
-
|
411
|
+
name.keep
|
405
412
|
end
|
406
413
|
|
407
414
|
# Add a prefix to the root column name of the expression.
|
408
415
|
#
|
409
416
|
# @return [Expr]
|
410
417
|
def prefix(prefix)
|
411
|
-
|
418
|
+
name.prefix(prefix)
|
412
419
|
end
|
413
420
|
|
414
421
|
# Add a suffix to the root column name of the expression.
|
415
422
|
#
|
416
423
|
# @return [Expr]
|
417
424
|
def suffix(suffix)
|
418
|
-
|
425
|
+
name.suffix(suffix)
|
419
426
|
end
|
420
427
|
|
421
428
|
# Rename the output of an expression by mapping a function over the root name.
|
@@ -443,7 +450,7 @@ module Polars
|
|
443
450
|
# # │ 1 ┆ 3 │
|
444
451
|
# # └───────────┴───────────┘
|
445
452
|
def map_alias(&f)
|
446
|
-
|
453
|
+
name.map(&f)
|
447
454
|
end
|
448
455
|
|
449
456
|
# Negate a boolean expression.
|
@@ -682,7 +689,7 @@ module Polars
|
|
682
689
|
# "value" => [94, 95, 96, 97, 97, 99]
|
683
690
|
# }
|
684
691
|
# )
|
685
|
-
# df.
|
692
|
+
# df.group_by("group", maintain_order: true).agg(Polars.col("value").agg_groups)
|
686
693
|
# # =>
|
687
694
|
# # shape: (2, 2)
|
688
695
|
# # ┌───────┬───────────┐
|
@@ -820,18 +827,18 @@ module Polars
|
|
820
827
|
# df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
|
821
828
|
# # =>
|
822
829
|
# # shape: (6, 1)
|
823
|
-
# #
|
824
|
-
# # │
|
825
|
-
# # │ ---
|
826
|
-
# # │ i64
|
827
|
-
# #
|
828
|
-
# # │ null
|
829
|
-
# # │ null
|
830
|
-
# # │ null
|
831
|
-
# # │ 1
|
832
|
-
# # │ 1
|
833
|
-
# # │ 2
|
834
|
-
# #
|
830
|
+
# # ┌────────┐
|
831
|
+
# # │ repeat │
|
832
|
+
# # │ --- │
|
833
|
+
# # │ i64 │
|
834
|
+
# # ╞════════╡
|
835
|
+
# # │ null │
|
836
|
+
# # │ null │
|
837
|
+
# # │ null │
|
838
|
+
# # │ 1 │
|
839
|
+
# # │ 1 │
|
840
|
+
# # │ 2 │
|
841
|
+
# # └────────┘
|
835
842
|
def rechunk
|
836
843
|
wrap_expr(_rbexpr.rechunk)
|
837
844
|
end
|
@@ -905,8 +912,8 @@ module Polars
|
|
905
912
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
906
913
|
# df.select(
|
907
914
|
# [
|
908
|
-
# Polars.col("a").
|
909
|
-
# Polars.col("a").
|
915
|
+
# Polars.col("a").cum_sum,
|
916
|
+
# Polars.col("a").cum_sum(reverse: true).alias("a_reverse")
|
910
917
|
# ]
|
911
918
|
# )
|
912
919
|
# # =>
|
@@ -921,9 +928,10 @@ module Polars
|
|
921
928
|
# # │ 6 ┆ 7 │
|
922
929
|
# # │ 10 ┆ 4 │
|
923
930
|
# # └─────┴───────────┘
|
924
|
-
def
|
925
|
-
wrap_expr(_rbexpr.
|
931
|
+
def cum_sum(reverse: false)
|
932
|
+
wrap_expr(_rbexpr.cum_sum(reverse))
|
926
933
|
end
|
934
|
+
alias_method :cumsum, :cum_sum
|
927
935
|
|
928
936
|
# Get an array with the cumulative product computed at every element.
|
929
937
|
#
|
@@ -940,8 +948,8 @@ module Polars
|
|
940
948
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
941
949
|
# df.select(
|
942
950
|
# [
|
943
|
-
# Polars.col("a").
|
944
|
-
# Polars.col("a").
|
951
|
+
# Polars.col("a").cum_prod,
|
952
|
+
# Polars.col("a").cum_prod(reverse: true).alias("a_reverse")
|
945
953
|
# ]
|
946
954
|
# )
|
947
955
|
# # =>
|
@@ -956,9 +964,10 @@ module Polars
|
|
956
964
|
# # │ 6 ┆ 12 │
|
957
965
|
# # │ 24 ┆ 4 │
|
958
966
|
# # └─────┴───────────┘
|
959
|
-
def
|
960
|
-
wrap_expr(_rbexpr.
|
967
|
+
def cum_prod(reverse: false)
|
968
|
+
wrap_expr(_rbexpr.cum_prod(reverse))
|
961
969
|
end
|
970
|
+
alias_method :cumprod, :cum_prod
|
962
971
|
|
963
972
|
# Get an array with the cumulative min computed at every element.
|
964
973
|
#
|
@@ -971,8 +980,8 @@ module Polars
|
|
971
980
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
972
981
|
# df.select(
|
973
982
|
# [
|
974
|
-
# Polars.col("a").
|
975
|
-
# Polars.col("a").
|
983
|
+
# Polars.col("a").cum_min,
|
984
|
+
# Polars.col("a").cum_min(reverse: true).alias("a_reverse")
|
976
985
|
# ]
|
977
986
|
# )
|
978
987
|
# # =>
|
@@ -987,9 +996,10 @@ module Polars
|
|
987
996
|
# # │ 1 ┆ 3 │
|
988
997
|
# # │ 1 ┆ 4 │
|
989
998
|
# # └─────┴───────────┘
|
990
|
-
def
|
991
|
-
wrap_expr(_rbexpr.
|
999
|
+
def cum_min(reverse: false)
|
1000
|
+
wrap_expr(_rbexpr.cum_min(reverse))
|
992
1001
|
end
|
1002
|
+
alias_method :cummin, :cum_min
|
993
1003
|
|
994
1004
|
# Get an array with the cumulative max computed at every element.
|
995
1005
|
#
|
@@ -1002,8 +1012,8 @@ module Polars
|
|
1002
1012
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
1003
1013
|
# df.select(
|
1004
1014
|
# [
|
1005
|
-
# Polars.col("a").
|
1006
|
-
# Polars.col("a").
|
1015
|
+
# Polars.col("a").cum_max,
|
1016
|
+
# Polars.col("a").cum_max(reverse: true).alias("a_reverse")
|
1007
1017
|
# ]
|
1008
1018
|
# )
|
1009
1019
|
# # =>
|
@@ -1018,9 +1028,10 @@ module Polars
|
|
1018
1028
|
# # │ 3 ┆ 4 │
|
1019
1029
|
# # │ 4 ┆ 4 │
|
1020
1030
|
# # └─────┴───────────┘
|
1021
|
-
def
|
1022
|
-
wrap_expr(_rbexpr.
|
1031
|
+
def cum_max(reverse: false)
|
1032
|
+
wrap_expr(_rbexpr.cum_max(reverse))
|
1023
1033
|
end
|
1034
|
+
alias_method :cummax, :cum_max
|
1024
1035
|
|
1025
1036
|
# Get an array with the cumulative count computed at every element.
|
1026
1037
|
#
|
@@ -1035,8 +1046,8 @@ module Polars
|
|
1035
1046
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
1036
1047
|
# df.select(
|
1037
1048
|
# [
|
1038
|
-
# Polars.col("a").
|
1039
|
-
# Polars.col("a").
|
1049
|
+
# Polars.col("a").cum_count,
|
1050
|
+
# Polars.col("a").cum_count(reverse: true).alias("a_reverse")
|
1040
1051
|
# ]
|
1041
1052
|
# )
|
1042
1053
|
# # =>
|
@@ -1051,9 +1062,10 @@ module Polars
|
|
1051
1062
|
# # │ 2 ┆ 1 │
|
1052
1063
|
# # │ 3 ┆ 0 │
|
1053
1064
|
# # └─────┴───────────┘
|
1054
|
-
def
|
1055
|
-
wrap_expr(_rbexpr.
|
1065
|
+
def cum_count(reverse: false)
|
1066
|
+
wrap_expr(_rbexpr.cum_count(reverse))
|
1056
1067
|
end
|
1068
|
+
alias_method :cumcount, :cum_count
|
1057
1069
|
|
1058
1070
|
# Rounds down to the nearest integer value.
|
1059
1071
|
#
|
@@ -1229,7 +1241,7 @@ module Polars
|
|
1229
1241
|
|
1230
1242
|
# Sort this column. In projection/ selection context the whole column is sorted.
|
1231
1243
|
#
|
1232
|
-
# If used in a
|
1244
|
+
# If used in a group by context, the groups are sorted.
|
1233
1245
|
#
|
1234
1246
|
# @param reverse [Boolean]
|
1235
1247
|
# false -> order from small to large.
|
@@ -1287,7 +1299,7 @@ module Polars
|
|
1287
1299
|
# # └───────┘
|
1288
1300
|
#
|
1289
1301
|
# @example
|
1290
|
-
# df.
|
1302
|
+
# df.group_by("group").agg(Polars.col("value").sort)
|
1291
1303
|
# # =>
|
1292
1304
|
# # shape: (2, 2)
|
1293
1305
|
# # ┌───────┬────────────┐
|
@@ -1337,6 +1349,7 @@ module Polars
|
|
1337
1349
|
# # │ 2 ┆ 98 │
|
1338
1350
|
# # └───────┴──────────┘
|
1339
1351
|
def top_k(k: 5)
|
1352
|
+
k = Utils.parse_as_expression(k)
|
1340
1353
|
wrap_expr(_rbexpr.top_k(k))
|
1341
1354
|
end
|
1342
1355
|
|
@@ -1375,6 +1388,7 @@ module Polars
|
|
1375
1388
|
# # │ 2 ┆ 98 │
|
1376
1389
|
# # └───────┴──────────┘
|
1377
1390
|
def bottom_k(k: 5)
|
1391
|
+
k = Utils.parse_as_expression(k)
|
1378
1392
|
wrap_expr(_rbexpr.bottom_k(k))
|
1379
1393
|
end
|
1380
1394
|
|
@@ -1494,7 +1508,7 @@ module Polars
|
|
1494
1508
|
# Sort this column by the ordering of another column, or multiple other columns.
|
1495
1509
|
#
|
1496
1510
|
# In projection/ selection context the whole column is sorted.
|
1497
|
-
# If used in a
|
1511
|
+
# If used in a group by context, the groups are sorted.
|
1498
1512
|
#
|
1499
1513
|
# @param by [Object]
|
1500
1514
|
# The column(s) used for sorting.
|
@@ -1534,10 +1548,10 @@ module Polars
|
|
1534
1548
|
# # │ two │
|
1535
1549
|
# # └───────┘
|
1536
1550
|
def sort_by(by, reverse: false)
|
1537
|
-
if !by.is_a?(Array)
|
1551
|
+
if !by.is_a?(::Array)
|
1538
1552
|
by = [by]
|
1539
1553
|
end
|
1540
|
-
if !reverse.is_a?(Array)
|
1554
|
+
if !reverse.is_a?(::Array)
|
1541
1555
|
reverse = [reverse]
|
1542
1556
|
end
|
1543
1557
|
by = Utils.selection_to_rbexpr_list(by)
|
@@ -1566,30 +1580,33 @@ module Polars
|
|
1566
1580
|
# "value" => [1, 98, 2, 3, 99, 4]
|
1567
1581
|
# }
|
1568
1582
|
# )
|
1569
|
-
# df.
|
1583
|
+
# df.group_by("group", maintain_order: true).agg(Polars.col("value").take([2, 1]))
|
1570
1584
|
# # =>
|
1571
1585
|
# # shape: (2, 2)
|
1572
|
-
# #
|
1573
|
-
# # │ group ┆ value
|
1574
|
-
# # │ --- ┆ ---
|
1575
|
-
# # │ str ┆ i64
|
1576
|
-
# #
|
1577
|
-
# # │ one ┆ 98
|
1578
|
-
# # │ two ┆ 99
|
1579
|
-
# #
|
1580
|
-
def
|
1581
|
-
if indices.is_a?(Array)
|
1586
|
+
# # ┌───────┬───────────┐
|
1587
|
+
# # │ group ┆ value │
|
1588
|
+
# # │ --- ┆ --- │
|
1589
|
+
# # │ str ┆ list[i64] │
|
1590
|
+
# # ╞═══════╪═══════════╡
|
1591
|
+
# # │ one ┆ [2, 98] │
|
1592
|
+
# # │ two ┆ [4, 99] │
|
1593
|
+
# # └───────┴───────────┘
|
1594
|
+
def gather(indices)
|
1595
|
+
if indices.is_a?(::Array)
|
1582
1596
|
indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
|
1583
1597
|
else
|
1584
1598
|
indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
|
1585
1599
|
end
|
1586
|
-
wrap_expr(_rbexpr.
|
1600
|
+
wrap_expr(_rbexpr.gather(indices_lit._rbexpr))
|
1587
1601
|
end
|
1602
|
+
alias_method :take, :gather
|
1588
1603
|
|
1589
1604
|
# Shift the values by a given period.
|
1590
1605
|
#
|
1591
|
-
# @param
|
1606
|
+
# @param n [Integer]
|
1592
1607
|
# Number of places to shift (may be negative).
|
1608
|
+
# @param fill_value [Object]
|
1609
|
+
# Fill the resulting null values with this value.
|
1593
1610
|
#
|
1594
1611
|
# @return [Expr]
|
1595
1612
|
#
|
@@ -1608,8 +1625,12 @@ module Polars
|
|
1608
1625
|
# # │ 2 │
|
1609
1626
|
# # │ 3 │
|
1610
1627
|
# # └──────┘
|
1611
|
-
def shift(
|
1612
|
-
|
1628
|
+
def shift(n = 1, fill_value: nil)
|
1629
|
+
if !fill_value.nil?
|
1630
|
+
fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
|
1631
|
+
end
|
1632
|
+
n = Utils.parse_as_expression(n)
|
1633
|
+
wrap_expr(_rbexpr.shift(n, fill_value))
|
1613
1634
|
end
|
1614
1635
|
|
1615
1636
|
# Shift the values by a given period and fill the resulting null values.
|
@@ -1637,8 +1658,7 @@ module Polars
|
|
1637
1658
|
# # │ 3 │
|
1638
1659
|
# # └─────┘
|
1639
1660
|
def shift_and_fill(periods, fill_value)
|
1640
|
-
|
1641
|
-
wrap_expr(_rbexpr.shift_and_fill(periods, fill_value._rbexpr))
|
1661
|
+
shift(periods, fill_value: fill_value)
|
1642
1662
|
end
|
1643
1663
|
|
1644
1664
|
# Fill null values using the specified value or strategy.
|
@@ -2063,7 +2083,7 @@ module Polars
|
|
2063
2083
|
# # │ 2 │
|
2064
2084
|
# # └─────┘
|
2065
2085
|
def approx_unique
|
2066
|
-
wrap_expr(_rbexpr.
|
2086
|
+
wrap_expr(_rbexpr.approx_n_unique)
|
2067
2087
|
end
|
2068
2088
|
|
2069
2089
|
# Count null values.
|
@@ -2201,7 +2221,7 @@ module Polars
|
|
2201
2221
|
|
2202
2222
|
# Apply window function over a subgroup.
|
2203
2223
|
#
|
2204
|
-
# This is similar to a
|
2224
|
+
# This is similar to a group by + aggregation + self join.
|
2205
2225
|
# Or similar to [window functions in Postgres](https://www.postgresql.org/docs/current/tutorial-window.html).
|
2206
2226
|
#
|
2207
2227
|
# @param expr [Object]
|
@@ -2309,9 +2329,10 @@ module Polars
|
|
2309
2329
|
# # │ 1 ┆ false │
|
2310
2330
|
# # │ 5 ┆ true │
|
2311
2331
|
# # └─────┴──────────┘
|
2312
|
-
def
|
2313
|
-
wrap_expr(_rbexpr.
|
2332
|
+
def is_first_distinct
|
2333
|
+
wrap_expr(_rbexpr.is_first_distinct)
|
2314
2334
|
end
|
2335
|
+
alias_method :is_first, :is_first_distinct
|
2315
2336
|
|
2316
2337
|
# Get mask of duplicated values.
|
2317
2338
|
#
|
@@ -2335,6 +2356,54 @@ module Polars
|
|
2335
2356
|
wrap_expr(_rbexpr.is_duplicated)
|
2336
2357
|
end
|
2337
2358
|
|
2359
|
+
# Get a boolean mask of the local maximum peaks.
|
2360
|
+
#
|
2361
|
+
# @return [Expr]
|
2362
|
+
#
|
2363
|
+
# @example
|
2364
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
|
2365
|
+
# df.select(Polars.col("a").peak_max)
|
2366
|
+
# # =>
|
2367
|
+
# # shape: (5, 1)
|
2368
|
+
# # ┌───────┐
|
2369
|
+
# # │ a │
|
2370
|
+
# # │ --- │
|
2371
|
+
# # │ bool │
|
2372
|
+
# # ╞═══════╡
|
2373
|
+
# # │ false │
|
2374
|
+
# # │ false │
|
2375
|
+
# # │ false │
|
2376
|
+
# # │ false │
|
2377
|
+
# # │ true │
|
2378
|
+
# # └───────┘
|
2379
|
+
def peak_max
|
2380
|
+
wrap_expr(_rbexpr.peak_max)
|
2381
|
+
end
|
2382
|
+
|
2383
|
+
# Get a boolean mask of the local minimum peaks.
|
2384
|
+
#
|
2385
|
+
# @return [Expr]
|
2386
|
+
#
|
2387
|
+
# @example
|
2388
|
+
# df = Polars::DataFrame.new({"a" => [4, 1, 3, 2, 5]})
|
2389
|
+
# df.select(Polars.col("a").peak_min)
|
2390
|
+
# # =>
|
2391
|
+
# # shape: (5, 1)
|
2392
|
+
# # ┌───────┐
|
2393
|
+
# # │ a │
|
2394
|
+
# # │ --- │
|
2395
|
+
# # │ bool │
|
2396
|
+
# # ╞═══════╡
|
2397
|
+
# # │ false │
|
2398
|
+
# # │ true │
|
2399
|
+
# # │ false │
|
2400
|
+
# # │ true │
|
2401
|
+
# # │ false │
|
2402
|
+
# # └───────┘
|
2403
|
+
def peak_min
|
2404
|
+
wrap_expr(_rbexpr.peak_min)
|
2405
|
+
end
|
2406
|
+
|
2338
2407
|
# Get quantile value.
|
2339
2408
|
#
|
2340
2409
|
# @param quantile [Float]
|
@@ -2427,7 +2496,7 @@ module Polars
|
|
2427
2496
|
# }
|
2428
2497
|
# )
|
2429
2498
|
# (
|
2430
|
-
# df.
|
2499
|
+
# df.group_by("group_col").agg(
|
2431
2500
|
# [
|
2432
2501
|
# Polars.col("b").filter(Polars.col("b") < 2).sum.alias("lt"),
|
2433
2502
|
# Polars.col("b").filter(Polars.col("b") >= 2).sum.alias("gte")
|
@@ -2436,14 +2505,14 @@ module Polars
|
|
2436
2505
|
# ).sort("group_col")
|
2437
2506
|
# # =>
|
2438
2507
|
# # shape: (2, 3)
|
2439
|
-
# #
|
2440
|
-
# # │ group_col ┆ lt
|
2441
|
-
# # │ --- ┆ ---
|
2442
|
-
# # │ str ┆ i64
|
2443
|
-
# #
|
2444
|
-
# # │ g1 ┆ 1
|
2445
|
-
# # │ g2 ┆
|
2446
|
-
# #
|
2508
|
+
# # ┌───────────┬─────┬─────┐
|
2509
|
+
# # │ group_col ┆ lt ┆ gte │
|
2510
|
+
# # │ --- ┆ --- ┆ --- │
|
2511
|
+
# # │ str ┆ i64 ┆ i64 │
|
2512
|
+
# # ╞═══════════╪═════╪═════╡
|
2513
|
+
# # │ g1 ┆ 1 ┆ 2 │
|
2514
|
+
# # │ g2 ┆ 0 ┆ 3 │
|
2515
|
+
# # └───────────┴─────┴─────┘
|
2447
2516
|
def filter(predicate)
|
2448
2517
|
wrap_expr(_rbexpr.filter(predicate._rbexpr))
|
2449
2518
|
end
|
@@ -2465,7 +2534,7 @@ module Polars
|
|
2465
2534
|
# }
|
2466
2535
|
# )
|
2467
2536
|
# (
|
2468
|
-
# df.
|
2537
|
+
# df.group_by("group_col").agg(
|
2469
2538
|
# [
|
2470
2539
|
# Polars.col("b").where(Polars.col("b") < 2).sum.alias("lt"),
|
2471
2540
|
# Polars.col("b").where(Polars.col("b") >= 2).sum.alias("gte")
|
@@ -2474,14 +2543,14 @@ module Polars
|
|
2474
2543
|
# ).sort("group_col")
|
2475
2544
|
# # =>
|
2476
2545
|
# # shape: (2, 3)
|
2477
|
-
# #
|
2478
|
-
# # │ group_col ┆ lt
|
2479
|
-
# # │ --- ┆ ---
|
2480
|
-
# # │ str ┆ i64
|
2481
|
-
# #
|
2482
|
-
# # │ g1 ┆ 1
|
2483
|
-
# # │ g2 ┆
|
2484
|
-
# #
|
2546
|
+
# # ┌───────────┬─────┬─────┐
|
2547
|
+
# # │ group_col ┆ lt ┆ gte │
|
2548
|
+
# # │ --- ┆ --- ┆ --- │
|
2549
|
+
# # │ str ┆ i64 ┆ i64 │
|
2550
|
+
# # ╞═══════════╪═════╪═════╡
|
2551
|
+
# # │ g1 ┆ 1 ┆ 2 │
|
2552
|
+
# # │ g2 ┆ 0 ┆ 3 │
|
2553
|
+
# # └───────────┴─────┴─────┘
|
2485
2554
|
def where(predicate)
|
2486
2555
|
filter(predicate)
|
2487
2556
|
end
|
@@ -2583,7 +2652,7 @@ module Polars
|
|
2583
2652
|
#
|
2584
2653
|
# @example In a GroupBy context the function is applied by group:
|
2585
2654
|
# df.lazy
|
2586
|
-
# .
|
2655
|
+
# .group_by("b", maintain_order: true)
|
2587
2656
|
# .agg(
|
2588
2657
|
# [
|
2589
2658
|
# Polars.col("a").apply { |x| x.sum }
|
@@ -2616,25 +2685,23 @@ module Polars
|
|
2616
2685
|
# @return [Expr]
|
2617
2686
|
#
|
2618
2687
|
# @example
|
2619
|
-
#
|
2620
|
-
#
|
2621
|
-
#
|
2622
|
-
#
|
2623
|
-
#
|
2624
|
-
#
|
2625
|
-
#
|
2626
|
-
#
|
2627
|
-
#
|
2628
|
-
#
|
2629
|
-
#
|
2630
|
-
#
|
2631
|
-
#
|
2632
|
-
#
|
2633
|
-
#
|
2634
|
-
#
|
2635
|
-
#
|
2636
|
-
# # │ d │
|
2637
|
-
# # └─────┘
|
2688
|
+
# df = Polars::DataFrame.new(
|
2689
|
+
# {
|
2690
|
+
# "group" => ["a", "b", "b"],
|
2691
|
+
# "values" => [[1, 2], [2, 3], [4]]
|
2692
|
+
# }
|
2693
|
+
# )
|
2694
|
+
# df.group_by("group").agg(Polars.col("values").flatten)
|
2695
|
+
# # =>
|
2696
|
+
# # shape: (2, 2)
|
2697
|
+
# # ┌───────┬───────────┐
|
2698
|
+
# # │ group ┆ values │
|
2699
|
+
# # │ --- ┆ --- │
|
2700
|
+
# # │ str ┆ list[i64] │
|
2701
|
+
# # ╞═══════╪═══════════╡
|
2702
|
+
# # │ a ┆ [1, 2] │
|
2703
|
+
# # │ b ┆ [2, 3, 4] │
|
2704
|
+
# # └───────┴───────────┘
|
2638
2705
|
def flatten
|
2639
2706
|
wrap_expr(_rbexpr.explode)
|
2640
2707
|
end
|
@@ -2672,7 +2739,7 @@ module Polars
|
|
2672
2739
|
#
|
2673
2740
|
# @example
|
2674
2741
|
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
|
2675
|
-
# df.select(Polars.col("foo").
|
2742
|
+
# df.select(Polars.col("foo").gather_every(3))
|
2676
2743
|
# # =>
|
2677
2744
|
# # shape: (3, 1)
|
2678
2745
|
# # ┌─────┐
|
@@ -2684,9 +2751,10 @@ module Polars
|
|
2684
2751
|
# # │ 4 │
|
2685
2752
|
# # │ 7 │
|
2686
2753
|
# # └─────┘
|
2687
|
-
def
|
2688
|
-
wrap_expr(_rbexpr.
|
2754
|
+
def gather_every(n)
|
2755
|
+
wrap_expr(_rbexpr.gather_every(n))
|
2689
2756
|
end
|
2757
|
+
alias_method :take_every, :gather_every
|
2690
2758
|
|
2691
2759
|
# Get the first `n` rows.
|
2692
2760
|
#
|
@@ -2798,7 +2866,7 @@ module Polars
|
|
2798
2866
|
# # │ false │
|
2799
2867
|
# # └──────────┘
|
2800
2868
|
def is_in(other)
|
2801
|
-
if other.is_a?(Array)
|
2869
|
+
if other.is_a?(::Array)
|
2802
2870
|
if other.length == 0
|
2803
2871
|
other = Polars.lit(nil)
|
2804
2872
|
else
|
@@ -3059,11 +3127,11 @@ module Polars
|
|
3059
3127
|
# # ┌─────┬─────┐
|
3060
3128
|
# # │ a ┆ b │
|
3061
3129
|
# # │ --- ┆ --- │
|
3062
|
-
# # │
|
3130
|
+
# # │ f64 ┆ f64 │
|
3063
3131
|
# # ╞═════╪═════╡
|
3064
|
-
# # │ 1
|
3065
|
-
# # │ 2
|
3066
|
-
# # │ 3
|
3132
|
+
# # │ 1.0 ┆ 1.0 │
|
3133
|
+
# # │ 2.0 ┆ NaN │
|
3134
|
+
# # │ 3.0 ┆ 3.0 │
|
3067
3135
|
# # └─────┴─────┘
|
3068
3136
|
def interpolate(method: "linear")
|
3069
3137
|
wrap_expr(_rbexpr.interpolate(method))
|
@@ -3114,7 +3182,7 @@ module Polars
|
|
3114
3182
|
#
|
3115
3183
|
# @note
|
3116
3184
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3117
|
-
# window, consider using `
|
3185
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3118
3186
|
# computation.
|
3119
3187
|
#
|
3120
3188
|
# @return [Expr]
|
@@ -3203,7 +3271,7 @@ module Polars
|
|
3203
3271
|
#
|
3204
3272
|
# @note
|
3205
3273
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3206
|
-
# window, consider using `
|
3274
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3207
3275
|
# computation.
|
3208
3276
|
#
|
3209
3277
|
# @return [Expr]
|
@@ -3292,7 +3360,7 @@ module Polars
|
|
3292
3360
|
#
|
3293
3361
|
# @note
|
3294
3362
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3295
|
-
# window, consider using `
|
3363
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3296
3364
|
# computation.
|
3297
3365
|
#
|
3298
3366
|
# @return [Expr]
|
@@ -3381,7 +3449,7 @@ module Polars
|
|
3381
3449
|
#
|
3382
3450
|
# @note
|
3383
3451
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3384
|
-
# window, consider using `
|
3452
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3385
3453
|
# computation.
|
3386
3454
|
#
|
3387
3455
|
# @return [Expr]
|
@@ -3470,7 +3538,7 @@ module Polars
|
|
3470
3538
|
#
|
3471
3539
|
# @note
|
3472
3540
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3473
|
-
# window, consider using `
|
3541
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3474
3542
|
# computation.
|
3475
3543
|
#
|
3476
3544
|
# @return [Expr]
|
@@ -3502,14 +3570,15 @@ module Polars
|
|
3502
3570
|
min_periods: nil,
|
3503
3571
|
center: false,
|
3504
3572
|
by: nil,
|
3505
|
-
closed: "left"
|
3573
|
+
closed: "left",
|
3574
|
+
ddof: 1
|
3506
3575
|
)
|
3507
3576
|
window_size, min_periods = _prepare_rolling_window_args(
|
3508
3577
|
window_size, min_periods
|
3509
3578
|
)
|
3510
3579
|
wrap_expr(
|
3511
3580
|
_rbexpr.rolling_std(
|
3512
|
-
window_size, weights, min_periods, center, by, closed
|
3581
|
+
window_size, weights, min_periods, center, by, closed, ddof
|
3513
3582
|
)
|
3514
3583
|
)
|
3515
3584
|
end
|
@@ -3559,7 +3628,7 @@ module Polars
|
|
3559
3628
|
#
|
3560
3629
|
# @note
|
3561
3630
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3562
|
-
# window, consider using `
|
3631
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3563
3632
|
# computation.
|
3564
3633
|
#
|
3565
3634
|
# @return [Expr]
|
@@ -3591,14 +3660,15 @@ module Polars
|
|
3591
3660
|
min_periods: nil,
|
3592
3661
|
center: false,
|
3593
3662
|
by: nil,
|
3594
|
-
closed: "left"
|
3663
|
+
closed: "left",
|
3664
|
+
ddof: 1
|
3595
3665
|
)
|
3596
3666
|
window_size, min_periods = _prepare_rolling_window_args(
|
3597
3667
|
window_size, min_periods
|
3598
3668
|
)
|
3599
3669
|
wrap_expr(
|
3600
3670
|
_rbexpr.rolling_var(
|
3601
|
-
window_size, weights, min_periods, center, by, closed
|
3671
|
+
window_size, weights, min_periods, center, by, closed, ddof
|
3602
3672
|
)
|
3603
3673
|
)
|
3604
3674
|
end
|
@@ -3644,7 +3714,7 @@ module Polars
|
|
3644
3714
|
#
|
3645
3715
|
# @note
|
3646
3716
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3647
|
-
# window, consider using `
|
3717
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3648
3718
|
# computation.
|
3649
3719
|
#
|
3650
3720
|
# @return [Expr]
|
@@ -3733,7 +3803,7 @@ module Polars
|
|
3733
3803
|
#
|
3734
3804
|
# @note
|
3735
3805
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
3736
|
-
# window, consider using `
|
3806
|
+
# window, consider using `group_by_rolling` this method can cache the window size
|
3737
3807
|
# computation.
|
3738
3808
|
#
|
3739
3809
|
# @return [Expr]
|
@@ -3948,7 +4018,7 @@ module Polars
|
|
3948
4018
|
# # ┌─────┐
|
3949
4019
|
# # │ a │
|
3950
4020
|
# # │ --- │
|
3951
|
-
# # │
|
4021
|
+
# # │ f64 │
|
3952
4022
|
# # ╞═════╡
|
3953
4023
|
# # │ 3.0 │
|
3954
4024
|
# # │ 4.5 │
|
@@ -4041,6 +4111,7 @@ module Polars
|
|
4041
4111
|
# # │ 12 ┆ 0.0 │
|
4042
4112
|
# # └──────┴────────────┘
|
4043
4113
|
def pct_change(n: 1)
|
4114
|
+
n = Utils.parse_as_expression(n)
|
4044
4115
|
wrap_expr(_rbexpr.pct_change(n))
|
4045
4116
|
end
|
4046
4117
|
|
@@ -4105,16 +4176,14 @@ module Polars
|
|
4105
4176
|
wrap_expr(_rbexpr.kurtosis(fisher, bias))
|
4106
4177
|
end
|
4107
4178
|
|
4108
|
-
#
|
4109
|
-
#
|
4110
|
-
# Only works for numerical types.
|
4179
|
+
# Set values outside the given boundaries to the boundary value.
|
4111
4180
|
#
|
4112
|
-
# If you want to clip other
|
4113
|
-
#
|
4181
|
+
# Only works for numeric and temporal columns. If you want to clip other data
|
4182
|
+
# types, consider writing a `when-then-otherwise` expression.
|
4114
4183
|
#
|
4115
|
-
# @param
|
4184
|
+
# @param lower_bound [Numeric]
|
4116
4185
|
# Minimum value.
|
4117
|
-
# @param
|
4186
|
+
# @param upper_bound [Numeric]
|
4118
4187
|
# Maximum value.
|
4119
4188
|
#
|
4120
4189
|
# @return [Expr]
|
@@ -4134,8 +4203,14 @@ module Polars
|
|
4134
4203
|
# # │ null ┆ null │
|
4135
4204
|
# # │ 50 ┆ 10 │
|
4136
4205
|
# # └──────┴─────────────┘
|
4137
|
-
def clip(
|
4138
|
-
|
4206
|
+
def clip(lower_bound, upper_bound)
|
4207
|
+
if !lower_bound.nil?
|
4208
|
+
lower_bound = Utils.parse_as_expression(lower_bound, str_as_lit: true)
|
4209
|
+
end
|
4210
|
+
if !upper_bound.nil?
|
4211
|
+
upper_bound = Utils.parse_as_expression(upper_bound, str_as_lit: true)
|
4212
|
+
end
|
4213
|
+
wrap_expr(_rbexpr.clip(lower_bound, upper_bound))
|
4139
4214
|
end
|
4140
4215
|
|
4141
4216
|
# Clip (limit) the values in an array to a `min` boundary.
|
@@ -4145,7 +4220,7 @@ module Polars
|
|
4145
4220
|
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
4146
4221
|
# expression. See `when` for more information.
|
4147
4222
|
#
|
4148
|
-
# @param
|
4223
|
+
# @param lower_bound [Numeric]
|
4149
4224
|
# Minimum value.
|
4150
4225
|
#
|
4151
4226
|
# @return [Expr]
|
@@ -4165,8 +4240,8 @@ module Polars
|
|
4165
4240
|
# # │ null ┆ null │
|
4166
4241
|
# # │ 50 ┆ 50 │
|
4167
4242
|
# # └──────┴─────────────┘
|
4168
|
-
def clip_min(
|
4169
|
-
|
4243
|
+
def clip_min(lower_bound)
|
4244
|
+
clip(lower_bound, nil)
|
4170
4245
|
end
|
4171
4246
|
|
4172
4247
|
# Clip (limit) the values in an array to a `max` boundary.
|
@@ -4176,7 +4251,7 @@ module Polars
|
|
4176
4251
|
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
4177
4252
|
# expression. See `when` for more information.
|
4178
4253
|
#
|
4179
|
-
# @param
|
4254
|
+
# @param upper_bound [Numeric]
|
4180
4255
|
# Maximum value.
|
4181
4256
|
#
|
4182
4257
|
# @return [Expr]
|
@@ -4196,8 +4271,8 @@ module Polars
|
|
4196
4271
|
# # │ null ┆ null │
|
4197
4272
|
# # │ 50 ┆ 0 │
|
4198
4273
|
# # └──────┴─────────────┘
|
4199
|
-
def clip_max(
|
4200
|
-
|
4274
|
+
def clip_max(upper_bound)
|
4275
|
+
clip(nil, upper_bound)
|
4201
4276
|
end
|
4202
4277
|
|
4203
4278
|
# Calculate the lower bound.
|
@@ -4607,12 +4682,14 @@ module Polars
|
|
4607
4682
|
end
|
4608
4683
|
|
4609
4684
|
if !n.nil? && frac.nil?
|
4685
|
+
n = Utils.parse_as_expression(n)
|
4610
4686
|
return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
|
4611
4687
|
end
|
4612
4688
|
|
4613
4689
|
if frac.nil?
|
4614
4690
|
frac = 1.0
|
4615
4691
|
end
|
4692
|
+
frac = Utils.parse_as_expression(frac)
|
4616
4693
|
wrap_expr(
|
4617
4694
|
_rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
|
4618
4695
|
)
|
@@ -4884,7 +4961,7 @@ module Polars
|
|
4884
4961
|
# Number of valid values there should be in the window before the expression
|
4885
4962
|
# is evaluated. valid values = `length - null_count`
|
4886
4963
|
# @param parallel [Boolean]
|
4887
|
-
# Run in parallel. Don't do this in a
|
4964
|
+
# Run in parallel. Don't do this in a group by or another operation that
|
4888
4965
|
# already has much parallelization.
|
4889
4966
|
#
|
4890
4967
|
# @return [Expr]
|
@@ -4929,8 +5006,8 @@ module Polars
|
|
4929
5006
|
#
|
4930
5007
|
# Enables downstream code to user fast paths for sorted arrays.
|
4931
5008
|
#
|
4932
|
-
# @param
|
4933
|
-
#
|
5009
|
+
# @param descending [Boolean]
|
5010
|
+
# Whether the `Series` order is descending.
|
4934
5011
|
#
|
4935
5012
|
# @return [Expr]
|
4936
5013
|
#
|
@@ -4950,9 +5027,9 @@ module Polars
|
|
4950
5027
|
# # ╞════════╡
|
4951
5028
|
# # │ 3 │
|
4952
5029
|
# # └────────┘
|
4953
|
-
|
4954
|
-
|
4955
|
-
|
5030
|
+
def set_sorted(descending: false)
|
5031
|
+
wrap_expr(_rbexpr.set_sorted_flag(descending))
|
5032
|
+
end
|
4956
5033
|
|
4957
5034
|
# Aggregate to list.
|
4958
5035
|
#
|
@@ -4965,7 +5042,7 @@ module Polars
|
|
4965
5042
|
# "b" => [4, 5, 6]
|
4966
5043
|
# }
|
4967
5044
|
# )
|
4968
|
-
# df.select(Polars.all.
|
5045
|
+
# df.select(Polars.all.implode)
|
4969
5046
|
# # =>
|
4970
5047
|
# # shape: (1, 2)
|
4971
5048
|
# # ┌───────────┬───────────┐
|
@@ -4978,7 +5055,6 @@ module Polars
|
|
4978
5055
|
def implode
|
4979
5056
|
wrap_expr(_rbexpr.implode)
|
4980
5057
|
end
|
4981
|
-
alias_method :list, :implode
|
4982
5058
|
|
4983
5059
|
# Shrink numeric columns to the minimal required datatype.
|
4984
5060
|
#
|
@@ -5018,10 +5094,17 @@ module Polars
|
|
5018
5094
|
# Create an object namespace of all list related methods.
|
5019
5095
|
#
|
5020
5096
|
# @return [ListExpr]
|
5021
|
-
def
|
5097
|
+
def list
|
5022
5098
|
ListExpr.new(self)
|
5023
5099
|
end
|
5024
5100
|
|
5101
|
+
# Create an object namespace of all array related methods.
|
5102
|
+
#
|
5103
|
+
# @return [ArrayExpr]
|
5104
|
+
def arr
|
5105
|
+
ArrayExpr.new(self)
|
5106
|
+
end
|
5107
|
+
|
5025
5108
|
# Create an object namespace of all binary related methods.
|
5026
5109
|
#
|
5027
5110
|
# @return [BinaryExpr]
|
@@ -5050,6 +5133,13 @@ module Polars
|
|
5050
5133
|
MetaExpr.new(self)
|
5051
5134
|
end
|
5052
5135
|
|
5136
|
+
# Create an object namespace of all expressions that modify expression names.
|
5137
|
+
#
|
5138
|
+
# @return [NameExpr]
|
5139
|
+
def name
|
5140
|
+
NameExpr.new(self)
|
5141
|
+
end
|
5142
|
+
|
5053
5143
|
# Create an object namespace of all string related methods.
|
5054
5144
|
#
|
5055
5145
|
# @return [StringExpr]
|