polars-df 0.5.0-x86_64-linux → 0.7.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
data/lib/polars/expr.rb CHANGED
@@ -131,6 +131,13 @@ module Polars
131
131
  wrap_expr(_rbexpr.gt(_to_expr(other)._rbexpr))
132
132
  end
133
133
 
134
+ # Performs boolean not.
135
+ #
136
+ # @return [Expr]
137
+ def !
138
+ is_not
139
+ end
140
+
134
141
  # Performs negation.
135
142
  #
136
143
  # @return [Expr]
@@ -191,8 +198,8 @@ module Polars
191
198
  # # ╞══════╪═══════╡
192
199
  # # │ true ┆ false │
193
200
  # # └──────┴───────┘
194
- def any
195
- wrap_expr(_rbexpr.any)
201
+ def any(drop_nulls: true)
202
+ wrap_expr(_rbexpr.any(drop_nulls))
196
203
  end
197
204
 
198
205
  # Check if all boolean values in a Boolean column are `true`.
@@ -216,8 +223,8 @@ module Polars
216
223
  # # ╞══════╪═══════╪═══════╡
217
224
  # # │ true ┆ false ┆ false │
218
225
  # # └──────┴───────┴───────┘
219
- def all
220
- wrap_expr(_rbexpr.all)
226
+ def all(drop_nulls: true)
227
+ wrap_expr(_rbexpr.all(drop_nulls))
221
228
  end
222
229
 
223
230
  # Compute the square root of the elements.
@@ -362,7 +369,7 @@ module Polars
362
369
  if columns.is_a?(String)
363
370
  columns = [columns]
364
371
  return wrap_expr(_rbexpr.exclude(columns))
365
- elsif !columns.is_a?(Array)
372
+ elsif !columns.is_a?(::Array)
366
373
  columns = [columns]
367
374
  return wrap_expr(_rbexpr.exclude_dtype(columns))
368
375
  end
@@ -401,21 +408,21 @@ module Polars
401
408
  # # │ 18 ┆ 4 │
402
409
  # # └─────┴─────┘
403
410
  def keep_name
404
- wrap_expr(_rbexpr.keep_name)
411
+ name.keep
405
412
  end
406
413
 
407
414
  # Add a prefix to the root column name of the expression.
408
415
  #
409
416
  # @return [Expr]
410
417
  def prefix(prefix)
411
- wrap_expr(_rbexpr.prefix(prefix))
418
+ name.prefix(prefix)
412
419
  end
413
420
 
414
421
  # Add a suffix to the root column name of the expression.
415
422
  #
416
423
  # @return [Expr]
417
424
  def suffix(suffix)
418
- wrap_expr(_rbexpr.suffix(suffix))
425
+ name.suffix(suffix)
419
426
  end
420
427
 
421
428
  # Rename the output of an expression by mapping a function over the root name.
@@ -443,7 +450,7 @@ module Polars
443
450
  # # │ 1 ┆ 3 │
444
451
  # # └───────────┴───────────┘
445
452
  def map_alias(&f)
446
- Utils.wrap_expr(_rbexpr.map_alias(f))
453
+ name.map(&f)
447
454
  end
448
455
 
449
456
  # Negate a boolean expression.
@@ -682,7 +689,7 @@ module Polars
682
689
  # "value" => [94, 95, 96, 97, 97, 99]
683
690
  # }
684
691
  # )
685
- # df.groupby("group", maintain_order: true).agg(Polars.col("value").agg_groups)
692
+ # df.group_by("group", maintain_order: true).agg(Polars.col("value").agg_groups)
686
693
  # # =>
687
694
  # # shape: (2, 2)
688
695
  # # ┌───────┬───────────┐
@@ -820,18 +827,18 @@ module Polars
820
827
  # df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
821
828
  # # =>
822
829
  # # shape: (6, 1)
823
- # # ┌─────────┐
824
- # # │ literal
825
- # # │ ---
826
- # # │ i64
827
- # # ╞═════════╡
828
- # # │ null
829
- # # │ null
830
- # # │ null
831
- # # │ 1
832
- # # │ 1
833
- # # │ 2
834
- # # └─────────┘
830
+ # # ┌────────┐
831
+ # # │ repeat
832
+ # # │ ---
833
+ # # │ i64
834
+ # # ╞════════╡
835
+ # # │ null
836
+ # # │ null
837
+ # # │ null
838
+ # # │ 1
839
+ # # │ 1
840
+ # # │ 2
841
+ # # └────────┘
835
842
  def rechunk
836
843
  wrap_expr(_rbexpr.rechunk)
837
844
  end
@@ -905,8 +912,8 @@ module Polars
905
912
  # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
906
913
  # df.select(
907
914
  # [
908
- # Polars.col("a").cumsum,
909
- # Polars.col("a").cumsum(reverse: true).alias("a_reverse")
915
+ # Polars.col("a").cum_sum,
916
+ # Polars.col("a").cum_sum(reverse: true).alias("a_reverse")
910
917
  # ]
911
918
  # )
912
919
  # # =>
@@ -921,9 +928,10 @@ module Polars
921
928
  # # │ 6 ┆ 7 │
922
929
  # # │ 10 ┆ 4 │
923
930
  # # └─────┴───────────┘
924
- def cumsum(reverse: false)
925
- wrap_expr(_rbexpr.cumsum(reverse))
931
+ def cum_sum(reverse: false)
932
+ wrap_expr(_rbexpr.cum_sum(reverse))
926
933
  end
934
+ alias_method :cumsum, :cum_sum
927
935
 
928
936
  # Get an array with the cumulative product computed at every element.
929
937
  #
@@ -940,8 +948,8 @@ module Polars
940
948
  # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
941
949
  # df.select(
942
950
  # [
943
- # Polars.col("a").cumprod,
944
- # Polars.col("a").cumprod(reverse: true).alias("a_reverse")
951
+ # Polars.col("a").cum_prod,
952
+ # Polars.col("a").cum_prod(reverse: true).alias("a_reverse")
945
953
  # ]
946
954
  # )
947
955
  # # =>
@@ -956,9 +964,10 @@ module Polars
956
964
  # # │ 6 ┆ 12 │
957
965
  # # │ 24 ┆ 4 │
958
966
  # # └─────┴───────────┘
959
- def cumprod(reverse: false)
960
- wrap_expr(_rbexpr.cumprod(reverse))
967
+ def cum_prod(reverse: false)
968
+ wrap_expr(_rbexpr.cum_prod(reverse))
961
969
  end
970
+ alias_method :cumprod, :cum_prod
962
971
 
963
972
  # Get an array with the cumulative min computed at every element.
964
973
  #
@@ -971,8 +980,8 @@ module Polars
971
980
  # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
972
981
  # df.select(
973
982
  # [
974
- # Polars.col("a").cummin,
975
- # Polars.col("a").cummin(reverse: true).alias("a_reverse")
983
+ # Polars.col("a").cum_min,
984
+ # Polars.col("a").cum_min(reverse: true).alias("a_reverse")
976
985
  # ]
977
986
  # )
978
987
  # # =>
@@ -987,9 +996,10 @@ module Polars
987
996
  # # │ 1 ┆ 3 │
988
997
  # # │ 1 ┆ 4 │
989
998
  # # └─────┴───────────┘
990
- def cummin(reverse: false)
991
- wrap_expr(_rbexpr.cummin(reverse))
999
+ def cum_min(reverse: false)
1000
+ wrap_expr(_rbexpr.cum_min(reverse))
992
1001
  end
1002
+ alias_method :cummin, :cum_min
993
1003
 
994
1004
  # Get an array with the cumulative max computed at every element.
995
1005
  #
@@ -1002,8 +1012,8 @@ module Polars
1002
1012
  # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
1003
1013
  # df.select(
1004
1014
  # [
1005
- # Polars.col("a").cummax,
1006
- # Polars.col("a").cummax(reverse: true).alias("a_reverse")
1015
+ # Polars.col("a").cum_max,
1016
+ # Polars.col("a").cum_max(reverse: true).alias("a_reverse")
1007
1017
  # ]
1008
1018
  # )
1009
1019
  # # =>
@@ -1018,9 +1028,10 @@ module Polars
1018
1028
  # # │ 3 ┆ 4 │
1019
1029
  # # │ 4 ┆ 4 │
1020
1030
  # # └─────┴───────────┘
1021
- def cummax(reverse: false)
1022
- wrap_expr(_rbexpr.cummax(reverse))
1031
+ def cum_max(reverse: false)
1032
+ wrap_expr(_rbexpr.cum_max(reverse))
1023
1033
  end
1034
+ alias_method :cummax, :cum_max
1024
1035
 
1025
1036
  # Get an array with the cumulative count computed at every element.
1026
1037
  #
@@ -1035,8 +1046,8 @@ module Polars
1035
1046
  # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
1036
1047
  # df.select(
1037
1048
  # [
1038
- # Polars.col("a").cumcount,
1039
- # Polars.col("a").cumcount(reverse: true).alias("a_reverse")
1049
+ # Polars.col("a").cum_count,
1050
+ # Polars.col("a").cum_count(reverse: true).alias("a_reverse")
1040
1051
  # ]
1041
1052
  # )
1042
1053
  # # =>
@@ -1051,9 +1062,10 @@ module Polars
1051
1062
  # # │ 2 ┆ 1 │
1052
1063
  # # │ 3 ┆ 0 │
1053
1064
  # # └─────┴───────────┘
1054
- def cumcount(reverse: false)
1055
- wrap_expr(_rbexpr.cumcount(reverse))
1065
+ def cum_count(reverse: false)
1066
+ wrap_expr(_rbexpr.cum_count(reverse))
1056
1067
  end
1068
+ alias_method :cumcount, :cum_count
1057
1069
 
1058
1070
  # Rounds down to the nearest integer value.
1059
1071
  #
@@ -1229,7 +1241,7 @@ module Polars
1229
1241
 
1230
1242
  # Sort this column. In projection/ selection context the whole column is sorted.
1231
1243
  #
1232
- # If used in a groupby context, the groups are sorted.
1244
+ # If used in a group by context, the groups are sorted.
1233
1245
  #
1234
1246
  # @param reverse [Boolean]
1235
1247
  # false -> order from small to large.
@@ -1287,7 +1299,7 @@ module Polars
1287
1299
  # # └───────┘
1288
1300
  #
1289
1301
  # @example
1290
- # df.groupby("group").agg(Polars.col("value").sort)
1302
+ # df.group_by("group").agg(Polars.col("value").sort)
1291
1303
  # # =>
1292
1304
  # # shape: (2, 2)
1293
1305
  # # ┌───────┬────────────┐
@@ -1337,6 +1349,7 @@ module Polars
1337
1349
  # # │ 2 ┆ 98 │
1338
1350
  # # └───────┴──────────┘
1339
1351
  def top_k(k: 5)
1352
+ k = Utils.parse_as_expression(k)
1340
1353
  wrap_expr(_rbexpr.top_k(k))
1341
1354
  end
1342
1355
 
@@ -1375,6 +1388,7 @@ module Polars
1375
1388
  # # │ 2 ┆ 98 │
1376
1389
  # # └───────┴──────────┘
1377
1390
  def bottom_k(k: 5)
1391
+ k = Utils.parse_as_expression(k)
1378
1392
  wrap_expr(_rbexpr.bottom_k(k))
1379
1393
  end
1380
1394
 
@@ -1494,7 +1508,7 @@ module Polars
1494
1508
  # Sort this column by the ordering of another column, or multiple other columns.
1495
1509
  #
1496
1510
  # In projection/ selection context the whole column is sorted.
1497
- # If used in a groupby context, the groups are sorted.
1511
+ # If used in a group by context, the groups are sorted.
1498
1512
  #
1499
1513
  # @param by [Object]
1500
1514
  # The column(s) used for sorting.
@@ -1534,10 +1548,10 @@ module Polars
1534
1548
  # # │ two │
1535
1549
  # # └───────┘
1536
1550
  def sort_by(by, reverse: false)
1537
- if !by.is_a?(Array)
1551
+ if !by.is_a?(::Array)
1538
1552
  by = [by]
1539
1553
  end
1540
- if !reverse.is_a?(Array)
1554
+ if !reverse.is_a?(::Array)
1541
1555
  reverse = [reverse]
1542
1556
  end
1543
1557
  by = Utils.selection_to_rbexpr_list(by)
@@ -1566,30 +1580,33 @@ module Polars
1566
1580
  # "value" => [1, 98, 2, 3, 99, 4]
1567
1581
  # }
1568
1582
  # )
1569
- # df.groupby("group", maintain_order: true).agg(Polars.col("value").take(1))
1583
+ # df.group_by("group", maintain_order: true).agg(Polars.col("value").take([2, 1]))
1570
1584
  # # =>
1571
1585
  # # shape: (2, 2)
1572
- # # ┌───────┬───────┐
1573
- # # │ group ┆ value
1574
- # # │ --- ┆ ---
1575
- # # │ str ┆ i64
1576
- # # ╞═══════╪═══════╡
1577
- # # │ one ┆ 98
1578
- # # │ two ┆ 99
1579
- # # └───────┴───────┘
1580
- def take(indices)
1581
- if indices.is_a?(Array)
1586
+ # # ┌───────┬───────────┐
1587
+ # # │ group ┆ value
1588
+ # # │ --- ┆ ---
1589
+ # # │ str ┆ list[i64]
1590
+ # # ╞═══════╪═══════════╡
1591
+ # # │ one ┆ [2, 98]
1592
+ # # │ two ┆ [4, 99]
1593
+ # # └───────┴───────────┘
1594
+ def gather(indices)
1595
+ if indices.is_a?(::Array)
1582
1596
  indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
1583
1597
  else
1584
1598
  indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
1585
1599
  end
1586
- wrap_expr(_rbexpr.take(indices_lit._rbexpr))
1600
+ wrap_expr(_rbexpr.gather(indices_lit._rbexpr))
1587
1601
  end
1602
+ alias_method :take, :gather
1588
1603
 
1589
1604
  # Shift the values by a given period.
1590
1605
  #
1591
- # @param periods [Integer]
1606
+ # @param n [Integer]
1592
1607
  # Number of places to shift (may be negative).
1608
+ # @param fill_value [Object]
1609
+ # Fill the resulting null values with this value.
1593
1610
  #
1594
1611
  # @return [Expr]
1595
1612
  #
@@ -1608,8 +1625,12 @@ module Polars
1608
1625
  # # │ 2 │
1609
1626
  # # │ 3 │
1610
1627
  # # └──────┘
1611
- def shift(periods = 1)
1612
- wrap_expr(_rbexpr.shift(periods))
1628
+ def shift(n = 1, fill_value: nil)
1629
+ if !fill_value.nil?
1630
+ fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
1631
+ end
1632
+ n = Utils.parse_as_expression(n)
1633
+ wrap_expr(_rbexpr.shift(n, fill_value))
1613
1634
  end
1614
1635
 
1615
1636
  # Shift the values by a given period and fill the resulting null values.
@@ -1637,8 +1658,7 @@ module Polars
1637
1658
  # # │ 3 │
1638
1659
  # # └─────┘
1639
1660
  def shift_and_fill(periods, fill_value)
1640
- fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
1641
- wrap_expr(_rbexpr.shift_and_fill(periods, fill_value._rbexpr))
1661
+ shift(periods, fill_value: fill_value)
1642
1662
  end
1643
1663
 
1644
1664
  # Fill null values using the specified value or strategy.
@@ -2063,7 +2083,7 @@ module Polars
2063
2083
  # # │ 2 │
2064
2084
  # # └─────┘
2065
2085
  def approx_unique
2066
- wrap_expr(_rbexpr.approx_unique)
2086
+ wrap_expr(_rbexpr.approx_n_unique)
2067
2087
  end
2068
2088
 
2069
2089
  # Count null values.
@@ -2201,7 +2221,7 @@ module Polars
2201
2221
 
2202
2222
  # Apply window function over a subgroup.
2203
2223
  #
2204
- # This is similar to a groupby + aggregation + self join.
2224
+ # This is similar to a group by + aggregation + self join.
2205
2225
  # Or similar to [window functions in Postgres](https://www.postgresql.org/docs/current/tutorial-window.html).
2206
2226
  #
2207
2227
  # @param expr [Object]
@@ -2309,9 +2329,10 @@ module Polars
2309
2329
  # # │ 1 ┆ false │
2310
2330
  # # │ 5 ┆ true │
2311
2331
  # # └─────┴──────────┘
2312
- def is_first
2313
- wrap_expr(_rbexpr.is_first)
2332
+ def is_first_distinct
2333
+ wrap_expr(_rbexpr.is_first_distinct)
2314
2334
  end
2335
+ alias_method :is_first, :is_first_distinct
2315
2336
 
2316
2337
  # Get mask of duplicated values.
2317
2338
  #
@@ -2335,6 +2356,54 @@ module Polars
2335
2356
  wrap_expr(_rbexpr.is_duplicated)
2336
2357
  end
2337
2358
 
2359
+ # Get a boolean mask of the local maximum peaks.
2360
+ #
2361
+ # @return [Expr]
2362
+ #
2363
+ # @example
2364
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
2365
+ # df.select(Polars.col("a").peak_max)
2366
+ # # =>
2367
+ # # shape: (5, 1)
2368
+ # # ┌───────┐
2369
+ # # │ a │
2370
+ # # │ --- │
2371
+ # # │ bool │
2372
+ # # ╞═══════╡
2373
+ # # │ false │
2374
+ # # │ false │
2375
+ # # │ false │
2376
+ # # │ false │
2377
+ # # │ true │
2378
+ # # └───────┘
2379
+ def peak_max
2380
+ wrap_expr(_rbexpr.peak_max)
2381
+ end
2382
+
2383
+ # Get a boolean mask of the local minimum peaks.
2384
+ #
2385
+ # @return [Expr]
2386
+ #
2387
+ # @example
2388
+ # df = Polars::DataFrame.new({"a" => [4, 1, 3, 2, 5]})
2389
+ # df.select(Polars.col("a").peak_min)
2390
+ # # =>
2391
+ # # shape: (5, 1)
2392
+ # # ┌───────┐
2393
+ # # │ a │
2394
+ # # │ --- │
2395
+ # # │ bool │
2396
+ # # ╞═══════╡
2397
+ # # │ false │
2398
+ # # │ true │
2399
+ # # │ false │
2400
+ # # │ true │
2401
+ # # │ false │
2402
+ # # └───────┘
2403
+ def peak_min
2404
+ wrap_expr(_rbexpr.peak_min)
2405
+ end
2406
+
2338
2407
  # Get quantile value.
2339
2408
  #
2340
2409
  # @param quantile [Float]
@@ -2427,7 +2496,7 @@ module Polars
2427
2496
  # }
2428
2497
  # )
2429
2498
  # (
2430
- # df.groupby("group_col").agg(
2499
+ # df.group_by("group_col").agg(
2431
2500
  # [
2432
2501
  # Polars.col("b").filter(Polars.col("b") < 2).sum.alias("lt"),
2433
2502
  # Polars.col("b").filter(Polars.col("b") >= 2).sum.alias("gte")
@@ -2436,14 +2505,14 @@ module Polars
2436
2505
  # ).sort("group_col")
2437
2506
  # # =>
2438
2507
  # # shape: (2, 3)
2439
- # # ┌───────────┬──────┬─────┐
2440
- # # │ group_col ┆ lt ┆ gte │
2441
- # # │ --- ┆ --- ┆ --- │
2442
- # # │ str ┆ i64 ┆ i64 │
2443
- # # ╞═══════════╪══════╪═════╡
2444
- # # │ g1 ┆ 1 ┆ 2 │
2445
- # # │ g2 ┆ null ┆ 3 │
2446
- # # └───────────┴──────┴─────┘
2508
+ # # ┌───────────┬─────┬─────┐
2509
+ # # │ group_col ┆ lt ┆ gte │
2510
+ # # │ --- ┆ --- ┆ --- │
2511
+ # # │ str ┆ i64 ┆ i64 │
2512
+ # # ╞═══════════╪═════╪═════╡
2513
+ # # │ g1 ┆ 1 ┆ 2 │
2514
+ # # │ g2 ┆ 0 ┆ 3 │
2515
+ # # └───────────┴─────┴─────┘
2447
2516
  def filter(predicate)
2448
2517
  wrap_expr(_rbexpr.filter(predicate._rbexpr))
2449
2518
  end
@@ -2465,7 +2534,7 @@ module Polars
2465
2534
  # }
2466
2535
  # )
2467
2536
  # (
2468
- # df.groupby("group_col").agg(
2537
+ # df.group_by("group_col").agg(
2469
2538
  # [
2470
2539
  # Polars.col("b").where(Polars.col("b") < 2).sum.alias("lt"),
2471
2540
  # Polars.col("b").where(Polars.col("b") >= 2).sum.alias("gte")
@@ -2474,14 +2543,14 @@ module Polars
2474
2543
  # ).sort("group_col")
2475
2544
  # # =>
2476
2545
  # # shape: (2, 3)
2477
- # # ┌───────────┬──────┬─────┐
2478
- # # │ group_col ┆ lt ┆ gte │
2479
- # # │ --- ┆ --- ┆ --- │
2480
- # # │ str ┆ i64 ┆ i64 │
2481
- # # ╞═══════════╪══════╪═════╡
2482
- # # │ g1 ┆ 1 ┆ 2 │
2483
- # # │ g2 ┆ null ┆ 3 │
2484
- # # └───────────┴──────┴─────┘
2546
+ # # ┌───────────┬─────┬─────┐
2547
+ # # │ group_col ┆ lt ┆ gte │
2548
+ # # │ --- ┆ --- ┆ --- │
2549
+ # # │ str ┆ i64 ┆ i64 │
2550
+ # # ╞═══════════╪═════╪═════╡
2551
+ # # │ g1 ┆ 1 ┆ 2 │
2552
+ # # │ g2 ┆ 0 ┆ 3 │
2553
+ # # └───────────┴─────┴─────┘
2485
2554
  def where(predicate)
2486
2555
  filter(predicate)
2487
2556
  end
@@ -2583,7 +2652,7 @@ module Polars
2583
2652
  #
2584
2653
  # @example In a GroupBy context the function is applied by group:
2585
2654
  # df.lazy
2586
- # .groupby("b", maintain_order: true)
2655
+ # .group_by("b", maintain_order: true)
2587
2656
  # .agg(
2588
2657
  # [
2589
2658
  # Polars.col("a").apply { |x| x.sum }
@@ -2616,25 +2685,23 @@ module Polars
2616
2685
  # @return [Expr]
2617
2686
  #
2618
2687
  # @example
2619
- # df = Polars::DataFrame.new({"foo" => ["hello", "world"]})
2620
- # df.select(Polars.col("foo").flatten)
2621
- # # =>
2622
- # # shape: (10, 1)
2623
- # # ┌─────┐
2624
- # # │ foo │
2625
- # # │ --- │
2626
- # # │ str │
2627
- # # ╞═════╡
2628
- # # │ h │
2629
- # # │ e
2630
- # # │ l
2631
- # # │ l
2632
- # # │ … │
2633
- # # │ o
2634
- # # │ r
2635
- # # │ l │
2636
- # # │ d │
2637
- # # └─────┘
2688
+ # df = Polars::DataFrame.new(
2689
+ # {
2690
+ # "group" => ["a", "b", "b"],
2691
+ # "values" => [[1, 2], [2, 3], [4]]
2692
+ # }
2693
+ # )
2694
+ # df.group_by("group").agg(Polars.col("values").flatten)
2695
+ # # =>
2696
+ # # shape: (2, 2)
2697
+ # # ┌───────┬───────────┐
2698
+ # # │ group ┆ values
2699
+ # # │ --- ┆ ---
2700
+ # # │ str ┆ list[i64]
2701
+ # # ╞═══════╪═══════════╡
2702
+ # # │ a ┆ [1, 2]
2703
+ # # │ b ┆ [2, 3, 4]
2704
+ # # └───────┴───────────┘
2638
2705
  def flatten
2639
2706
  wrap_expr(_rbexpr.explode)
2640
2707
  end
@@ -2672,7 +2739,7 @@ module Polars
2672
2739
  #
2673
2740
  # @example
2674
2741
  # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
2675
- # df.select(Polars.col("foo").take_every(3))
2742
+ # df.select(Polars.col("foo").gather_every(3))
2676
2743
  # # =>
2677
2744
  # # shape: (3, 1)
2678
2745
  # # ┌─────┐
@@ -2684,9 +2751,10 @@ module Polars
2684
2751
  # # │ 4 │
2685
2752
  # # │ 7 │
2686
2753
  # # └─────┘
2687
- def take_every(n)
2688
- wrap_expr(_rbexpr.take_every(n))
2754
+ def gather_every(n)
2755
+ wrap_expr(_rbexpr.gather_every(n))
2689
2756
  end
2757
+ alias_method :take_every, :gather_every
2690
2758
 
2691
2759
  # Get the first `n` rows.
2692
2760
  #
@@ -2798,7 +2866,7 @@ module Polars
2798
2866
  # # │ false │
2799
2867
  # # └──────────┘
2800
2868
  def is_in(other)
2801
- if other.is_a?(Array)
2869
+ if other.is_a?(::Array)
2802
2870
  if other.length == 0
2803
2871
  other = Polars.lit(nil)
2804
2872
  else
@@ -3059,11 +3127,11 @@ module Polars
3059
3127
  # # ┌─────┬─────┐
3060
3128
  # # │ a ┆ b │
3061
3129
  # # │ --- ┆ --- │
3062
- # # │ i64 ┆ f64 │
3130
+ # # │ f64 ┆ f64 │
3063
3131
  # # ╞═════╪═════╡
3064
- # # │ 1 ┆ 1.0 │
3065
- # # │ 2 ┆ NaN │
3066
- # # │ 3 ┆ 3.0 │
3132
+ # # │ 1.0 ┆ 1.0 │
3133
+ # # │ 2.0 ┆ NaN │
3134
+ # # │ 3.0 ┆ 3.0 │
3067
3135
  # # └─────┴─────┘
3068
3136
  def interpolate(method: "linear")
3069
3137
  wrap_expr(_rbexpr.interpolate(method))
@@ -3114,7 +3182,7 @@ module Polars
3114
3182
  #
3115
3183
  # @note
3116
3184
  # If you want to compute multiple aggregation statistics over the same dynamic
3117
- # window, consider using `groupby_rolling` this method can cache the window size
3185
+ # window, consider using `group_by_rolling` this method can cache the window size
3118
3186
  # computation.
3119
3187
  #
3120
3188
  # @return [Expr]
@@ -3203,7 +3271,7 @@ module Polars
3203
3271
  #
3204
3272
  # @note
3205
3273
  # If you want to compute multiple aggregation statistics over the same dynamic
3206
- # window, consider using `groupby_rolling` this method can cache the window size
3274
+ # window, consider using `group_by_rolling` this method can cache the window size
3207
3275
  # computation.
3208
3276
  #
3209
3277
  # @return [Expr]
@@ -3292,7 +3360,7 @@ module Polars
3292
3360
  #
3293
3361
  # @note
3294
3362
  # If you want to compute multiple aggregation statistics over the same dynamic
3295
- # window, consider using `groupby_rolling` this method can cache the window size
3363
+ # window, consider using `group_by_rolling` this method can cache the window size
3296
3364
  # computation.
3297
3365
  #
3298
3366
  # @return [Expr]
@@ -3381,7 +3449,7 @@ module Polars
3381
3449
  #
3382
3450
  # @note
3383
3451
  # If you want to compute multiple aggregation statistics over the same dynamic
3384
- # window, consider using `groupby_rolling` this method can cache the window size
3452
+ # window, consider using `group_by_rolling` this method can cache the window size
3385
3453
  # computation.
3386
3454
  #
3387
3455
  # @return [Expr]
@@ -3470,7 +3538,7 @@ module Polars
3470
3538
  #
3471
3539
  # @note
3472
3540
  # If you want to compute multiple aggregation statistics over the same dynamic
3473
- # window, consider using `groupby_rolling` this method can cache the window size
3541
+ # window, consider using `group_by_rolling` this method can cache the window size
3474
3542
  # computation.
3475
3543
  #
3476
3544
  # @return [Expr]
@@ -3502,14 +3570,15 @@ module Polars
3502
3570
  min_periods: nil,
3503
3571
  center: false,
3504
3572
  by: nil,
3505
- closed: "left"
3573
+ closed: "left",
3574
+ ddof: 1
3506
3575
  )
3507
3576
  window_size, min_periods = _prepare_rolling_window_args(
3508
3577
  window_size, min_periods
3509
3578
  )
3510
3579
  wrap_expr(
3511
3580
  _rbexpr.rolling_std(
3512
- window_size, weights, min_periods, center, by, closed
3581
+ window_size, weights, min_periods, center, by, closed, ddof
3513
3582
  )
3514
3583
  )
3515
3584
  end
@@ -3559,7 +3628,7 @@ module Polars
3559
3628
  #
3560
3629
  # @note
3561
3630
  # If you want to compute multiple aggregation statistics over the same dynamic
3562
- # window, consider using `groupby_rolling` this method can cache the window size
3631
+ # window, consider using `group_by_rolling` this method can cache the window size
3563
3632
  # computation.
3564
3633
  #
3565
3634
  # @return [Expr]
@@ -3591,14 +3660,15 @@ module Polars
3591
3660
  min_periods: nil,
3592
3661
  center: false,
3593
3662
  by: nil,
3594
- closed: "left"
3663
+ closed: "left",
3664
+ ddof: 1
3595
3665
  )
3596
3666
  window_size, min_periods = _prepare_rolling_window_args(
3597
3667
  window_size, min_periods
3598
3668
  )
3599
3669
  wrap_expr(
3600
3670
  _rbexpr.rolling_var(
3601
- window_size, weights, min_periods, center, by, closed
3671
+ window_size, weights, min_periods, center, by, closed, ddof
3602
3672
  )
3603
3673
  )
3604
3674
  end
@@ -3644,7 +3714,7 @@ module Polars
3644
3714
  #
3645
3715
  # @note
3646
3716
  # If you want to compute multiple aggregation statistics over the same dynamic
3647
- # window, consider using `groupby_rolling` this method can cache the window size
3717
+ # window, consider using `group_by_rolling` this method can cache the window size
3648
3718
  # computation.
3649
3719
  #
3650
3720
  # @return [Expr]
@@ -3733,7 +3803,7 @@ module Polars
3733
3803
  #
3734
3804
  # @note
3735
3805
  # If you want to compute multiple aggregation statistics over the same dynamic
3736
- # window, consider using `groupby_rolling` this method can cache the window size
3806
+ # window, consider using `group_by_rolling` this method can cache the window size
3737
3807
  # computation.
3738
3808
  #
3739
3809
  # @return [Expr]
@@ -3948,7 +4018,7 @@ module Polars
3948
4018
  # # ┌─────┐
3949
4019
  # # │ a │
3950
4020
  # # │ --- │
3951
- # # │ f32
4021
+ # # │ f64
3952
4022
  # # ╞═════╡
3953
4023
  # # │ 3.0 │
3954
4024
  # # │ 4.5 │
@@ -4041,6 +4111,7 @@ module Polars
4041
4111
  # # │ 12 ┆ 0.0 │
4042
4112
  # # └──────┴────────────┘
4043
4113
  def pct_change(n: 1)
4114
+ n = Utils.parse_as_expression(n)
4044
4115
  wrap_expr(_rbexpr.pct_change(n))
4045
4116
  end
4046
4117
 
@@ -4105,16 +4176,14 @@ module Polars
4105
4176
  wrap_expr(_rbexpr.kurtosis(fisher, bias))
4106
4177
  end
4107
4178
 
4108
- # Clip (limit) the values in an array to a `min` and `max` boundary.
4109
- #
4110
- # Only works for numerical types.
4179
+ # Set values outside the given boundaries to the boundary value.
4111
4180
  #
4112
- # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4113
- # expression. See `when` for more information.
4181
+ # Only works for numeric and temporal columns. If you want to clip other data
4182
+ # types, consider writing a `when-then-otherwise` expression.
4114
4183
  #
4115
- # @param min_val [Numeric]
4184
+ # @param lower_bound [Numeric]
4116
4185
  # Minimum value.
4117
- # @param max_val [Numeric]
4186
+ # @param upper_bound [Numeric]
4118
4187
  # Maximum value.
4119
4188
  #
4120
4189
  # @return [Expr]
@@ -4134,8 +4203,14 @@ module Polars
4134
4203
  # # │ null ┆ null │
4135
4204
  # # │ 50 ┆ 10 │
4136
4205
  # # └──────┴─────────────┘
4137
- def clip(min_val, max_val)
4138
- wrap_expr(_rbexpr.clip(min_val, max_val))
4206
+ def clip(lower_bound, upper_bound)
4207
+ if !lower_bound.nil?
4208
+ lower_bound = Utils.parse_as_expression(lower_bound, str_as_lit: true)
4209
+ end
4210
+ if !upper_bound.nil?
4211
+ upper_bound = Utils.parse_as_expression(upper_bound, str_as_lit: true)
4212
+ end
4213
+ wrap_expr(_rbexpr.clip(lower_bound, upper_bound))
4139
4214
  end
4140
4215
 
4141
4216
  # Clip (limit) the values in an array to a `min` boundary.
@@ -4145,7 +4220,7 @@ module Polars
4145
4220
  # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4146
4221
  # expression. See `when` for more information.
4147
4222
  #
4148
- # @param min_val [Numeric]
4223
+ # @param lower_bound [Numeric]
4149
4224
  # Minimum value.
4150
4225
  #
4151
4226
  # @return [Expr]
@@ -4165,8 +4240,8 @@ module Polars
4165
4240
  # # │ null ┆ null │
4166
4241
  # # │ 50 ┆ 50 │
4167
4242
  # # └──────┴─────────────┘
4168
- def clip_min(min_val)
4169
- wrap_expr(_rbexpr.clip_min(min_val))
4243
+ def clip_min(lower_bound)
4244
+ clip(lower_bound, nil)
4170
4245
  end
4171
4246
 
4172
4247
  # Clip (limit) the values in an array to a `max` boundary.
@@ -4176,7 +4251,7 @@ module Polars
4176
4251
  # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4177
4252
  # expression. See `when` for more information.
4178
4253
  #
4179
- # @param max_val [Numeric]
4254
+ # @param upper_bound [Numeric]
4180
4255
  # Maximum value.
4181
4256
  #
4182
4257
  # @return [Expr]
@@ -4196,8 +4271,8 @@ module Polars
4196
4271
  # # │ null ┆ null │
4197
4272
  # # │ 50 ┆ 0 │
4198
4273
  # # └──────┴─────────────┘
4199
- def clip_max(max_val)
4200
- wrap_expr(_rbexpr.clip_max(max_val))
4274
+ def clip_max(upper_bound)
4275
+ clip(nil, upper_bound)
4201
4276
  end
4202
4277
 
4203
4278
  # Calculate the lower bound.
@@ -4607,12 +4682,14 @@ module Polars
4607
4682
  end
4608
4683
 
4609
4684
  if !n.nil? && frac.nil?
4685
+ n = Utils.parse_as_expression(n)
4610
4686
  return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
4611
4687
  end
4612
4688
 
4613
4689
  if frac.nil?
4614
4690
  frac = 1.0
4615
4691
  end
4692
+ frac = Utils.parse_as_expression(frac)
4616
4693
  wrap_expr(
4617
4694
  _rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
4618
4695
  )
@@ -4884,7 +4961,7 @@ module Polars
4884
4961
  # Number of valid values there should be in the window before the expression
4885
4962
  # is evaluated. valid values = `length - null_count`
4886
4963
  # @param parallel [Boolean]
4887
- # Run in parallel. Don't do this in a groupby or another operation that
4964
+ # Run in parallel. Don't do this in a group by or another operation that
4888
4965
  # already has much parallelization.
4889
4966
  #
4890
4967
  # @return [Expr]
@@ -4929,8 +5006,8 @@ module Polars
4929
5006
  #
4930
5007
  # Enables downstream code to user fast paths for sorted arrays.
4931
5008
  #
4932
- # @param reverse [Boolean]
4933
- # If the `Series` order is reversed, e.g. descending.
5009
+ # @param descending [Boolean]
5010
+ # Whether the `Series` order is descending.
4934
5011
  #
4935
5012
  # @return [Expr]
4936
5013
  #
@@ -4950,9 +5027,9 @@ module Polars
4950
5027
  # # ╞════════╡
4951
5028
  # # │ 3 │
4952
5029
  # # └────────┘
4953
- # def set_sorted(reverse: false)
4954
- # map { |s| s.set_sorted(reverse) }
4955
- # end
5030
+ def set_sorted(descending: false)
5031
+ wrap_expr(_rbexpr.set_sorted_flag(descending))
5032
+ end
4956
5033
 
4957
5034
  # Aggregate to list.
4958
5035
  #
@@ -4965,7 +5042,7 @@ module Polars
4965
5042
  # "b" => [4, 5, 6]
4966
5043
  # }
4967
5044
  # )
4968
- # df.select(Polars.all.list)
5045
+ # df.select(Polars.all.implode)
4969
5046
  # # =>
4970
5047
  # # shape: (1, 2)
4971
5048
  # # ┌───────────┬───────────┐
@@ -4978,7 +5055,6 @@ module Polars
4978
5055
  def implode
4979
5056
  wrap_expr(_rbexpr.implode)
4980
5057
  end
4981
- alias_method :list, :implode
4982
5058
 
4983
5059
  # Shrink numeric columns to the minimal required datatype.
4984
5060
  #
@@ -5018,10 +5094,17 @@ module Polars
5018
5094
  # Create an object namespace of all list related methods.
5019
5095
  #
5020
5096
  # @return [ListExpr]
5021
- def arr
5097
+ def list
5022
5098
  ListExpr.new(self)
5023
5099
  end
5024
5100
 
5101
+ # Create an object namespace of all array related methods.
5102
+ #
5103
+ # @return [ArrayExpr]
5104
+ def arr
5105
+ ArrayExpr.new(self)
5106
+ end
5107
+
5025
5108
  # Create an object namespace of all binary related methods.
5026
5109
  #
5027
5110
  # @return [BinaryExpr]
@@ -5050,6 +5133,13 @@ module Polars
5050
5133
  MetaExpr.new(self)
5051
5134
  end
5052
5135
 
5136
+ # Create an object namespace of all expressions that modify expression names.
5137
+ #
5138
+ # @return [NameExpr]
5139
+ def name
5140
+ NameExpr.new(self)
5141
+ end
5142
+
5053
5143
  # Create an object namespace of all string related methods.
5054
5144
  #
5055
5145
  # @return [StringExpr]