polars-df 0.6.0-x86_64-linux → 0.8.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
data/lib/polars/expr.rb CHANGED
@@ -131,6 +131,13 @@ module Polars
131
131
  wrap_expr(_rbexpr.gt(_to_expr(other)._rbexpr))
132
132
  end
133
133
 
134
+ # Performs boolean not.
135
+ #
136
+ # @return [Expr]
137
+ def !
138
+ is_not
139
+ end
140
+
134
141
  # Performs negation.
135
142
  #
136
143
  # @return [Expr]
@@ -191,8 +198,8 @@ module Polars
191
198
  # # ╞══════╪═══════╡
192
199
  # # │ true ┆ false │
193
200
  # # └──────┴───────┘
194
- def any
195
- wrap_expr(_rbexpr.any)
201
+ def any(drop_nulls: true)
202
+ wrap_expr(_rbexpr.any(drop_nulls))
196
203
  end
197
204
 
198
205
  # Check if all boolean values in a Boolean column are `true`.
@@ -216,8 +223,8 @@ module Polars
216
223
  # # ╞══════╪═══════╪═══════╡
217
224
  # # │ true ┆ false ┆ false │
218
225
  # # └──────┴───────┴───────┘
219
- def all
220
- wrap_expr(_rbexpr.all)
226
+ def all(drop_nulls: true)
227
+ wrap_expr(_rbexpr.all(drop_nulls))
221
228
  end
222
229
 
223
230
  # Compute the square root of the elements.
@@ -359,7 +366,7 @@ module Polars
359
366
  # # │ 3 ┆ 1.5 │
360
367
  # # └─────┴──────┘
361
368
  def exclude(columns)
362
- if columns.is_a?(String)
369
+ if columns.is_a?(::String)
363
370
  columns = [columns]
364
371
  return wrap_expr(_rbexpr.exclude(columns))
365
372
  elsif !columns.is_a?(::Array)
@@ -367,11 +374,11 @@ module Polars
367
374
  return wrap_expr(_rbexpr.exclude_dtype(columns))
368
375
  end
369
376
 
370
- if !columns.all? { |a| a.is_a?(String) } || !columns.all? { |a| Utils.is_polars_dtype(a) }
377
+ if !columns.all? { |a| a.is_a?(::String) } || !columns.all? { |a| Utils.is_polars_dtype(a) }
371
378
  raise ArgumentError, "input should be all string or all DataType"
372
379
  end
373
380
 
374
- if columns[0].is_a?(String)
381
+ if columns[0].is_a?(::String)
375
382
  wrap_expr(_rbexpr.exclude(columns))
376
383
  else
377
384
  wrap_expr(_rbexpr.exclude_dtype(columns))
@@ -401,21 +408,21 @@ module Polars
401
408
  # # │ 18 ┆ 4 │
402
409
  # # └─────┴─────┘
403
410
  def keep_name
404
- wrap_expr(_rbexpr.keep_name)
411
+ name.keep
405
412
  end
406
413
 
407
414
  # Add a prefix to the root column name of the expression.
408
415
  #
409
416
  # @return [Expr]
410
417
  def prefix(prefix)
411
- wrap_expr(_rbexpr.prefix(prefix))
418
+ name.prefix(prefix)
412
419
  end
413
420
 
414
421
  # Add a suffix to the root column name of the expression.
415
422
  #
416
423
  # @return [Expr]
417
424
  def suffix(suffix)
418
- wrap_expr(_rbexpr.suffix(suffix))
425
+ name.suffix(suffix)
419
426
  end
420
427
 
421
428
  # Rename the output of an expression by mapping a function over the root name.
@@ -443,7 +450,7 @@ module Polars
443
450
  # # │ 1 ┆ 3 │
444
451
  # # └───────────┴───────────┘
445
452
  def map_alias(&f)
446
- Utils.wrap_expr(_rbexpr.map_alias(f))
453
+ name.map(&f)
447
454
  end
448
455
 
449
456
  # Negate a boolean expression.
@@ -682,7 +689,7 @@ module Polars
682
689
  # "value" => [94, 95, 96, 97, 97, 99]
683
690
  # }
684
691
  # )
685
- # df.groupby("group", maintain_order: true).agg(Polars.col("value").agg_groups)
692
+ # df.group_by("group", maintain_order: true).agg(Polars.col("value").agg_groups)
686
693
  # # =>
687
694
  # # shape: (2, 2)
688
695
  # # ┌───────┬───────────┐
@@ -714,13 +721,13 @@ module Polars
714
721
  # # │ 3 ┆ 3 │
715
722
  # # └─────┴─────┘
716
723
  def count
717
- wrap_expr(_rbexpr.count)
724
+ warn "`Expr#count` will exclude null values in 0.9.0. Use `Expr#length` instead."
725
+ # wrap_expr(_rbexpr.count)
726
+ wrap_expr(_rbexpr.len)
718
727
  end
719
728
 
720
729
  # Count the number of values in this expression.
721
730
  #
722
- # Alias for {#count}.
723
- #
724
731
  # @return [Expr]
725
732
  #
726
733
  # @example
@@ -736,8 +743,9 @@ module Polars
736
743
  # # │ 3 ┆ 3 │
737
744
  # # └─────┴─────┘
738
745
  def len
739
- count
746
+ wrap_expr(_rbexpr.len)
740
747
  end
748
+ alias_method :length, :len
741
749
 
742
750
  # Get a slice of this expression.
743
751
  #
@@ -905,8 +913,8 @@ module Polars
905
913
  # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
906
914
  # df.select(
907
915
  # [
908
- # Polars.col("a").cumsum,
909
- # Polars.col("a").cumsum(reverse: true).alias("a_reverse")
916
+ # Polars.col("a").cum_sum,
917
+ # Polars.col("a").cum_sum(reverse: true).alias("a_reverse")
910
918
  # ]
911
919
  # )
912
920
  # # =>
@@ -921,9 +929,10 @@ module Polars
921
929
  # # │ 6 ┆ 7 │
922
930
  # # │ 10 ┆ 4 │
923
931
  # # └─────┴───────────┘
924
- def cumsum(reverse: false)
925
- wrap_expr(_rbexpr.cumsum(reverse))
932
+ def cum_sum(reverse: false)
933
+ wrap_expr(_rbexpr.cum_sum(reverse))
926
934
  end
935
+ alias_method :cumsum, :cum_sum
927
936
 
928
937
  # Get an array with the cumulative product computed at every element.
929
938
  #
@@ -940,8 +949,8 @@ module Polars
940
949
  # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
941
950
  # df.select(
942
951
  # [
943
- # Polars.col("a").cumprod,
944
- # Polars.col("a").cumprod(reverse: true).alias("a_reverse")
952
+ # Polars.col("a").cum_prod,
953
+ # Polars.col("a").cum_prod(reverse: true).alias("a_reverse")
945
954
  # ]
946
955
  # )
947
956
  # # =>
@@ -956,9 +965,10 @@ module Polars
956
965
  # # │ 6 ┆ 12 │
957
966
  # # │ 24 ┆ 4 │
958
967
  # # └─────┴───────────┘
959
- def cumprod(reverse: false)
960
- wrap_expr(_rbexpr.cumprod(reverse))
968
+ def cum_prod(reverse: false)
969
+ wrap_expr(_rbexpr.cum_prod(reverse))
961
970
  end
971
+ alias_method :cumprod, :cum_prod
962
972
 
963
973
  # Get an array with the cumulative min computed at every element.
964
974
  #
@@ -971,8 +981,8 @@ module Polars
971
981
  # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
972
982
  # df.select(
973
983
  # [
974
- # Polars.col("a").cummin,
975
- # Polars.col("a").cummin(reverse: true).alias("a_reverse")
984
+ # Polars.col("a").cum_min,
985
+ # Polars.col("a").cum_min(reverse: true).alias("a_reverse")
976
986
  # ]
977
987
  # )
978
988
  # # =>
@@ -987,9 +997,10 @@ module Polars
987
997
  # # │ 1 ┆ 3 │
988
998
  # # │ 1 ┆ 4 │
989
999
  # # └─────┴───────────┘
990
- def cummin(reverse: false)
991
- wrap_expr(_rbexpr.cummin(reverse))
1000
+ def cum_min(reverse: false)
1001
+ wrap_expr(_rbexpr.cum_min(reverse))
992
1002
  end
1003
+ alias_method :cummin, :cum_min
993
1004
 
994
1005
  # Get an array with the cumulative max computed at every element.
995
1006
  #
@@ -1002,8 +1013,8 @@ module Polars
1002
1013
  # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
1003
1014
  # df.select(
1004
1015
  # [
1005
- # Polars.col("a").cummax,
1006
- # Polars.col("a").cummax(reverse: true).alias("a_reverse")
1016
+ # Polars.col("a").cum_max,
1017
+ # Polars.col("a").cum_max(reverse: true).alias("a_reverse")
1007
1018
  # ]
1008
1019
  # )
1009
1020
  # # =>
@@ -1018,9 +1029,10 @@ module Polars
1018
1029
  # # │ 3 ┆ 4 │
1019
1030
  # # │ 4 ┆ 4 │
1020
1031
  # # └─────┴───────────┘
1021
- def cummax(reverse: false)
1022
- wrap_expr(_rbexpr.cummax(reverse))
1032
+ def cum_max(reverse: false)
1033
+ wrap_expr(_rbexpr.cum_max(reverse))
1023
1034
  end
1035
+ alias_method :cummax, :cum_max
1024
1036
 
1025
1037
  # Get an array with the cumulative count computed at every element.
1026
1038
  #
@@ -1035,8 +1047,8 @@ module Polars
1035
1047
  # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
1036
1048
  # df.select(
1037
1049
  # [
1038
- # Polars.col("a").cumcount,
1039
- # Polars.col("a").cumcount(reverse: true).alias("a_reverse")
1050
+ # Polars.col("a").cum_count,
1051
+ # Polars.col("a").cum_count(reverse: true).alias("a_reverse")
1040
1052
  # ]
1041
1053
  # )
1042
1054
  # # =>
@@ -1051,9 +1063,10 @@ module Polars
1051
1063
  # # │ 2 ┆ 1 │
1052
1064
  # # │ 3 ┆ 0 │
1053
1065
  # # └─────┴───────────┘
1054
- def cumcount(reverse: false)
1055
- wrap_expr(_rbexpr.cumcount(reverse))
1066
+ def cum_count(reverse: false)
1067
+ wrap_expr(_rbexpr.cum_count(reverse))
1056
1068
  end
1069
+ alias_method :cumcount, :cum_count
1057
1070
 
1058
1071
  # Rounds down to the nearest integer value.
1059
1072
  #
@@ -1229,7 +1242,7 @@ module Polars
1229
1242
 
1230
1243
  # Sort this column. In projection/ selection context the whole column is sorted.
1231
1244
  #
1232
- # If used in a groupby context, the groups are sorted.
1245
+ # If used in a group by context, the groups are sorted.
1233
1246
  #
1234
1247
  # @param reverse [Boolean]
1235
1248
  # false -> order from small to large.
@@ -1287,7 +1300,7 @@ module Polars
1287
1300
  # # └───────┘
1288
1301
  #
1289
1302
  # @example
1290
- # df.groupby("group").agg(Polars.col("value").sort)
1303
+ # df.group_by("group").agg(Polars.col("value").sort)
1291
1304
  # # =>
1292
1305
  # # shape: (2, 2)
1293
1306
  # # ┌───────┬────────────┐
@@ -1337,6 +1350,7 @@ module Polars
1337
1350
  # # │ 2 ┆ 98 │
1338
1351
  # # └───────┴──────────┘
1339
1352
  def top_k(k: 5)
1353
+ k = Utils.parse_as_expression(k)
1340
1354
  wrap_expr(_rbexpr.top_k(k))
1341
1355
  end
1342
1356
 
@@ -1375,6 +1389,7 @@ module Polars
1375
1389
  # # │ 2 ┆ 98 │
1376
1390
  # # └───────┴──────────┘
1377
1391
  def bottom_k(k: 5)
1392
+ k = Utils.parse_as_expression(k)
1378
1393
  wrap_expr(_rbexpr.bottom_k(k))
1379
1394
  end
1380
1395
 
@@ -1494,7 +1509,7 @@ module Polars
1494
1509
  # Sort this column by the ordering of another column, or multiple other columns.
1495
1510
  #
1496
1511
  # In projection/ selection context the whole column is sorted.
1497
- # If used in a groupby context, the groups are sorted.
1512
+ # If used in a group by context, the groups are sorted.
1498
1513
  #
1499
1514
  # @param by [Object]
1500
1515
  # The column(s) used for sorting.
@@ -1566,30 +1581,33 @@ module Polars
1566
1581
  # "value" => [1, 98, 2, 3, 99, 4]
1567
1582
  # }
1568
1583
  # )
1569
- # df.groupby("group", maintain_order: true).agg(Polars.col("value").take(1))
1584
+ # df.group_by("group", maintain_order: true).agg(Polars.col("value").take([2, 1]))
1570
1585
  # # =>
1571
1586
  # # shape: (2, 2)
1572
- # # ┌───────┬───────┐
1573
- # # │ group ┆ value
1574
- # # │ --- ┆ ---
1575
- # # │ str ┆ i64
1576
- # # ╞═══════╪═══════╡
1577
- # # │ one ┆ 98
1578
- # # │ two ┆ 99
1579
- # # └───────┴───────┘
1580
- def take(indices)
1587
+ # # ┌───────┬───────────┐
1588
+ # # │ group ┆ value
1589
+ # # │ --- ┆ ---
1590
+ # # │ str ┆ list[i64]
1591
+ # # ╞═══════╪═══════════╡
1592
+ # # │ one ┆ [2, 98]
1593
+ # # │ two ┆ [4, 99]
1594
+ # # └───────┴───────────┘
1595
+ def gather(indices)
1581
1596
  if indices.is_a?(::Array)
1582
1597
  indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
1583
1598
  else
1584
1599
  indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
1585
1600
  end
1586
- wrap_expr(_rbexpr.take(indices_lit._rbexpr))
1601
+ wrap_expr(_rbexpr.gather(indices_lit._rbexpr))
1587
1602
  end
1603
+ alias_method :take, :gather
1588
1604
 
1589
1605
  # Shift the values by a given period.
1590
1606
  #
1591
- # @param periods [Integer]
1607
+ # @param n [Integer]
1592
1608
  # Number of places to shift (may be negative).
1609
+ # @param fill_value [Object]
1610
+ # Fill the resulting null values with this value.
1593
1611
  #
1594
1612
  # @return [Expr]
1595
1613
  #
@@ -1608,8 +1626,12 @@ module Polars
1608
1626
  # # │ 2 │
1609
1627
  # # │ 3 │
1610
1628
  # # └──────┘
1611
- def shift(periods = 1)
1612
- wrap_expr(_rbexpr.shift(periods))
1629
+ def shift(n = 1, fill_value: nil)
1630
+ if !fill_value.nil?
1631
+ fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
1632
+ end
1633
+ n = Utils.parse_as_expression(n)
1634
+ wrap_expr(_rbexpr.shift(n, fill_value))
1613
1635
  end
1614
1636
 
1615
1637
  # Shift the values by a given period and fill the resulting null values.
@@ -1637,8 +1659,7 @@ module Polars
1637
1659
  # # │ 3 │
1638
1660
  # # └─────┘
1639
1661
  def shift_and_fill(periods, fill_value)
1640
- fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
1641
- wrap_expr(_rbexpr.shift_and_fill(periods, fill_value._rbexpr))
1662
+ shift(periods, fill_value: fill_value)
1642
1663
  end
1643
1664
 
1644
1665
  # Fill null values using the specified value or strategy.
@@ -2063,7 +2084,7 @@ module Polars
2063
2084
  # # │ 2 │
2064
2085
  # # └─────┘
2065
2086
  def approx_unique
2066
- wrap_expr(_rbexpr.approx_unique)
2087
+ wrap_expr(_rbexpr.approx_n_unique)
2067
2088
  end
2068
2089
 
2069
2090
  # Count null values.
@@ -2201,7 +2222,7 @@ module Polars
2201
2222
 
2202
2223
  # Apply window function over a subgroup.
2203
2224
  #
2204
- # This is similar to a groupby + aggregation + self join.
2225
+ # This is similar to a group by + aggregation + self join.
2205
2226
  # Or similar to [window functions in Postgres](https://www.postgresql.org/docs/current/tutorial-window.html).
2206
2227
  #
2207
2228
  # @param expr [Object]
@@ -2309,9 +2330,10 @@ module Polars
2309
2330
  # # │ 1 ┆ false │
2310
2331
  # # │ 5 ┆ true │
2311
2332
  # # └─────┴──────────┘
2312
- def is_first
2313
- wrap_expr(_rbexpr.is_first)
2333
+ def is_first_distinct
2334
+ wrap_expr(_rbexpr.is_first_distinct)
2314
2335
  end
2336
+ alias_method :is_first, :is_first_distinct
2315
2337
 
2316
2338
  # Get mask of duplicated values.
2317
2339
  #
@@ -2335,6 +2357,54 @@ module Polars
2335
2357
  wrap_expr(_rbexpr.is_duplicated)
2336
2358
  end
2337
2359
 
2360
+ # Get a boolean mask of the local maximum peaks.
2361
+ #
2362
+ # @return [Expr]
2363
+ #
2364
+ # @example
2365
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
2366
+ # df.select(Polars.col("a").peak_max)
2367
+ # # =>
2368
+ # # shape: (5, 1)
2369
+ # # ┌───────┐
2370
+ # # │ a │
2371
+ # # │ --- │
2372
+ # # │ bool │
2373
+ # # ╞═══════╡
2374
+ # # │ false │
2375
+ # # │ false │
2376
+ # # │ false │
2377
+ # # │ false │
2378
+ # # │ true │
2379
+ # # └───────┘
2380
+ def peak_max
2381
+ wrap_expr(_rbexpr.peak_max)
2382
+ end
2383
+
2384
+ # Get a boolean mask of the local minimum peaks.
2385
+ #
2386
+ # @return [Expr]
2387
+ #
2388
+ # @example
2389
+ # df = Polars::DataFrame.new({"a" => [4, 1, 3, 2, 5]})
2390
+ # df.select(Polars.col("a").peak_min)
2391
+ # # =>
2392
+ # # shape: (5, 1)
2393
+ # # ┌───────┐
2394
+ # # │ a │
2395
+ # # │ --- │
2396
+ # # │ bool │
2397
+ # # ╞═══════╡
2398
+ # # │ false │
2399
+ # # │ true │
2400
+ # # │ false │
2401
+ # # │ true │
2402
+ # # │ false │
2403
+ # # └───────┘
2404
+ def peak_min
2405
+ wrap_expr(_rbexpr.peak_min)
2406
+ end
2407
+
2338
2408
  # Get quantile value.
2339
2409
  #
2340
2410
  # @param quantile [Float]
@@ -2354,7 +2424,7 @@ module Polars
2354
2424
  # # │ --- │
2355
2425
  # # │ f64 │
2356
2426
  # # ╞═════╡
2357
- # # │ 1.0 │
2427
+ # # │ 2.0 │
2358
2428
  # # └─────┘
2359
2429
  #
2360
2430
  # @example
@@ -2409,6 +2479,206 @@ module Polars
2409
2479
  wrap_expr(_rbexpr.quantile(quantile._rbexpr, interpolation))
2410
2480
  end
2411
2481
 
2482
+ # Bin continuous values into discrete categories.
2483
+ #
2484
+ # @param breaks [Array]
2485
+ # List of unique cut points.
2486
+ # @param labels [Array]
2487
+ # Names of the categories. The number of labels must be equal to the number
2488
+ # of cut points plus one.
2489
+ # @param left_closed [Boolean]
2490
+ # Set the intervals to be left-closed instead of right-closed.
2491
+ # @param include_breaks [Boolean]
2492
+ # Include a column with the right endpoint of the bin each observation falls
2493
+ # in. This will change the data type of the output from a
2494
+ # `Categorical` to a `Struct`.
2495
+ #
2496
+ # @return [Expr]
2497
+ #
2498
+ # @example Divide a column into three categories.
2499
+ # df = Polars::DataFrame.new({"foo" => [-2, -1, 0, 1, 2]})
2500
+ # df.with_columns(
2501
+ # Polars.col("foo").cut([-1, 1], labels: ["a", "b", "c"]).alias("cut")
2502
+ # )
2503
+ # # =>
2504
+ # # shape: (5, 2)
2505
+ # # ┌─────┬─────┐
2506
+ # # │ foo ┆ cut │
2507
+ # # │ --- ┆ --- │
2508
+ # # │ i64 ┆ cat │
2509
+ # # ╞═════╪═════╡
2510
+ # # │ -2 ┆ a │
2511
+ # # │ -1 ┆ a │
2512
+ # # │ 0 ┆ b │
2513
+ # # │ 1 ┆ b │
2514
+ # # │ 2 ┆ c │
2515
+ # # └─────┴─────┘
2516
+ #
2517
+ # @example Add both the category and the breakpoint.
2518
+ # df.with_columns(
2519
+ # Polars.col("foo").cut([-1, 1], include_breaks: true).alias("cut")
2520
+ # ).unnest("cut")
2521
+ # # =>
2522
+ # # shape: (5, 3)
2523
+ # # ┌─────┬──────┬────────────┐
2524
+ # # │ foo ┆ brk ┆ foo_bin │
2525
+ # # │ --- ┆ --- ┆ --- │
2526
+ # # │ i64 ┆ f64 ┆ cat │
2527
+ # # ╞═════╪══════╪════════════╡
2528
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
2529
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
2530
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
2531
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
2532
+ # # │ 2 ┆ inf ┆ (1, inf] │
2533
+ # # └─────┴──────┴────────────┘
2534
+ def cut(breaks, labels: nil, left_closed: false, include_breaks: false)
2535
+ wrap_expr(_rbexpr.cut(breaks, labels, left_closed, include_breaks))
2536
+ end
2537
+
2538
+ # Bin continuous values into discrete categories based on their quantiles.
2539
+ #
2540
+ # @param quantiles [Array]
2541
+ # Either a list of quantile probabilities between 0 and 1 or a positive
2542
+ # integer determining the number of bins with uniform probability.
2543
+ # @param labels [Array]
2544
+ # Names of the categories. The number of labels must be equal to the number
2545
+ # of categories.
2546
+ # @param left_closed [Boolean]
2547
+ # Set the intervals to be left-closed instead of right-closed.
2548
+ # @param allow_duplicates [Boolean]
2549
+ # If set to `true`, duplicates in the resulting quantiles are dropped,
2550
+ # rather than raising a `DuplicateError`. This can happen even with unique
2551
+ # probabilities, depending on the data.
2552
+ # @param include_breaks [Boolean]
2553
+ # Include a column with the right endpoint of the bin each observation falls
2554
+ # in. This will change the data type of the output from a
2555
+ # `Categorical` to a `Struct`.
2556
+ #
2557
+ # @return [Expr]
2558
+ #
2559
+ # @example Divide a column into three categories according to pre-defined quantile probabilities.
2560
+ # df = Polars::DataFrame.new({"foo" => [-2, -1, 0, 1, 2]})
2561
+ # df.with_columns(
2562
+ # Polars.col("foo").qcut([0.25, 0.75], labels: ["a", "b", "c"]).alias("qcut")
2563
+ # )
2564
+ # # =>
2565
+ # # shape: (5, 2)
2566
+ # # ┌─────┬──────┐
2567
+ # # │ foo ┆ qcut │
2568
+ # # │ --- ┆ --- │
2569
+ # # │ i64 ┆ cat │
2570
+ # # ╞═════╪══════╡
2571
+ # # │ -2 ┆ a │
2572
+ # # │ -1 ┆ a │
2573
+ # # │ 0 ┆ b │
2574
+ # # │ 1 ┆ b │
2575
+ # # │ 2 ┆ c │
2576
+ # # └─────┴──────┘
2577
+ #
2578
+ # @example Divide a column into two categories using uniform quantile probabilities.
2579
+ # df.with_columns(
2580
+ # Polars.col("foo")
2581
+ # .qcut(2, labels: ["low", "high"], left_closed: true)
2582
+ # .alias("qcut")
2583
+ # )
2584
+ # # =>
2585
+ # # shape: (5, 2)
2586
+ # # ┌─────┬──────┐
2587
+ # # │ foo ┆ qcut │
2588
+ # # │ --- ┆ --- │
2589
+ # # │ i64 ┆ cat │
2590
+ # # ╞═════╪══════╡
2591
+ # # │ -2 ┆ low │
2592
+ # # │ -1 ┆ low │
2593
+ # # │ 0 ┆ high │
2594
+ # # │ 1 ┆ high │
2595
+ # # │ 2 ┆ high │
2596
+ # # └─────┴──────┘
2597
+ #
2598
+ # @example Add both the category and the breakpoint.
2599
+ # df.with_columns(
2600
+ # Polars.col("foo").qcut([0.25, 0.75], include_breaks: true).alias("qcut")
2601
+ # ).unnest("qcut")
2602
+ # # =>
2603
+ # # shape: (5, 3)
2604
+ # # ┌─────┬──────┬────────────┐
2605
+ # # │ foo ┆ brk ┆ foo_bin │
2606
+ # # │ --- ┆ --- ┆ --- │
2607
+ # # │ i64 ┆ f64 ┆ cat │
2608
+ # # ╞═════╪══════╪════════════╡
2609
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
2610
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
2611
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
2612
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
2613
+ # # │ 2 ┆ inf ┆ (1, inf] │
2614
+ # # └─────┴──────┴────────────┘
2615
+ def qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false)
2616
+ if quantiles.is_a?(Integer)
2617
+ rbexpr = _rbexpr.qcut_uniform(
2618
+ quantiles, labels, left_closed, allow_duplicates, include_breaks
2619
+ )
2620
+ else
2621
+ rbexpr = _rbexpr.qcut(
2622
+ quantiles, labels, left_closed, allow_duplicates, include_breaks
2623
+ )
2624
+ end
2625
+
2626
+ wrap_expr(rbexpr)
2627
+ end
2628
+
2629
+ # Get the lengths of runs of identical values.
2630
+ #
2631
+ # @return [Expr]
2632
+ #
2633
+ # @example
2634
+ # df = Polars::DataFrame.new(Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3]))
2635
+ # df.select(Polars.col("s").rle).unnest("s")
2636
+ # # =>
2637
+ # # shape: (6, 2)
2638
+ # # ┌─────────┬────────┐
2639
+ # # │ lengths ┆ values │
2640
+ # # │ --- ┆ --- │
2641
+ # # │ i32 ┆ i64 │
2642
+ # # ╞═════════╪════════╡
2643
+ # # │ 2 ┆ 1 │
2644
+ # # │ 1 ┆ 2 │
2645
+ # # │ 1 ┆ 1 │
2646
+ # # │ 1 ┆ null │
2647
+ # # │ 1 ┆ 1 │
2648
+ # # │ 2 ┆ 3 │
2649
+ # # └─────────┴────────┘
2650
+ def rle
2651
+ wrap_expr(_rbexpr.rle)
2652
+ end
2653
+
2654
+ # Map values to run IDs.
2655
+ #
2656
+ # Similar to RLE, but it maps each value to an ID corresponding to the run into
2657
+ # which it falls. This is especially useful when you want to define groups by
2658
+ # runs of identical values rather than the values themselves.
2659
+ #
2660
+ # @return [Expr]
2661
+ #
2662
+ # @example
2663
+ # df = Polars::DataFrame.new({"a" => [1, 2, 1, 1, 1], "b" => ["x", "x", nil, "y", "y"]})
2664
+ # df.with_columns([Polars.col("a").rle_id.alias("a_r"), Polars.struct(["a", "b"]).rle_id.alias("ab_r")])
2665
+ # # =>
2666
+ # # shape: (5, 4)
2667
+ # # ┌─────┬──────┬─────┬──────┐
2668
+ # # │ a ┆ b ┆ a_r ┆ ab_r │
2669
+ # # │ --- ┆ --- ┆ --- ┆ --- │
2670
+ # # │ i64 ┆ str ┆ u32 ┆ u32 │
2671
+ # # ╞═════╪══════╪═════╪══════╡
2672
+ # # │ 1 ┆ x ┆ 0 ┆ 0 │
2673
+ # # │ 2 ┆ x ┆ 1 ┆ 1 │
2674
+ # # │ 1 ┆ null ┆ 2 ┆ 2 │
2675
+ # # │ 1 ┆ y ┆ 2 ┆ 3 │
2676
+ # # │ 1 ┆ y ┆ 2 ┆ 3 │
2677
+ # # └─────┴──────┴─────┴──────┘
2678
+ def rle_id
2679
+ wrap_expr(_rbexpr.rle_id)
2680
+ end
2681
+
2412
2682
  # Filter a single column.
2413
2683
  #
2414
2684
  # Mostly useful in an aggregation context. If you want to filter on a DataFrame
@@ -2427,7 +2697,7 @@ module Polars
2427
2697
  # }
2428
2698
  # )
2429
2699
  # (
2430
- # df.groupby("group_col").agg(
2700
+ # df.group_by("group_col").agg(
2431
2701
  # [
2432
2702
  # Polars.col("b").filter(Polars.col("b") < 2).sum.alias("lt"),
2433
2703
  # Polars.col("b").filter(Polars.col("b") >= 2).sum.alias("gte")
@@ -2465,7 +2735,7 @@ module Polars
2465
2735
  # }
2466
2736
  # )
2467
2737
  # (
2468
- # df.groupby("group_col").agg(
2738
+ # df.group_by("group_col").agg(
2469
2739
  # [
2470
2740
  # Polars.col("b").where(Polars.col("b") < 2).sum.alias("lt"),
2471
2741
  # Polars.col("b").where(Polars.col("b") >= 2).sum.alias("gte")
@@ -2583,7 +2853,7 @@ module Polars
2583
2853
  #
2584
2854
  # @example In a GroupBy context the function is applied by group:
2585
2855
  # df.lazy
2586
- # .groupby("b", maintain_order: true)
2856
+ # .group_by("b", maintain_order: true)
2587
2857
  # .agg(
2588
2858
  # [
2589
2859
  # Polars.col("a").apply { |x| x.sum }
@@ -2622,7 +2892,7 @@ module Polars
2622
2892
  # "values" => [[1, 2], [2, 3], [4]]
2623
2893
  # }
2624
2894
  # )
2625
- # df.groupby("group").agg(Polars.col("values").flatten)
2895
+ # df.group_by("group").agg(Polars.col("values").flatten)
2626
2896
  # # =>
2627
2897
  # # shape: (2, 2)
2628
2898
  # # ┌───────┬───────────┐
@@ -2670,7 +2940,7 @@ module Polars
2670
2940
  #
2671
2941
  # @example
2672
2942
  # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
2673
- # df.select(Polars.col("foo").take_every(3))
2943
+ # df.select(Polars.col("foo").gather_every(3))
2674
2944
  # # =>
2675
2945
  # # shape: (3, 1)
2676
2946
  # # ┌─────┐
@@ -2682,9 +2952,10 @@ module Polars
2682
2952
  # # │ 4 │
2683
2953
  # # │ 7 │
2684
2954
  # # └─────┘
2685
- def take_every(n)
2686
- wrap_expr(_rbexpr.take_every(n))
2955
+ def gather_every(n, offset = 0)
2956
+ wrap_expr(_rbexpr.gather_every(n, offset))
2687
2957
  end
2958
+ alias_method :take_every, :gather_every
2688
2959
 
2689
2960
  # Get the first `n` rows.
2690
2961
  #
@@ -3057,11 +3328,11 @@ module Polars
3057
3328
  # # ┌─────┬─────┐
3058
3329
  # # │ a ┆ b │
3059
3330
  # # │ --- ┆ --- │
3060
- # # │ i64 ┆ f64 │
3331
+ # # │ f64 ┆ f64 │
3061
3332
  # # ╞═════╪═════╡
3062
- # # │ 1 ┆ 1.0 │
3063
- # # │ 2 ┆ NaN │
3064
- # # │ 3 ┆ 3.0 │
3333
+ # # │ 1.0 ┆ 1.0 │
3334
+ # # │ 2.0 ┆ NaN │
3335
+ # # │ 3.0 ┆ 3.0 │
3065
3336
  # # └─────┴─────┘
3066
3337
  def interpolate(method: "linear")
3067
3338
  wrap_expr(_rbexpr.interpolate(method))
@@ -3112,7 +3383,7 @@ module Polars
3112
3383
  #
3113
3384
  # @note
3114
3385
  # If you want to compute multiple aggregation statistics over the same dynamic
3115
- # window, consider using `groupby_rolling` this method can cache the window size
3386
+ # window, consider using `group_by_rolling` this method can cache the window size
3116
3387
  # computation.
3117
3388
  #
3118
3389
  # @return [Expr]
@@ -3201,7 +3472,7 @@ module Polars
3201
3472
  #
3202
3473
  # @note
3203
3474
  # If you want to compute multiple aggregation statistics over the same dynamic
3204
- # window, consider using `groupby_rolling` this method can cache the window size
3475
+ # window, consider using `group_by_rolling` this method can cache the window size
3205
3476
  # computation.
3206
3477
  #
3207
3478
  # @return [Expr]
@@ -3290,7 +3561,7 @@ module Polars
3290
3561
  #
3291
3562
  # @note
3292
3563
  # If you want to compute multiple aggregation statistics over the same dynamic
3293
- # window, consider using `groupby_rolling` this method can cache the window size
3564
+ # window, consider using `group_by_rolling` this method can cache the window size
3294
3565
  # computation.
3295
3566
  #
3296
3567
  # @return [Expr]
@@ -3379,7 +3650,7 @@ module Polars
3379
3650
  #
3380
3651
  # @note
3381
3652
  # If you want to compute multiple aggregation statistics over the same dynamic
3382
- # window, consider using `groupby_rolling` this method can cache the window size
3653
+ # window, consider using `group_by_rolling` this method can cache the window size
3383
3654
  # computation.
3384
3655
  #
3385
3656
  # @return [Expr]
@@ -3468,7 +3739,7 @@ module Polars
3468
3739
  #
3469
3740
  # @note
3470
3741
  # If you want to compute multiple aggregation statistics over the same dynamic
3471
- # window, consider using `groupby_rolling` this method can cache the window size
3742
+ # window, consider using `group_by_rolling` this method can cache the window size
3472
3743
  # computation.
3473
3744
  #
3474
3745
  # @return [Expr]
@@ -3501,14 +3772,15 @@ module Polars
3501
3772
  center: false,
3502
3773
  by: nil,
3503
3774
  closed: "left",
3504
- ddof: 1
3775
+ ddof: 1,
3776
+ warn_if_unsorted: true
3505
3777
  )
3506
3778
  window_size, min_periods = _prepare_rolling_window_args(
3507
3779
  window_size, min_periods
3508
3780
  )
3509
3781
  wrap_expr(
3510
3782
  _rbexpr.rolling_std(
3511
- window_size, weights, min_periods, center, by, closed, ddof
3783
+ window_size, weights, min_periods, center, by, closed, ddof, warn_if_unsorted
3512
3784
  )
3513
3785
  )
3514
3786
  end
@@ -3558,7 +3830,7 @@ module Polars
3558
3830
  #
3559
3831
  # @note
3560
3832
  # If you want to compute multiple aggregation statistics over the same dynamic
3561
- # window, consider using `groupby_rolling` this method can cache the window size
3833
+ # window, consider using `group_by_rolling` this method can cache the window size
3562
3834
  # computation.
3563
3835
  #
3564
3836
  # @return [Expr]
@@ -3591,14 +3863,15 @@ module Polars
3591
3863
  center: false,
3592
3864
  by: nil,
3593
3865
  closed: "left",
3594
- ddof: 1
3866
+ ddof: 1,
3867
+ warn_if_unsorted: true
3595
3868
  )
3596
3869
  window_size, min_periods = _prepare_rolling_window_args(
3597
3870
  window_size, min_periods
3598
3871
  )
3599
3872
  wrap_expr(
3600
3873
  _rbexpr.rolling_var(
3601
- window_size, weights, min_periods, center, by, closed, ddof
3874
+ window_size, weights, min_periods, center, by, closed, ddof, warn_if_unsorted
3602
3875
  )
3603
3876
  )
3604
3877
  end
@@ -3644,7 +3917,7 @@ module Polars
3644
3917
  #
3645
3918
  # @note
3646
3919
  # If you want to compute multiple aggregation statistics over the same dynamic
3647
- # window, consider using `groupby_rolling` this method can cache the window size
3920
+ # window, consider using `group_by_rolling` this method can cache the window size
3648
3921
  # computation.
3649
3922
  #
3650
3923
  # @return [Expr]
@@ -3676,14 +3949,15 @@ module Polars
3676
3949
  min_periods: nil,
3677
3950
  center: false,
3678
3951
  by: nil,
3679
- closed: "left"
3952
+ closed: "left",
3953
+ warn_if_unsorted: true
3680
3954
  )
3681
3955
  window_size, min_periods = _prepare_rolling_window_args(
3682
3956
  window_size, min_periods
3683
3957
  )
3684
3958
  wrap_expr(
3685
3959
  _rbexpr.rolling_median(
3686
- window_size, weights, min_periods, center, by, closed
3960
+ window_size, weights, min_periods, center, by, closed, warn_if_unsorted
3687
3961
  )
3688
3962
  )
3689
3963
  end
@@ -3733,7 +4007,7 @@ module Polars
3733
4007
  #
3734
4008
  # @note
3735
4009
  # If you want to compute multiple aggregation statistics over the same dynamic
3736
- # window, consider using `groupby_rolling` this method can cache the window size
4010
+ # window, consider using `group_by_rolling` this method can cache the window size
3737
4011
  # computation.
3738
4012
  #
3739
4013
  # @return [Expr]
@@ -3767,14 +4041,15 @@ module Polars
3767
4041
  min_periods: nil,
3768
4042
  center: false,
3769
4043
  by: nil,
3770
- closed: "left"
4044
+ closed: "left",
4045
+ warn_if_unsorted: true
3771
4046
  )
3772
4047
  window_size, min_periods = _prepare_rolling_window_args(
3773
4048
  window_size, min_periods
3774
4049
  )
3775
4050
  wrap_expr(
3776
4051
  _rbexpr.rolling_quantile(
3777
- quantile, interpolation, window_size, weights, min_periods, center, by, closed
4052
+ quantile, interpolation, window_size, weights, min_periods, center, by, closed, warn_if_unsorted
3778
4053
  )
3779
4054
  )
3780
4055
  end
@@ -3948,7 +4223,7 @@ module Polars
3948
4223
  # # ┌─────┐
3949
4224
  # # │ a │
3950
4225
  # # │ --- │
3951
- # # │ f32
4226
+ # # │ f64
3952
4227
  # # ╞═════╡
3953
4228
  # # │ 3.0 │
3954
4229
  # # │ 4.5 │
@@ -4041,6 +4316,7 @@ module Polars
4041
4316
  # # │ 12 ┆ 0.0 │
4042
4317
  # # └──────┴────────────┘
4043
4318
  def pct_change(n: 1)
4319
+ n = Utils.parse_as_expression(n)
4044
4320
  wrap_expr(_rbexpr.pct_change(n))
4045
4321
  end
4046
4322
 
@@ -4105,16 +4381,14 @@ module Polars
4105
4381
  wrap_expr(_rbexpr.kurtosis(fisher, bias))
4106
4382
  end
4107
4383
 
4108
- # Clip (limit) the values in an array to a `min` and `max` boundary.
4109
- #
4110
- # Only works for numerical types.
4384
+ # Set values outside the given boundaries to the boundary value.
4111
4385
  #
4112
- # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4113
- # expression. See `when` for more information.
4386
+ # Only works for numeric and temporal columns. If you want to clip other data
4387
+ # types, consider writing a `when-then-otherwise` expression.
4114
4388
  #
4115
- # @param min_val [Numeric]
4389
+ # @param lower_bound [Numeric]
4116
4390
  # Minimum value.
4117
- # @param max_val [Numeric]
4391
+ # @param upper_bound [Numeric]
4118
4392
  # Maximum value.
4119
4393
  #
4120
4394
  # @return [Expr]
@@ -4134,8 +4408,14 @@ module Polars
4134
4408
  # # │ null ┆ null │
4135
4409
  # # │ 50 ┆ 10 │
4136
4410
  # # └──────┴─────────────┘
4137
- def clip(min_val, max_val)
4138
- wrap_expr(_rbexpr.clip(min_val, max_val))
4411
+ def clip(lower_bound, upper_bound)
4412
+ if !lower_bound.nil?
4413
+ lower_bound = Utils.parse_as_expression(lower_bound, str_as_lit: true)
4414
+ end
4415
+ if !upper_bound.nil?
4416
+ upper_bound = Utils.parse_as_expression(upper_bound, str_as_lit: true)
4417
+ end
4418
+ wrap_expr(_rbexpr.clip(lower_bound, upper_bound))
4139
4419
  end
4140
4420
 
4141
4421
  # Clip (limit) the values in an array to a `min` boundary.
@@ -4145,7 +4425,7 @@ module Polars
4145
4425
  # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4146
4426
  # expression. See `when` for more information.
4147
4427
  #
4148
- # @param min_val [Numeric]
4428
+ # @param lower_bound [Numeric]
4149
4429
  # Minimum value.
4150
4430
  #
4151
4431
  # @return [Expr]
@@ -4165,8 +4445,8 @@ module Polars
4165
4445
  # # │ null ┆ null │
4166
4446
  # # │ 50 ┆ 50 │
4167
4447
  # # └──────┴─────────────┘
4168
- def clip_min(min_val)
4169
- wrap_expr(_rbexpr.clip_min(min_val))
4448
+ def clip_min(lower_bound)
4449
+ clip(lower_bound, nil)
4170
4450
  end
4171
4451
 
4172
4452
  # Clip (limit) the values in an array to a `max` boundary.
@@ -4176,7 +4456,7 @@ module Polars
4176
4456
  # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4177
4457
  # expression. See `when` for more information.
4178
4458
  #
4179
- # @param max_val [Numeric]
4459
+ # @param upper_bound [Numeric]
4180
4460
  # Maximum value.
4181
4461
  #
4182
4462
  # @return [Expr]
@@ -4196,8 +4476,8 @@ module Polars
4196
4476
  # # │ null ┆ null │
4197
4477
  # # │ 50 ┆ 0 │
4198
4478
  # # └──────┴─────────────┘
4199
- def clip_max(max_val)
4200
- wrap_expr(_rbexpr.clip_max(max_val))
4479
+ def clip_max(upper_bound)
4480
+ clip(nil, upper_bound)
4201
4481
  end
4202
4482
 
4203
4483
  # Calculate the lower bound.
@@ -4558,11 +4838,11 @@ module Polars
4558
4838
  # # │ 1 │
4559
4839
  # # │ 3 │
4560
4840
  # # └─────┘
4561
- def shuffle(seed: nil, fixed_seed: false)
4841
+ def shuffle(seed: nil)
4562
4842
  if seed.nil?
4563
4843
  seed = rand(10000)
4564
4844
  end
4565
- wrap_expr(_rbexpr.shuffle(seed, fixed_seed))
4845
+ wrap_expr(_rbexpr.shuffle(seed))
4566
4846
  end
4567
4847
 
4568
4848
  # Sample from this expression.
@@ -4600,22 +4880,23 @@ module Polars
4600
4880
  with_replacement: true,
4601
4881
  shuffle: false,
4602
4882
  seed: nil,
4603
- n: nil,
4604
- fixed_seed: false
4883
+ n: nil
4605
4884
  )
4606
4885
  if !n.nil? && !frac.nil?
4607
4886
  raise ArgumentError, "cannot specify both `n` and `frac`"
4608
4887
  end
4609
4888
 
4610
4889
  if !n.nil? && frac.nil?
4611
- return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed, fixed_seed))
4890
+ n = Utils.parse_as_expression(n)
4891
+ return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
4612
4892
  end
4613
4893
 
4614
4894
  if frac.nil?
4615
4895
  frac = 1.0
4616
4896
  end
4897
+ frac = Utils.parse_as_expression(frac)
4617
4898
  wrap_expr(
4618
- _rbexpr.sample_frac(frac, with_replacement, shuffle, seed, fixed_seed)
4899
+ _rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
4619
4900
  )
4620
4901
  end
4621
4902
 
@@ -4885,7 +5166,7 @@ module Polars
4885
5166
  # Number of valid values there should be in the window before the expression
4886
5167
  # is evaluated. valid values = `length - null_count`
4887
5168
  # @param parallel [Boolean]
4888
- # Run in parallel. Don't do this in a groupby or another operation that
5169
+ # Run in parallel. Don't do this in a group by or another operation that
4889
5170
  # already has much parallelization.
4890
5171
  #
4891
5172
  # @return [Expr]
@@ -5057,6 +5338,13 @@ module Polars
5057
5338
  MetaExpr.new(self)
5058
5339
  end
5059
5340
 
5341
+ # Create an object namespace of all expressions that modify expression names.
5342
+ #
5343
+ # @return [NameExpr]
5344
+ def name
5345
+ NameExpr.new(self)
5346
+ end
5347
+
5060
5348
  # Create an object namespace of all string related methods.
5061
5349
  #
5062
5350
  # @return [StringExpr]