polars-df 0.6.0-x86_64-darwin → 0.8.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/polars/expr.rb CHANGED
@@ -131,6 +131,13 @@ module Polars
131
131
  wrap_expr(_rbexpr.gt(_to_expr(other)._rbexpr))
132
132
  end
133
133
 
134
+ # Performs boolean not.
135
+ #
136
+ # @return [Expr]
137
+ def !
138
+ is_not
139
+ end
140
+
134
141
  # Performs negation.
135
142
  #
136
143
  # @return [Expr]
@@ -191,8 +198,8 @@ module Polars
191
198
  # # ╞══════╪═══════╡
192
199
  # # │ true ┆ false │
193
200
  # # └──────┴───────┘
194
- def any
195
- wrap_expr(_rbexpr.any)
201
+ def any(drop_nulls: true)
202
+ wrap_expr(_rbexpr.any(drop_nulls))
196
203
  end
197
204
 
198
205
  # Check if all boolean values in a Boolean column are `true`.
@@ -216,8 +223,8 @@ module Polars
216
223
  # # ╞══════╪═══════╪═══════╡
217
224
  # # │ true ┆ false ┆ false │
218
225
  # # └──────┴───────┴───────┘
219
- def all
220
- wrap_expr(_rbexpr.all)
226
+ def all(drop_nulls: true)
227
+ wrap_expr(_rbexpr.all(drop_nulls))
221
228
  end
222
229
 
223
230
  # Compute the square root of the elements.
@@ -359,7 +366,7 @@ module Polars
359
366
  # # │ 3 ┆ 1.5 │
360
367
  # # └─────┴──────┘
361
368
  def exclude(columns)
362
- if columns.is_a?(String)
369
+ if columns.is_a?(::String)
363
370
  columns = [columns]
364
371
  return wrap_expr(_rbexpr.exclude(columns))
365
372
  elsif !columns.is_a?(::Array)
@@ -367,11 +374,11 @@ module Polars
367
374
  return wrap_expr(_rbexpr.exclude_dtype(columns))
368
375
  end
369
376
 
370
- if !columns.all? { |a| a.is_a?(String) } || !columns.all? { |a| Utils.is_polars_dtype(a) }
377
+ if !columns.all? { |a| a.is_a?(::String) } || !columns.all? { |a| Utils.is_polars_dtype(a) }
371
378
  raise ArgumentError, "input should be all string or all DataType"
372
379
  end
373
380
 
374
- if columns[0].is_a?(String)
381
+ if columns[0].is_a?(::String)
375
382
  wrap_expr(_rbexpr.exclude(columns))
376
383
  else
377
384
  wrap_expr(_rbexpr.exclude_dtype(columns))
@@ -401,21 +408,21 @@ module Polars
401
408
  # # │ 18 ┆ 4 │
402
409
  # # └─────┴─────┘
403
410
  def keep_name
404
- wrap_expr(_rbexpr.keep_name)
411
+ name.keep
405
412
  end
406
413
 
407
414
  # Add a prefix to the root column name of the expression.
408
415
  #
409
416
  # @return [Expr]
410
417
  def prefix(prefix)
411
- wrap_expr(_rbexpr.prefix(prefix))
418
+ name.prefix(prefix)
412
419
  end
413
420
 
414
421
  # Add a suffix to the root column name of the expression.
415
422
  #
416
423
  # @return [Expr]
417
424
  def suffix(suffix)
418
- wrap_expr(_rbexpr.suffix(suffix))
425
+ name.suffix(suffix)
419
426
  end
420
427
 
421
428
  # Rename the output of an expression by mapping a function over the root name.
@@ -443,7 +450,7 @@ module Polars
443
450
  # # │ 1 ┆ 3 │
444
451
  # # └───────────┴───────────┘
445
452
  def map_alias(&f)
446
- Utils.wrap_expr(_rbexpr.map_alias(f))
453
+ name.map(&f)
447
454
  end
448
455
 
449
456
  # Negate a boolean expression.
@@ -682,7 +689,7 @@ module Polars
682
689
  # "value" => [94, 95, 96, 97, 97, 99]
683
690
  # }
684
691
  # )
685
- # df.groupby("group", maintain_order: true).agg(Polars.col("value").agg_groups)
692
+ # df.group_by("group", maintain_order: true).agg(Polars.col("value").agg_groups)
686
693
  # # =>
687
694
  # # shape: (2, 2)
688
695
  # # ┌───────┬───────────┐
@@ -714,13 +721,13 @@ module Polars
714
721
  # # │ 3 ┆ 3 │
715
722
  # # └─────┴─────┘
716
723
  def count
717
- wrap_expr(_rbexpr.count)
724
+ warn "`Expr#count` will exclude null values in 0.9.0. Use `Expr#length` instead."
725
+ # wrap_expr(_rbexpr.count)
726
+ wrap_expr(_rbexpr.len)
718
727
  end
719
728
 
720
729
  # Count the number of values in this expression.
721
730
  #
722
- # Alias for {#count}.
723
- #
724
731
  # @return [Expr]
725
732
  #
726
733
  # @example
@@ -736,8 +743,9 @@ module Polars
736
743
  # # │ 3 ┆ 3 │
737
744
  # # └─────┴─────┘
738
745
  def len
739
- count
746
+ wrap_expr(_rbexpr.len)
740
747
  end
748
+ alias_method :length, :len
741
749
 
742
750
  # Get a slice of this expression.
743
751
  #
@@ -905,8 +913,8 @@ module Polars
905
913
  # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
906
914
  # df.select(
907
915
  # [
908
- # Polars.col("a").cumsum,
909
- # Polars.col("a").cumsum(reverse: true).alias("a_reverse")
916
+ # Polars.col("a").cum_sum,
917
+ # Polars.col("a").cum_sum(reverse: true).alias("a_reverse")
910
918
  # ]
911
919
  # )
912
920
  # # =>
@@ -921,9 +929,10 @@ module Polars
921
929
  # # │ 6 ┆ 7 │
922
930
  # # │ 10 ┆ 4 │
923
931
  # # └─────┴───────────┘
924
- def cumsum(reverse: false)
925
- wrap_expr(_rbexpr.cumsum(reverse))
932
+ def cum_sum(reverse: false)
933
+ wrap_expr(_rbexpr.cum_sum(reverse))
926
934
  end
935
+ alias_method :cumsum, :cum_sum
927
936
 
928
937
  # Get an array with the cumulative product computed at every element.
929
938
  #
@@ -940,8 +949,8 @@ module Polars
940
949
  # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
941
950
  # df.select(
942
951
  # [
943
- # Polars.col("a").cumprod,
944
- # Polars.col("a").cumprod(reverse: true).alias("a_reverse")
952
+ # Polars.col("a").cum_prod,
953
+ # Polars.col("a").cum_prod(reverse: true).alias("a_reverse")
945
954
  # ]
946
955
  # )
947
956
  # # =>
@@ -956,9 +965,10 @@ module Polars
956
965
  # # │ 6 ┆ 12 │
957
966
  # # │ 24 ┆ 4 │
958
967
  # # └─────┴───────────┘
959
- def cumprod(reverse: false)
960
- wrap_expr(_rbexpr.cumprod(reverse))
968
+ def cum_prod(reverse: false)
969
+ wrap_expr(_rbexpr.cum_prod(reverse))
961
970
  end
971
+ alias_method :cumprod, :cum_prod
962
972
 
963
973
  # Get an array with the cumulative min computed at every element.
964
974
  #
@@ -971,8 +981,8 @@ module Polars
971
981
  # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
972
982
  # df.select(
973
983
  # [
974
- # Polars.col("a").cummin,
975
- # Polars.col("a").cummin(reverse: true).alias("a_reverse")
984
+ # Polars.col("a").cum_min,
985
+ # Polars.col("a").cum_min(reverse: true).alias("a_reverse")
976
986
  # ]
977
987
  # )
978
988
  # # =>
@@ -987,9 +997,10 @@ module Polars
987
997
  # # │ 1 ┆ 3 │
988
998
  # # │ 1 ┆ 4 │
989
999
  # # └─────┴───────────┘
990
- def cummin(reverse: false)
991
- wrap_expr(_rbexpr.cummin(reverse))
1000
+ def cum_min(reverse: false)
1001
+ wrap_expr(_rbexpr.cum_min(reverse))
992
1002
  end
1003
+ alias_method :cummin, :cum_min
993
1004
 
994
1005
  # Get an array with the cumulative max computed at every element.
995
1006
  #
@@ -1002,8 +1013,8 @@ module Polars
1002
1013
  # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
1003
1014
  # df.select(
1004
1015
  # [
1005
- # Polars.col("a").cummax,
1006
- # Polars.col("a").cummax(reverse: true).alias("a_reverse")
1016
+ # Polars.col("a").cum_max,
1017
+ # Polars.col("a").cum_max(reverse: true).alias("a_reverse")
1007
1018
  # ]
1008
1019
  # )
1009
1020
  # # =>
@@ -1018,9 +1029,10 @@ module Polars
1018
1029
  # # │ 3 ┆ 4 │
1019
1030
  # # │ 4 ┆ 4 │
1020
1031
  # # └─────┴───────────┘
1021
- def cummax(reverse: false)
1022
- wrap_expr(_rbexpr.cummax(reverse))
1032
+ def cum_max(reverse: false)
1033
+ wrap_expr(_rbexpr.cum_max(reverse))
1023
1034
  end
1035
+ alias_method :cummax, :cum_max
1024
1036
 
1025
1037
  # Get an array with the cumulative count computed at every element.
1026
1038
  #
@@ -1035,8 +1047,8 @@ module Polars
1035
1047
  # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
1036
1048
  # df.select(
1037
1049
  # [
1038
- # Polars.col("a").cumcount,
1039
- # Polars.col("a").cumcount(reverse: true).alias("a_reverse")
1050
+ # Polars.col("a").cum_count,
1051
+ # Polars.col("a").cum_count(reverse: true).alias("a_reverse")
1040
1052
  # ]
1041
1053
  # )
1042
1054
  # # =>
@@ -1051,9 +1063,10 @@ module Polars
1051
1063
  # # │ 2 ┆ 1 │
1052
1064
  # # │ 3 ┆ 0 │
1053
1065
  # # └─────┴───────────┘
1054
- def cumcount(reverse: false)
1055
- wrap_expr(_rbexpr.cumcount(reverse))
1066
+ def cum_count(reverse: false)
1067
+ wrap_expr(_rbexpr.cum_count(reverse))
1056
1068
  end
1069
+ alias_method :cumcount, :cum_count
1057
1070
 
1058
1071
  # Rounds down to the nearest integer value.
1059
1072
  #
@@ -1229,7 +1242,7 @@ module Polars
1229
1242
 
1230
1243
  # Sort this column. In projection/ selection context the whole column is sorted.
1231
1244
  #
1232
- # If used in a groupby context, the groups are sorted.
1245
+ # If used in a group by context, the groups are sorted.
1233
1246
  #
1234
1247
  # @param reverse [Boolean]
1235
1248
  # false -> order from small to large.
@@ -1287,7 +1300,7 @@ module Polars
1287
1300
  # # └───────┘
1288
1301
  #
1289
1302
  # @example
1290
- # df.groupby("group").agg(Polars.col("value").sort)
1303
+ # df.group_by("group").agg(Polars.col("value").sort)
1291
1304
  # # =>
1292
1305
  # # shape: (2, 2)
1293
1306
  # # ┌───────┬────────────┐
@@ -1337,6 +1350,7 @@ module Polars
1337
1350
  # # │ 2 ┆ 98 │
1338
1351
  # # └───────┴──────────┘
1339
1352
  def top_k(k: 5)
1353
+ k = Utils.parse_as_expression(k)
1340
1354
  wrap_expr(_rbexpr.top_k(k))
1341
1355
  end
1342
1356
 
@@ -1375,6 +1389,7 @@ module Polars
1375
1389
  # # │ 2 ┆ 98 │
1376
1390
  # # └───────┴──────────┘
1377
1391
  def bottom_k(k: 5)
1392
+ k = Utils.parse_as_expression(k)
1378
1393
  wrap_expr(_rbexpr.bottom_k(k))
1379
1394
  end
1380
1395
 
@@ -1494,7 +1509,7 @@ module Polars
1494
1509
  # Sort this column by the ordering of another column, or multiple other columns.
1495
1510
  #
1496
1511
  # In projection/ selection context the whole column is sorted.
1497
- # If used in a groupby context, the groups are sorted.
1512
+ # If used in a group by context, the groups are sorted.
1498
1513
  #
1499
1514
  # @param by [Object]
1500
1515
  # The column(s) used for sorting.
@@ -1566,30 +1581,33 @@ module Polars
1566
1581
  # "value" => [1, 98, 2, 3, 99, 4]
1567
1582
  # }
1568
1583
  # )
1569
- # df.groupby("group", maintain_order: true).agg(Polars.col("value").take(1))
1584
+ # df.group_by("group", maintain_order: true).agg(Polars.col("value").take([2, 1]))
1570
1585
  # # =>
1571
1586
  # # shape: (2, 2)
1572
- # # ┌───────┬───────┐
1573
- # # │ group ┆ value
1574
- # # │ --- ┆ ---
1575
- # # │ str ┆ i64
1576
- # # ╞═══════╪═══════╡
1577
- # # │ one ┆ 98
1578
- # # │ two ┆ 99
1579
- # # └───────┴───────┘
1580
- def take(indices)
1587
+ # # ┌───────┬───────────┐
1588
+ # # │ group ┆ value
1589
+ # # │ --- ┆ ---
1590
+ # # │ str ┆ list[i64]
1591
+ # # ╞═══════╪═══════════╡
1592
+ # # │ one ┆ [2, 98]
1593
+ # # │ two ┆ [4, 99]
1594
+ # # └───────┴───────────┘
1595
+ def gather(indices)
1581
1596
  if indices.is_a?(::Array)
1582
1597
  indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
1583
1598
  else
1584
1599
  indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
1585
1600
  end
1586
- wrap_expr(_rbexpr.take(indices_lit._rbexpr))
1601
+ wrap_expr(_rbexpr.gather(indices_lit._rbexpr))
1587
1602
  end
1603
+ alias_method :take, :gather
1588
1604
 
1589
1605
  # Shift the values by a given period.
1590
1606
  #
1591
- # @param periods [Integer]
1607
+ # @param n [Integer]
1592
1608
  # Number of places to shift (may be negative).
1609
+ # @param fill_value [Object]
1610
+ # Fill the resulting null values with this value.
1593
1611
  #
1594
1612
  # @return [Expr]
1595
1613
  #
@@ -1608,8 +1626,12 @@ module Polars
1608
1626
  # # │ 2 │
1609
1627
  # # │ 3 │
1610
1628
  # # └──────┘
1611
- def shift(periods = 1)
1612
- wrap_expr(_rbexpr.shift(periods))
1629
+ def shift(n = 1, fill_value: nil)
1630
+ if !fill_value.nil?
1631
+ fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
1632
+ end
1633
+ n = Utils.parse_as_expression(n)
1634
+ wrap_expr(_rbexpr.shift(n, fill_value))
1613
1635
  end
1614
1636
 
1615
1637
  # Shift the values by a given period and fill the resulting null values.
@@ -1637,8 +1659,7 @@ module Polars
1637
1659
  # # │ 3 │
1638
1660
  # # └─────┘
1639
1661
  def shift_and_fill(periods, fill_value)
1640
- fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
1641
- wrap_expr(_rbexpr.shift_and_fill(periods, fill_value._rbexpr))
1662
+ shift(periods, fill_value: fill_value)
1642
1663
  end
1643
1664
 
1644
1665
  # Fill null values using the specified value or strategy.
@@ -2063,7 +2084,7 @@ module Polars
2063
2084
  # # │ 2 │
2064
2085
  # # └─────┘
2065
2086
  def approx_unique
2066
- wrap_expr(_rbexpr.approx_unique)
2087
+ wrap_expr(_rbexpr.approx_n_unique)
2067
2088
  end
2068
2089
 
2069
2090
  # Count null values.
@@ -2201,7 +2222,7 @@ module Polars
2201
2222
 
2202
2223
  # Apply window function over a subgroup.
2203
2224
  #
2204
- # This is similar to a groupby + aggregation + self join.
2225
+ # This is similar to a group by + aggregation + self join.
2205
2226
  # Or similar to [window functions in Postgres](https://www.postgresql.org/docs/current/tutorial-window.html).
2206
2227
  #
2207
2228
  # @param expr [Object]
@@ -2309,9 +2330,10 @@ module Polars
2309
2330
  # # │ 1 ┆ false │
2310
2331
  # # │ 5 ┆ true │
2311
2332
  # # └─────┴──────────┘
2312
- def is_first
2313
- wrap_expr(_rbexpr.is_first)
2333
+ def is_first_distinct
2334
+ wrap_expr(_rbexpr.is_first_distinct)
2314
2335
  end
2336
+ alias_method :is_first, :is_first_distinct
2315
2337
 
2316
2338
  # Get mask of duplicated values.
2317
2339
  #
@@ -2335,6 +2357,54 @@ module Polars
2335
2357
  wrap_expr(_rbexpr.is_duplicated)
2336
2358
  end
2337
2359
 
2360
+ # Get a boolean mask of the local maximum peaks.
2361
+ #
2362
+ # @return [Expr]
2363
+ #
2364
+ # @example
2365
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
2366
+ # df.select(Polars.col("a").peak_max)
2367
+ # # =>
2368
+ # # shape: (5, 1)
2369
+ # # ┌───────┐
2370
+ # # │ a │
2371
+ # # │ --- │
2372
+ # # │ bool │
2373
+ # # ╞═══════╡
2374
+ # # │ false │
2375
+ # # │ false │
2376
+ # # │ false │
2377
+ # # │ false │
2378
+ # # │ true │
2379
+ # # └───────┘
2380
+ def peak_max
2381
+ wrap_expr(_rbexpr.peak_max)
2382
+ end
2383
+
2384
+ # Get a boolean mask of the local minimum peaks.
2385
+ #
2386
+ # @return [Expr]
2387
+ #
2388
+ # @example
2389
+ # df = Polars::DataFrame.new({"a" => [4, 1, 3, 2, 5]})
2390
+ # df.select(Polars.col("a").peak_min)
2391
+ # # =>
2392
+ # # shape: (5, 1)
2393
+ # # ┌───────┐
2394
+ # # │ a │
2395
+ # # │ --- │
2396
+ # # │ bool │
2397
+ # # ╞═══════╡
2398
+ # # │ false │
2399
+ # # │ true │
2400
+ # # │ false │
2401
+ # # │ true │
2402
+ # # │ false │
2403
+ # # └───────┘
2404
+ def peak_min
2405
+ wrap_expr(_rbexpr.peak_min)
2406
+ end
2407
+
2338
2408
  # Get quantile value.
2339
2409
  #
2340
2410
  # @param quantile [Float]
@@ -2354,7 +2424,7 @@ module Polars
2354
2424
  # # │ --- │
2355
2425
  # # │ f64 │
2356
2426
  # # ╞═════╡
2357
- # # │ 1.0 │
2427
+ # # │ 2.0 │
2358
2428
  # # └─────┘
2359
2429
  #
2360
2430
  # @example
@@ -2409,6 +2479,206 @@ module Polars
2409
2479
  wrap_expr(_rbexpr.quantile(quantile._rbexpr, interpolation))
2410
2480
  end
2411
2481
 
2482
+ # Bin continuous values into discrete categories.
2483
+ #
2484
+ # @param breaks [Array]
2485
+ # List of unique cut points.
2486
+ # @param labels [Array]
2487
+ # Names of the categories. The number of labels must be equal to the number
2488
+ # of cut points plus one.
2489
+ # @param left_closed [Boolean]
2490
+ # Set the intervals to be left-closed instead of right-closed.
2491
+ # @param include_breaks [Boolean]
2492
+ # Include a column with the right endpoint of the bin each observation falls
2493
+ # in. This will change the data type of the output from a
2494
+ # `Categorical` to a `Struct`.
2495
+ #
2496
+ # @return [Expr]
2497
+ #
2498
+ # @example Divide a column into three categories.
2499
+ # df = Polars::DataFrame.new({"foo" => [-2, -1, 0, 1, 2]})
2500
+ # df.with_columns(
2501
+ # Polars.col("foo").cut([-1, 1], labels: ["a", "b", "c"]).alias("cut")
2502
+ # )
2503
+ # # =>
2504
+ # # shape: (5, 2)
2505
+ # # ┌─────┬─────┐
2506
+ # # │ foo ┆ cut │
2507
+ # # │ --- ┆ --- │
2508
+ # # │ i64 ┆ cat │
2509
+ # # ╞═════╪═════╡
2510
+ # # │ -2 ┆ a │
2511
+ # # │ -1 ┆ a │
2512
+ # # │ 0 ┆ b │
2513
+ # # │ 1 ┆ b │
2514
+ # # │ 2 ┆ c │
2515
+ # # └─────┴─────┘
2516
+ #
2517
+ # @example Add both the category and the breakpoint.
2518
+ # df.with_columns(
2519
+ # Polars.col("foo").cut([-1, 1], include_breaks: true).alias("cut")
2520
+ # ).unnest("cut")
2521
+ # # =>
2522
+ # # shape: (5, 3)
2523
+ # # ┌─────┬──────┬────────────┐
2524
+ # # │ foo ┆ brk ┆ foo_bin │
2525
+ # # │ --- ┆ --- ┆ --- │
2526
+ # # │ i64 ┆ f64 ┆ cat │
2527
+ # # ╞═════╪══════╪════════════╡
2528
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
2529
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
2530
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
2531
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
2532
+ # # │ 2 ┆ inf ┆ (1, inf] │
2533
+ # # └─────┴──────┴────────────┘
2534
+ def cut(breaks, labels: nil, left_closed: false, include_breaks: false)
2535
+ wrap_expr(_rbexpr.cut(breaks, labels, left_closed, include_breaks))
2536
+ end
2537
+
2538
+ # Bin continuous values into discrete categories based on their quantiles.
2539
+ #
2540
+ # @param quantiles [Array]
2541
+ # Either a list of quantile probabilities between 0 and 1 or a positive
2542
+ # integer determining the number of bins with uniform probability.
2543
+ # @param labels [Array]
2544
+ # Names of the categories. The number of labels must be equal to the number
2545
+ # of categories.
2546
+ # @param left_closed [Boolean]
2547
+ # Set the intervals to be left-closed instead of right-closed.
2548
+ # @param allow_duplicates [Boolean]
2549
+ # If set to `true`, duplicates in the resulting quantiles are dropped,
2550
+ # rather than raising a `DuplicateError`. This can happen even with unique
2551
+ # probabilities, depending on the data.
2552
+ # @param include_breaks [Boolean]
2553
+ # Include a column with the right endpoint of the bin each observation falls
2554
+ # in. This will change the data type of the output from a
2555
+ # `Categorical` to a `Struct`.
2556
+ #
2557
+ # @return [Expr]
2558
+ #
2559
+ # @example Divide a column into three categories according to pre-defined quantile probabilities.
2560
+ # df = Polars::DataFrame.new({"foo" => [-2, -1, 0, 1, 2]})
2561
+ # df.with_columns(
2562
+ # Polars.col("foo").qcut([0.25, 0.75], labels: ["a", "b", "c"]).alias("qcut")
2563
+ # )
2564
+ # # =>
2565
+ # # shape: (5, 2)
2566
+ # # ┌─────┬──────┐
2567
+ # # │ foo ┆ qcut │
2568
+ # # │ --- ┆ --- │
2569
+ # # │ i64 ┆ cat │
2570
+ # # ╞═════╪══════╡
2571
+ # # │ -2 ┆ a │
2572
+ # # │ -1 ┆ a │
2573
+ # # │ 0 ┆ b │
2574
+ # # │ 1 ┆ b │
2575
+ # # │ 2 ┆ c │
2576
+ # # └─────┴──────┘
2577
+ #
2578
+ # @example Divide a column into two categories using uniform quantile probabilities.
2579
+ # df.with_columns(
2580
+ # Polars.col("foo")
2581
+ # .qcut(2, labels: ["low", "high"], left_closed: true)
2582
+ # .alias("qcut")
2583
+ # )
2584
+ # # =>
2585
+ # # shape: (5, 2)
2586
+ # # ┌─────┬──────┐
2587
+ # # │ foo ┆ qcut │
2588
+ # # │ --- ┆ --- │
2589
+ # # │ i64 ┆ cat │
2590
+ # # ╞═════╪══════╡
2591
+ # # │ -2 ┆ low │
2592
+ # # │ -1 ┆ low │
2593
+ # # │ 0 ┆ high │
2594
+ # # │ 1 ┆ high │
2595
+ # # │ 2 ┆ high │
2596
+ # # └─────┴──────┘
2597
+ #
2598
+ # @example Add both the category and the breakpoint.
2599
+ # df.with_columns(
2600
+ # Polars.col("foo").qcut([0.25, 0.75], include_breaks: true).alias("qcut")
2601
+ # ).unnest("qcut")
2602
+ # # =>
2603
+ # # shape: (5, 3)
2604
+ # # ┌─────┬──────┬────────────┐
2605
+ # # │ foo ┆ brk ┆ foo_bin │
2606
+ # # │ --- ┆ --- ┆ --- │
2607
+ # # │ i64 ┆ f64 ┆ cat │
2608
+ # # ╞═════╪══════╪════════════╡
2609
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
2610
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
2611
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
2612
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
2613
+ # # │ 2 ┆ inf ┆ (1, inf] │
2614
+ # # └─────┴──────┴────────────┘
2615
+ def qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false)
2616
+ if quantiles.is_a?(Integer)
2617
+ rbexpr = _rbexpr.qcut_uniform(
2618
+ quantiles, labels, left_closed, allow_duplicates, include_breaks
2619
+ )
2620
+ else
2621
+ rbexpr = _rbexpr.qcut(
2622
+ quantiles, labels, left_closed, allow_duplicates, include_breaks
2623
+ )
2624
+ end
2625
+
2626
+ wrap_expr(rbexpr)
2627
+ end
2628
+
2629
+ # Get the lengths of runs of identical values.
2630
+ #
2631
+ # @return [Expr]
2632
+ #
2633
+ # @example
2634
+ # df = Polars::DataFrame.new(Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3]))
2635
+ # df.select(Polars.col("s").rle).unnest("s")
2636
+ # # =>
2637
+ # # shape: (6, 2)
2638
+ # # ┌─────────┬────────┐
2639
+ # # │ lengths ┆ values │
2640
+ # # │ --- ┆ --- │
2641
+ # # │ i32 ┆ i64 │
2642
+ # # ╞═════════╪════════╡
2643
+ # # │ 2 ┆ 1 │
2644
+ # # │ 1 ┆ 2 │
2645
+ # # │ 1 ┆ 1 │
2646
+ # # │ 1 ┆ null │
2647
+ # # │ 1 ┆ 1 │
2648
+ # # │ 2 ┆ 3 │
2649
+ # # └─────────┴────────┘
2650
+ def rle
2651
+ wrap_expr(_rbexpr.rle)
2652
+ end
2653
+
2654
+ # Map values to run IDs.
2655
+ #
2656
+ # Similar to RLE, but it maps each value to an ID corresponding to the run into
2657
+ # which it falls. This is especially useful when you want to define groups by
2658
+ # runs of identical values rather than the values themselves.
2659
+ #
2660
+ # @return [Expr]
2661
+ #
2662
+ # @example
2663
+ # df = Polars::DataFrame.new({"a" => [1, 2, 1, 1, 1], "b" => ["x", "x", nil, "y", "y"]})
2664
+ # df.with_columns([Polars.col("a").rle_id.alias("a_r"), Polars.struct(["a", "b"]).rle_id.alias("ab_r")])
2665
+ # # =>
2666
+ # # shape: (5, 4)
2667
+ # # ┌─────┬──────┬─────┬──────┐
2668
+ # # │ a ┆ b ┆ a_r ┆ ab_r │
2669
+ # # │ --- ┆ --- ┆ --- ┆ --- │
2670
+ # # │ i64 ┆ str ┆ u32 ┆ u32 │
2671
+ # # ╞═════╪══════╪═════╪══════╡
2672
+ # # │ 1 ┆ x ┆ 0 ┆ 0 │
2673
+ # # │ 2 ┆ x ┆ 1 ┆ 1 │
2674
+ # # │ 1 ┆ null ┆ 2 ┆ 2 │
2675
+ # # │ 1 ┆ y ┆ 2 ┆ 3 │
2676
+ # # │ 1 ┆ y ┆ 2 ┆ 3 │
2677
+ # # └─────┴──────┴─────┴──────┘
2678
+ def rle_id
2679
+ wrap_expr(_rbexpr.rle_id)
2680
+ end
2681
+
2412
2682
  # Filter a single column.
2413
2683
  #
2414
2684
  # Mostly useful in an aggregation context. If you want to filter on a DataFrame
@@ -2427,7 +2697,7 @@ module Polars
2427
2697
  # }
2428
2698
  # )
2429
2699
  # (
2430
- # df.groupby("group_col").agg(
2700
+ # df.group_by("group_col").agg(
2431
2701
  # [
2432
2702
  # Polars.col("b").filter(Polars.col("b") < 2).sum.alias("lt"),
2433
2703
  # Polars.col("b").filter(Polars.col("b") >= 2).sum.alias("gte")
@@ -2465,7 +2735,7 @@ module Polars
2465
2735
  # }
2466
2736
  # )
2467
2737
  # (
2468
- # df.groupby("group_col").agg(
2738
+ # df.group_by("group_col").agg(
2469
2739
  # [
2470
2740
  # Polars.col("b").where(Polars.col("b") < 2).sum.alias("lt"),
2471
2741
  # Polars.col("b").where(Polars.col("b") >= 2).sum.alias("gte")
@@ -2583,7 +2853,7 @@ module Polars
2583
2853
  #
2584
2854
  # @example In a GroupBy context the function is applied by group:
2585
2855
  # df.lazy
2586
- # .groupby("b", maintain_order: true)
2856
+ # .group_by("b", maintain_order: true)
2587
2857
  # .agg(
2588
2858
  # [
2589
2859
  # Polars.col("a").apply { |x| x.sum }
@@ -2622,7 +2892,7 @@ module Polars
2622
2892
  # "values" => [[1, 2], [2, 3], [4]]
2623
2893
  # }
2624
2894
  # )
2625
- # df.groupby("group").agg(Polars.col("values").flatten)
2895
+ # df.group_by("group").agg(Polars.col("values").flatten)
2626
2896
  # # =>
2627
2897
  # # shape: (2, 2)
2628
2898
  # # ┌───────┬───────────┐
@@ -2670,7 +2940,7 @@ module Polars
2670
2940
  #
2671
2941
  # @example
2672
2942
  # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
2673
- # df.select(Polars.col("foo").take_every(3))
2943
+ # df.select(Polars.col("foo").gather_every(3))
2674
2944
  # # =>
2675
2945
  # # shape: (3, 1)
2676
2946
  # # ┌─────┐
@@ -2682,9 +2952,10 @@ module Polars
2682
2952
  # # │ 4 │
2683
2953
  # # │ 7 │
2684
2954
  # # └─────┘
2685
- def take_every(n)
2686
- wrap_expr(_rbexpr.take_every(n))
2955
+ def gather_every(n, offset = 0)
2956
+ wrap_expr(_rbexpr.gather_every(n, offset))
2687
2957
  end
2958
+ alias_method :take_every, :gather_every
2688
2959
 
2689
2960
  # Get the first `n` rows.
2690
2961
  #
@@ -3057,11 +3328,11 @@ module Polars
3057
3328
  # # ┌─────┬─────┐
3058
3329
  # # │ a ┆ b │
3059
3330
  # # │ --- ┆ --- │
3060
- # # │ i64 ┆ f64 │
3331
+ # # │ f64 ┆ f64 │
3061
3332
  # # ╞═════╪═════╡
3062
- # # │ 1 ┆ 1.0 │
3063
- # # │ 2 ┆ NaN │
3064
- # # │ 3 ┆ 3.0 │
3333
+ # # │ 1.0 ┆ 1.0 │
3334
+ # # │ 2.0 ┆ NaN │
3335
+ # # │ 3.0 ┆ 3.0 │
3065
3336
  # # └─────┴─────┘
3066
3337
  def interpolate(method: "linear")
3067
3338
  wrap_expr(_rbexpr.interpolate(method))
@@ -3112,7 +3383,7 @@ module Polars
3112
3383
  #
3113
3384
  # @note
3114
3385
  # If you want to compute multiple aggregation statistics over the same dynamic
3115
- # window, consider using `groupby_rolling` this method can cache the window size
3386
+ # window, consider using `group_by_rolling` this method can cache the window size
3116
3387
  # computation.
3117
3388
  #
3118
3389
  # @return [Expr]
@@ -3201,7 +3472,7 @@ module Polars
3201
3472
  #
3202
3473
  # @note
3203
3474
  # If you want to compute multiple aggregation statistics over the same dynamic
3204
- # window, consider using `groupby_rolling` this method can cache the window size
3475
+ # window, consider using `group_by_rolling` this method can cache the window size
3205
3476
  # computation.
3206
3477
  #
3207
3478
  # @return [Expr]
@@ -3290,7 +3561,7 @@ module Polars
3290
3561
  #
3291
3562
  # @note
3292
3563
  # If you want to compute multiple aggregation statistics over the same dynamic
3293
- # window, consider using `groupby_rolling` this method can cache the window size
3564
+ # window, consider using `group_by_rolling` this method can cache the window size
3294
3565
  # computation.
3295
3566
  #
3296
3567
  # @return [Expr]
@@ -3379,7 +3650,7 @@ module Polars
3379
3650
  #
3380
3651
  # @note
3381
3652
  # If you want to compute multiple aggregation statistics over the same dynamic
3382
- # window, consider using `groupby_rolling` this method can cache the window size
3653
+ # window, consider using `group_by_rolling` this method can cache the window size
3383
3654
  # computation.
3384
3655
  #
3385
3656
  # @return [Expr]
@@ -3468,7 +3739,7 @@ module Polars
3468
3739
  #
3469
3740
  # @note
3470
3741
  # If you want to compute multiple aggregation statistics over the same dynamic
3471
- # window, consider using `groupby_rolling` this method can cache the window size
3742
+ # window, consider using `group_by_rolling` this method can cache the window size
3472
3743
  # computation.
3473
3744
  #
3474
3745
  # @return [Expr]
@@ -3501,14 +3772,15 @@ module Polars
3501
3772
  center: false,
3502
3773
  by: nil,
3503
3774
  closed: "left",
3504
- ddof: 1
3775
+ ddof: 1,
3776
+ warn_if_unsorted: true
3505
3777
  )
3506
3778
  window_size, min_periods = _prepare_rolling_window_args(
3507
3779
  window_size, min_periods
3508
3780
  )
3509
3781
  wrap_expr(
3510
3782
  _rbexpr.rolling_std(
3511
- window_size, weights, min_periods, center, by, closed, ddof
3783
+ window_size, weights, min_periods, center, by, closed, ddof, warn_if_unsorted
3512
3784
  )
3513
3785
  )
3514
3786
  end
@@ -3558,7 +3830,7 @@ module Polars
3558
3830
  #
3559
3831
  # @note
3560
3832
  # If you want to compute multiple aggregation statistics over the same dynamic
3561
- # window, consider using `groupby_rolling` this method can cache the window size
3833
+ # window, consider using `group_by_rolling` this method can cache the window size
3562
3834
  # computation.
3563
3835
  #
3564
3836
  # @return [Expr]
@@ -3591,14 +3863,15 @@ module Polars
3591
3863
  center: false,
3592
3864
  by: nil,
3593
3865
  closed: "left",
3594
- ddof: 1
3866
+ ddof: 1,
3867
+ warn_if_unsorted: true
3595
3868
  )
3596
3869
  window_size, min_periods = _prepare_rolling_window_args(
3597
3870
  window_size, min_periods
3598
3871
  )
3599
3872
  wrap_expr(
3600
3873
  _rbexpr.rolling_var(
3601
- window_size, weights, min_periods, center, by, closed, ddof
3874
+ window_size, weights, min_periods, center, by, closed, ddof, warn_if_unsorted
3602
3875
  )
3603
3876
  )
3604
3877
  end
@@ -3644,7 +3917,7 @@ module Polars
3644
3917
  #
3645
3918
  # @note
3646
3919
  # If you want to compute multiple aggregation statistics over the same dynamic
3647
- # window, consider using `groupby_rolling` this method can cache the window size
3920
+ # window, consider using `group_by_rolling` this method can cache the window size
3648
3921
  # computation.
3649
3922
  #
3650
3923
  # @return [Expr]
@@ -3676,14 +3949,15 @@ module Polars
3676
3949
  min_periods: nil,
3677
3950
  center: false,
3678
3951
  by: nil,
3679
- closed: "left"
3952
+ closed: "left",
3953
+ warn_if_unsorted: true
3680
3954
  )
3681
3955
  window_size, min_periods = _prepare_rolling_window_args(
3682
3956
  window_size, min_periods
3683
3957
  )
3684
3958
  wrap_expr(
3685
3959
  _rbexpr.rolling_median(
3686
- window_size, weights, min_periods, center, by, closed
3960
+ window_size, weights, min_periods, center, by, closed, warn_if_unsorted
3687
3961
  )
3688
3962
  )
3689
3963
  end
@@ -3733,7 +4007,7 @@ module Polars
3733
4007
  #
3734
4008
  # @note
3735
4009
  # If you want to compute multiple aggregation statistics over the same dynamic
3736
- # window, consider using `groupby_rolling` this method can cache the window size
4010
+ # window, consider using `group_by_rolling` this method can cache the window size
3737
4011
  # computation.
3738
4012
  #
3739
4013
  # @return [Expr]
@@ -3767,14 +4041,15 @@ module Polars
3767
4041
  min_periods: nil,
3768
4042
  center: false,
3769
4043
  by: nil,
3770
- closed: "left"
4044
+ closed: "left",
4045
+ warn_if_unsorted: true
3771
4046
  )
3772
4047
  window_size, min_periods = _prepare_rolling_window_args(
3773
4048
  window_size, min_periods
3774
4049
  )
3775
4050
  wrap_expr(
3776
4051
  _rbexpr.rolling_quantile(
3777
- quantile, interpolation, window_size, weights, min_periods, center, by, closed
4052
+ quantile, interpolation, window_size, weights, min_periods, center, by, closed, warn_if_unsorted
3778
4053
  )
3779
4054
  )
3780
4055
  end
@@ -3948,7 +4223,7 @@ module Polars
3948
4223
  # # ┌─────┐
3949
4224
  # # │ a │
3950
4225
  # # │ --- │
3951
- # # │ f32
4226
+ # # │ f64
3952
4227
  # # ╞═════╡
3953
4228
  # # │ 3.0 │
3954
4229
  # # │ 4.5 │
@@ -4041,6 +4316,7 @@ module Polars
4041
4316
  # # │ 12 ┆ 0.0 │
4042
4317
  # # └──────┴────────────┘
4043
4318
  def pct_change(n: 1)
4319
+ n = Utils.parse_as_expression(n)
4044
4320
  wrap_expr(_rbexpr.pct_change(n))
4045
4321
  end
4046
4322
 
@@ -4105,16 +4381,14 @@ module Polars
4105
4381
  wrap_expr(_rbexpr.kurtosis(fisher, bias))
4106
4382
  end
4107
4383
 
4108
- # Clip (limit) the values in an array to a `min` and `max` boundary.
4109
- #
4110
- # Only works for numerical types.
4384
+ # Set values outside the given boundaries to the boundary value.
4111
4385
  #
4112
- # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4113
- # expression. See `when` for more information.
4386
+ # Only works for numeric and temporal columns. If you want to clip other data
4387
+ # types, consider writing a `when-then-otherwise` expression.
4114
4388
  #
4115
- # @param min_val [Numeric]
4389
+ # @param lower_bound [Numeric]
4116
4390
  # Minimum value.
4117
- # @param max_val [Numeric]
4391
+ # @param upper_bound [Numeric]
4118
4392
  # Maximum value.
4119
4393
  #
4120
4394
  # @return [Expr]
@@ -4134,8 +4408,14 @@ module Polars
4134
4408
  # # │ null ┆ null │
4135
4409
  # # │ 50 ┆ 10 │
4136
4410
  # # └──────┴─────────────┘
4137
- def clip(min_val, max_val)
4138
- wrap_expr(_rbexpr.clip(min_val, max_val))
4411
+ def clip(lower_bound, upper_bound)
4412
+ if !lower_bound.nil?
4413
+ lower_bound = Utils.parse_as_expression(lower_bound, str_as_lit: true)
4414
+ end
4415
+ if !upper_bound.nil?
4416
+ upper_bound = Utils.parse_as_expression(upper_bound, str_as_lit: true)
4417
+ end
4418
+ wrap_expr(_rbexpr.clip(lower_bound, upper_bound))
4139
4419
  end
4140
4420
 
4141
4421
  # Clip (limit) the values in an array to a `min` boundary.
@@ -4145,7 +4425,7 @@ module Polars
4145
4425
  # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4146
4426
  # expression. See `when` for more information.
4147
4427
  #
4148
- # @param min_val [Numeric]
4428
+ # @param lower_bound [Numeric]
4149
4429
  # Minimum value.
4150
4430
  #
4151
4431
  # @return [Expr]
@@ -4165,8 +4445,8 @@ module Polars
4165
4445
  # # │ null ┆ null │
4166
4446
  # # │ 50 ┆ 50 │
4167
4447
  # # └──────┴─────────────┘
4168
- def clip_min(min_val)
4169
- wrap_expr(_rbexpr.clip_min(min_val))
4448
+ def clip_min(lower_bound)
4449
+ clip(lower_bound, nil)
4170
4450
  end
4171
4451
 
4172
4452
  # Clip (limit) the values in an array to a `max` boundary.
@@ -4176,7 +4456,7 @@ module Polars
4176
4456
  # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4177
4457
  # expression. See `when` for more information.
4178
4458
  #
4179
- # @param max_val [Numeric]
4459
+ # @param upper_bound [Numeric]
4180
4460
  # Maximum value.
4181
4461
  #
4182
4462
  # @return [Expr]
@@ -4196,8 +4476,8 @@ module Polars
4196
4476
  # # │ null ┆ null │
4197
4477
  # # │ 50 ┆ 0 │
4198
4478
  # # └──────┴─────────────┘
4199
- def clip_max(max_val)
4200
- wrap_expr(_rbexpr.clip_max(max_val))
4479
+ def clip_max(upper_bound)
4480
+ clip(nil, upper_bound)
4201
4481
  end
4202
4482
 
4203
4483
  # Calculate the lower bound.
@@ -4558,11 +4838,11 @@ module Polars
4558
4838
  # # │ 1 │
4559
4839
  # # │ 3 │
4560
4840
  # # └─────┘
4561
- def shuffle(seed: nil, fixed_seed: false)
4841
+ def shuffle(seed: nil)
4562
4842
  if seed.nil?
4563
4843
  seed = rand(10000)
4564
4844
  end
4565
- wrap_expr(_rbexpr.shuffle(seed, fixed_seed))
4845
+ wrap_expr(_rbexpr.shuffle(seed))
4566
4846
  end
4567
4847
 
4568
4848
  # Sample from this expression.
@@ -4600,22 +4880,23 @@ module Polars
4600
4880
  with_replacement: true,
4601
4881
  shuffle: false,
4602
4882
  seed: nil,
4603
- n: nil,
4604
- fixed_seed: false
4883
+ n: nil
4605
4884
  )
4606
4885
  if !n.nil? && !frac.nil?
4607
4886
  raise ArgumentError, "cannot specify both `n` and `frac`"
4608
4887
  end
4609
4888
 
4610
4889
  if !n.nil? && frac.nil?
4611
- return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed, fixed_seed))
4890
+ n = Utils.parse_as_expression(n)
4891
+ return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
4612
4892
  end
4613
4893
 
4614
4894
  if frac.nil?
4615
4895
  frac = 1.0
4616
4896
  end
4897
+ frac = Utils.parse_as_expression(frac)
4617
4898
  wrap_expr(
4618
- _rbexpr.sample_frac(frac, with_replacement, shuffle, seed, fixed_seed)
4899
+ _rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
4619
4900
  )
4620
4901
  end
4621
4902
 
@@ -4885,7 +5166,7 @@ module Polars
4885
5166
  # Number of valid values there should be in the window before the expression
4886
5167
  # is evaluated. valid values = `length - null_count`
4887
5168
  # @param parallel [Boolean]
4888
- # Run in parallel. Don't do this in a groupby or another operation that
5169
+ # Run in parallel. Don't do this in a group by or another operation that
4889
5170
  # already has much parallelization.
4890
5171
  #
4891
5172
  # @return [Expr]
@@ -5057,6 +5338,13 @@ module Polars
5057
5338
  MetaExpr.new(self)
5058
5339
  end
5059
5340
 
5341
+ # Create an object namespace of all expressions that modify expression names.
5342
+ #
5343
+ # @return [NameExpr]
5344
+ def name
5345
+ NameExpr.new(self)
5346
+ end
5347
+
5060
5348
  # Create an object namespace of all string related methods.
5061
5349
  #
5062
5350
  # @return [StringExpr]