polars-df 0.6.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/Cargo.lock +468 -538
  4. data/Cargo.toml +1 -0
  5. data/README.md +8 -7
  6. data/ext/polars/Cargo.toml +17 -10
  7. data/ext/polars/src/batched_csv.rs +26 -26
  8. data/ext/polars/src/conversion.rs +121 -93
  9. data/ext/polars/src/dataframe.rs +116 -71
  10. data/ext/polars/src/error.rs +0 -5
  11. data/ext/polars/src/expr/binary.rs +18 -6
  12. data/ext/polars/src/expr/datetime.rs +10 -12
  13. data/ext/polars/src/expr/general.rs +68 -284
  14. data/ext/polars/src/expr/list.rs +17 -9
  15. data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
  16. data/ext/polars/src/expr/name.rs +44 -0
  17. data/ext/polars/src/expr/rolling.rs +196 -0
  18. data/ext/polars/src/expr/string.rs +85 -58
  19. data/ext/polars/src/file.rs +3 -3
  20. data/ext/polars/src/functions/aggregation.rs +35 -0
  21. data/ext/polars/src/functions/eager.rs +7 -31
  22. data/ext/polars/src/functions/io.rs +10 -10
  23. data/ext/polars/src/functions/lazy.rs +66 -41
  24. data/ext/polars/src/functions/meta.rs +30 -0
  25. data/ext/polars/src/functions/misc.rs +8 -0
  26. data/ext/polars/src/functions/mod.rs +5 -0
  27. data/ext/polars/src/functions/random.rs +6 -0
  28. data/ext/polars/src/functions/range.rs +46 -0
  29. data/ext/polars/src/functions/string_cache.rs +11 -0
  30. data/ext/polars/src/functions/whenthen.rs +7 -7
  31. data/ext/polars/src/lazyframe.rs +47 -42
  32. data/ext/polars/src/lib.rs +156 -72
  33. data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
  34. data/ext/polars/src/{apply → map}/mod.rs +3 -3
  35. data/ext/polars/src/{apply → map}/series.rs +12 -16
  36. data/ext/polars/src/object.rs +1 -1
  37. data/ext/polars/src/rb_modules.rs +22 -7
  38. data/ext/polars/src/series/construction.rs +4 -4
  39. data/ext/polars/src/series/export.rs +2 -2
  40. data/ext/polars/src/series/set_at_idx.rs +33 -17
  41. data/ext/polars/src/series.rs +7 -27
  42. data/ext/polars/src/sql.rs +46 -0
  43. data/lib/polars/config.rb +530 -0
  44. data/lib/polars/data_frame.rb +115 -82
  45. data/lib/polars/date_time_expr.rb +13 -18
  46. data/lib/polars/date_time_name_space.rb +5 -25
  47. data/lib/polars/dynamic_group_by.rb +2 -2
  48. data/lib/polars/expr.rb +177 -94
  49. data/lib/polars/functions.rb +29 -37
  50. data/lib/polars/group_by.rb +38 -55
  51. data/lib/polars/io.rb +37 -2
  52. data/lib/polars/lazy_frame.rb +93 -66
  53. data/lib/polars/lazy_functions.rb +36 -48
  54. data/lib/polars/lazy_group_by.rb +7 -8
  55. data/lib/polars/list_expr.rb +12 -8
  56. data/lib/polars/list_name_space.rb +2 -2
  57. data/lib/polars/name_expr.rb +198 -0
  58. data/lib/polars/rolling_group_by.rb +2 -2
  59. data/lib/polars/series.rb +26 -13
  60. data/lib/polars/sql_context.rb +194 -0
  61. data/lib/polars/string_expr.rb +114 -60
  62. data/lib/polars/string_name_space.rb +19 -4
  63. data/lib/polars/utils.rb +12 -0
  64. data/lib/polars/version.rb +1 -1
  65. data/lib/polars.rb +3 -0
  66. metadata +18 -7
  67. /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
data/lib/polars/expr.rb CHANGED
@@ -131,6 +131,13 @@ module Polars
131
131
  wrap_expr(_rbexpr.gt(_to_expr(other)._rbexpr))
132
132
  end
133
133
 
134
+ # Performs boolean not.
135
+ #
136
+ # @return [Expr]
137
+ def !
138
+ is_not
139
+ end
140
+
134
141
  # Performs negation.
135
142
  #
136
143
  # @return [Expr]
@@ -191,8 +198,8 @@ module Polars
191
198
  # # ╞══════╪═══════╡
192
199
  # # │ true ┆ false │
193
200
  # # └──────┴───────┘
194
- def any
195
- wrap_expr(_rbexpr.any)
201
+ def any(drop_nulls: true)
202
+ wrap_expr(_rbexpr.any(drop_nulls))
196
203
  end
197
204
 
198
205
  # Check if all boolean values in a Boolean column are `true`.
@@ -216,8 +223,8 @@ module Polars
216
223
  # # ╞══════╪═══════╪═══════╡
217
224
  # # │ true ┆ false ┆ false │
218
225
  # # └──────┴───────┴───────┘
219
- def all
220
- wrap_expr(_rbexpr.all)
226
+ def all(drop_nulls: true)
227
+ wrap_expr(_rbexpr.all(drop_nulls))
221
228
  end
222
229
 
223
230
  # Compute the square root of the elements.
@@ -401,21 +408,21 @@ module Polars
401
408
  # # │ 18 ┆ 4 │
402
409
  # # └─────┴─────┘
403
410
  def keep_name
404
- wrap_expr(_rbexpr.keep_name)
411
+ name.keep
405
412
  end
406
413
 
407
414
  # Add a prefix to the root column name of the expression.
408
415
  #
409
416
  # @return [Expr]
410
417
  def prefix(prefix)
411
- wrap_expr(_rbexpr.prefix(prefix))
418
+ name.prefix(prefix)
412
419
  end
413
420
 
414
421
  # Add a suffix to the root column name of the expression.
415
422
  #
416
423
  # @return [Expr]
417
424
  def suffix(suffix)
418
- wrap_expr(_rbexpr.suffix(suffix))
425
+ name.suffix(suffix)
419
426
  end
420
427
 
421
428
  # Rename the output of an expression by mapping a function over the root name.
@@ -443,7 +450,7 @@ module Polars
443
450
  # # │ 1 ┆ 3 │
444
451
  # # └───────────┴───────────┘
445
452
  def map_alias(&f)
446
- Utils.wrap_expr(_rbexpr.map_alias(f))
453
+ name.map(&f)
447
454
  end
448
455
 
449
456
  # Negate a boolean expression.
@@ -682,7 +689,7 @@ module Polars
682
689
  # "value" => [94, 95, 96, 97, 97, 99]
683
690
  # }
684
691
  # )
685
- # df.groupby("group", maintain_order: true).agg(Polars.col("value").agg_groups)
692
+ # df.group_by("group", maintain_order: true).agg(Polars.col("value").agg_groups)
686
693
  # # =>
687
694
  # # shape: (2, 2)
688
695
  # # ┌───────┬───────────┐
@@ -905,8 +912,8 @@ module Polars
905
912
  # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
906
913
  # df.select(
907
914
  # [
908
- # Polars.col("a").cumsum,
909
- # Polars.col("a").cumsum(reverse: true).alias("a_reverse")
915
+ # Polars.col("a").cum_sum,
916
+ # Polars.col("a").cum_sum(reverse: true).alias("a_reverse")
910
917
  # ]
911
918
  # )
912
919
  # # =>
@@ -921,9 +928,10 @@ module Polars
921
928
  # # │ 6 ┆ 7 │
922
929
  # # │ 10 ┆ 4 │
923
930
  # # └─────┴───────────┘
924
- def cumsum(reverse: false)
925
- wrap_expr(_rbexpr.cumsum(reverse))
931
+ def cum_sum(reverse: false)
932
+ wrap_expr(_rbexpr.cum_sum(reverse))
926
933
  end
934
+ alias_method :cumsum, :cum_sum
927
935
 
928
936
  # Get an array with the cumulative product computed at every element.
929
937
  #
@@ -940,8 +948,8 @@ module Polars
940
948
  # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
941
949
  # df.select(
942
950
  # [
943
- # Polars.col("a").cumprod,
944
- # Polars.col("a").cumprod(reverse: true).alias("a_reverse")
951
+ # Polars.col("a").cum_prod,
952
+ # Polars.col("a").cum_prod(reverse: true).alias("a_reverse")
945
953
  # ]
946
954
  # )
947
955
  # # =>
@@ -956,9 +964,10 @@ module Polars
956
964
  # # │ 6 ┆ 12 │
957
965
  # # │ 24 ┆ 4 │
958
966
  # # └─────┴───────────┘
959
- def cumprod(reverse: false)
960
- wrap_expr(_rbexpr.cumprod(reverse))
967
+ def cum_prod(reverse: false)
968
+ wrap_expr(_rbexpr.cum_prod(reverse))
961
969
  end
970
+ alias_method :cumprod, :cum_prod
962
971
 
963
972
  # Get an array with the cumulative min computed at every element.
964
973
  #
@@ -971,8 +980,8 @@ module Polars
971
980
  # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
972
981
  # df.select(
973
982
  # [
974
- # Polars.col("a").cummin,
975
- # Polars.col("a").cummin(reverse: true).alias("a_reverse")
983
+ # Polars.col("a").cum_min,
984
+ # Polars.col("a").cum_min(reverse: true).alias("a_reverse")
976
985
  # ]
977
986
  # )
978
987
  # # =>
@@ -987,9 +996,10 @@ module Polars
987
996
  # # │ 1 ┆ 3 │
988
997
  # # │ 1 ┆ 4 │
989
998
  # # └─────┴───────────┘
990
- def cummin(reverse: false)
991
- wrap_expr(_rbexpr.cummin(reverse))
999
+ def cum_min(reverse: false)
1000
+ wrap_expr(_rbexpr.cum_min(reverse))
992
1001
  end
1002
+ alias_method :cummin, :cum_min
993
1003
 
994
1004
  # Get an array with the cumulative max computed at every element.
995
1005
  #
@@ -1002,8 +1012,8 @@ module Polars
1002
1012
  # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
1003
1013
  # df.select(
1004
1014
  # [
1005
- # Polars.col("a").cummax,
1006
- # Polars.col("a").cummax(reverse: true).alias("a_reverse")
1015
+ # Polars.col("a").cum_max,
1016
+ # Polars.col("a").cum_max(reverse: true).alias("a_reverse")
1007
1017
  # ]
1008
1018
  # )
1009
1019
  # # =>
@@ -1018,9 +1028,10 @@ module Polars
1018
1028
  # # │ 3 ┆ 4 │
1019
1029
  # # │ 4 ┆ 4 │
1020
1030
  # # └─────┴───────────┘
1021
- def cummax(reverse: false)
1022
- wrap_expr(_rbexpr.cummax(reverse))
1031
+ def cum_max(reverse: false)
1032
+ wrap_expr(_rbexpr.cum_max(reverse))
1023
1033
  end
1034
+ alias_method :cummax, :cum_max
1024
1035
 
1025
1036
  # Get an array with the cumulative count computed at every element.
1026
1037
  #
@@ -1035,8 +1046,8 @@ module Polars
1035
1046
  # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
1036
1047
  # df.select(
1037
1048
  # [
1038
- # Polars.col("a").cumcount,
1039
- # Polars.col("a").cumcount(reverse: true).alias("a_reverse")
1049
+ # Polars.col("a").cum_count,
1050
+ # Polars.col("a").cum_count(reverse: true).alias("a_reverse")
1040
1051
  # ]
1041
1052
  # )
1042
1053
  # # =>
@@ -1051,9 +1062,10 @@ module Polars
1051
1062
  # # │ 2 ┆ 1 │
1052
1063
  # # │ 3 ┆ 0 │
1053
1064
  # # └─────┴───────────┘
1054
- def cumcount(reverse: false)
1055
- wrap_expr(_rbexpr.cumcount(reverse))
1065
+ def cum_count(reverse: false)
1066
+ wrap_expr(_rbexpr.cum_count(reverse))
1056
1067
  end
1068
+ alias_method :cumcount, :cum_count
1057
1069
 
1058
1070
  # Rounds down to the nearest integer value.
1059
1071
  #
@@ -1229,7 +1241,7 @@ module Polars
1229
1241
 
1230
1242
  # Sort this column. In projection/ selection context the whole column is sorted.
1231
1243
  #
1232
- # If used in a groupby context, the groups are sorted.
1244
+ # If used in a group by context, the groups are sorted.
1233
1245
  #
1234
1246
  # @param reverse [Boolean]
1235
1247
  # false -> order from small to large.
@@ -1287,7 +1299,7 @@ module Polars
1287
1299
  # # └───────┘
1288
1300
  #
1289
1301
  # @example
1290
- # df.groupby("group").agg(Polars.col("value").sort)
1302
+ # df.group_by("group").agg(Polars.col("value").sort)
1291
1303
  # # =>
1292
1304
  # # shape: (2, 2)
1293
1305
  # # ┌───────┬────────────┐
@@ -1337,6 +1349,7 @@ module Polars
1337
1349
  # # │ 2 ┆ 98 │
1338
1350
  # # └───────┴──────────┘
1339
1351
  def top_k(k: 5)
1352
+ k = Utils.parse_as_expression(k)
1340
1353
  wrap_expr(_rbexpr.top_k(k))
1341
1354
  end
1342
1355
 
@@ -1375,6 +1388,7 @@ module Polars
1375
1388
  # # │ 2 ┆ 98 │
1376
1389
  # # └───────┴──────────┘
1377
1390
  def bottom_k(k: 5)
1391
+ k = Utils.parse_as_expression(k)
1378
1392
  wrap_expr(_rbexpr.bottom_k(k))
1379
1393
  end
1380
1394
 
@@ -1494,7 +1508,7 @@ module Polars
1494
1508
  # Sort this column by the ordering of another column, or multiple other columns.
1495
1509
  #
1496
1510
  # In projection/ selection context the whole column is sorted.
1497
- # If used in a groupby context, the groups are sorted.
1511
+ # If used in a group by context, the groups are sorted.
1498
1512
  #
1499
1513
  # @param by [Object]
1500
1514
  # The column(s) used for sorting.
@@ -1566,30 +1580,33 @@ module Polars
1566
1580
  # "value" => [1, 98, 2, 3, 99, 4]
1567
1581
  # }
1568
1582
  # )
1569
- # df.groupby("group", maintain_order: true).agg(Polars.col("value").take(1))
1583
+ # df.group_by("group", maintain_order: true).agg(Polars.col("value").take([2, 1]))
1570
1584
  # # =>
1571
1585
  # # shape: (2, 2)
1572
- # # ┌───────┬───────┐
1573
- # # │ group ┆ value
1574
- # # │ --- ┆ ---
1575
- # # │ str ┆ i64
1576
- # # ╞═══════╪═══════╡
1577
- # # │ one ┆ 98
1578
- # # │ two ┆ 99
1579
- # # └───────┴───────┘
1580
- def take(indices)
1586
+ # # ┌───────┬───────────┐
1587
+ # # │ group ┆ value
1588
+ # # │ --- ┆ ---
1589
+ # # │ str ┆ list[i64]
1590
+ # # ╞═══════╪═══════════╡
1591
+ # # │ one ┆ [2, 98]
1592
+ # # │ two ┆ [4, 99]
1593
+ # # └───────┴───────────┘
1594
+ def gather(indices)
1581
1595
  if indices.is_a?(::Array)
1582
1596
  indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
1583
1597
  else
1584
1598
  indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
1585
1599
  end
1586
- wrap_expr(_rbexpr.take(indices_lit._rbexpr))
1600
+ wrap_expr(_rbexpr.gather(indices_lit._rbexpr))
1587
1601
  end
1602
+ alias_method :take, :gather
1588
1603
 
1589
1604
  # Shift the values by a given period.
1590
1605
  #
1591
- # @param periods [Integer]
1606
+ # @param n [Integer]
1592
1607
  # Number of places to shift (may be negative).
1608
+ # @param fill_value [Object]
1609
+ # Fill the resulting null values with this value.
1593
1610
  #
1594
1611
  # @return [Expr]
1595
1612
  #
@@ -1608,8 +1625,12 @@ module Polars
1608
1625
  # # │ 2 │
1609
1626
  # # │ 3 │
1610
1627
  # # └──────┘
1611
- def shift(periods = 1)
1612
- wrap_expr(_rbexpr.shift(periods))
1628
+ def shift(n = 1, fill_value: nil)
1629
+ if !fill_value.nil?
1630
+ fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
1631
+ end
1632
+ n = Utils.parse_as_expression(n)
1633
+ wrap_expr(_rbexpr.shift(n, fill_value))
1613
1634
  end
1614
1635
 
1615
1636
  # Shift the values by a given period and fill the resulting null values.
@@ -1637,8 +1658,7 @@ module Polars
1637
1658
  # # │ 3 │
1638
1659
  # # └─────┘
1639
1660
  def shift_and_fill(periods, fill_value)
1640
- fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
1641
- wrap_expr(_rbexpr.shift_and_fill(periods, fill_value._rbexpr))
1661
+ shift(periods, fill_value: fill_value)
1642
1662
  end
1643
1663
 
1644
1664
  # Fill null values using the specified value or strategy.
@@ -2063,7 +2083,7 @@ module Polars
2063
2083
  # # │ 2 │
2064
2084
  # # └─────┘
2065
2085
  def approx_unique
2066
- wrap_expr(_rbexpr.approx_unique)
2086
+ wrap_expr(_rbexpr.approx_n_unique)
2067
2087
  end
2068
2088
 
2069
2089
  # Count null values.
@@ -2201,7 +2221,7 @@ module Polars
2201
2221
 
2202
2222
  # Apply window function over a subgroup.
2203
2223
  #
2204
- # This is similar to a groupby + aggregation + self join.
2224
+ # This is similar to a group by + aggregation + self join.
2205
2225
  # Or similar to [window functions in Postgres](https://www.postgresql.org/docs/current/tutorial-window.html).
2206
2226
  #
2207
2227
  # @param expr [Object]
@@ -2309,9 +2329,10 @@ module Polars
2309
2329
  # # │ 1 ┆ false │
2310
2330
  # # │ 5 ┆ true │
2311
2331
  # # └─────┴──────────┘
2312
- def is_first
2313
- wrap_expr(_rbexpr.is_first)
2332
+ def is_first_distinct
2333
+ wrap_expr(_rbexpr.is_first_distinct)
2314
2334
  end
2335
+ alias_method :is_first, :is_first_distinct
2315
2336
 
2316
2337
  # Get mask of duplicated values.
2317
2338
  #
@@ -2335,6 +2356,54 @@ module Polars
2335
2356
  wrap_expr(_rbexpr.is_duplicated)
2336
2357
  end
2337
2358
 
2359
+ # Get a boolean mask of the local maximum peaks.
2360
+ #
2361
+ # @return [Expr]
2362
+ #
2363
+ # @example
2364
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
2365
+ # df.select(Polars.col("a").peak_max)
2366
+ # # =>
2367
+ # # shape: (5, 1)
2368
+ # # ┌───────┐
2369
+ # # │ a │
2370
+ # # │ --- │
2371
+ # # │ bool │
2372
+ # # ╞═══════╡
2373
+ # # │ false │
2374
+ # # │ false │
2375
+ # # │ false │
2376
+ # # │ false │
2377
+ # # │ true │
2378
+ # # └───────┘
2379
+ def peak_max
2380
+ wrap_expr(_rbexpr.peak_max)
2381
+ end
2382
+
2383
+ # Get a boolean mask of the local minimum peaks.
2384
+ #
2385
+ # @return [Expr]
2386
+ #
2387
+ # @example
2388
+ # df = Polars::DataFrame.new({"a" => [4, 1, 3, 2, 5]})
2389
+ # df.select(Polars.col("a").peak_min)
2390
+ # # =>
2391
+ # # shape: (5, 1)
2392
+ # # ┌───────┐
2393
+ # # │ a │
2394
+ # # │ --- │
2395
+ # # │ bool │
2396
+ # # ╞═══════╡
2397
+ # # │ false │
2398
+ # # │ true │
2399
+ # # │ false │
2400
+ # # │ true │
2401
+ # # │ false │
2402
+ # # └───────┘
2403
+ def peak_min
2404
+ wrap_expr(_rbexpr.peak_min)
2405
+ end
2406
+
2338
2407
  # Get quantile value.
2339
2408
  #
2340
2409
  # @param quantile [Float]
@@ -2427,7 +2496,7 @@ module Polars
2427
2496
  # }
2428
2497
  # )
2429
2498
  # (
2430
- # df.groupby("group_col").agg(
2499
+ # df.group_by("group_col").agg(
2431
2500
  # [
2432
2501
  # Polars.col("b").filter(Polars.col("b") < 2).sum.alias("lt"),
2433
2502
  # Polars.col("b").filter(Polars.col("b") >= 2).sum.alias("gte")
@@ -2465,7 +2534,7 @@ module Polars
2465
2534
  # }
2466
2535
  # )
2467
2536
  # (
2468
- # df.groupby("group_col").agg(
2537
+ # df.group_by("group_col").agg(
2469
2538
  # [
2470
2539
  # Polars.col("b").where(Polars.col("b") < 2).sum.alias("lt"),
2471
2540
  # Polars.col("b").where(Polars.col("b") >= 2).sum.alias("gte")
@@ -2583,7 +2652,7 @@ module Polars
2583
2652
  #
2584
2653
  # @example In a GroupBy context the function is applied by group:
2585
2654
  # df.lazy
2586
- # .groupby("b", maintain_order: true)
2655
+ # .group_by("b", maintain_order: true)
2587
2656
  # .agg(
2588
2657
  # [
2589
2658
  # Polars.col("a").apply { |x| x.sum }
@@ -2622,7 +2691,7 @@ module Polars
2622
2691
  # "values" => [[1, 2], [2, 3], [4]]
2623
2692
  # }
2624
2693
  # )
2625
- # df.groupby("group").agg(Polars.col("values").flatten)
2694
+ # df.group_by("group").agg(Polars.col("values").flatten)
2626
2695
  # # =>
2627
2696
  # # shape: (2, 2)
2628
2697
  # # ┌───────┬───────────┐
@@ -2670,7 +2739,7 @@ module Polars
2670
2739
  #
2671
2740
  # @example
2672
2741
  # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
2673
- # df.select(Polars.col("foo").take_every(3))
2742
+ # df.select(Polars.col("foo").gather_every(3))
2674
2743
  # # =>
2675
2744
  # # shape: (3, 1)
2676
2745
  # # ┌─────┐
@@ -2682,9 +2751,10 @@ module Polars
2682
2751
  # # │ 4 │
2683
2752
  # # │ 7 │
2684
2753
  # # └─────┘
2685
- def take_every(n)
2686
- wrap_expr(_rbexpr.take_every(n))
2754
+ def gather_every(n)
2755
+ wrap_expr(_rbexpr.gather_every(n))
2687
2756
  end
2757
+ alias_method :take_every, :gather_every
2688
2758
 
2689
2759
  # Get the first `n` rows.
2690
2760
  #
@@ -3057,11 +3127,11 @@ module Polars
3057
3127
  # # ┌─────┬─────┐
3058
3128
  # # │ a ┆ b │
3059
3129
  # # │ --- ┆ --- │
3060
- # # │ i64 ┆ f64 │
3130
+ # # │ f64 ┆ f64 │
3061
3131
  # # ╞═════╪═════╡
3062
- # # │ 1 ┆ 1.0 │
3063
- # # │ 2 ┆ NaN │
3064
- # # │ 3 ┆ 3.0 │
3132
+ # # │ 1.0 ┆ 1.0 │
3133
+ # # │ 2.0 ┆ NaN │
3134
+ # # │ 3.0 ┆ 3.0 │
3065
3135
  # # └─────┴─────┘
3066
3136
  def interpolate(method: "linear")
3067
3137
  wrap_expr(_rbexpr.interpolate(method))
@@ -3112,7 +3182,7 @@ module Polars
3112
3182
  #
3113
3183
  # @note
3114
3184
  # If you want to compute multiple aggregation statistics over the same dynamic
3115
- # window, consider using `groupby_rolling` this method can cache the window size
3185
+ # window, consider using `group_by_rolling` this method can cache the window size
3116
3186
  # computation.
3117
3187
  #
3118
3188
  # @return [Expr]
@@ -3201,7 +3271,7 @@ module Polars
3201
3271
  #
3202
3272
  # @note
3203
3273
  # If you want to compute multiple aggregation statistics over the same dynamic
3204
- # window, consider using `groupby_rolling` this method can cache the window size
3274
+ # window, consider using `group_by_rolling` this method can cache the window size
3205
3275
  # computation.
3206
3276
  #
3207
3277
  # @return [Expr]
@@ -3290,7 +3360,7 @@ module Polars
3290
3360
  #
3291
3361
  # @note
3292
3362
  # If you want to compute multiple aggregation statistics over the same dynamic
3293
- # window, consider using `groupby_rolling` this method can cache the window size
3363
+ # window, consider using `group_by_rolling` this method can cache the window size
3294
3364
  # computation.
3295
3365
  #
3296
3366
  # @return [Expr]
@@ -3379,7 +3449,7 @@ module Polars
3379
3449
  #
3380
3450
  # @note
3381
3451
  # If you want to compute multiple aggregation statistics over the same dynamic
3382
- # window, consider using `groupby_rolling` this method can cache the window size
3452
+ # window, consider using `group_by_rolling` this method can cache the window size
3383
3453
  # computation.
3384
3454
  #
3385
3455
  # @return [Expr]
@@ -3468,7 +3538,7 @@ module Polars
3468
3538
  #
3469
3539
  # @note
3470
3540
  # If you want to compute multiple aggregation statistics over the same dynamic
3471
- # window, consider using `groupby_rolling` this method can cache the window size
3541
+ # window, consider using `group_by_rolling` this method can cache the window size
3472
3542
  # computation.
3473
3543
  #
3474
3544
  # @return [Expr]
@@ -3558,7 +3628,7 @@ module Polars
3558
3628
  #
3559
3629
  # @note
3560
3630
  # If you want to compute multiple aggregation statistics over the same dynamic
3561
- # window, consider using `groupby_rolling` this method can cache the window size
3631
+ # window, consider using `group_by_rolling` this method can cache the window size
3562
3632
  # computation.
3563
3633
  #
3564
3634
  # @return [Expr]
@@ -3644,7 +3714,7 @@ module Polars
3644
3714
  #
3645
3715
  # @note
3646
3716
  # If you want to compute multiple aggregation statistics over the same dynamic
3647
- # window, consider using `groupby_rolling` this method can cache the window size
3717
+ # window, consider using `group_by_rolling` this method can cache the window size
3648
3718
  # computation.
3649
3719
  #
3650
3720
  # @return [Expr]
@@ -3733,7 +3803,7 @@ module Polars
3733
3803
  #
3734
3804
  # @note
3735
3805
  # If you want to compute multiple aggregation statistics over the same dynamic
3736
- # window, consider using `groupby_rolling` this method can cache the window size
3806
+ # window, consider using `group_by_rolling` this method can cache the window size
3737
3807
  # computation.
3738
3808
  #
3739
3809
  # @return [Expr]
@@ -3948,7 +4018,7 @@ module Polars
3948
4018
  # # ┌─────┐
3949
4019
  # # │ a │
3950
4020
  # # │ --- │
3951
- # # │ f32
4021
+ # # │ f64
3952
4022
  # # ╞═════╡
3953
4023
  # # │ 3.0 │
3954
4024
  # # │ 4.5 │
@@ -4041,6 +4111,7 @@ module Polars
4041
4111
  # # │ 12 ┆ 0.0 │
4042
4112
  # # └──────┴────────────┘
4043
4113
  def pct_change(n: 1)
4114
+ n = Utils.parse_as_expression(n)
4044
4115
  wrap_expr(_rbexpr.pct_change(n))
4045
4116
  end
4046
4117
 
@@ -4105,16 +4176,14 @@ module Polars
4105
4176
  wrap_expr(_rbexpr.kurtosis(fisher, bias))
4106
4177
  end
4107
4178
 
4108
- # Clip (limit) the values in an array to a `min` and `max` boundary.
4109
- #
4110
- # Only works for numerical types.
4179
+ # Set values outside the given boundaries to the boundary value.
4111
4180
  #
4112
- # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4113
- # expression. See `when` for more information.
4181
+ # Only works for numeric and temporal columns. If you want to clip other data
4182
+ # types, consider writing a `when-then-otherwise` expression.
4114
4183
  #
4115
- # @param min_val [Numeric]
4184
+ # @param lower_bound [Numeric]
4116
4185
  # Minimum value.
4117
- # @param max_val [Numeric]
4186
+ # @param upper_bound [Numeric]
4118
4187
  # Maximum value.
4119
4188
  #
4120
4189
  # @return [Expr]
@@ -4134,8 +4203,14 @@ module Polars
4134
4203
  # # │ null ┆ null │
4135
4204
  # # │ 50 ┆ 10 │
4136
4205
  # # └──────┴─────────────┘
4137
- def clip(min_val, max_val)
4138
- wrap_expr(_rbexpr.clip(min_val, max_val))
4206
+ def clip(lower_bound, upper_bound)
4207
+ if !lower_bound.nil?
4208
+ lower_bound = Utils.parse_as_expression(lower_bound, str_as_lit: true)
4209
+ end
4210
+ if !upper_bound.nil?
4211
+ upper_bound = Utils.parse_as_expression(upper_bound, str_as_lit: true)
4212
+ end
4213
+ wrap_expr(_rbexpr.clip(lower_bound, upper_bound))
4139
4214
  end
4140
4215
 
4141
4216
  # Clip (limit) the values in an array to a `min` boundary.
@@ -4145,7 +4220,7 @@ module Polars
4145
4220
  # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4146
4221
  # expression. See `when` for more information.
4147
4222
  #
4148
- # @param min_val [Numeric]
4223
+ # @param lower_bound [Numeric]
4149
4224
  # Minimum value.
4150
4225
  #
4151
4226
  # @return [Expr]
@@ -4165,8 +4240,8 @@ module Polars
4165
4240
  # # │ null ┆ null │
4166
4241
  # # │ 50 ┆ 50 │
4167
4242
  # # └──────┴─────────────┘
4168
- def clip_min(min_val)
4169
- wrap_expr(_rbexpr.clip_min(min_val))
4243
+ def clip_min(lower_bound)
4244
+ clip(lower_bound, nil)
4170
4245
  end
4171
4246
 
4172
4247
  # Clip (limit) the values in an array to a `max` boundary.
@@ -4176,7 +4251,7 @@ module Polars
4176
4251
  # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4177
4252
  # expression. See `when` for more information.
4178
4253
  #
4179
- # @param max_val [Numeric]
4254
+ # @param upper_bound [Numeric]
4180
4255
  # Maximum value.
4181
4256
  #
4182
4257
  # @return [Expr]
@@ -4196,8 +4271,8 @@ module Polars
4196
4271
  # # │ null ┆ null │
4197
4272
  # # │ 50 ┆ 0 │
4198
4273
  # # └──────┴─────────────┘
4199
- def clip_max(max_val)
4200
- wrap_expr(_rbexpr.clip_max(max_val))
4274
+ def clip_max(upper_bound)
4275
+ clip(nil, upper_bound)
4201
4276
  end
4202
4277
 
4203
4278
  # Calculate the lower bound.
@@ -4558,11 +4633,11 @@ module Polars
4558
4633
  # # │ 1 │
4559
4634
  # # │ 3 │
4560
4635
  # # └─────┘
4561
- def shuffle(seed: nil, fixed_seed: false)
4636
+ def shuffle(seed: nil)
4562
4637
  if seed.nil?
4563
4638
  seed = rand(10000)
4564
4639
  end
4565
- wrap_expr(_rbexpr.shuffle(seed, fixed_seed))
4640
+ wrap_expr(_rbexpr.shuffle(seed))
4566
4641
  end
4567
4642
 
4568
4643
  # Sample from this expression.
@@ -4600,22 +4675,23 @@ module Polars
4600
4675
  with_replacement: true,
4601
4676
  shuffle: false,
4602
4677
  seed: nil,
4603
- n: nil,
4604
- fixed_seed: false
4678
+ n: nil
4605
4679
  )
4606
4680
  if !n.nil? && !frac.nil?
4607
4681
  raise ArgumentError, "cannot specify both `n` and `frac`"
4608
4682
  end
4609
4683
 
4610
4684
  if !n.nil? && frac.nil?
4611
- return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed, fixed_seed))
4685
+ n = Utils.parse_as_expression(n)
4686
+ return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
4612
4687
  end
4613
4688
 
4614
4689
  if frac.nil?
4615
4690
  frac = 1.0
4616
4691
  end
4692
+ frac = Utils.parse_as_expression(frac)
4617
4693
  wrap_expr(
4618
- _rbexpr.sample_frac(frac, with_replacement, shuffle, seed, fixed_seed)
4694
+ _rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
4619
4695
  )
4620
4696
  end
4621
4697
 
@@ -4885,7 +4961,7 @@ module Polars
4885
4961
  # Number of valid values there should be in the window before the expression
4886
4962
  # is evaluated. valid values = `length - null_count`
4887
4963
  # @param parallel [Boolean]
4888
- # Run in parallel. Don't do this in a groupby or another operation that
4964
+ # Run in parallel. Don't do this in a group by or another operation that
4889
4965
  # already has much parallelization.
4890
4966
  #
4891
4967
  # @return [Expr]
@@ -5057,6 +5133,13 @@ module Polars
5057
5133
  MetaExpr.new(self)
5058
5134
  end
5059
5135
 
5136
+ # Create an object namespace of all expressions that modify expression names.
5137
+ #
5138
+ # @return [NameExpr]
5139
+ def name
5140
+ NameExpr.new(self)
5141
+ end
5142
+
5060
5143
  # Create an object namespace of all string related methods.
5061
5144
  #
5062
5145
  # @return [StringExpr]