polars-df 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/polars/expr.rb CHANGED
@@ -138,8 +138,45 @@ module Polars
138
138
  Utils.lit(0) - self
139
139
  end
140
140
 
141
- # def to_physical
142
- # end
141
+ # Cast to physical representation of the logical dtype.
142
+ #
143
+ # - `:date` -> `:i32`
144
+ # - `:datetime` -> `:i64`
145
+ # - `:time` -> `:i64`
146
+ # - `:duration` -> `:i64`
147
+ # - `:cat` -> `:u32`
148
+ # - Other data types will be left unchanged.
149
+ #
150
+ # @return [Expr]
151
+ #
152
+ # @example
153
+ # Polars::DataFrame.new({"vals" => ["a", "x", nil, "a"]}).with_columns(
154
+ # [
155
+ # Polars.col("vals").cast(:cat),
156
+ # Polars.col("vals")
157
+ # .cast(:cat)
158
+ # .to_physical
159
+ # .alias("vals_physical")
160
+ # ]
161
+ # )
162
+ # # =>
163
+ # # shape: (4, 2)
164
+ # # ┌──────┬───────────────┐
165
+ # # │ vals ┆ vals_physical │
166
+ # # │ --- ┆ --- │
167
+ # # │ cat ┆ u32 │
168
+ # # ╞══════╪═══════════════╡
169
+ # # │ a ┆ 0 │
170
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
171
+ # # │ x ┆ 1 │
172
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
173
+ # # │ null ┆ null │
174
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
175
+ # # │ a ┆ 0 │
176
+ # # └──────┴───────────────┘
177
+ def to_physical
178
+ wrap_expr(_rbexpr.to_physical)
179
+ end
143
180
 
144
181
  # Check if any boolean value in a Boolean column is `true`.
145
182
  #
@@ -258,13 +295,82 @@ module Polars
258
295
  wrap_expr(_rbexpr.exp)
259
296
  end
260
297
 
298
+ # Rename the output of an expression.
299
+ #
300
+ # @param name [String]
301
+ # New name.
302
+ #
303
+ # @return [Expr]
304
+ #
305
+ # @example
306
+ # df = Polars::DataFrame.new(
307
+ # {
308
+ # "a" => [1, 2, 3],
309
+ # "b" => ["a", "b", nil]
310
+ # }
311
+ # )
312
+ # df.select(
313
+ # [
314
+ # Polars.col("a").alias("bar"),
315
+ # Polars.col("b").alias("foo")
316
+ # ]
317
+ # )
318
+ # # =>
319
+ # # shape: (3, 2)
320
+ # # ┌─────┬──────┐
321
+ # # │ bar ┆ foo │
322
+ # # │ --- ┆ --- │
323
+ # # │ i64 ┆ str │
324
+ # # ╞═════╪══════╡
325
+ # # │ 1 ┆ a │
326
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
327
+ # # │ 2 ┆ b │
328
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
329
+ # # │ 3 ┆ null │
330
+ # # └─────┴──────┘
261
331
  def alias(name)
262
332
  wrap_expr(_rbexpr._alias(name))
263
333
  end
264
334
 
265
335
  # TODO support symbols for exclude
266
336
 
337
+ # Exclude certain columns from a wildcard/regex selection.
338
+ #
339
+ # You may also use regexes in the exclude list. They must start with `^` and end
340
+ # with `$`.
341
+ #
342
+ # @param columns [Object]
343
+ # Column(s) to exclude from selection.
344
+ # This can be:
267
345
  #
346
+ # - a column name, or multiple column names
347
+ # - a regular expression starting with `^` and ending with `$`
348
+ # - a dtype or multiple dtypes
349
+ #
350
+ # @return [Expr]
351
+ #
352
+ # @example
353
+ # df = Polars::DataFrame.new(
354
+ # {
355
+ # "aa" => [1, 2, 3],
356
+ # "ba" => ["a", "b", nil],
357
+ # "cc" => [nil, 2.5, 1.5]
358
+ # }
359
+ # )
360
+ # df.select(Polars.all.exclude("ba"))
361
+ # # =>
362
+ # # shape: (3, 2)
363
+ # # ┌─────┬──────┐
364
+ # # │ aa ┆ cc │
365
+ # # │ --- ┆ --- │
366
+ # # │ i64 ┆ f64 │
367
+ # # ╞═════╪══════╡
368
+ # # │ 1 ┆ null │
369
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
370
+ # # │ 2 ┆ 2.5 │
371
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
372
+ # # │ 3 ┆ 1.5 │
373
+ # # └─────┴──────┘
268
374
  def exclude(columns)
269
375
  if columns.is_a?(String)
270
376
  columns = [columns]
@@ -285,14 +391,43 @@ module Polars
285
391
  end
286
392
  end
287
393
 
394
+ # Keep the original root name of the expression.
395
+ #
396
+ # @return [Expr]
397
+ #
398
+ # @example
399
+ # df = Polars::DataFrame.new(
400
+ # {
401
+ # "a" => [1, 2],
402
+ # "b" => [3, 4]
403
+ # }
404
+ # )
405
+ # df.with_columns([(Polars.col("a") * 9).alias("c").keep_name])
406
+ # # =>
407
+ # # shape: (2, 2)
408
+ # # ┌─────┬─────┐
409
+ # # │ a ┆ b │
410
+ # # │ --- ┆ --- │
411
+ # # │ i64 ┆ i64 │
412
+ # # ╞═════╪═════╡
413
+ # # │ 9 ┆ 3 │
414
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
415
+ # # │ 18 ┆ 4 │
416
+ # # └─────┴─────┘
288
417
  def keep_name
289
418
  wrap_expr(_rbexpr.keep_name)
290
419
  end
291
420
 
421
+ # Add a prefix to the root column name of the expression.
422
+ #
423
+ # @return [Expr]
292
424
  def prefix(prefix)
293
425
  wrap_expr(_rbexpr.prefix(prefix))
294
426
  end
295
427
 
428
+ # Add a suffix to the root column name of the expression.
429
+ #
430
+ # @return [Expr]
296
431
  def suffix(suffix)
297
432
  wrap_expr(_rbexpr.suffix(suffix))
298
433
  end
@@ -464,14 +599,112 @@ module Polars
464
599
  wrap_expr(_rbexpr.is_infinite)
465
600
  end
466
601
 
602
+ # Returns a boolean Series indicating which values are NaN.
603
+ #
604
+ # @note
605
+ # Floating point `NaN` (Not A Number) should not be confused
606
+ # with missing data represented as `nil`.
607
+ #
608
+ # @return [Expr]
609
+ #
610
+ # @example
611
+ # df = Polars::DataFrame.new(
612
+ # {
613
+ # "a" => [1, 2, nil, 1, 5],
614
+ # "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
615
+ # }
616
+ # )
617
+ # df.with_column(Polars.col(Polars::Float64).is_nan.suffix("_isnan"))
618
+ # # =>
619
+ # # shape: (5, 3)
620
+ # # ┌──────┬─────┬─────────┐
621
+ # # │ a ┆ b ┆ b_isnan │
622
+ # # │ --- ┆ --- ┆ --- │
623
+ # # │ i64 ┆ f64 ┆ bool │
624
+ # # ╞══════╪═════╪═════════╡
625
+ # # │ 1 ┆ 1.0 ┆ false │
626
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
627
+ # # │ 2 ┆ 2.0 ┆ false │
628
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
629
+ # # │ null ┆ NaN ┆ true │
630
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
631
+ # # │ 1 ┆ 1.0 ┆ false │
632
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
633
+ # # │ 5 ┆ 5.0 ┆ false │
634
+ # # └──────┴─────┴─────────┘
467
635
  def is_nan
468
636
  wrap_expr(_rbexpr.is_nan)
469
637
  end
470
638
 
639
+ # Returns a boolean Series indicating which values are not NaN.
640
+ #
641
+ # @note
642
+ # Floating point `NaN` (Not A Number) should not be confused
643
+ # with missing data represented as `nil`.
644
+ #
645
+ # @return [Expr]
646
+ #
647
+ # @example
648
+ # df = Polars::DataFrame.new(
649
+ # {
650
+ # "a" => [1, 2, nil, 1, 5],
651
+ # "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
652
+ # }
653
+ # )
654
+ # df.with_column(Polars.col(Polars::Float64).is_not_nan.suffix("_is_not_nan"))
655
+ # # =>
656
+ # # shape: (5, 3)
657
+ # # ┌──────┬─────┬──────────────┐
658
+ # # │ a ┆ b ┆ b_is_not_nan │
659
+ # # │ --- ┆ --- ┆ --- │
660
+ # # │ i64 ┆ f64 ┆ bool │
661
+ # # ╞══════╪═════╪══════════════╡
662
+ # # │ 1 ┆ 1.0 ┆ true │
663
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
664
+ # # │ 2 ┆ 2.0 ┆ true │
665
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
666
+ # # │ null ┆ NaN ┆ false │
667
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
668
+ # # │ 1 ┆ 1.0 ┆ true │
669
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
670
+ # # │ 5 ┆ 5.0 ┆ true │
671
+ # # └──────┴─────┴──────────────┘
471
672
  def is_not_nan
472
673
  wrap_expr(_rbexpr.is_not_nan)
473
674
  end
474
675
 
676
+ # Get the group indexes of the group by operation.
677
+ #
678
+ # Should be used in aggregation context only.
679
+ #
680
+ # @return [Expr]
681
+ #
682
+ # @example
683
+ # df = Polars::DataFrame.new(
684
+ # {
685
+ # "group" => [
686
+ # "one",
687
+ # "one",
688
+ # "one",
689
+ # "two",
690
+ # "two",
691
+ # "two"
692
+ # ],
693
+ # "value" => [94, 95, 96, 97, 97, 99]
694
+ # }
695
+ # )
696
+ # df.groupby("group", maintain_order: true).agg(Polars.col("value").agg_groups)
697
+ # # =>
698
+ # # shape: (2, 2)
699
+ # # ┌───────┬───────────┐
700
+ # # │ group ┆ value │
701
+ # # │ --- ┆ --- │
702
+ # # │ str ┆ list[u32] │
703
+ # # ╞═══════╪═══════════╡
704
+ # # │ one ┆ [0, 1, 2] │
705
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
706
+ # # │ two ┆ [3, 4, 5] │
707
+ # # └───────┴───────────┘
475
708
  def agg_groups
476
709
  wrap_expr(_rbexpr.agg_groups)
477
710
  end
@@ -557,6 +790,36 @@ module Polars
557
790
  wrap_expr(_rbexpr.slice(offset._rbexpr, length._rbexpr))
558
791
  end
559
792
 
793
+ # Append expressions.
794
+ #
795
+ # This is done by adding the chunks of `other` to this `Series`.
796
+ #
797
+ # @param other [Expr]
798
+ # Expression to append.
799
+ # @param upcast [Boolean]
800
+ # Cast both `Series` to the same supertype.
801
+ #
802
+ # @return [Expr]
803
+ #
804
+ # @example
805
+ # df = Polars::DataFrame.new(
806
+ # {
807
+ # "a" => [8, 9, 10],
808
+ # "b" => [nil, 4, 4]
809
+ # }
810
+ # )
811
+ # df.select(Polars.all.head(1).append(Polars.all.tail(1)))
812
+ # # =>
813
+ # # shape: (2, 2)
814
+ # # ┌─────┬──────┐
815
+ # # │ a ┆ b │
816
+ # # │ --- ┆ --- │
817
+ # # │ i64 ┆ i64 │
818
+ # # ╞═════╪══════╡
819
+ # # │ 8 ┆ null │
820
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
821
+ # # │ 10 ┆ 4 │
822
+ # # └─────┴──────┘
560
823
  def append(other, upcast: true)
561
824
  other = Utils.expr_to_lit_or_expr(other)
562
825
  wrap_expr(_rbexpr.append(other._rbexpr, upcast))
@@ -567,7 +830,7 @@ module Polars
567
830
  # @return [Expr]
568
831
  #
569
832
  # @example Create a Series with 3 nulls, append column a then rechunk
570
- # df = Polars::DataFrame.new({"a": [1, 1, 2]})
833
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
571
834
  # df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
572
835
  # # =>
573
836
  # # shape: (6, 1)
@@ -650,22 +913,182 @@ module Polars
650
913
  wrap_expr(_rbexpr.drop_nans)
651
914
  end
652
915
 
916
+ # Get an array with the cumulative sum computed at every element.
917
+ #
918
+ # @param reverse [Boolean]
919
+ # Reverse the operation.
920
+ #
921
+ # @return [Expr]
922
+ #
923
+ # @note
924
+ # Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
925
+ # `:i64` before summing to prevent overflow issues.
926
+ #
927
+ # @example
928
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
929
+ # df.select(
930
+ # [
931
+ # Polars.col("a").cumsum,
932
+ # Polars.col("a").cumsum(reverse: true).alias("a_reverse")
933
+ # ]
934
+ # )
935
+ # # =>
936
+ # # shape: (4, 2)
937
+ # # ┌─────┬───────────┐
938
+ # # │ a ┆ a_reverse │
939
+ # # │ --- ┆ --- │
940
+ # # │ i64 ┆ i64 │
941
+ # # ╞═════╪═══════════╡
942
+ # # │ 1 ┆ 10 │
943
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
944
+ # # │ 3 ┆ 9 │
945
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
946
+ # # │ 6 ┆ 7 │
947
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
948
+ # # │ 10 ┆ 4 │
949
+ # # └─────┴───────────┘
653
950
  def cumsum(reverse: false)
654
951
  wrap_expr(_rbexpr.cumsum(reverse))
655
952
  end
656
953
 
954
+ # Get an array with the cumulative product computed at every element.
955
+ #
956
+ # @param reverse [Boolean]
957
+ # Reverse the operation.
958
+ #
959
+ # @return [Expr]
960
+ #
961
+ # @note
962
+ # Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
963
+ # `:i64` before summing to prevent overflow issues.
964
+ #
965
+ # @example
966
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
967
+ # df.select(
968
+ # [
969
+ # Polars.col("a").cumprod,
970
+ # Polars.col("a").cumprod(reverse: true).alias("a_reverse")
971
+ # ]
972
+ # )
973
+ # # =>
974
+ # # shape: (4, 2)
975
+ # # ┌─────┬───────────┐
976
+ # # │ a ┆ a_reverse │
977
+ # # │ --- ┆ --- │
978
+ # # │ i64 ┆ i64 │
979
+ # # ╞═════╪═══════════╡
980
+ # # │ 1 ┆ 24 │
981
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
982
+ # # │ 2 ┆ 24 │
983
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
984
+ # # │ 6 ┆ 12 │
985
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
986
+ # # │ 24 ┆ 4 │
987
+ # # └─────┴───────────┘
657
988
  def cumprod(reverse: false)
658
989
  wrap_expr(_rbexpr.cumprod(reverse))
659
990
  end
660
991
 
992
+ # Get an array with the cumulative min computed at every element.
993
+ #
994
+ # @param reverse [Boolean]
995
+ # Reverse the operation.
996
+ #
997
+ # @return [Expr]
998
+ #
999
+ # @example
1000
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
1001
+ # df.select(
1002
+ # [
1003
+ # Polars.col("a").cummin,
1004
+ # Polars.col("a").cummin(reverse: true).alias("a_reverse")
1005
+ # ]
1006
+ # )
1007
+ # # =>
1008
+ # # shape: (4, 2)
1009
+ # # ┌─────┬───────────┐
1010
+ # # │ a ┆ a_reverse │
1011
+ # # │ --- ┆ --- │
1012
+ # # │ i64 ┆ i64 │
1013
+ # # ╞═════╪═══════════╡
1014
+ # # │ 1 ┆ 1 │
1015
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1016
+ # # │ 1 ┆ 2 │
1017
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1018
+ # # │ 1 ┆ 3 │
1019
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1020
+ # # │ 1 ┆ 4 │
1021
+ # # └─────┴───────────┘
661
1022
  def cummin(reverse: false)
662
1023
  wrap_expr(_rbexpr.cummin(reverse))
663
1024
  end
664
1025
 
1026
+ # Get an array with the cumulative max computed at every element.
1027
+ #
1028
+ # @param reverse [Boolean]
1029
+ # Reverse the operation.
1030
+ #
1031
+ # @return [Expr]
1032
+ #
1033
+ # @example
1034
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
1035
+ # df.select(
1036
+ # [
1037
+ # Polars.col("a").cummax,
1038
+ # Polars.col("a").cummax(reverse: true).alias("a_reverse")
1039
+ # ]
1040
+ # )
1041
+ # # =>
1042
+ # # shape: (4, 2)
1043
+ # # ┌─────┬───────────┐
1044
+ # # │ a ┆ a_reverse │
1045
+ # # │ --- ┆ --- │
1046
+ # # │ i64 ┆ i64 │
1047
+ # # ╞═════╪═══════════╡
1048
+ # # │ 1 ┆ 4 │
1049
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1050
+ # # │ 2 ┆ 4 │
1051
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1052
+ # # │ 3 ┆ 4 │
1053
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1054
+ # # │ 4 ┆ 4 │
1055
+ # # └─────┴───────────┘
665
1056
  def cummax(reverse: false)
666
1057
  wrap_expr(_rbexpr.cummax(reverse))
667
1058
  end
668
1059
 
1060
+ # Get an array with the cumulative count computed at every element.
1061
+ #
1062
+ # Counting from 0 to len
1063
+ #
1064
+ # @param reverse [Boolean]
1065
+ # Reverse the operation.
1066
+ #
1067
+ # @return [Expr]
1068
+ #
1069
+ # @example
1070
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
1071
+ # df.select(
1072
+ # [
1073
+ # Polars.col("a").cumcount,
1074
+ # Polars.col("a").cumcount(reverse: true).alias("a_reverse")
1075
+ # ]
1076
+ # )
1077
+ # # =>
1078
+ # # shape: (4, 2)
1079
+ # # ┌─────┬───────────┐
1080
+ # # │ a ┆ a_reverse │
1081
+ # # │ --- ┆ --- │
1082
+ # # │ u32 ┆ u32 │
1083
+ # # ╞═════╪═══════════╡
1084
+ # # │ 0 ┆ 3 │
1085
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1086
+ # # │ 1 ┆ 2 │
1087
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1088
+ # # │ 2 ┆ 1 │
1089
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1090
+ # # │ 3 ┆ 0 │
1091
+ # # └─────┴───────────┘
669
1092
  def cumcount(reverse: false)
670
1093
  wrap_expr(_rbexpr.cumcount(reverse))
671
1094
  end
@@ -755,6 +1178,30 @@ module Polars
755
1178
  wrap_expr(_rbexpr.round(decimals))
756
1179
  end
757
1180
 
1181
+ # Compute the dot/inner product between two Expressions.
1182
+ #
1183
+ # @param other [Expr]
1184
+ # Expression to compute dot product with.
1185
+ #
1186
+ # @return [Expr]
1187
+ #
1188
+ # @example
1189
+ # df = Polars::DataFrame.new(
1190
+ # {
1191
+ # "a" => [1, 3, 5],
1192
+ # "b" => [2, 4, 6]
1193
+ # }
1194
+ # )
1195
+ # df.select(Polars.col("a").dot(Polars.col("b")))
1196
+ # # =>
1197
+ # # shape: (1, 1)
1198
+ # # ┌─────┐
1199
+ # # │ a │
1200
+ # # │ --- │
1201
+ # # │ i64 │
1202
+ # # ╞═════╡
1203
+ # # │ 44 │
1204
+ # # └─────┘
758
1205
  def dot(other)
759
1206
  other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
760
1207
  wrap_expr(_rbexpr.dot(other._rbexpr))
@@ -789,19 +1236,206 @@ module Polars
789
1236
  wrap_expr(_rbexpr.mode)
790
1237
  end
791
1238
 
1239
+ # Cast between data types.
1240
+ #
1241
+ # @param dtype [Symbol]
1242
+ # DataType to cast to.
1243
+ # @param strict [Boolean]
1244
+ # Throw an error if a cast could not be done.
1245
+ # For instance, due to an overflow.
1246
+ #
1247
+ # @return [Expr]
1248
+ #
1249
+ # @example
1250
+ # df = Polars::DataFrame.new(
1251
+ # {
1252
+ # "a" => [1, 2, 3],
1253
+ # "b" => ["4", "5", "6"]
1254
+ # }
1255
+ # )
1256
+ # df.with_columns(
1257
+ # [
1258
+ # Polars.col("a").cast(:f64),
1259
+ # Polars.col("b").cast(:i32)
1260
+ # ]
1261
+ # )
1262
+ # # =>
1263
+ # # shape: (3, 2)
1264
+ # # ┌─────┬─────┐
1265
+ # # │ a ┆ b │
1266
+ # # │ --- ┆ --- │
1267
+ # # │ f64 ┆ i32 │
1268
+ # # ╞═════╪═════╡
1269
+ # # │ 1.0 ┆ 4 │
1270
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1271
+ # # │ 2.0 ┆ 5 │
1272
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1273
+ # # │ 3.0 ┆ 6 │
1274
+ # # └─────┴─────┘
792
1275
  def cast(dtype, strict: true)
793
1276
  dtype = Utils.rb_type_to_dtype(dtype)
794
1277
  wrap_expr(_rbexpr.cast(dtype, strict))
795
1278
  end
796
1279
 
1280
+ # Sort this column. In projection/ selection context the whole column is sorted.
1281
+ #
1282
+ # If used in a groupby context, the groups are sorted.
1283
+ #
1284
+ # @param reverse [Boolean]
1285
+ # false -> order from small to large.
1286
+ # true -> order from large to small.
1287
+ # @param nulls_last [Boolean]
1288
+ # If true nulls are considered to be larger than any valid value.
1289
+ #
1290
+ # @return [Expr]
1291
+ #
1292
+ # @example
1293
+ # df = Polars::DataFrame.new(
1294
+ # {
1295
+ # "group" => [
1296
+ # "one",
1297
+ # "one",
1298
+ # "one",
1299
+ # "two",
1300
+ # "two",
1301
+ # "two"
1302
+ # ],
1303
+ # "value" => [1, 98, 2, 3, 99, 4]
1304
+ # }
1305
+ # )
1306
+ # df.select(Polars.col("value").sort)
1307
+ # # =>
1308
+ # # shape: (6, 1)
1309
+ # # ┌───────┐
1310
+ # # │ value │
1311
+ # # │ --- │
1312
+ # # │ i64 │
1313
+ # # ╞═══════╡
1314
+ # # │ 1 │
1315
+ # # ├╌╌╌╌╌╌╌┤
1316
+ # # │ 2 │
1317
+ # # ├╌╌╌╌╌╌╌┤
1318
+ # # │ 3 │
1319
+ # # ├╌╌╌╌╌╌╌┤
1320
+ # # │ 4 │
1321
+ # # ├╌╌╌╌╌╌╌┤
1322
+ # # │ 98 │
1323
+ # # ├╌╌╌╌╌╌╌┤
1324
+ # # │ 99 │
1325
+ # # └───────┘
1326
+ #
1327
+ # @example
1328
+ # df.select(Polars.col("value").sort)
1329
+ # # =>
1330
+ # # shape: (6, 1)
1331
+ # # ┌───────┐
1332
+ # # │ value │
1333
+ # # │ --- │
1334
+ # # │ i64 │
1335
+ # # ╞═══════╡
1336
+ # # │ 1 │
1337
+ # # ├╌╌╌╌╌╌╌┤
1338
+ # # │ 2 │
1339
+ # # ├╌╌╌╌╌╌╌┤
1340
+ # # │ 3 │
1341
+ # # ├╌╌╌╌╌╌╌┤
1342
+ # # │ 4 │
1343
+ # # ├╌╌╌╌╌╌╌┤
1344
+ # # │ 98 │
1345
+ # # ├╌╌╌╌╌╌╌┤
1346
+ # # │ 99 │
1347
+ # # └───────┘
1348
+ #
1349
+ # @example
1350
+ # df.groupby("group").agg(Polars.col("value").sort)
1351
+ # # =>
1352
+ # # shape: (2, 2)
1353
+ # # ┌───────┬────────────┐
1354
+ # # │ group ┆ value │
1355
+ # # │ --- ┆ --- │
1356
+ # # │ str ┆ list[i64] │
1357
+ # # ╞═══════╪════════════╡
1358
+ # # │ two ┆ [3, 4, 99] │
1359
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1360
+ # # │ one ┆ [1, 2, 98] │
1361
+ # # └───────┴────────────┘
797
1362
  def sort(reverse: false, nulls_last: false)
798
1363
  wrap_expr(_rbexpr.sort_with(reverse, nulls_last))
799
1364
  end
800
1365
 
1366
+ # Return the `k` largest elements.
1367
+ #
1368
+ # If 'reverse: true` the smallest elements will be given.
1369
+ #
1370
+ # @param k [Integer]
1371
+ # Number of elements to return.
1372
+ # @param reverse [Boolean]
1373
+ # Return the smallest elements.
1374
+ #
1375
+ # @return [Expr]
1376
+ #
1377
+ # @example
1378
+ # df = Polars::DataFrame.new(
1379
+ # {
1380
+ # "value" => [1, 98, 2, 3, 99, 4]
1381
+ # }
1382
+ # )
1383
+ # df.select(
1384
+ # [
1385
+ # Polars.col("value").top_k.alias("top_k"),
1386
+ # Polars.col("value").top_k(reverse: true).alias("bottom_k")
1387
+ # ]
1388
+ # )
1389
+ # # =>
1390
+ # # shape: (5, 2)
1391
+ # # ┌───────┬──────────┐
1392
+ # # │ top_k ┆ bottom_k │
1393
+ # # │ --- ┆ --- │
1394
+ # # │ i64 ┆ i64 │
1395
+ # # ╞═══════╪══════════╡
1396
+ # # │ 99 ┆ 1 │
1397
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
1398
+ # # │ 98 ┆ 2 │
1399
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
1400
+ # # │ 4 ┆ 3 │
1401
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
1402
+ # # │ 3 ┆ 4 │
1403
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
1404
+ # # │ 2 ┆ 98 │
1405
+ # # └───────┴──────────┘
801
1406
  def top_k(k: 5, reverse: false)
802
1407
  wrap_expr(_rbexpr.top_k(k, reverse))
803
1408
  end
804
1409
 
1410
+ # Get the index values that would sort this column.
1411
+ #
1412
+ # @param reverse [Boolean]
1413
+ # Sort in reverse (descending) order.
1414
+ # @param nulls_last [Boolean]
1415
+ # Place null values last instead of first.
1416
+ #
1417
+ # @return [Expr]
1418
+ #
1419
+ # @example
1420
+ # df = Polars::DataFrame.new(
1421
+ # {
1422
+ # "a" => [20, 10, 30]
1423
+ # }
1424
+ # )
1425
+ # df.select(Polars.col("a").arg_sort)
1426
+ # # =>
1427
+ # # shape: (3, 1)
1428
+ # # ┌─────┐
1429
+ # # │ a │
1430
+ # # │ --- │
1431
+ # # │ u32 │
1432
+ # # ╞═════╡
1433
+ # # │ 1 │
1434
+ # # ├╌╌╌╌╌┤
1435
+ # # │ 0 │
1436
+ # # ├╌╌╌╌╌┤
1437
+ # # │ 2 │
1438
+ # # └─────┘
805
1439
  def arg_sort(reverse: false, nulls_last: false)
806
1440
  wrap_expr(_rbexpr.arg_sort(reverse, nulls_last))
807
1441
  end
@@ -854,15 +1488,91 @@ module Polars
854
1488
  wrap_expr(_rbexpr.arg_min)
855
1489
  end
856
1490
 
857
- def search_sorted(element)
858
- element = Utils.expr_to_lit_or_expr(element, str_to_lit: false)
859
- wrap_expr(_rbexpr.search_sorted(element._rbexpr))
860
- end
861
-
862
- def sort_by(by, reverse: false)
863
- if !by.is_a?(Array)
864
- by = [by]
865
- end
1491
+ # Find indices where elements should be inserted to maintain order.
1492
+ #
1493
+ # @param element [Object]
1494
+ # Expression or scalar value.
1495
+ #
1496
+ # @return [Expr]
1497
+ #
1498
+ # @example
1499
+ # df = Polars::DataFrame.new(
1500
+ # {
1501
+ # "values" => [1, 2, 3, 5]
1502
+ # }
1503
+ # )
1504
+ # df.select(
1505
+ # [
1506
+ # Polars.col("values").search_sorted(0).alias("zero"),
1507
+ # Polars.col("values").search_sorted(3).alias("three"),
1508
+ # Polars.col("values").search_sorted(6).alias("six")
1509
+ # ]
1510
+ # )
1511
+ # # =>
1512
+ # # shape: (1, 3)
1513
+ # # ┌──────┬───────┬─────┐
1514
+ # # │ zero ┆ three ┆ six │
1515
+ # # │ --- ┆ --- ┆ --- │
1516
+ # # │ u32 ┆ u32 ┆ u32 │
1517
+ # # ╞══════╪═══════╪═════╡
1518
+ # # │ 0 ┆ 2 ┆ 4 │
1519
+ # # └──────┴───────┴─────┘
1520
+ def search_sorted(element)
1521
+ element = Utils.expr_to_lit_or_expr(element, str_to_lit: false)
1522
+ wrap_expr(_rbexpr.search_sorted(element._rbexpr))
1523
+ end
1524
+
1525
+ # Sort this column by the ordering of another column, or multiple other columns.
1526
+ #
1527
+ # In projection/ selection context the whole column is sorted.
1528
+ # If used in a groupby context, the groups are sorted.
1529
+ #
1530
+ # @param by [Object]
1531
+ # The column(s) used for sorting.
1532
+ # @param reverse [Boolean]
1533
+ # false -> order from small to large.
1534
+ # true -> order from large to small.
1535
+ #
1536
+ # @return [Expr]
1537
+ #
1538
+ # @example
1539
+ # df = Polars::DataFrame.new(
1540
+ # {
1541
+ # "group" => [
1542
+ # "one",
1543
+ # "one",
1544
+ # "one",
1545
+ # "two",
1546
+ # "two",
1547
+ # "two"
1548
+ # ],
1549
+ # "value" => [1, 98, 2, 3, 99, 4]
1550
+ # }
1551
+ # )
1552
+ # df.select(Polars.col("group").sort_by("value"))
1553
+ # # =>
1554
+ # # shape: (6, 1)
1555
+ # # ┌───────┐
1556
+ # # │ group │
1557
+ # # │ --- │
1558
+ # # │ str │
1559
+ # # ╞═══════╡
1560
+ # # │ one │
1561
+ # # ├╌╌╌╌╌╌╌┤
1562
+ # # │ one │
1563
+ # # ├╌╌╌╌╌╌╌┤
1564
+ # # │ two │
1565
+ # # ├╌╌╌╌╌╌╌┤
1566
+ # # │ two │
1567
+ # # ├╌╌╌╌╌╌╌┤
1568
+ # # │ one │
1569
+ # # ├╌╌╌╌╌╌╌┤
1570
+ # # │ two │
1571
+ # # └───────┘
1572
+ def sort_by(by, reverse: false)
1573
+ if !by.is_a?(Array)
1574
+ by = [by]
1575
+ end
866
1576
  if !reverse.is_a?(Array)
867
1577
  reverse = [reverse]
868
1578
  end
@@ -871,6 +1581,39 @@ module Polars
871
1581
  wrap_expr(_rbexpr.sort_by(by, reverse))
872
1582
  end
873
1583
 
1584
+ # Take values by index.
1585
+ #
1586
+ # @param indices [Expr]
1587
+ # An expression that leads to a `:u32` dtyped Series.
1588
+ #
1589
+ # @return [Expr]
1590
+ #
1591
+ # @example
1592
+ # df = Polars::DataFrame.new(
1593
+ # {
1594
+ # "group" => [
1595
+ # "one",
1596
+ # "one",
1597
+ # "one",
1598
+ # "two",
1599
+ # "two",
1600
+ # "two"
1601
+ # ],
1602
+ # "value" => [1, 98, 2, 3, 99, 4]
1603
+ # }
1604
+ # )
1605
+ # df.groupby("group", maintain_order: true).agg(Polars.col("value").take(1))
1606
+ # # =>
1607
+ # # shape: (2, 2)
1608
+ # # ┌───────┬───────┐
1609
+ # # │ group ┆ value │
1610
+ # # │ --- ┆ --- │
1611
+ # # │ str ┆ i64 │
1612
+ # # ╞═══════╪═══════╡
1613
+ # # │ one ┆ 98 │
1614
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1615
+ # # │ two ┆ 99 │
1616
+ # # └───────┴───────┘
874
1617
  def take(indices)
875
1618
  if indices.is_a?(Array)
876
1619
  indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
@@ -909,11 +1652,105 @@ module Polars
909
1652
  wrap_expr(_rbexpr.shift(periods))
910
1653
  end
911
1654
 
1655
+ # Shift the values by a given period and fill the resulting null values.
1656
+ #
1657
+ # @param periods [Integer]
1658
+ # Number of places to shift (may be negative).
1659
+ # @param fill_value [Object]
1660
+ # Fill nil values with the result of this expression.
1661
+ #
1662
+ # @return [Expr]
1663
+ #
1664
+ # @example
1665
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
1666
+ # df.select(Polars.col("foo").shift_and_fill(1, "a"))
1667
+ # # =>
1668
+ # # shape: (4, 1)
1669
+ # # ┌─────┐
1670
+ # # │ foo │
1671
+ # # │ --- │
1672
+ # # │ str │
1673
+ # # ╞═════╡
1674
+ # # │ a │
1675
+ # # ├╌╌╌╌╌┤
1676
+ # # │ 1 │
1677
+ # # ├╌╌╌╌╌┤
1678
+ # # │ 2 │
1679
+ # # ├╌╌╌╌╌┤
1680
+ # # │ 3 │
1681
+ # # └─────┘
912
1682
  def shift_and_fill(periods, fill_value)
913
1683
  fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
914
1684
  wrap_expr(_rbexpr.shift_and_fill(periods, fill_value._rbexpr))
915
1685
  end
916
1686
 
1687
+ # Fill null values using the specified value or strategy.
1688
+ #
1689
+ # To interpolate over null values see interpolate.
1690
+ #
1691
+ # @param value [Object]
1692
+ # Value used to fill null values.
1693
+ # @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
1694
+ # Strategy used to fill null values.
1695
+ # @param limit [Integer]
1696
+ # Number of consecutive null values to fill when using the 'forward' or
1697
+ # 'backward' strategy.
1698
+ #
1699
+ # @return [Expr]
1700
+ #
1701
+ # @example
1702
+ # df = Polars::DataFrame.new(
1703
+ # {
1704
+ # "a" => [1, 2, nil],
1705
+ # "b" => [4, nil, 6]
1706
+ # }
1707
+ # )
1708
+ # df.fill_null(strategy: "zero")
1709
+ # # =>
1710
+ # # shape: (3, 2)
1711
+ # # ┌─────┬─────┐
1712
+ # # │ a ┆ b │
1713
+ # # │ --- ┆ --- │
1714
+ # # │ i64 ┆ i64 │
1715
+ # # ╞═════╪═════╡
1716
+ # # │ 1 ┆ 4 │
1717
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1718
+ # # │ 2 ┆ 0 │
1719
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1720
+ # # │ 0 ┆ 6 │
1721
+ # # └─────┴─────┘
1722
+ #
1723
+ # @example
1724
+ # df.fill_null(99)
1725
+ # # =>
1726
+ # # shape: (3, 2)
1727
+ # # ┌─────┬─────┐
1728
+ # # │ a ┆ b │
1729
+ # # │ --- ┆ --- │
1730
+ # # │ i64 ┆ i64 │
1731
+ # # ╞═════╪═════╡
1732
+ # # │ 1 ┆ 4 │
1733
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1734
+ # # │ 2 ┆ 99 │
1735
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1736
+ # # │ 99 ┆ 6 │
1737
+ # # └─────┴─────┘
1738
+ #
1739
+ # @example
1740
+ # df.fill_null(strategy: "forward")
1741
+ # # =>
1742
+ # # shape: (3, 2)
1743
+ # # ┌─────┬─────┐
1744
+ # # │ a ┆ b │
1745
+ # # │ --- ┆ --- │
1746
+ # # │ i64 ┆ i64 │
1747
+ # # ╞═════╪═════╡
1748
+ # # │ 1 ┆ 4 │
1749
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1750
+ # # │ 2 ┆ 4 │
1751
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1752
+ # # │ 2 ┆ 6 │
1753
+ # # └─────┴─────┘
917
1754
  def fill_null(value = nil, strategy: nil, limit: nil)
918
1755
  if !value.nil? && !strategy.nil?
919
1756
  raise ArgumentError, "cannot specify both 'value' and 'strategy'."
@@ -931,75 +1768,426 @@ module Polars
931
1768
  end
932
1769
  end
933
1770
 
1771
+ # Fill floating point NaN value with a fill value.
1772
+ #
1773
+ # @return [Expr]
1774
+ #
1775
+ # @example
1776
+ # df = Polars::DataFrame.new(
1777
+ # {
1778
+ # "a" => [1.0, nil, Float::NAN],
1779
+ # "b" => [4.0, Float::NAN, 6]
1780
+ # }
1781
+ # )
1782
+ # df.fill_nan("zero")
1783
+ # # =>
1784
+ # # shape: (3, 2)
1785
+ # # ┌──────┬──────┐
1786
+ # # │ a ┆ b │
1787
+ # # │ --- ┆ --- │
1788
+ # # │ str ┆ str │
1789
+ # # ╞══════╪══════╡
1790
+ # # │ 1.0 ┆ 4.0 │
1791
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1792
+ # # │ null ┆ zero │
1793
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1794
+ # # │ zero ┆ 6.0 │
1795
+ # # └──────┴──────┘
934
1796
  def fill_nan(fill_value)
935
1797
  fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
936
1798
  wrap_expr(_rbexpr.fill_nan(fill_value._rbexpr))
937
1799
  end
938
1800
 
1801
+ # Fill missing values with the latest seen values.
1802
+ #
1803
+ # @param limit [Integer]
1804
+ # The number of consecutive null values to forward fill.
1805
+ #
1806
+ # @return [Expr]
1807
+ #
1808
+ # @example
1809
+ # df = Polars::DataFrame.new(
1810
+ # {
1811
+ # "a" => [1, 2, nil],
1812
+ # "b" => [4, nil, 6]
1813
+ # }
1814
+ # )
1815
+ # df.select(Polars.all.forward_fill)
1816
+ # # =>
1817
+ # # shape: (3, 2)
1818
+ # # ┌─────┬─────┐
1819
+ # # │ a ┆ b │
1820
+ # # │ --- ┆ --- │
1821
+ # # │ i64 ┆ i64 │
1822
+ # # ╞═════╪═════╡
1823
+ # # │ 1 ┆ 4 │
1824
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1825
+ # # │ 2 ┆ 4 │
1826
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1827
+ # # │ 2 ┆ 6 │
1828
+ # # └─────┴─────┘
939
1829
  def forward_fill(limit: nil)
940
1830
  wrap_expr(_rbexpr.forward_fill(limit))
941
1831
  end
942
1832
 
1833
+ # Fill missing values with the next to be seen values.
1834
+ #
1835
+ # @param limit [Integer]
1836
+ # The number of consecutive null values to backward fill.
1837
+ #
1838
+ # @return [Expr]
1839
+ #
1840
+ # @example
1841
+ # df = Polars::DataFrame.new(
1842
+ # {
1843
+ # "a" => [1, 2, nil],
1844
+ # "b" => [4, nil, 6]
1845
+ # }
1846
+ # )
1847
+ # df.select(Polars.all.backward_fill)
1848
+ # # =>
1849
+ # # shape: (3, 2)
1850
+ # # ┌──────┬─────┐
1851
+ # # │ a ┆ b │
1852
+ # # │ --- ┆ --- │
1853
+ # # │ i64 ┆ i64 │
1854
+ # # ╞══════╪═════╡
1855
+ # # │ 1 ┆ 4 │
1856
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
1857
+ # # │ 2 ┆ 6 │
1858
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
1859
+ # # │ null ┆ 6 │
1860
+ # # └──────┴─────┘
943
1861
  def backward_fill(limit: nil)
944
1862
  wrap_expr(_rbexpr.backward_fill(limit))
945
1863
  end
946
1864
 
1865
+ # Reverse the selection.
1866
+ #
1867
+ # @return [Expr]
947
1868
  def reverse
948
1869
  wrap_expr(_rbexpr.reverse)
949
1870
  end
950
1871
 
1872
+ # Get standard deviation.
1873
+ #
1874
+ # @param ddof [Integer]
1875
+ # Degrees of freedom.
1876
+ #
1877
+ # @return [Expr]
1878
+ #
1879
+ # @example
1880
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
1881
+ # df.select(Polars.col("a").std)
1882
+ # # =>
1883
+ # # shape: (1, 1)
1884
+ # # ┌─────┐
1885
+ # # │ a │
1886
+ # # │ --- │
1887
+ # # │ f64 │
1888
+ # # ╞═════╡
1889
+ # # │ 1.0 │
1890
+ # # └─────┘
951
1891
  def std(ddof: 1)
952
1892
  wrap_expr(_rbexpr.std(ddof))
953
1893
  end
954
1894
 
1895
+ # Get variance.
1896
+ #
1897
+ # @param ddof [Integer]
1898
+ # Degrees of freedom.
1899
+ #
1900
+ # @return [Expr]
1901
+ #
1902
+ # @example
1903
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
1904
+ # df.select(Polars.col("a").var)
1905
+ # # =>
1906
+ # # shape: (1, 1)
1907
+ # # ┌─────┐
1908
+ # # │ a │
1909
+ # # │ --- │
1910
+ # # │ f64 │
1911
+ # # ╞═════╡
1912
+ # # │ 1.0 │
1913
+ # # └─────┘
955
1914
  def var(ddof: 1)
956
1915
  wrap_expr(_rbexpr.var(ddof))
957
1916
  end
958
1917
 
1918
+ # Get maximum value.
1919
+ #
1920
+ # @return [Expr]
1921
+ #
1922
+ # @example
1923
+ # df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
1924
+ # df.select(Polars.col("a").max)
1925
+ # # =>
1926
+ # # shape: (1, 1)
1927
+ # # ┌─────┐
1928
+ # # │ a │
1929
+ # # │ --- │
1930
+ # # │ f64 │
1931
+ # # ╞═════╡
1932
+ # # │ 1.0 │
1933
+ # # └─────┘
959
1934
  def max
960
1935
  wrap_expr(_rbexpr.max)
961
1936
  end
962
1937
 
1938
+ # Get minimum value.
1939
+ #
1940
+ # @return [Expr]
1941
+ #
1942
+ # @example
1943
+ # df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
1944
+ # df.select(Polars.col("a").min)
1945
+ # # =>
1946
+ # # shape: (1, 1)
1947
+ # # ┌──────┐
1948
+ # # │ a │
1949
+ # # │ --- │
1950
+ # # │ f64 │
1951
+ # # ╞══════╡
1952
+ # # │ -1.0 │
1953
+ # # └──────┘
963
1954
  def min
964
1955
  wrap_expr(_rbexpr.min)
965
1956
  end
966
1957
 
1958
+ # Get maximum value, but propagate/poison encountered NaN values.
1959
+ #
1960
+ # @return [Expr]
1961
+ #
1962
+ # @example
1963
+ # df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
1964
+ # df.select(Polars.col("a").nan_max)
1965
+ # # =>
1966
+ # # shape: (1, 1)
1967
+ # # ┌─────┐
1968
+ # # │ a │
1969
+ # # │ --- │
1970
+ # # │ f64 │
1971
+ # # ╞═════╡
1972
+ # # │ NaN │
1973
+ # # └─────┘
967
1974
  def nan_max
968
1975
  wrap_expr(_rbexpr.nan_max)
969
1976
  end
970
1977
 
1978
+ # Get minimum value, but propagate/poison encountered NaN values.
1979
+ #
1980
+ # @return [Expr]
1981
+ #
1982
+ # @example
1983
+ # df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
1984
+ # df.select(Polars.col("a").nan_min)
1985
+ # # =>
1986
+ # # shape: (1, 1)
1987
+ # # ┌─────┐
1988
+ # # │ a │
1989
+ # # │ --- │
1990
+ # # │ f64 │
1991
+ # # ╞═════╡
1992
+ # # │ NaN │
1993
+ # # └─────┘
971
1994
  def nan_min
972
1995
  wrap_expr(_rbexpr.nan_min)
973
1996
  end
974
1997
 
1998
+ # Get sum value.
1999
+ #
2000
+ # @return [Expr]
2001
+ #
2002
+ # @note
2003
+ # Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
2004
+ # `:i64` before summing to prevent overflow issues.
2005
+ #
2006
+ # @example
2007
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
2008
+ # df.select(Polars.col("a").sum)
2009
+ # # =>
2010
+ # # shape: (1, 1)
2011
+ # # ┌─────┐
2012
+ # # │ a │
2013
+ # # │ --- │
2014
+ # # │ i64 │
2015
+ # # ╞═════╡
2016
+ # # │ 0 │
2017
+ # # └─────┘
975
2018
  def sum
976
2019
  wrap_expr(_rbexpr.sum)
977
2020
  end
978
2021
 
979
- def mean
980
- wrap_expr(_rbexpr.mean)
2022
+ # Get mean value.
2023
+ #
2024
+ # @return [Expr]
2025
+ #
2026
+ # @example
2027
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
2028
+ # df.select(Polars.col("a").mean)
2029
+ # # =>
2030
+ # # shape: (1, 1)
2031
+ # # ┌─────┐
2032
+ # # │ a │
2033
+ # # │ --- │
2034
+ # # │ f64 │
2035
+ # # ╞═════╡
2036
+ # # │ 0.0 │
2037
+ # # └─────┘
2038
+ def mean
2039
+ wrap_expr(_rbexpr.mean)
981
2040
  end
982
2041
 
2042
+ # Get median value using linear interpolation.
2043
+ #
2044
+ # @return [Expr]
2045
+ #
2046
+ # @example
2047
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
2048
+ # df.select(Polars.col("a").median)
2049
+ # # =>
2050
+ # # shape: (1, 1)
2051
+ # # ┌─────┐
2052
+ # # │ a │
2053
+ # # │ --- │
2054
+ # # │ f64 │
2055
+ # # ╞═════╡
2056
+ # # │ 0.0 │
2057
+ # # └─────┘
983
2058
  def median
984
2059
  wrap_expr(_rbexpr.median)
985
2060
  end
986
2061
 
2062
+ # Compute the product of an expression.
2063
+ #
2064
+ # @return [Expr]
2065
+ #
2066
+ # @example
2067
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
2068
+ # df.select(Polars.col("a").product)
2069
+ # # =>
2070
+ # # shape: (1, 1)
2071
+ # # ┌─────┐
2072
+ # # │ a │
2073
+ # # │ --- │
2074
+ # # │ i64 │
2075
+ # # ╞═════╡
2076
+ # # │ 6 │
2077
+ # # └─────┘
987
2078
  def product
988
2079
  wrap_expr(_rbexpr.product)
989
2080
  end
990
2081
 
2082
+ # Count unique values.
2083
+ #
2084
+ # @return [Expr]
2085
+ #
2086
+ # @example
2087
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2088
+ # df.select(Polars.col("a").n_unique)
2089
+ # # =>
2090
+ # # shape: (1, 1)
2091
+ # # ┌─────┐
2092
+ # # │ a │
2093
+ # # │ --- │
2094
+ # # │ u32 │
2095
+ # # ╞═════╡
2096
+ # # │ 2 │
2097
+ # # └─────┘
991
2098
  def n_unique
992
2099
  wrap_expr(_rbexpr.n_unique)
993
2100
  end
994
2101
 
2102
+ # Count null values.
2103
+ #
2104
+ # @return [Expr]
2105
+ #
2106
+ # @example
2107
+ # df = Polars::DataFrame.new(
2108
+ # {
2109
+ # "a" => [nil, 1, nil],
2110
+ # "b" => [1, 2, 3]
2111
+ # }
2112
+ # )
2113
+ # df.select(Polars.all.null_count)
2114
+ # # =>
2115
+ # # shape: (1, 2)
2116
+ # # ┌─────┬─────┐
2117
+ # # │ a ┆ b │
2118
+ # # │ --- ┆ --- │
2119
+ # # │ u32 ┆ u32 │
2120
+ # # ╞═════╪═════╡
2121
+ # # │ 2 ┆ 0 │
2122
+ # # └─────┴─────┘
995
2123
  def null_count
996
2124
  wrap_expr(_rbexpr.null_count)
997
2125
  end
998
2126
 
2127
+ # Get index of first unique value.
2128
+ #
2129
+ # @return [Expr]
2130
+ #
2131
+ # @example
2132
+ # df = Polars::DataFrame.new(
2133
+ # {
2134
+ # "a" => [8, 9, 10],
2135
+ # "b" => [nil, 4, 4]
2136
+ # }
2137
+ # )
2138
+ # df.select(Polars.col("a").arg_unique)
2139
+ # # =>
2140
+ # # shape: (3, 1)
2141
+ # # ┌─────┐
2142
+ # # │ a │
2143
+ # # │ --- │
2144
+ # # │ u32 │
2145
+ # # ╞═════╡
2146
+ # # │ 0 │
2147
+ # # ├╌╌╌╌╌┤
2148
+ # # │ 1 │
2149
+ # # ├╌╌╌╌╌┤
2150
+ # # │ 2 │
2151
+ # # └─────┘
2152
+ #
2153
+ # @example
2154
+ # df.select(Polars.col("b").arg_unique)
2155
+ # # =>
2156
+ # # shape: (2, 1)
2157
+ # # ┌─────┐
2158
+ # # │ b │
2159
+ # # │ --- │
2160
+ # # │ u32 │
2161
+ # # ╞═════╡
2162
+ # # │ 0 │
2163
+ # # ├╌╌╌╌╌┤
2164
+ # # │ 1 │
2165
+ # # └─────┘
999
2166
  def arg_unique
1000
2167
  wrap_expr(_rbexpr.arg_unique)
1001
2168
  end
1002
2169
 
2170
+ # Get unique values of this expression.
2171
+ #
2172
+ # @param maintain_order [Boolean]
2173
+ # Maintain order of data. This requires more work.
2174
+ #
2175
+ # @return [Expr]
2176
+ #
2177
+ # @example
2178
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2179
+ # df.select(Polars.col("a").unique(maintain_order: true))
2180
+ # # =>
2181
+ # # shape: (2, 1)
2182
+ # # ┌─────┐
2183
+ # # │ a │
2184
+ # # │ --- │
2185
+ # # │ i64 │
2186
+ # # ╞═════╡
2187
+ # # │ 1 │
2188
+ # # ├╌╌╌╌╌┤
2189
+ # # │ 2 │
2190
+ # # └─────┘
1003
2191
  def unique(maintain_order: false)
1004
2192
  if maintain_order
1005
2193
  wrap_expr(_rbexpr.unique_stable)
@@ -1008,95 +2196,743 @@ module Polars
1008
2196
  end
1009
2197
  end
1010
2198
 
2199
+ # Get the first value.
2200
+ #
2201
+ # @return [Expr]
2202
+ #
2203
+ # @example
2204
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2205
+ # df.select(Polars.col("a").first)
2206
+ # # =>
2207
+ # # shape: (1, 1)
2208
+ # # ┌─────┐
2209
+ # # │ a │
2210
+ # # │ --- │
2211
+ # # │ i64 │
2212
+ # # ╞═════╡
2213
+ # # │ 1 │
2214
+ # # └─────┘
1011
2215
  def first
1012
2216
  wrap_expr(_rbexpr.first)
1013
2217
  end
1014
2218
 
2219
+ # Get the last value.
2220
+ #
2221
+ # @return [Expr]
2222
+ #
2223
+ # @example
2224
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2225
+ # df.select(Polars.col("a").last)
2226
+ # # =>
2227
+ # # shape: (1, 1)
2228
+ # # ┌─────┐
2229
+ # # │ a │
2230
+ # # │ --- │
2231
+ # # │ i64 │
2232
+ # # ╞═════╡
2233
+ # # │ 2 │
2234
+ # # └─────┘
1015
2235
  def last
1016
2236
  wrap_expr(_rbexpr.last)
1017
2237
  end
1018
2238
 
2239
+ # Apply window function over a subgroup.
2240
+ #
2241
+ # This is similar to a groupby + aggregation + self join.
2242
+ # Or similar to [window functions in Postgres](https://www.postgresql.org/docs/current/tutorial-window.html).
2243
+ #
2244
+ # @param expr [Object]
2245
+ # Column(s) to group by.
2246
+ #
2247
+ # @return [Expr]
2248
+ #
2249
+ # @example
2250
+ # df = Polars::DataFrame.new(
2251
+ # {
2252
+ # "groups" => ["g1", "g1", "g2"],
2253
+ # "values" => [1, 2, 3]
2254
+ # }
2255
+ # )
2256
+ # df.with_column(
2257
+ # Polars.col("values").max.over("groups").alias("max_by_group")
2258
+ # )
2259
+ # # =>
2260
+ # # shape: (3, 3)
2261
+ # # ┌────────┬────────┬──────────────┐
2262
+ # # │ groups ┆ values ┆ max_by_group │
2263
+ # # │ --- ┆ --- ┆ --- │
2264
+ # # │ str ┆ i64 ┆ i64 │
2265
+ # # ╞════════╪════════╪══════════════╡
2266
+ # # │ g1 ┆ 1 ┆ 2 │
2267
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
2268
+ # # │ g1 ┆ 2 ┆ 2 │
2269
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
2270
+ # # │ g2 ┆ 3 ┆ 3 │
2271
+ # # └────────┴────────┴──────────────┘
2272
+ #
2273
+ # @example
2274
+ # df = Polars::DataFrame.new(
2275
+ # {
2276
+ # "groups" => [1, 1, 2, 2, 1, 2, 3, 3, 1],
2277
+ # "values" => [1, 2, 3, 4, 5, 6, 7, 8, 8]
2278
+ # }
2279
+ # )
2280
+ # df.lazy
2281
+ # .select([Polars.col("groups").sum.over("groups")])
2282
+ # .collect
2283
+ # # =>
2284
+ # # shape: (9, 1)
2285
+ # # ┌────────┐
2286
+ # # │ groups │
2287
+ # # │ --- │
2288
+ # # │ i64 │
2289
+ # # ╞════════╡
2290
+ # # │ 4 │
2291
+ # # ├╌╌╌╌╌╌╌╌┤
2292
+ # # │ 4 │
2293
+ # # ├╌╌╌╌╌╌╌╌┤
2294
+ # # │ 6 │
2295
+ # # ├╌╌╌╌╌╌╌╌┤
2296
+ # # │ 6 │
2297
+ # # ├╌╌╌╌╌╌╌╌┤
2298
+ # # │ ... │
2299
+ # # ├╌╌╌╌╌╌╌╌┤
2300
+ # # │ 6 │
2301
+ # # ├╌╌╌╌╌╌╌╌┤
2302
+ # # │ 6 │
2303
+ # # ├╌╌╌╌╌╌╌╌┤
2304
+ # # │ 6 │
2305
+ # # ├╌╌╌╌╌╌╌╌┤
2306
+ # # │ 4 │
2307
+ # # └────────┘
1019
2308
  def over(expr)
1020
2309
  rbexprs = Utils.selection_to_rbexpr_list(expr)
1021
2310
  wrap_expr(_rbexpr.over(rbexprs))
1022
2311
  end
1023
2312
 
2313
+ # Get mask of unique values.
2314
+ #
2315
+ # @return [Expr]
2316
+ #
2317
+ # @example
2318
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2319
+ # df.select(Polars.col("a").is_unique)
2320
+ # # =>
2321
+ # # shape: (3, 1)
2322
+ # # ┌───────┐
2323
+ # # │ a │
2324
+ # # │ --- │
2325
+ # # │ bool │
2326
+ # # ╞═══════╡
2327
+ # # │ false │
2328
+ # # ├╌╌╌╌╌╌╌┤
2329
+ # # │ false │
2330
+ # # ├╌╌╌╌╌╌╌┤
2331
+ # # │ true │
2332
+ # # └───────┘
1024
2333
  def is_unique
1025
2334
  wrap_expr(_rbexpr.is_unique)
1026
2335
  end
1027
2336
 
2337
+ # Get a mask of the first unique value.
2338
+ #
2339
+ # @return [Expr]
2340
+ #
2341
+ # @example
2342
+ # df = Polars::DataFrame.new(
2343
+ # {
2344
+ # "num" => [1, 2, 3, 1, 5]
2345
+ # }
2346
+ # )
2347
+ # df.with_column(Polars.col("num").is_first.alias("is_first"))
2348
+ # # =>
2349
+ # # shape: (5, 2)
2350
+ # # ┌─────┬──────────┐
2351
+ # # │ num ┆ is_first │
2352
+ # # │ --- ┆ --- │
2353
+ # # │ i64 ┆ bool │
2354
+ # # ╞═════╪══════════╡
2355
+ # # │ 1 ┆ true │
2356
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2357
+ # # │ 2 ┆ true │
2358
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2359
+ # # │ 3 ┆ true │
2360
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2361
+ # # │ 1 ┆ false │
2362
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2363
+ # # │ 5 ┆ true │
2364
+ # # └─────┴──────────┘
1028
2365
  def is_first
1029
2366
  wrap_expr(_rbexpr.is_first)
1030
2367
  end
1031
2368
 
2369
+ # Get mask of duplicated values.
2370
+ #
2371
+ # @return [Expr]
2372
+ #
2373
+ # @example
2374
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2375
+ # df.select(Polars.col("a").is_duplicated)
2376
+ # # =>
2377
+ # # shape: (3, 1)
2378
+ # # ┌───────┐
2379
+ # # │ a │
2380
+ # # │ --- │
2381
+ # # │ bool │
2382
+ # # ╞═══════╡
2383
+ # # │ true │
2384
+ # # ├╌╌╌╌╌╌╌┤
2385
+ # # │ true │
2386
+ # # ├╌╌╌╌╌╌╌┤
2387
+ # # │ false │
2388
+ # # └───────┘
1032
2389
  def is_duplicated
1033
2390
  wrap_expr(_rbexpr.is_duplicated)
1034
2391
  end
1035
2392
 
2393
+ # Get quantile value.
2394
+ #
2395
+ # @param quantile [Float]
2396
+ # Quantile between 0.0 and 1.0.
2397
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
2398
+ # Interpolation method.
2399
+ #
2400
+ # @return [Expr]
2401
+ #
2402
+ # @example
2403
+ # df = Polars::DataFrame.new({"a" => [0, 1, 2, 3, 4, 5]})
2404
+ # df.select(Polars.col("a").quantile(0.3))
2405
+ # # =>
2406
+ # # shape: (1, 1)
2407
+ # # ┌─────┐
2408
+ # # │ a │
2409
+ # # │ --- │
2410
+ # # │ f64 │
2411
+ # # ╞═════╡
2412
+ # # │ 1.0 │
2413
+ # # └─────┘
2414
+ #
2415
+ # @example
2416
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "higher"))
2417
+ # # =>
2418
+ # # shape: (1, 1)
2419
+ # # ┌─────┐
2420
+ # # │ a │
2421
+ # # │ --- │
2422
+ # # │ f64 │
2423
+ # # ╞═════╡
2424
+ # # │ 2.0 │
2425
+ # # └─────┘
2426
+ #
2427
+ # @example
2428
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "lower"))
2429
+ # # =>
2430
+ # # shape: (1, 1)
2431
+ # # ┌─────┐
2432
+ # # │ a │
2433
+ # # │ --- │
2434
+ # # │ f64 │
2435
+ # # ╞═════╡
2436
+ # # │ 1.0 │
2437
+ # # └─────┘
2438
+ #
2439
+ # @example
2440
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "midpoint"))
2441
+ # # =>
2442
+ # # shape: (1, 1)
2443
+ # # ┌─────┐
2444
+ # # │ a │
2445
+ # # │ --- │
2446
+ # # │ f64 │
2447
+ # # ╞═════╡
2448
+ # # │ 1.5 │
2449
+ # # └─────┘
2450
+ #
2451
+ # @example
2452
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "linear"))
2453
+ # # =>
2454
+ # # shape: (1, 1)
2455
+ # # ┌─────┐
2456
+ # # │ a │
2457
+ # # │ --- │
2458
+ # # │ f64 │
2459
+ # # ╞═════╡
2460
+ # # │ 1.5 │
2461
+ # # └─────┘
1036
2462
  def quantile(quantile, interpolation: "nearest")
1037
2463
  wrap_expr(_rbexpr.quantile(quantile, interpolation))
1038
2464
  end
1039
2465
 
1040
- def filter(predicate)
2466
+ # Filter a single column.
2467
+ #
2468
+ # Mostly useful in an aggregation context. If you want to filter on a DataFrame
2469
+ # level, use `LazyFrame#filter`.
2470
+ #
2471
+ # @param predicate [Expr]
2472
+ # Boolean expression.
2473
+ #
2474
+ # @return [Expr]
2475
+ #
2476
+ # @example
2477
+ # df = Polars::DataFrame.new(
2478
+ # {
2479
+ # "group_col" => ["g1", "g1", "g2"],
2480
+ # "b" => [1, 2, 3]
2481
+ # }
2482
+ # )
2483
+ # (
2484
+ # df.groupby("group_col").agg(
2485
+ # [
2486
+ # Polars.col("b").filter(Polars.col("b") < 2).sum.alias("lt"),
2487
+ # Polars.col("b").filter(Polars.col("b") >= 2).sum.alias("gte")
2488
+ # ]
2489
+ # )
2490
+ # ).sort("group_col")
2491
+ # # =>
2492
+ # # shape: (2, 3)
2493
+ # # ┌───────────┬──────┬─────┐
2494
+ # # │ group_col ┆ lt ┆ gte │
2495
+ # # │ --- ┆ --- ┆ --- │
2496
+ # # │ str ┆ i64 ┆ i64 │
2497
+ # # ╞═══════════╪══════╪═════╡
2498
+ # # │ g1 ┆ 1 ┆ 2 │
2499
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
2500
+ # # │ g2 ┆ null ┆ 3 │
2501
+ # # └───────────┴──────┴─────┘
2502
+ def filter(predicate)
1041
2503
  wrap_expr(_rbexpr.filter(predicate._rbexpr))
1042
2504
  end
1043
2505
 
2506
+ # Filter a single column.
2507
+ #
2508
+ # Alias for {#filter}.
2509
+ #
2510
+ # @param predicate [Expr]
2511
+ # Boolean expression.
2512
+ #
2513
+ # @return [Expr]
2514
+ #
2515
+ # @example
2516
+ # df = Polars::DataFrame.new(
2517
+ # {
2518
+ # "group_col" => ["g1", "g1", "g2"],
2519
+ # "b" => [1, 2, 3]
2520
+ # }
2521
+ # )
2522
+ # (
2523
+ # df.groupby("group_col").agg(
2524
+ # [
2525
+ # Polars.col("b").where(Polars.col("b") < 2).sum.alias("lt"),
2526
+ # Polars.col("b").where(Polars.col("b") >= 2).sum.alias("gte")
2527
+ # ]
2528
+ # )
2529
+ # ).sort("group_col")
2530
+ # # =>
2531
+ # # shape: (2, 3)
2532
+ # # ┌───────────┬──────┬─────┐
2533
+ # # │ group_col ┆ lt ┆ gte │
2534
+ # # │ --- ┆ --- ┆ --- │
2535
+ # # │ str ┆ i64 ┆ i64 │
2536
+ # # ╞═══════════╪══════╪═════╡
2537
+ # # │ g1 ┆ 1 ┆ 2 │
2538
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
2539
+ # # │ g2 ┆ null ┆ 3 │
2540
+ # # └───────────┴──────┴─────┘
1044
2541
  def where(predicate)
1045
2542
  filter(predicate)
1046
2543
  end
1047
2544
 
1048
- # def map
2545
+ # Apply a custom Ruby function to a Series or sequence of Series.
2546
+ #
2547
+ # The output of this custom function must be a Series.
2548
+ # If you want to apply a custom function elementwise over single values, see
2549
+ # {#apply}. A use case for `map` is when you want to transform an
2550
+ # expression with a third-party library.
2551
+ #
2552
+ # Read more in [the book](https://pola-rs.github.io/polars-book/user-guide/dsl/custom_functions.html).
2553
+ #
2554
+ # @param return_dtype [Symbol]
2555
+ # Dtype of the output Series.
2556
+ # @param agg_list [Boolean]
2557
+ # Aggregate list.
2558
+ #
2559
+ # @return [Expr]
2560
+ #
2561
+ # @example
2562
+ # df = Polars::DataFrame.new(
2563
+ # {
2564
+ # "sine" => [0.0, 1.0, 0.0, -1.0],
2565
+ # "cosine" => [1.0, 0.0, -1.0, 0.0]
2566
+ # }
2567
+ # )
2568
+ # df.select(Polars.all.map { |x| x.to_numpy.argmax })
2569
+ # # =>
2570
+ # # shape: (1, 2)
2571
+ # # ┌──────┬────────┐
2572
+ # # │ sine ┆ cosine │
2573
+ # # │ --- ┆ --- │
2574
+ # # │ i64 ┆ i64 │
2575
+ # # ╞══════╪════════╡
2576
+ # # │ 1 ┆ 0 │
2577
+ # # └──────┴────────┘
2578
+ # def map(return_dtype: nil, agg_list: false, &block)
2579
+ # if !return_dtype.nil?
2580
+ # return_dtype = Utils.rb_type_to_dtype(return_dtype)
2581
+ # end
2582
+ # wrap_expr(_rbexpr.map(return_dtype, agg_list, &block))
1049
2583
  # end
1050
2584
 
1051
2585
  # def apply
1052
2586
  # end
1053
2587
 
2588
+ # Explode a list or utf8 Series. This means that every item is expanded to a new
2589
+ # row.
2590
+ #
2591
+ # Alias for {#explode}.
2592
+ #
2593
+ # @return [Expr]
1054
2594
  #
2595
+ # @example
2596
+ # df = Polars::DataFrame.new({"foo" => ["hello", "world"]})
2597
+ # df.select(Polars.col("foo").flatten)
2598
+ # # =>
2599
+ # # shape: (10, 1)
2600
+ # # ┌─────┐
2601
+ # # │ foo │
2602
+ # # │ --- │
2603
+ # # │ str │
2604
+ # # ╞═════╡
2605
+ # # │ h │
2606
+ # # ├╌╌╌╌╌┤
2607
+ # # │ e │
2608
+ # # ├╌╌╌╌╌┤
2609
+ # # │ l │
2610
+ # # ├╌╌╌╌╌┤
2611
+ # # │ l │
2612
+ # # ├╌╌╌╌╌┤
2613
+ # # │ ... │
2614
+ # # ├╌╌╌╌╌┤
2615
+ # # │ o │
2616
+ # # ├╌╌╌╌╌┤
2617
+ # # │ r │
2618
+ # # ├╌╌╌╌╌┤
2619
+ # # │ l │
2620
+ # # ├╌╌╌╌╌┤
2621
+ # # │ d │
2622
+ # # └─────┘
1055
2623
  def flatten
1056
2624
  wrap_expr(_rbexpr.explode)
1057
2625
  end
1058
2626
 
2627
+ # Explode a list or utf8 Series.
2628
+ #
2629
+ # This means that every item is expanded to a new row.
2630
+ #
2631
+ # @return [Expr]
2632
+ #
2633
+ # @example
2634
+ # df = Polars::DataFrame.new({"b" => [[1, 2, 3], [4, 5, 6]]})
2635
+ # df.select(Polars.col("b").explode)
2636
+ # # =>
2637
+ # # shape: (6, 1)
2638
+ # # ┌─────┐
2639
+ # # │ b │
2640
+ # # │ --- │
2641
+ # # │ i64 │
2642
+ # # ╞═════╡
2643
+ # # │ 1 │
2644
+ # # ├╌╌╌╌╌┤
2645
+ # # │ 2 │
2646
+ # # ├╌╌╌╌╌┤
2647
+ # # │ 3 │
2648
+ # # ├╌╌╌╌╌┤
2649
+ # # │ 4 │
2650
+ # # ├╌╌╌╌╌┤
2651
+ # # │ 5 │
2652
+ # # ├╌╌╌╌╌┤
2653
+ # # │ 6 │
2654
+ # # └─────┘
1059
2655
  def explode
1060
2656
  wrap_expr(_rbexpr.explode)
1061
2657
  end
1062
2658
 
2659
+ # Take every nth value in the Series and return as a new Series.
2660
+ #
2661
+ # @return [Expr]
2662
+ #
2663
+ # @example
2664
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
2665
+ # df.select(Polars.col("foo").take_every(3))
2666
+ # # =>
2667
+ # # shape: (3, 1)
2668
+ # # ┌─────┐
2669
+ # # │ foo │
2670
+ # # │ --- │
2671
+ # # │ i64 │
2672
+ # # ╞═════╡
2673
+ # # │ 1 │
2674
+ # # ├╌╌╌╌╌┤
2675
+ # # │ 4 │
2676
+ # # ├╌╌╌╌╌┤
2677
+ # # │ 7 │
2678
+ # # └─────┘
1063
2679
  def take_every(n)
1064
2680
  wrap_expr(_rbexpr.take_every(n))
1065
2681
  end
1066
2682
 
2683
+ # Get the first `n` rows.
2684
+ #
2685
+ # @param n [Integer]
2686
+ # Number of rows to return.
2687
+ #
2688
+ # @return [Expr]
2689
+ #
2690
+ # @example
2691
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
2692
+ # df.head(3)
2693
+ # # =>
2694
+ # # shape: (3, 1)
2695
+ # # ┌─────┐
2696
+ # # │ foo │
2697
+ # # │ --- │
2698
+ # # │ i64 │
2699
+ # # ╞═════╡
2700
+ # # │ 1 │
2701
+ # # ├╌╌╌╌╌┤
2702
+ # # │ 2 │
2703
+ # # ├╌╌╌╌╌┤
2704
+ # # │ 3 │
2705
+ # # └─────┘
1067
2706
  def head(n = 10)
1068
2707
  wrap_expr(_rbexpr.head(n))
1069
2708
  end
1070
2709
 
2710
+ # Get the last `n` rows.
2711
+ #
2712
+ # @param n [Integer]
2713
+ # Number of rows to return.
2714
+ #
2715
+ # @return [Expr]
2716
+ #
2717
+ # @example
2718
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
2719
+ # df.tail(3)
2720
+ # # =>
2721
+ # # shape: (3, 1)
2722
+ # # ┌─────┐
2723
+ # # │ foo │
2724
+ # # │ --- │
2725
+ # # │ i64 │
2726
+ # # ╞═════╡
2727
+ # # │ 5 │
2728
+ # # ├╌╌╌╌╌┤
2729
+ # # │ 6 │
2730
+ # # ├╌╌╌╌╌┤
2731
+ # # │ 7 │
2732
+ # # └─────┘
1071
2733
  def tail(n = 10)
1072
2734
  wrap_expr(_rbexpr.tail(n))
1073
2735
  end
1074
2736
 
2737
+ # Get the first `n` rows.
2738
+ #
2739
+ # Alias for {#head}.
2740
+ #
2741
+ # @param n [Integer]
2742
+ # Number of rows to return.
2743
+ #
2744
+ # @return [Expr]
1075
2745
  def limit(n = 10)
1076
2746
  head(n)
1077
2747
  end
1078
2748
 
2749
+ # Raise expression to the power of exponent.
2750
+ #
2751
+ # @return [Expr]
2752
+ #
2753
+ # @example
2754
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
2755
+ # df.select(Polars.col("foo").pow(3))
2756
+ # # =>
2757
+ # # shape: (4, 1)
2758
+ # # ┌──────┐
2759
+ # # │ foo │
2760
+ # # │ --- │
2761
+ # # │ f64 │
2762
+ # # ╞══════╡
2763
+ # # │ 1.0 │
2764
+ # # ├╌╌╌╌╌╌┤
2765
+ # # │ 8.0 │
2766
+ # # ├╌╌╌╌╌╌┤
2767
+ # # │ 27.0 │
2768
+ # # ├╌╌╌╌╌╌┤
2769
+ # # │ 64.0 │
2770
+ # # └──────┘
1079
2771
  def pow(exponent)
1080
2772
  exponent = Utils.expr_to_lit_or_expr(exponent)
1081
2773
  wrap_expr(_rbexpr.pow(exponent._rbexpr))
1082
2774
  end
1083
2775
 
1084
- # def is_in
1085
- # end
2776
+ # Check if elements of this expression are present in the other Series.
2777
+ #
2778
+ # @param other [Object]
2779
+ # Series or sequence of primitive type.
2780
+ #
2781
+ # @return [Expr]
2782
+ #
2783
+ # @example
2784
+ # df = Polars::DataFrame.new(
2785
+ # {"sets" => [[1, 2, 3], [1, 2], [9, 10]], "optional_members" => [1, 2, 3]}
2786
+ # )
2787
+ # df.select([Polars.col("optional_members").is_in("sets").alias("contains")])
2788
+ # # =>
2789
+ # # shape: (3, 1)
2790
+ # # ┌──────────┐
2791
+ # # │ contains │
2792
+ # # │ --- │
2793
+ # # │ bool │
2794
+ # # ╞══════════╡
2795
+ # # │ true │
2796
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
2797
+ # # │ true │
2798
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
2799
+ # # │ false │
2800
+ # # └──────────┘
2801
+ def is_in(other)
2802
+ if other.is_a?(Array)
2803
+ if other.length == 0
2804
+ other = Polars.lit(nil)
2805
+ else
2806
+ other = Polars.lit(Series.new(other))
2807
+ end
2808
+ else
2809
+ other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
2810
+ end
2811
+ wrap_expr(_rbexpr.is_in(other._rbexpr))
2812
+ end
1086
2813
 
2814
+ # Repeat the elements in this Series as specified in the given expression.
2815
+ #
2816
+ # The repeated elements are expanded into a `List`.
2817
+ #
2818
+ # @param by [Object]
2819
+ # Numeric column that determines how often the values will be repeated.
2820
+ # The column will be coerced to UInt32. Give this dtype to make the coercion a
2821
+ # no-op.
1087
2822
  #
2823
+ # @return [Expr]
2824
+ #
2825
+ # @example
2826
+ # df = Polars::DataFrame.new(
2827
+ # {
2828
+ # "a" => ["x", "y", "z"],
2829
+ # "n" => [1, 2, 3]
2830
+ # }
2831
+ # )
2832
+ # df.select(Polars.col("a").repeat_by("n"))
2833
+ # # =>
2834
+ # # shape: (3, 1)
2835
+ # # ┌─────────────────┐
2836
+ # # │ a │
2837
+ # # │ --- │
2838
+ # # │ list[str] │
2839
+ # # ╞═════════════════╡
2840
+ # # │ ["x"] │
2841
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
2842
+ # # │ ["y", "y"] │
2843
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
2844
+ # # │ ["z", "z", "z"] │
2845
+ # # └─────────────────┘
1088
2846
  def repeat_by(by)
1089
- by = Utils.expr_to_lit_or_expr(by, false)
2847
+ by = Utils.expr_to_lit_or_expr(by, str_to_lit: false)
1090
2848
  wrap_expr(_rbexpr.repeat_by(by._rbexpr))
1091
2849
  end
1092
2850
 
1093
- # def is_between
1094
- # end
2851
+ # Check if this expression is between start and end.
2852
+ #
2853
+ # @param start [Object]
2854
+ # Lower bound as primitive type or datetime.
2855
+ # @param _end [Object]
2856
+ # Upper bound as primitive type or datetime.
2857
+ # @param include_bounds [Boolean]
2858
+ # False: Exclude both start and end (default).
2859
+ # True: Include both start and end.
2860
+ # (False, False): Exclude start and exclude end.
2861
+ # (True, True): Include start and include end.
2862
+ # (False, True): Exclude start and include end.
2863
+ # (True, False): Include start and exclude end.
2864
+ #
2865
+ # @return [Expr]
2866
+ #
2867
+ # @example
2868
+ # df = Polars::DataFrame.new({"num" => [1, 2, 3, 4, 5]})
2869
+ # df.with_column(Polars.col("num").is_between(2, 4))
2870
+ # # =>
2871
+ # # shape: (5, 2)
2872
+ # # ┌─────┬────────────┐
2873
+ # # │ num ┆ is_between │
2874
+ # # │ --- ┆ --- │
2875
+ # # │ i64 ┆ bool │
2876
+ # # ╞═════╪════════════╡
2877
+ # # │ 1 ┆ false │
2878
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2879
+ # # │ 2 ┆ false │
2880
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2881
+ # # │ 3 ┆ true │
2882
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2883
+ # # │ 4 ┆ false │
2884
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2885
+ # # │ 5 ┆ false │
2886
+ # # └─────┴────────────┘
2887
+ def is_between(start, _end, include_bounds: false)
2888
+ if include_bounds == false || include_bounds == [false, false]
2889
+ ((self > start) & (self < _end)).alias("is_between")
2890
+ elsif include_bounds == true || include_bounds == [true, true]
2891
+ ((self >= start) & (self <= _end)).alias("is_between")
2892
+ elsif include_bounds == [false, true]
2893
+ ((self > start) & (self <= _end)).alias("is_between")
2894
+ elsif include_bounds == [true, false]
2895
+ ((self >= start) & (self < _end)).alias("is_between")
2896
+ else
2897
+ raise ArgumentError, "include_bounds should be a bool or [bool, bool]."
2898
+ end
2899
+ end
1095
2900
 
1096
2901
  # def _hash
1097
2902
  # end
1098
2903
 
2904
+ # Reinterpret the underlying bits as a signed/unsigned integer.
2905
+ #
2906
+ # This operation is only allowed for 64bit integers. For lower bits integers,
2907
+ # you can safely use that cast operation.
2908
+ #
2909
+ # @param signed [Boolean]
2910
+ # If true, reinterpret as `:i64`. Otherwise, reinterpret as `:u64`.
2911
+ #
2912
+ # @return [Expr]
1099
2913
  #
2914
+ # @example
2915
+ # s = Polars::Series.new("a", [1, 1, 2], dtype: :u64)
2916
+ # df = Polars::DataFrame.new([s])
2917
+ # df.select(
2918
+ # [
2919
+ # Polars.col("a").reinterpret(signed: true).alias("reinterpreted"),
2920
+ # Polars.col("a").alias("original")
2921
+ # ]
2922
+ # )
2923
+ # # =>
2924
+ # # shape: (3, 2)
2925
+ # # ┌───────────────┬──────────┐
2926
+ # # │ reinterpreted ┆ original │
2927
+ # # │ --- ┆ --- │
2928
+ # # │ i64 ┆ u64 │
2929
+ # # ╞═══════════════╪══════════╡
2930
+ # # │ 1 ┆ 1 │
2931
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2932
+ # # │ 1 ┆ 1 │
2933
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2934
+ # # │ 2 ┆ 2 │
2935
+ # # └───────────────┴──────────┘
1100
2936
  def reinterpret(signed: false)
1101
2937
  wrap_expr(_rbexpr.reinterpret(signed))
1102
2938
  end
@@ -1104,147 +2940,1541 @@ module Polars
1104
2940
  # def _inspect
1105
2941
  # end
1106
2942
 
2943
+ # Fill nulls with linear interpolation over missing values.
2944
+ #
2945
+ # Can also be used to regrid data to a new grid - see examples below.
2946
+ #
2947
+ # @return [Expr]
1107
2948
  #
2949
+ # @example Fill nulls with linear interpolation
2950
+ # df = Polars::DataFrame.new(
2951
+ # {
2952
+ # "a" => [1, nil, 3],
2953
+ # "b" => [1.0, Float::NAN, 3.0]
2954
+ # }
2955
+ # )
2956
+ # df.select(Polars.all.interpolate)
2957
+ # # =>
2958
+ # # shape: (3, 2)
2959
+ # # ┌─────┬─────┐
2960
+ # # │ a ┆ b │
2961
+ # # │ --- ┆ --- │
2962
+ # # │ i64 ┆ f64 │
2963
+ # # ╞═════╪═════╡
2964
+ # # │ 1 ┆ 1.0 │
2965
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2966
+ # # │ 2 ┆ NaN │
2967
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2968
+ # # │ 3 ┆ 3.0 │
2969
+ # # └─────┴─────┘
1108
2970
  def interpolate
1109
2971
  wrap_expr(_rbexpr.interpolate)
1110
2972
  end
1111
2973
 
1112
- # def rolling_min
1113
- # end
1114
-
1115
- # def rolling_max
1116
- # end
1117
-
1118
- # def rolling_mean
1119
- # end
1120
-
1121
- # def rolling_sum
1122
- # end
1123
-
1124
- # def rolling_std
1125
- # end
1126
-
1127
- # def rolling_var
1128
- # end
1129
-
1130
- # def rolling_median
1131
- # end
1132
-
1133
- # def rolling_quantile
1134
- # end
2974
+ # Apply a rolling min (moving min) over the values in this array.
2975
+ #
2976
+ # A window of length `window_size` will traverse the array. The values that fill
2977
+ # this window will (optionally) be multiplied with the weights given by the
2978
+ # `weight` vector. The resulting values will be aggregated to their sum.
2979
+ #
2980
+ # @param window_size [Integer]
2981
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
2982
+ # size indicated by a timedelta or the following string language:
2983
+ #
2984
+ # - 1ns (1 nanosecond)
2985
+ # - 1us (1 microsecond)
2986
+ # - 1ms (1 millisecond)
2987
+ # - 1s (1 second)
2988
+ # - 1m (1 minute)
2989
+ # - 1h (1 hour)
2990
+ # - 1d (1 day)
2991
+ # - 1w (1 week)
2992
+ # - 1mo (1 calendar month)
2993
+ # - 1y (1 calendar year)
2994
+ # - 1i (1 index count)
2995
+ #
2996
+ # If a timedelta or the dynamic string language is used, the `by`
2997
+ # and `closed` arguments must also be set.
2998
+ # @param weights [Array]
2999
+ # An optional slice with the same length as the window that will be multiplied
3000
+ # elementwise with the values in the window.
3001
+ # @param min_periods [Integer]
3002
+ # The number of values in the window that should be non-null before computing
3003
+ # a result. If None, it will be set equal to window size.
3004
+ # @param center [Boolean]
3005
+ # Set the labels at the center of the window
3006
+ # @param by [String]
3007
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3008
+ # set the column that will be used to determine the windows. This column must
3009
+ # be of dtype `{Date, Datetime}`
3010
+ # @param closed ["left", "right", "both", "none"]
3011
+ # Define whether the temporal window interval is closed or not.
3012
+ #
3013
+ # @note
3014
+ # This functionality is experimental and may change without it being considered a
3015
+ # breaking change.
3016
+ #
3017
+ # @note
3018
+ # If you want to compute multiple aggregation statistics over the same dynamic
3019
+ # window, consider using `groupby_rolling` this method can cache the window size
3020
+ # computation.
3021
+ #
3022
+ # @return [Expr]
3023
+ #
3024
+ # @example
3025
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
3026
+ # df.select(
3027
+ # [
3028
+ # Polars.col("A").rolling_min(2)
3029
+ # ]
3030
+ # )
3031
+ # # =>
3032
+ # # shape: (6, 1)
3033
+ # # ┌──────┐
3034
+ # # │ A │
3035
+ # # │ --- │
3036
+ # # │ f64 │
3037
+ # # ╞══════╡
3038
+ # # │ null │
3039
+ # # ├╌╌╌╌╌╌┤
3040
+ # # │ 1.0 │
3041
+ # # ├╌╌╌╌╌╌┤
3042
+ # # │ 2.0 │
3043
+ # # ├╌╌╌╌╌╌┤
3044
+ # # │ 3.0 │
3045
+ # # ├╌╌╌╌╌╌┤
3046
+ # # │ 4.0 │
3047
+ # # ├╌╌╌╌╌╌┤
3048
+ # # │ 5.0 │
3049
+ # # └──────┘
3050
+ def rolling_min(
3051
+ window_size,
3052
+ weights: nil,
3053
+ min_periods: nil,
3054
+ center: false,
3055
+ by: nil,
3056
+ closed: "left"
3057
+ )
3058
+ window_size, min_periods = _prepare_rolling_window_args(
3059
+ window_size, min_periods
3060
+ )
3061
+ wrap_expr(
3062
+ _rbexpr.rolling_min(
3063
+ window_size, weights, min_periods, center, by, closed
3064
+ )
3065
+ )
3066
+ end
3067
+
3068
+ # Apply a rolling max (moving max) over the values in this array.
3069
+ #
3070
+ # A window of length `window_size` will traverse the array. The values that fill
3071
+ # this window will (optionally) be multiplied with the weights given by the
3072
+ # `weight` vector. The resulting values will be aggregated to their sum.
3073
+ #
3074
+ # @param window_size [Integer]
3075
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3076
+ # size indicated by a timedelta or the following string language:
3077
+ #
3078
+ # - 1ns (1 nanosecond)
3079
+ # - 1us (1 microsecond)
3080
+ # - 1ms (1 millisecond)
3081
+ # - 1s (1 second)
3082
+ # - 1m (1 minute)
3083
+ # - 1h (1 hour)
3084
+ # - 1d (1 day)
3085
+ # - 1w (1 week)
3086
+ # - 1mo (1 calendar month)
3087
+ # - 1y (1 calendar year)
3088
+ # - 1i (1 index count)
3089
+ #
3090
+ # If a timedelta or the dynamic string language is used, the `by`
3091
+ # and `closed` arguments must also be set.
3092
+ # @param weights [Array]
3093
+ # An optional slice with the same length as the window that will be multiplied
3094
+ # elementwise with the values in the window.
3095
+ # @param min_periods [Integer]
3096
+ # The number of values in the window that should be non-null before computing
3097
+ # a result. If None, it will be set equal to window size.
3098
+ # @param center [Boolean]
3099
+ # Set the labels at the center of the window
3100
+ # @param by [String]
3101
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3102
+ # set the column that will be used to determine the windows. This column must
3103
+ # be of dtype `{Date, Datetime}`
3104
+ # @param closed ["left", "right", "both", "none"]
3105
+ # Define whether the temporal window interval is closed or not.
3106
+ #
3107
+ # @note
3108
+ # This functionality is experimental and may change without it being considered a
3109
+ # breaking change.
3110
+ #
3111
+ # @note
3112
+ # If you want to compute multiple aggregation statistics over the same dynamic
3113
+ # window, consider using `groupby_rolling` this method can cache the window size
3114
+ # computation.
3115
+ #
3116
+ # @return [Expr]
3117
+ #
3118
+ # @example
3119
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
3120
+ # df.select(
3121
+ # [
3122
+ # Polars.col("A").rolling_max(2)
3123
+ # ]
3124
+ # )
3125
+ # # =>
3126
+ # # shape: (6, 1)
3127
+ # # ┌──────┐
3128
+ # # │ A │
3129
+ # # │ --- │
3130
+ # # │ f64 │
3131
+ # # ╞══════╡
3132
+ # # │ null │
3133
+ # # ├╌╌╌╌╌╌┤
3134
+ # # │ 2.0 │
3135
+ # # ├╌╌╌╌╌╌┤
3136
+ # # │ 3.0 │
3137
+ # # ├╌╌╌╌╌╌┤
3138
+ # # │ 4.0 │
3139
+ # # ├╌╌╌╌╌╌┤
3140
+ # # │ 5.0 │
3141
+ # # ├╌╌╌╌╌╌┤
3142
+ # # │ 6.0 │
3143
+ # # └──────┘
3144
+ def rolling_max(
3145
+ window_size,
3146
+ weights: nil,
3147
+ min_periods: nil,
3148
+ center: false,
3149
+ by: nil,
3150
+ closed: "left"
3151
+ )
3152
+ window_size, min_periods = _prepare_rolling_window_args(
3153
+ window_size, min_periods
3154
+ )
3155
+ wrap_expr(
3156
+ _rbexpr.rolling_max(
3157
+ window_size, weights, min_periods, center, by, closed
3158
+ )
3159
+ )
3160
+ end
3161
+
3162
+ # Apply a rolling mean (moving mean) over the values in this array.
3163
+ #
3164
+ # A window of length `window_size` will traverse the array. The values that fill
3165
+ # this window will (optionally) be multiplied with the weights given by the
3166
+ # `weight` vector. The resulting values will be aggregated to their sum.
3167
+ #
3168
+ # @param window_size [Integer]
3169
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3170
+ # size indicated by a timedelta or the following string language:
3171
+ #
3172
+ # - 1ns (1 nanosecond)
3173
+ # - 1us (1 microsecond)
3174
+ # - 1ms (1 millisecond)
3175
+ # - 1s (1 second)
3176
+ # - 1m (1 minute)
3177
+ # - 1h (1 hour)
3178
+ # - 1d (1 day)
3179
+ # - 1w (1 week)
3180
+ # - 1mo (1 calendar month)
3181
+ # - 1y (1 calendar year)
3182
+ # - 1i (1 index count)
3183
+ #
3184
+ # If a timedelta or the dynamic string language is used, the `by`
3185
+ # and `closed` arguments must also be set.
3186
+ # @param weights [Array]
3187
+ # An optional slice with the same length as the window that will be multiplied
3188
+ # elementwise with the values in the window.
3189
+ # @param min_periods [Integer]
3190
+ # The number of values in the window that should be non-null before computing
3191
+ # a result. If None, it will be set equal to window size.
3192
+ # @param center [Boolean]
3193
+ # Set the labels at the center of the window
3194
+ # @param by [String]
3195
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3196
+ # set the column that will be used to determine the windows. This column must
3197
+ # be of dtype `{Date, Datetime}`
3198
+ # @param closed ["left", "right", "both", "none"]
3199
+ # Define whether the temporal window interval is closed or not.
3200
+ #
3201
+ # @note
3202
+ # This functionality is experimental and may change without it being considered a
3203
+ # breaking change.
3204
+ #
3205
+ # @note
3206
+ # If you want to compute multiple aggregation statistics over the same dynamic
3207
+ # window, consider using `groupby_rolling` this method can cache the window size
3208
+ # computation.
3209
+ #
3210
+ # @return [Expr]
3211
+ #
3212
+ # @example
3213
+ # df = Polars::DataFrame.new({"A" => [1.0, 8.0, 6.0, 2.0, 16.0, 10.0]})
3214
+ # df.select(
3215
+ # [
3216
+ # Polars.col("A").rolling_mean(2)
3217
+ # ]
3218
+ # )
3219
+ # # =>
3220
+ # # shape: (6, 1)
3221
+ # # ┌──────┐
3222
+ # # │ A │
3223
+ # # │ --- │
3224
+ # # │ f64 │
3225
+ # # ╞══════╡
3226
+ # # │ null │
3227
+ # # ├╌╌╌╌╌╌┤
3228
+ # # │ 4.5 │
3229
+ # # ├╌╌╌╌╌╌┤
3230
+ # # │ 7.0 │
3231
+ # # ├╌╌╌╌╌╌┤
3232
+ # # │ 4.0 │
3233
+ # # ├╌╌╌╌╌╌┤
3234
+ # # │ 9.0 │
3235
+ # # ├╌╌╌╌╌╌┤
3236
+ # # │ 13.0 │
3237
+ # # └──────┘
3238
+ def rolling_mean(
3239
+ window_size,
3240
+ weights: nil,
3241
+ min_periods: nil,
3242
+ center: false,
3243
+ by: nil,
3244
+ closed: "left"
3245
+ )
3246
+ window_size, min_periods = _prepare_rolling_window_args(
3247
+ window_size, min_periods
3248
+ )
3249
+ wrap_expr(
3250
+ _rbexpr.rolling_mean(
3251
+ window_size, weights, min_periods, center, by, closed
3252
+ )
3253
+ )
3254
+ end
3255
+
3256
+ # Apply a rolling sum (moving sum) over the values in this array.
3257
+ #
3258
+ # A window of length `window_size` will traverse the array. The values that fill
3259
+ # this window will (optionally) be multiplied with the weights given by the
3260
+ # `weight` vector. The resulting values will be aggregated to their sum.
3261
+ #
3262
+ # @param window_size [Integer]
3263
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3264
+ # size indicated by a timedelta or the following string language:
3265
+ #
3266
+ # - 1ns (1 nanosecond)
3267
+ # - 1us (1 microsecond)
3268
+ # - 1ms (1 millisecond)
3269
+ # - 1s (1 second)
3270
+ # - 1m (1 minute)
3271
+ # - 1h (1 hour)
3272
+ # - 1d (1 day)
3273
+ # - 1w (1 week)
3274
+ # - 1mo (1 calendar month)
3275
+ # - 1y (1 calendar year)
3276
+ # - 1i (1 index count)
3277
+ #
3278
+ # If a timedelta or the dynamic string language is used, the `by`
3279
+ # and `closed` arguments must also be set.
3280
+ # @param weights [Array]
3281
+ # An optional slice with the same length as the window that will be multiplied
3282
+ # elementwise with the values in the window.
3283
+ # @param min_periods [Integer]
3284
+ # The number of values in the window that should be non-null before computing
3285
+ # a result. If None, it will be set equal to window size.
3286
+ # @param center [Boolean]
3287
+ # Set the labels at the center of the window
3288
+ # @param by [String]
3289
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3290
+ # set the column that will be used to determine the windows. This column must
3291
+ # be of dtype `{Date, Datetime}`
3292
+ # @param closed ["left", "right", "both", "none"]
3293
+ # Define whether the temporal window interval is closed or not.
3294
+ #
3295
+ # @note
3296
+ # This functionality is experimental and may change without it being considered a
3297
+ # breaking change.
3298
+ #
3299
+ # @note
3300
+ # If you want to compute multiple aggregation statistics over the same dynamic
3301
+ # window, consider using `groupby_rolling` this method can cache the window size
3302
+ # computation.
3303
+ #
3304
+ # @return [Expr]
3305
+ #
3306
+ # @example
3307
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
3308
+ # df.select(
3309
+ # [
3310
+ # Polars.col("A").rolling_sum(2)
3311
+ # ]
3312
+ # )
3313
+ # # =>
3314
+ # # shape: (6, 1)
3315
+ # # ┌──────┐
3316
+ # # │ A │
3317
+ # # │ --- │
3318
+ # # │ f64 │
3319
+ # # ╞══════╡
3320
+ # # │ null │
3321
+ # # ├╌╌╌╌╌╌┤
3322
+ # # │ 3.0 │
3323
+ # # ├╌╌╌╌╌╌┤
3324
+ # # │ 5.0 │
3325
+ # # ├╌╌╌╌╌╌┤
3326
+ # # │ 7.0 │
3327
+ # # ├╌╌╌╌╌╌┤
3328
+ # # │ 9.0 │
3329
+ # # ├╌╌╌╌╌╌┤
3330
+ # # │ 11.0 │
3331
+ # # └──────┘
3332
+ def rolling_sum(
3333
+ window_size,
3334
+ weights: nil,
3335
+ min_periods: nil,
3336
+ center: false,
3337
+ by: nil,
3338
+ closed: "left"
3339
+ )
3340
+ window_size, min_periods = _prepare_rolling_window_args(
3341
+ window_size, min_periods
3342
+ )
3343
+ wrap_expr(
3344
+ _rbexpr.rolling_sum(
3345
+ window_size, weights, min_periods, center, by, closed
3346
+ )
3347
+ )
3348
+ end
3349
+
3350
+ # Compute a rolling standard deviation.
3351
+ #
3352
+ # A window of length `window_size` will traverse the array. The values that fill
3353
+ # this window will (optionally) be multiplied with the weights given by the
3354
+ # `weight` vector. The resulting values will be aggregated to their sum.
3355
+ #
3356
+ # @param window_size [Integer]
3357
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3358
+ # size indicated by a timedelta or the following string language:
3359
+ #
3360
+ # - 1ns (1 nanosecond)
3361
+ # - 1us (1 microsecond)
3362
+ # - 1ms (1 millisecond)
3363
+ # - 1s (1 second)
3364
+ # - 1m (1 minute)
3365
+ # - 1h (1 hour)
3366
+ # - 1d (1 day)
3367
+ # - 1w (1 week)
3368
+ # - 1mo (1 calendar month)
3369
+ # - 1y (1 calendar year)
3370
+ # - 1i (1 index count)
3371
+ #
3372
+ # If a timedelta or the dynamic string language is used, the `by`
3373
+ # and `closed` arguments must also be set.
3374
+ # @param weights [Array]
3375
+ # An optional slice with the same length as the window that will be multiplied
3376
+ # elementwise with the values in the window.
3377
+ # @param min_periods [Integer]
3378
+ # The number of values in the window that should be non-null before computing
3379
+ # a result. If None, it will be set equal to window size.
3380
+ # @param center [Boolean]
3381
+ # Set the labels at the center of the window
3382
+ # @param by [String]
3383
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3384
+ # set the column that will be used to determine the windows. This column must
3385
+ # be of dtype `{Date, Datetime}`
3386
+ # @param closed ["left", "right", "both", "none"]
3387
+ # Define whether the temporal window interval is closed or not.
3388
+ #
3389
+ # @note
3390
+ # This functionality is experimental and may change without it being considered a
3391
+ # breaking change.
3392
+ #
3393
+ # @note
3394
+ # If you want to compute multiple aggregation statistics over the same dynamic
3395
+ # window, consider using `groupby_rolling` this method can cache the window size
3396
+ # computation.
3397
+ #
3398
+ # @return [Expr]
3399
+ #
3400
+ # @example
3401
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
3402
+ # df.select(
3403
+ # [
3404
+ # Polars.col("A").rolling_std(3)
3405
+ # ]
3406
+ # )
3407
+ # # =>
3408
+ # # shape: (6, 1)
3409
+ # # ┌──────────┐
3410
+ # # │ A │
3411
+ # # │ --- │
3412
+ # # │ f64 │
3413
+ # # ╞══════════╡
3414
+ # # │ null │
3415
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3416
+ # # │ null │
3417
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3418
+ # # │ 1.0 │
3419
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3420
+ # # │ 1.0 │
3421
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3422
+ # # │ 1.527525 │
3423
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3424
+ # # │ 2.0 │
3425
+ # # └──────────┘
3426
+ def rolling_std(
3427
+ window_size,
3428
+ weights: nil,
3429
+ min_periods: nil,
3430
+ center: false,
3431
+ by: nil,
3432
+ closed: "left"
3433
+ )
3434
+ window_size, min_periods = _prepare_rolling_window_args(
3435
+ window_size, min_periods
3436
+ )
3437
+ wrap_expr(
3438
+ _rbexpr.rolling_std(
3439
+ window_size, weights, min_periods, center, by, closed
3440
+ )
3441
+ )
3442
+ end
3443
+
3444
+ # Compute a rolling variance.
3445
+ #
3446
+ # A window of length `window_size` will traverse the array. The values that fill
3447
+ # this window will (optionally) be multiplied with the weights given by the
3448
+ # `weight` vector. The resulting values will be aggregated to their sum.
3449
+ #
3450
+ # @param window_size [Integer]
3451
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3452
+ # size indicated by a timedelta or the following string language:
3453
+ #
3454
+ # - 1ns (1 nanosecond)
3455
+ # - 1us (1 microsecond)
3456
+ # - 1ms (1 millisecond)
3457
+ # - 1s (1 second)
3458
+ # - 1m (1 minute)
3459
+ # - 1h (1 hour)
3460
+ # - 1d (1 day)
3461
+ # - 1w (1 week)
3462
+ # - 1mo (1 calendar month)
3463
+ # - 1y (1 calendar year)
3464
+ # - 1i (1 index count)
3465
+ #
3466
+ # If a timedelta or the dynamic string language is used, the `by`
3467
+ # and `closed` arguments must also be set.
3468
+ # @param weights [Array]
3469
+ # An optional slice with the same length as the window that will be multiplied
3470
+ # elementwise with the values in the window.
3471
+ # @param min_periods [Integer]
3472
+ # The number of values in the window that should be non-null before computing
3473
+ # a result. If None, it will be set equal to window size.
3474
+ # @param center [Boolean]
3475
+ # Set the labels at the center of the window
3476
+ # @param by [String]
3477
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3478
+ # set the column that will be used to determine the windows. This column must
3479
+ # be of dtype `{Date, Datetime}`
3480
+ # @param closed ["left", "right", "both", "none"]
3481
+ # Define whether the temporal window interval is closed or not.
3482
+ #
3483
+ # @note
3484
+ # This functionality is experimental and may change without it being considered a
3485
+ # breaking change.
3486
+ #
3487
+ # @note
3488
+ # If you want to compute multiple aggregation statistics over the same dynamic
3489
+ # window, consider using `groupby_rolling` this method can cache the window size
3490
+ # computation.
3491
+ #
3492
+ # @return [Expr]
3493
+ #
3494
+ # @example
3495
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
3496
+ # df.select(
3497
+ # [
3498
+ # Polars.col("A").rolling_var(3)
3499
+ # ]
3500
+ # )
3501
+ # # =>
3502
+ # # shape: (6, 1)
3503
+ # # ┌──────────┐
3504
+ # # │ A │
3505
+ # # │ --- │
3506
+ # # │ f64 │
3507
+ # # ╞══════════╡
3508
+ # # │ null │
3509
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3510
+ # # │ null │
3511
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3512
+ # # │ 1.0 │
3513
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3514
+ # # │ 1.0 │
3515
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3516
+ # # │ 2.333333 │
3517
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3518
+ # # │ 4.0 │
3519
+ # # └──────────┘
3520
+ def rolling_var(
3521
+ window_size,
3522
+ weights: nil,
3523
+ min_periods: nil,
3524
+ center: false,
3525
+ by: nil,
3526
+ closed: "left"
3527
+ )
3528
+ window_size, min_periods = _prepare_rolling_window_args(
3529
+ window_size, min_periods
3530
+ )
3531
+ wrap_expr(
3532
+ _rbexpr.rolling_var(
3533
+ window_size, weights, min_periods, center, by, closed
3534
+ )
3535
+ )
3536
+ end
3537
+
3538
+ # Compute a rolling median.
3539
+ #
3540
+ # @param window_size [Integer]
3541
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3542
+ # size indicated by a timedelta or the following string language:
3543
+ #
3544
+ # - 1ns (1 nanosecond)
3545
+ # - 1us (1 microsecond)
3546
+ # - 1ms (1 millisecond)
3547
+ # - 1s (1 second)
3548
+ # - 1m (1 minute)
3549
+ # - 1h (1 hour)
3550
+ # - 1d (1 day)
3551
+ # - 1w (1 week)
3552
+ # - 1mo (1 calendar month)
3553
+ # - 1y (1 calendar year)
3554
+ # - 1i (1 index count)
3555
+ #
3556
+ # If a timedelta or the dynamic string language is used, the `by`
3557
+ # and `closed` arguments must also be set.
3558
+ # @param weights [Array]
3559
+ # An optional slice with the same length as the window that will be multiplied
3560
+ # elementwise with the values in the window.
3561
+ # @param min_periods [Integer]
3562
+ # The number of values in the window that should be non-null before computing
3563
+ # a result. If None, it will be set equal to window size.
3564
+ # @param center [Boolean]
3565
+ # Set the labels at the center of the window
3566
+ # @param by [String]
3567
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3568
+ # set the column that will be used to determine the windows. This column must
3569
+ # be of dtype `{Date, Datetime}`
3570
+ # @param closed ["left", "right", "both", "none"]
3571
+ # Define whether the temporal window interval is closed or not.
3572
+ #
3573
+ # @note
3574
+ # This functionality is experimental and may change without it being considered a
3575
+ # breaking change.
3576
+ #
3577
+ # @note
3578
+ # If you want to compute multiple aggregation statistics over the same dynamic
3579
+ # window, consider using `groupby_rolling` this method can cache the window size
3580
+ # computation.
3581
+ #
3582
+ # @return [Expr]
3583
+ #
3584
+ # @example
3585
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
3586
+ # df.select(
3587
+ # [
3588
+ # Polars.col("A").rolling_median(3)
3589
+ # ]
3590
+ # )
3591
+ # # =>
3592
+ # # shape: (6, 1)
3593
+ # # ┌──────┐
3594
+ # # │ A │
3595
+ # # │ --- │
3596
+ # # │ f64 │
3597
+ # # ╞══════╡
3598
+ # # │ null │
3599
+ # # ├╌╌╌╌╌╌┤
3600
+ # # │ null │
3601
+ # # ├╌╌╌╌╌╌┤
3602
+ # # │ 2.0 │
3603
+ # # ├╌╌╌╌╌╌┤
3604
+ # # │ 3.0 │
3605
+ # # ├╌╌╌╌╌╌┤
3606
+ # # │ 4.0 │
3607
+ # # ├╌╌╌╌╌╌┤
3608
+ # # │ 6.0 │
3609
+ # # └──────┘
3610
+ def rolling_median(
3611
+ window_size,
3612
+ weights: nil,
3613
+ min_periods: nil,
3614
+ center: false,
3615
+ by: nil,
3616
+ closed: "left"
3617
+ )
3618
+ window_size, min_periods = _prepare_rolling_window_args(
3619
+ window_size, min_periods
3620
+ )
3621
+ wrap_expr(
3622
+ _rbexpr.rolling_median(
3623
+ window_size, weights, min_periods, center, by, closed
3624
+ )
3625
+ )
3626
+ end
3627
+
3628
+ # Compute a rolling quantile.
3629
+ #
3630
+ # @param quantile [Float]
3631
+ # Quantile between 0.0 and 1.0.
3632
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
3633
+ # Interpolation method.
3634
+ # @param window_size [Integer]
3635
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3636
+ # size indicated by a timedelta or the following string language:
3637
+ #
3638
+ # - 1ns (1 nanosecond)
3639
+ # - 1us (1 microsecond)
3640
+ # - 1ms (1 millisecond)
3641
+ # - 1s (1 second)
3642
+ # - 1m (1 minute)
3643
+ # - 1h (1 hour)
3644
+ # - 1d (1 day)
3645
+ # - 1w (1 week)
3646
+ # - 1mo (1 calendar month)
3647
+ # - 1y (1 calendar year)
3648
+ # - 1i (1 index count)
3649
+ #
3650
+ # If a timedelta or the dynamic string language is used, the `by`
3651
+ # and `closed` arguments must also be set.
3652
+ # @param weights [Array]
3653
+ # An optional slice with the same length as the window that will be multiplied
3654
+ # elementwise with the values in the window.
3655
+ # @param min_periods [Integer]
3656
+ # The number of values in the window that should be non-null before computing
3657
+ # a result. If None, it will be set equal to window size.
3658
+ # @param center [Boolean]
3659
+ # Set the labels at the center of the window
3660
+ # @param by [String]
3661
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3662
+ # set the column that will be used to determine the windows. This column must
3663
+ # be of dtype `{Date, Datetime}`
3664
+ # @param closed ["left", "right", "both", "none"]
3665
+ # Define whether the temporal window interval is closed or not.
3666
+ #
3667
+ # @note
3668
+ # This functionality is experimental and may change without it being considered a
3669
+ # breaking change.
3670
+ #
3671
+ # @note
3672
+ # If you want to compute multiple aggregation statistics over the same dynamic
3673
+ # window, consider using `groupby_rolling` this method can cache the window size
3674
+ # computation.
3675
+ #
3676
+ # @return [Expr]
3677
+ #
3678
+ # @example
3679
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
3680
+ # df.select(
3681
+ # [
3682
+ # Polars.col("A").rolling_quantile(0.33, window_size: 3)
3683
+ # ]
3684
+ # )
3685
+ # # =>
3686
+ # # shape: (6, 1)
3687
+ # # ┌──────┐
3688
+ # # │ A │
3689
+ # # │ --- │
3690
+ # # │ f64 │
3691
+ # # ╞══════╡
3692
+ # # │ null │
3693
+ # # ├╌╌╌╌╌╌┤
3694
+ # # │ null │
3695
+ # # ├╌╌╌╌╌╌┤
3696
+ # # │ 1.0 │
3697
+ # # ├╌╌╌╌╌╌┤
3698
+ # # │ 2.0 │
3699
+ # # ├╌╌╌╌╌╌┤
3700
+ # # │ 3.0 │
3701
+ # # ├╌╌╌╌╌╌┤
3702
+ # # │ 4.0 │
3703
+ # # └──────┘
3704
+ def rolling_quantile(
3705
+ quantile,
3706
+ interpolation: "nearest",
3707
+ window_size: 2,
3708
+ weights: nil,
3709
+ min_periods: nil,
3710
+ center: false,
3711
+ by: nil,
3712
+ closed: "left"
3713
+ )
3714
+ window_size, min_periods = _prepare_rolling_window_args(
3715
+ window_size, min_periods
3716
+ )
3717
+ wrap_expr(
3718
+ _rbexpr.rolling_quantile(
3719
+ quantile, interpolation, window_size, weights, min_periods, center, by, closed
3720
+ )
3721
+ )
3722
+ end
1135
3723
 
1136
3724
  # def rolling_apply
1137
3725
  # end
1138
3726
 
3727
+ # Compute a rolling skew.
1139
3728
  #
3729
+ # @param window_size [Integer]
3730
+ # Integer size of the rolling window.
3731
+ # @param bias [Boolean]
3732
+ # If false, the calculations are corrected for statistical bias.
3733
+ #
3734
+ # @return [Expr]
1140
3735
  def rolling_skew(window_size, bias: true)
1141
3736
  wrap_expr(_rbexpr.rolling_skew(window_size, bias))
1142
3737
  end
1143
3738
 
3739
+ # Compute absolute values.
3740
+ #
3741
+ # @return [Expr]
3742
+ #
3743
+ # @example
3744
+ # df = Polars::DataFrame.new(
3745
+ # {
3746
+ # "A" => [-1.0, 0.0, 1.0, 2.0]
3747
+ # }
3748
+ # )
3749
+ # df.select(Polars.col("A").abs)
3750
+ # # =>
3751
+ # # shape: (4, 1)
3752
+ # # ┌─────┐
3753
+ # # │ A │
3754
+ # # │ --- │
3755
+ # # │ f64 │
3756
+ # # ╞═════╡
3757
+ # # │ 1.0 │
3758
+ # # ├╌╌╌╌╌┤
3759
+ # # │ 0.0 │
3760
+ # # ├╌╌╌╌╌┤
3761
+ # # │ 1.0 │
3762
+ # # ├╌╌╌╌╌┤
3763
+ # # │ 2.0 │
3764
+ # # └─────┘
1144
3765
  def abs
1145
3766
  wrap_expr(_rbexpr.abs)
1146
3767
  end
1147
3768
 
3769
+ # Get the index values that would sort this column.
3770
+ #
3771
+ # Alias for {#arg_sort}.
3772
+ #
3773
+ # @param reverse [Boolean]
3774
+ # Sort in reverse (descending) order.
3775
+ # @param nulls_last [Boolean]
3776
+ # Place null values last instead of first.
3777
+ #
3778
+ # @return [expr]
3779
+ #
3780
+ # @example
3781
+ # df = Polars::DataFrame.new(
3782
+ # {
3783
+ # "a" => [20, 10, 30]
3784
+ # }
3785
+ # )
3786
+ # df.select(Polars.col("a").argsort)
3787
+ # # =>
3788
+ # # shape: (3, 1)
3789
+ # # ┌─────┐
3790
+ # # │ a │
3791
+ # # │ --- │
3792
+ # # │ u32 │
3793
+ # # ╞═════╡
3794
+ # # │ 1 │
3795
+ # # ├╌╌╌╌╌┤
3796
+ # # │ 0 │
3797
+ # # ├╌╌╌╌╌┤
3798
+ # # │ 2 │
3799
+ # # └─────┘
1148
3800
  def argsort(reverse: false, nulls_last: false)
1149
3801
  arg_sort(reverse: reverse, nulls_last: nulls_last)
1150
3802
  end
1151
3803
 
3804
+ # Assign ranks to data, dealing with ties appropriately.
3805
+ #
3806
+ # @param method ["average", "min", "max", "dense", "ordinal", "random"]
3807
+ # The method used to assign ranks to tied elements.
3808
+ # The following methods are available:
3809
+ #
3810
+ # - 'average' : The average of the ranks that would have been assigned to
3811
+ # all the tied values is assigned to each value.
3812
+ # - 'min' : The minimum of the ranks that would have been assigned to all
3813
+ # the tied values is assigned to each value. (This is also referred to
3814
+ # as "competition" ranking.)
3815
+ # - 'max' : The maximum of the ranks that would have been assigned to all
3816
+ # the tied values is assigned to each value.
3817
+ # - 'dense' : Like 'min', but the rank of the next highest element is
3818
+ # assigned the rank immediately after those assigned to the tied
3819
+ # elements.
3820
+ # - 'ordinal' : All values are given a distinct rank, corresponding to
3821
+ # the order that the values occur in the Series.
3822
+ # - 'random' : Like 'ordinal', but the rank for ties is not dependent
3823
+ # on the order that the values occur in the Series.
3824
+ # @param reverse [Boolean]
3825
+ # Reverse the operation.
3826
+ #
3827
+ # @return [Expr]
3828
+ #
3829
+ # @example The 'average' method:
3830
+ # df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
3831
+ # df.select(Polars.col("a").rank)
3832
+ # # =>
3833
+ # # shape: (5, 1)
3834
+ # # ┌─────┐
3835
+ # # │ a │
3836
+ # # │ --- │
3837
+ # # │ f32 │
3838
+ # # ╞═════╡
3839
+ # # │ 3.0 │
3840
+ # # ├╌╌╌╌╌┤
3841
+ # # │ 4.5 │
3842
+ # # ├╌╌╌╌╌┤
3843
+ # # │ 1.5 │
3844
+ # # ├╌╌╌╌╌┤
3845
+ # # │ 1.5 │
3846
+ # # ├╌╌╌╌╌┤
3847
+ # # │ 4.5 │
3848
+ # # └─────┘
3849
+ #
3850
+ # @example The 'ordinal' method:
3851
+ # df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
3852
+ # df.select(Polars.col("a").rank(method: "ordinal"))
3853
+ # # =>
3854
+ # # shape: (5, 1)
3855
+ # # ┌─────┐
3856
+ # # │ a │
3857
+ # # │ --- │
3858
+ # # │ u32 │
3859
+ # # ╞═════╡
3860
+ # # │ 3 │
3861
+ # # ├╌╌╌╌╌┤
3862
+ # # │ 4 │
3863
+ # # ├╌╌╌╌╌┤
3864
+ # # │ 1 │
3865
+ # # ├╌╌╌╌╌┤
3866
+ # # │ 2 │
3867
+ # # ├╌╌╌╌╌┤
3868
+ # # │ 5 │
3869
+ # # └─────┘
1152
3870
  def rank(method: "average", reverse: false)
1153
3871
  wrap_expr(_rbexpr.rank(method, reverse))
1154
3872
  end
1155
3873
 
3874
+ # Calculate the n-th discrete difference.
3875
+ #
3876
+ # @param n [Integer]
3877
+ # Number of slots to shift.
3878
+ # @param null_behavior ["ignore", "drop"]
3879
+ # How to handle null values.
3880
+ #
3881
+ # @return [Expr]
3882
+ #
3883
+ # @example
3884
+ # df = Polars::DataFrame.new(
3885
+ # {
3886
+ # "a" => [20, 10, 30]
3887
+ # }
3888
+ # )
3889
+ # df.select(Polars.col("a").diff)
3890
+ # # =>
3891
+ # # shape: (3, 1)
3892
+ # # ┌──────┐
3893
+ # # │ a │
3894
+ # # │ --- │
3895
+ # # │ i64 │
3896
+ # # ╞══════╡
3897
+ # # │ null │
3898
+ # # ├╌╌╌╌╌╌┤
3899
+ # # │ -10 │
3900
+ # # ├╌╌╌╌╌╌┤
3901
+ # # │ 20 │
3902
+ # # └──────┘
1156
3903
  def diff(n: 1, null_behavior: "ignore")
1157
3904
  wrap_expr(_rbexpr.diff(n, null_behavior))
1158
3905
  end
1159
3906
 
3907
+ # Computes percentage change between values.
3908
+ #
3909
+ # Percentage change (as fraction) between current element and most-recent
3910
+ # non-null element at least `n` period(s) before the current element.
3911
+ #
3912
+ # Computes the change from the previous row by default.
3913
+ #
3914
+ # @param n [Integer]
3915
+ # Periods to shift for forming percent change.
3916
+ #
3917
+ # @return [Expr]
3918
+ #
3919
+ # @example
3920
+ # df = Polars::DataFrame.new(
3921
+ # {
3922
+ # "a" => [10, 11, 12, nil, 12]
3923
+ # }
3924
+ # )
3925
+ # df.with_column(Polars.col("a").pct_change.alias("pct_change"))
3926
+ # # =>
3927
+ # # shape: (5, 2)
3928
+ # # ┌──────┬────────────┐
3929
+ # # │ a ┆ pct_change │
3930
+ # # │ --- ┆ --- │
3931
+ # # │ i64 ┆ f64 │
3932
+ # # ╞══════╪════════════╡
3933
+ # # │ 10 ┆ null │
3934
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
3935
+ # # │ 11 ┆ 0.1 │
3936
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
3937
+ # # │ 12 ┆ 0.090909 │
3938
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
3939
+ # # │ null ┆ 0.0 │
3940
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
3941
+ # # │ 12 ┆ 0.0 │
3942
+ # # └──────┴────────────┘
1160
3943
  def pct_change(n: 1)
1161
3944
  wrap_expr(_rbexpr.pct_change(n))
1162
3945
  end
1163
3946
 
3947
+ # Compute the sample skewness of a data set.
3948
+ #
3949
+ # For normally distributed data, the skewness should be about zero. For
3950
+ # unimodal continuous distributions, a skewness value greater than zero means
3951
+ # that there is more weight in the right tail of the distribution. The
3952
+ # function `skewtest` can be used to determine if the skewness value
3953
+ # is close enough to zero, statistically speaking.
3954
+ #
3955
+ # @param bias [Boolean]
3956
+ # If false, the calculations are corrected for statistical bias.
3957
+ #
3958
+ # @return [Expr]
3959
+ #
3960
+ # @example
3961
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
3962
+ # df.select(Polars.col("a").skew)
3963
+ # # =>
3964
+ # # shape: (1, 1)
3965
+ # # ┌──────────┐
3966
+ # # │ a │
3967
+ # # │ --- │
3968
+ # # │ f64 │
3969
+ # # ╞══════════╡
3970
+ # # │ 0.343622 │
3971
+ # # └──────────┘
1164
3972
  def skew(bias: true)
1165
3973
  wrap_expr(_rbexpr.skew(bias))
1166
3974
  end
1167
3975
 
3976
+ # Compute the kurtosis (Fisher or Pearson) of a dataset.
3977
+ #
3978
+ # Kurtosis is the fourth central moment divided by the square of the
3979
+ # variance. If Fisher's definition is used, then 3.0 is subtracted from
3980
+ # the result to give 0.0 for a normal distribution.
3981
+ # If bias is False then the kurtosis is calculated using k statistics to
3982
+ # eliminate bias coming from biased moment estimators
3983
+ #
3984
+ # @param fisher [Boolean]
3985
+ # If true, Fisher's definition is used (normal ==> 0.0). If false,
3986
+ # Pearson's definition is used (normal ==> 3.0).
3987
+ # @param bias [Boolean]
3988
+ # If false, the calculations are corrected for statistical bias.
3989
+ #
3990
+ # @return [Expr]
3991
+ #
3992
+ # @example
3993
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
3994
+ # df.select(Polars.col("a").kurtosis)
3995
+ # # =>
3996
+ # # shape: (1, 1)
3997
+ # # ┌───────────┐
3998
+ # # │ a │
3999
+ # # │ --- │
4000
+ # # │ f64 │
4001
+ # # ╞═══════════╡
4002
+ # # │ -1.153061 │
4003
+ # # └───────────┘
1168
4004
  def kurtosis(fisher: true, bias: true)
1169
4005
  wrap_expr(_rbexpr.kurtosis(fisher, bias))
1170
4006
  end
1171
4007
 
4008
+ # Clip (limit) the values in an array to a `min` and `max` boundary.
4009
+ #
4010
+ # Only works for numerical types.
4011
+ #
4012
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4013
+ # expression. See `when` for more information.
4014
+ #
4015
+ # @param min_val [Numeric]
4016
+ # Minimum value.
4017
+ # @param max_val [Numeric]
4018
+ # Maximum value.
4019
+ #
4020
+ # @return [Expr]
4021
+ #
4022
+ # @example
4023
+ # df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
4024
+ # df.with_column(Polars.col("foo").clip(1, 10).alias("foo_clipped"))
4025
+ # # =>
4026
+ # # shape: (4, 2)
4027
+ # # ┌──────┬─────────────┐
4028
+ # # │ foo ┆ foo_clipped │
4029
+ # # │ --- ┆ --- │
4030
+ # # │ i64 ┆ i64 │
4031
+ # # ╞══════╪═════════════╡
4032
+ # # │ -50 ┆ 1 │
4033
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4034
+ # # │ 5 ┆ 5 │
4035
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4036
+ # # │ null ┆ null │
4037
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4038
+ # # │ 50 ┆ 10 │
4039
+ # # └──────┴─────────────┘
1172
4040
  def clip(min_val, max_val)
1173
4041
  wrap_expr(_rbexpr.clip(min_val, max_val))
1174
4042
  end
1175
4043
 
4044
+ # Clip (limit) the values in an array to a `min` boundary.
4045
+ #
4046
+ # Only works for numerical types.
4047
+ #
4048
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4049
+ # expression. See `when` for more information.
4050
+ #
4051
+ # @param min_val [Numeric]
4052
+ # Minimum value.
4053
+ #
4054
+ # @return [Expr]
4055
+ #
4056
+ # @example
4057
+ # df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
4058
+ # df.with_column(Polars.col("foo").clip_min(0).alias("foo_clipped"))
4059
+ # # =>
4060
+ # # shape: (4, 2)
4061
+ # # ┌──────┬─────────────┐
4062
+ # # │ foo ┆ foo_clipped │
4063
+ # # │ --- ┆ --- │
4064
+ # # │ i64 ┆ i64 │
4065
+ # # ╞══════╪═════════════╡
4066
+ # # │ -50 ┆ 0 │
4067
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4068
+ # # │ 5 ┆ 5 │
4069
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4070
+ # # │ null ┆ null │
4071
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4072
+ # # │ 50 ┆ 50 │
4073
+ # # └──────┴─────────────┘
1176
4074
  def clip_min(min_val)
1177
4075
  wrap_expr(_rbexpr.clip_min(min_val))
1178
4076
  end
1179
4077
 
4078
+ # Clip (limit) the values in an array to a `max` boundary.
4079
+ #
4080
+ # Only works for numerical types.
4081
+ #
4082
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4083
+ # expression. See `when` for more information.
4084
+ #
4085
+ # @param max_val [Numeric]
4086
+ # Maximum value.
4087
+ #
4088
+ # @return [Expr]
4089
+ #
4090
+ # @example
4091
+ # df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
4092
+ # df.with_column(Polars.col("foo").clip_max(0).alias("foo_clipped"))
4093
+ # # =>
4094
+ # # shape: (4, 2)
4095
+ # # ┌──────┬─────────────┐
4096
+ # # │ foo ┆ foo_clipped │
4097
+ # # │ --- ┆ --- │
4098
+ # # │ i64 ┆ i64 │
4099
+ # # ╞══════╪═════════════╡
4100
+ # # │ -50 ┆ -50 │
4101
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4102
+ # # │ 5 ┆ 0 │
4103
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4104
+ # # │ null ┆ null │
4105
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4106
+ # # │ 50 ┆ 0 │
4107
+ # # └──────┴─────────────┘
1180
4108
  def clip_max(max_val)
1181
4109
  wrap_expr(_rbexpr.clip_max(max_val))
1182
4110
  end
1183
4111
 
4112
+ # Calculate the lower bound.
4113
+ #
4114
+ # Returns a unit Series with the lowest value possible for the dtype of this
4115
+ # expression.
4116
+ #
4117
+ # @return [Expr]
4118
+ #
4119
+ # @example
4120
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
4121
+ # df.select(Polars.col("a").lower_bound)
4122
+ # # =>
4123
+ # # shape: (1, 1)
4124
+ # # ┌──────────────────────┐
4125
+ # # │ a │
4126
+ # # │ --- │
4127
+ # # │ i64 │
4128
+ # # ╞══════════════════════╡
4129
+ # # │ -9223372036854775808 │
4130
+ # # └──────────────────────┘
1184
4131
  def lower_bound
1185
4132
  wrap_expr(_rbexpr.lower_bound)
1186
4133
  end
1187
4134
 
4135
+ # Calculate the upper bound.
4136
+ #
4137
+ # Returns a unit Series with the highest value possible for the dtype of this
4138
+ # expression.
4139
+ #
4140
+ # @return [Expr]
4141
+ #
4142
+ # @example
4143
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
4144
+ # df.select(Polars.col("a").upper_bound)
4145
+ # # =>
4146
+ # # shape: (1, 1)
4147
+ # # ┌─────────────────────┐
4148
+ # # │ a │
4149
+ # # │ --- │
4150
+ # # │ i64 │
4151
+ # # ╞═════════════════════╡
4152
+ # # │ 9223372036854775807 │
4153
+ # # └─────────────────────┘
1188
4154
  def upper_bound
1189
4155
  wrap_expr(_rbexpr.upper_bound)
1190
4156
  end
1191
4157
 
4158
+ # Compute the element-wise indication of the sign.
4159
+ #
4160
+ # @return [Expr]
4161
+ #
4162
+ # @example
4163
+ # df = Polars::DataFrame.new({"a" => [-9.0, -0.0, 0.0, 4.0, nil]})
4164
+ # df.select(Polars.col("a").sign)
4165
+ # # =>
4166
+ # # shape: (5, 1)
4167
+ # # ┌──────┐
4168
+ # # │ a │
4169
+ # # │ --- │
4170
+ # # │ i64 │
4171
+ # # ╞══════╡
4172
+ # # │ -1 │
4173
+ # # ├╌╌╌╌╌╌┤
4174
+ # # │ 0 │
4175
+ # # ├╌╌╌╌╌╌┤
4176
+ # # │ 0 │
4177
+ # # ├╌╌╌╌╌╌┤
4178
+ # # │ 1 │
4179
+ # # ├╌╌╌╌╌╌┤
4180
+ # # │ null │
4181
+ # # └──────┘
1192
4182
  def sign
1193
4183
  wrap_expr(_rbexpr.sign)
1194
4184
  end
1195
4185
 
4186
+ # Compute the element-wise value for the sine.
4187
+ #
4188
+ # @return [Expr]
4189
+ #
4190
+ # @example
4191
+ # df = Polars::DataFrame.new({"a" => [0.0]})
4192
+ # df.select(Polars.col("a").sin)
4193
+ # # =>
4194
+ # # shape: (1, 1)
4195
+ # # ┌─────┐
4196
+ # # │ a │
4197
+ # # │ --- │
4198
+ # # │ f64 │
4199
+ # # ╞═════╡
4200
+ # # │ 0.0 │
4201
+ # # └─────┘
1196
4202
  def sin
1197
4203
  wrap_expr(_rbexpr.sin)
1198
4204
  end
1199
4205
 
4206
+ # Compute the element-wise value for the cosine.
4207
+ #
4208
+ # @return [Expr]
4209
+ #
4210
+ # @example
4211
+ # df = Polars::DataFrame.new({"a" => [0.0]})
4212
+ # df.select(Polars.col("a").cos)
4213
+ # # =>
4214
+ # # shape: (1, 1)
4215
+ # # ┌─────┐
4216
+ # # │ a │
4217
+ # # │ --- │
4218
+ # # │ f64 │
4219
+ # # ╞═════╡
4220
+ # # │ 1.0 │
4221
+ # # └─────┘
1200
4222
  def cos
1201
4223
  wrap_expr(_rbexpr.cos)
1202
4224
  end
1203
4225
 
4226
+ # Compute the element-wise value for the tangent.
4227
+ #
4228
+ # @return [Expr]
4229
+ #
4230
+ # @example
4231
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4232
+ # df.select(Polars.col("a").tan)
4233
+ # # =>
4234
+ # # shape: (1, 1)
4235
+ # # ┌──────────┐
4236
+ # # │ a │
4237
+ # # │ --- │
4238
+ # # │ f64 │
4239
+ # # ╞══════════╡
4240
+ # # │ 1.557408 │
4241
+ # # └──────────┘
1204
4242
  def tan
1205
4243
  wrap_expr(_rbexpr.tan)
1206
4244
  end
1207
4245
 
4246
+ # Compute the element-wise value for the inverse sine.
4247
+ #
4248
+ # @return [Expr]
4249
+ #
4250
+ # @example
4251
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4252
+ # df.select(Polars.col("a").arcsin)
4253
+ # # =>
4254
+ # # shape: (1, 1)
4255
+ # # ┌──────────┐
4256
+ # # │ a │
4257
+ # # │ --- │
4258
+ # # │ f64 │
4259
+ # # ╞══════════╡
4260
+ # # │ 1.570796 │
4261
+ # # └──────────┘
1208
4262
  def arcsin
1209
4263
  wrap_expr(_rbexpr.arcsin)
1210
4264
  end
1211
4265
 
4266
+ # Compute the element-wise value for the inverse cosine.
4267
+ #
4268
+ # @return [Expr]
4269
+ #
4270
+ # @example
4271
+ # df = Polars::DataFrame.new({"a" => [0.0]})
4272
+ # df.select(Polars.col("a").arccos)
4273
+ # # =>
4274
+ # # shape: (1, 1)
4275
+ # # ┌──────────┐
4276
+ # # │ a │
4277
+ # # │ --- │
4278
+ # # │ f64 │
4279
+ # # ╞══════════╡
4280
+ # # │ 1.570796 │
4281
+ # # └──────────┘
1212
4282
  def arccos
1213
4283
  wrap_expr(_rbexpr.arccos)
1214
4284
  end
1215
4285
 
4286
+ # Compute the element-wise value for the inverse tangent.
4287
+ #
4288
+ # @return [Expr]
4289
+ #
4290
+ # @example
4291
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4292
+ # df.select(Polars.col("a").arctan)
4293
+ # # =>
4294
+ # # shape: (1, 1)
4295
+ # # ┌──────────┐
4296
+ # # │ a │
4297
+ # # │ --- │
4298
+ # # │ f64 │
4299
+ # # ╞══════════╡
4300
+ # # │ 0.785398 │
4301
+ # # └──────────┘
1216
4302
  def arctan
1217
4303
  wrap_expr(_rbexpr.arctan)
1218
4304
  end
1219
4305
 
4306
+ # Compute the element-wise value for the hyperbolic sine.
4307
+ #
4308
+ # @return [Expr]
4309
+ #
4310
+ # @example
4311
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4312
+ # df.select(Polars.col("a").sinh)
4313
+ # # =>
4314
+ # # shape: (1, 1)
4315
+ # # ┌──────────┐
4316
+ # # │ a │
4317
+ # # │ --- │
4318
+ # # │ f64 │
4319
+ # # ╞══════════╡
4320
+ # # │ 1.175201 │
4321
+ # # └──────────┘
1220
4322
  def sinh
1221
4323
  wrap_expr(_rbexpr.sinh)
1222
4324
  end
1223
4325
 
4326
+ # Compute the element-wise value for the hyperbolic cosine.
4327
+ #
4328
+ # @return [Expr]
4329
+ #
4330
+ # @example
4331
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4332
+ # df.select(Polars.col("a").cosh)
4333
+ # # =>
4334
+ # # shape: (1, 1)
4335
+ # # ┌──────────┐
4336
+ # # │ a │
4337
+ # # │ --- │
4338
+ # # │ f64 │
4339
+ # # ╞══════════╡
4340
+ # # │ 1.543081 │
4341
+ # # └──────────┘
1224
4342
  def cosh
1225
4343
  wrap_expr(_rbexpr.cosh)
1226
4344
  end
1227
4345
 
4346
+ # Compute the element-wise value for the hyperbolic tangent.
4347
+ #
4348
+ # @return [Expr]
4349
+ #
4350
+ # @example
4351
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4352
+ # df.select(Polars.col("a").tanh)
4353
+ # # =>
4354
+ # # shape: (1, 1)
4355
+ # # ┌──────────┐
4356
+ # # │ a │
4357
+ # # │ --- │
4358
+ # # │ f64 │
4359
+ # # ╞══════════╡
4360
+ # # │ 0.761594 │
4361
+ # # └──────────┘
1228
4362
  def tanh
1229
4363
  wrap_expr(_rbexpr.tanh)
1230
4364
  end
1231
4365
 
4366
+ # Compute the element-wise value for the inverse hyperbolic sine.
4367
+ #
4368
+ # @return [Expr]
4369
+ #
4370
+ # @example
4371
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4372
+ # df.select(Polars.col("a").arcsinh)
4373
+ # # =>
4374
+ # # shape: (1, 1)
4375
+ # # ┌──────────┐
4376
+ # # │ a │
4377
+ # # │ --- │
4378
+ # # │ f64 │
4379
+ # # ╞══════════╡
4380
+ # # │ 0.881374 │
4381
+ # # └──────────┘
1232
4382
  def arcsinh
1233
4383
  wrap_expr(_rbexpr.arcsinh)
1234
4384
  end
1235
4385
 
4386
+ # Compute the element-wise value for the inverse hyperbolic cosine.
4387
+ #
4388
+ # @return [Expr]
4389
+ #
4390
+ # @example
4391
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4392
+ # df.select(Polars.col("a").arccosh)
4393
+ # # =>
4394
+ # # shape: (1, 1)
4395
+ # # ┌─────┐
4396
+ # # │ a │
4397
+ # # │ --- │
4398
+ # # │ f64 │
4399
+ # # ╞═════╡
4400
+ # # │ 0.0 │
4401
+ # # └─────┘
1236
4402
  def arccosh
1237
4403
  wrap_expr(_rbexpr.arccosh)
1238
4404
  end
1239
4405
 
4406
+ # Compute the element-wise value for the inverse hyperbolic tangent.
4407
+ #
4408
+ # @return [Expr]
4409
+ #
4410
+ # @example
4411
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4412
+ # df.select(Polars.col("a").arctanh)
4413
+ # # =>
4414
+ # # shape: (1, 1)
4415
+ # # ┌─────┐
4416
+ # # │ a │
4417
+ # # │ --- │
4418
+ # # │ f64 │
4419
+ # # ╞═════╡
4420
+ # # │ inf │
4421
+ # # └─────┘
1240
4422
  def arctanh
1241
4423
  wrap_expr(_rbexpr.arctanh)
1242
4424
  end
1243
4425
 
4426
+ # Reshape this Expr to a flat Series or a Series of Lists.
4427
+ #
4428
+ # @param dims [Array]
4429
+ # Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that
4430
+ # dimension is inferred.
4431
+ #
4432
+ # @return [Expr]
4433
+ #
4434
+ # @example
4435
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
4436
+ # df.select(Polars.col("foo").reshape([3, 3]))
4437
+ # # =>
4438
+ # # shape: (3, 1)
4439
+ # # ┌───────────┐
4440
+ # # │ foo │
4441
+ # # │ --- │
4442
+ # # │ list[i64] │
4443
+ # # ╞═══════════╡
4444
+ # # │ [1, 2, 3] │
4445
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
4446
+ # # │ [4, 5, 6] │
4447
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
4448
+ # # │ [7, 8, 9] │
4449
+ # # └───────────┘
1244
4450
  def reshape(dims)
1245
4451
  wrap_expr(_rbexpr.reshape(dims))
1246
4452
  end
1247
4453
 
4454
+ # Shuffle the contents of this expr.
4455
+ #
4456
+ # @param seed [Integer]
4457
+ # Seed for the random number generator. If set to None (default), a random
4458
+ # seed is generated using the `random` module.
4459
+ #
4460
+ # @return [Expr]
4461
+ #
4462
+ # @example
4463
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4464
+ # df.select(Polars.col("a").shuffle(seed: 1))
4465
+ # # =>
4466
+ # # shape: (3, 1)
4467
+ # # ┌─────┐
4468
+ # # │ a │
4469
+ # # │ --- │
4470
+ # # │ i64 │
4471
+ # # ╞═════╡
4472
+ # # │ 2 │
4473
+ # # ├╌╌╌╌╌┤
4474
+ # # │ 1 │
4475
+ # # ├╌╌╌╌╌┤
4476
+ # # │ 3 │
4477
+ # # └─────┘
1248
4478
  def shuffle(seed: nil)
1249
4479
  if seed.nil?
1250
4480
  seed = rand(10000)
@@ -1252,74 +4482,514 @@ module Polars
1252
4482
  wrap_expr(_rbexpr.shuffle(seed))
1253
4483
  end
1254
4484
 
1255
- # def sample
1256
- # end
1257
-
1258
- # def ewm_mean
1259
- # end
4485
+ # Sample from this expression.
4486
+ #
4487
+ # @param frac [Float]
4488
+ # Fraction of items to return. Cannot be used with `n`.
4489
+ # @param with_replacement [Boolean]
4490
+ # Allow values to be sampled more than once.
4491
+ # @param shuffle [Boolean]
4492
+ # Shuffle the order of sampled data points.
4493
+ # @param seed [Integer]
4494
+ # Seed for the random number generator. If set to None (default), a random
4495
+ # seed is used.
4496
+ # @param n [Integer]
4497
+ # Number of items to return. Cannot be used with `frac`.
4498
+ #
4499
+ # @return [Expr]
4500
+ #
4501
+ # @example
4502
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4503
+ # df.select(Polars.col("a").sample(frac: 1.0, with_replacement: true, seed: 1))
4504
+ # # =>
4505
+ # # shape: (3, 1)
4506
+ # # ┌─────┐
4507
+ # # │ a │
4508
+ # # │ --- │
4509
+ # # │ i64 │
4510
+ # # ╞═════╡
4511
+ # # │ 3 │
4512
+ # # ├╌╌╌╌╌┤
4513
+ # # │ 1 │
4514
+ # # ├╌╌╌╌╌┤
4515
+ # # │ 1 │
4516
+ # # └─────┘
4517
+ def sample(
4518
+ frac: nil,
4519
+ with_replacement: true,
4520
+ shuffle: false,
4521
+ seed: nil,
4522
+ n: nil
4523
+ )
4524
+ if !n.nil? && !frac.nil?
4525
+ raise ArgumentError, "cannot specify both `n` and `frac`"
4526
+ end
1260
4527
 
1261
- # def ewm_std
1262
- # end
4528
+ if !n.nil? && frac.nil?
4529
+ return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
4530
+ end
1263
4531
 
1264
- # def ewm_var
1265
- # end
4532
+ if frac.nil?
4533
+ frac = 1.0
4534
+ end
4535
+ wrap_expr(
4536
+ _rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
4537
+ )
4538
+ end
1266
4539
 
4540
+ # Exponentially-weighted moving average.
4541
+ #
4542
+ # @return [Expr]
4543
+ #
4544
+ # @example
4545
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4546
+ # df.select(Polars.col("a").ewm_mean(com: 1))
4547
+ # # =>
4548
+ # # shape: (3, 1)
4549
+ # # ┌──────────┐
4550
+ # # │ a │
4551
+ # # │ --- │
4552
+ # # │ f64 │
4553
+ # # ╞══════════╡
4554
+ # # │ 1.0 │
4555
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4556
+ # # │ 1.666667 │
4557
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4558
+ # # │ 2.428571 │
4559
+ # # └──────────┘
4560
+ def ewm_mean(
4561
+ com: nil,
4562
+ span: nil,
4563
+ half_life: nil,
4564
+ alpha: nil,
4565
+ adjust: true,
4566
+ min_periods: 1
4567
+ )
4568
+ alpha = _prepare_alpha(com, span, half_life, alpha)
4569
+ wrap_expr(_rbexpr.ewm_mean(alpha, adjust, min_periods))
4570
+ end
4571
+
4572
+ # Exponentially-weighted moving standard deviation.
4573
+ #
4574
+ # @return [Expr]
4575
+ #
4576
+ # @example
4577
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4578
+ # df.select(Polars.col("a").ewm_std(com: 1))
4579
+ # # =>
4580
+ # # shape: (3, 1)
4581
+ # # ┌──────────┐
4582
+ # # │ a │
4583
+ # # │ --- │
4584
+ # # │ f64 │
4585
+ # # ╞══════════╡
4586
+ # # │ 0.0 │
4587
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4588
+ # # │ 0.707107 │
4589
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4590
+ # # │ 0.963624 │
4591
+ # # └──────────┘
4592
+ def ewm_std(
4593
+ com: nil,
4594
+ span: nil,
4595
+ half_life: nil,
4596
+ alpha: nil,
4597
+ adjust: true,
4598
+ bias: false,
4599
+ min_periods: 1
4600
+ )
4601
+ alpha = _prepare_alpha(com, span, half_life, alpha)
4602
+ wrap_expr(_rbexpr.ewm_std(alpha, adjust, bias, min_periods))
4603
+ end
4604
+
4605
+ # Exponentially-weighted moving variance.
4606
+ #
4607
+ # @return [Expr]
4608
+ #
4609
+ # @example
4610
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4611
+ # df.select(Polars.col("a").ewm_var(com: 1))
4612
+ # # =>
4613
+ # # shape: (3, 1)
4614
+ # # ┌──────────┐
4615
+ # # │ a │
4616
+ # # │ --- │
4617
+ # # │ f64 │
4618
+ # # ╞══════════╡
4619
+ # # │ 0.0 │
4620
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4621
+ # # │ 0.5 │
4622
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4623
+ # # │ 0.928571 │
4624
+ # # └──────────┘
4625
+ def ewm_var(
4626
+ com: nil,
4627
+ span: nil,
4628
+ half_life: nil,
4629
+ alpha: nil,
4630
+ adjust: true,
4631
+ bias: false,
4632
+ min_periods: 1
4633
+ )
4634
+ alpha = _prepare_alpha(com, span, half_life, alpha)
4635
+ wrap_expr(_rbexpr.ewm_var(alpha, adjust, bias, min_periods))
4636
+ end
4637
+
4638
+ # Extend the Series with given number of values.
4639
+ #
4640
+ # @param value [Object]
4641
+ # The value to extend the Series with. This value may be nil to fill with
4642
+ # nulls.
4643
+ # @param n [Integer]
4644
+ # The number of values to extend.
4645
+ #
4646
+ # @return [Expr]
1267
4647
  #
4648
+ # @example
4649
+ # df = Polars::DataFrame.new({"values" => [1, 2, 3]})
4650
+ # df.select(Polars.col("values").extend_constant(99, 2))
4651
+ # # =>
4652
+ # # shape: (5, 1)
4653
+ # # ┌────────┐
4654
+ # # │ values │
4655
+ # # │ --- │
4656
+ # # │ i64 │
4657
+ # # ╞════════╡
4658
+ # # │ 1 │
4659
+ # # ├╌╌╌╌╌╌╌╌┤
4660
+ # # │ 2 │
4661
+ # # ├╌╌╌╌╌╌╌╌┤
4662
+ # # │ 3 │
4663
+ # # ├╌╌╌╌╌╌╌╌┤
4664
+ # # │ 99 │
4665
+ # # ├╌╌╌╌╌╌╌╌┤
4666
+ # # │ 99 │
4667
+ # # └────────┘
1268
4668
  def extend_constant(value, n)
1269
4669
  wrap_expr(_rbexpr.extend_constant(value, n))
1270
4670
  end
1271
4671
 
4672
+ # Count all unique values and create a struct mapping value to count.
4673
+ #
4674
+ # @param multithreaded [Boolean]
4675
+ # Better to turn this off in the aggregation context, as it can lead to
4676
+ # contention.
4677
+ # @param sort [Boolean]
4678
+ # Ensure the output is sorted from most values to least.
4679
+ #
4680
+ # @return [Expr]
4681
+ #
4682
+ # @example
4683
+ # df = Polars::DataFrame.new(
4684
+ # {
4685
+ # "id" => ["a", "b", "b", "c", "c", "c"]
4686
+ # }
4687
+ # )
4688
+ # df.select(
4689
+ # [
4690
+ # Polars.col("id").value_counts(sort: true),
4691
+ # ]
4692
+ # )
4693
+ # # =>
4694
+ # # shape: (3, 1)
4695
+ # # ┌───────────┐
4696
+ # # │ id │
4697
+ # # │ --- │
4698
+ # # │ struct[2] │
4699
+ # # ╞═══════════╡
4700
+ # # │ {"c",3} │
4701
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
4702
+ # # │ {"b",2} │
4703
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
4704
+ # # │ {"a",1} │
4705
+ # # └───────────┘
1272
4706
  def value_counts(multithreaded: false, sort: false)
1273
4707
  wrap_expr(_rbexpr.value_counts(multithreaded, sort))
1274
4708
  end
1275
4709
 
4710
+ # Return a count of the unique values in the order of appearance.
4711
+ #
4712
+ # This method differs from `value_counts` in that it does not return the
4713
+ # values, only the counts and might be faster
4714
+ #
4715
+ # @return [Expr]
4716
+ #
4717
+ # @example
4718
+ # df = Polars::DataFrame.new(
4719
+ # {
4720
+ # "id" => ["a", "b", "b", "c", "c", "c"]
4721
+ # }
4722
+ # )
4723
+ # df.select(
4724
+ # [
4725
+ # Polars.col("id").unique_counts
4726
+ # ]
4727
+ # )
4728
+ # # =>
4729
+ # # shape: (3, 1)
4730
+ # # ┌─────┐
4731
+ # # │ id │
4732
+ # # │ --- │
4733
+ # # │ u32 │
4734
+ # # ╞═════╡
4735
+ # # │ 1 │
4736
+ # # ├╌╌╌╌╌┤
4737
+ # # │ 2 │
4738
+ # # ├╌╌╌╌╌┤
4739
+ # # │ 3 │
4740
+ # # └─────┘
1276
4741
  def unique_counts
1277
4742
  wrap_expr(_rbexpr.unique_counts)
1278
4743
  end
1279
4744
 
4745
+ # Compute the logarithm to a given base.
4746
+ #
4747
+ # @param base [Float]
4748
+ # Given base, defaults to `e`.
4749
+ #
4750
+ # @return [Expr]
4751
+ #
4752
+ # @example
4753
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4754
+ # df.select(Polars.col("a").log(2))
4755
+ # # =>
4756
+ # # shape: (3, 1)
4757
+ # # ┌──────────┐
4758
+ # # │ a │
4759
+ # # │ --- │
4760
+ # # │ f64 │
4761
+ # # ╞══════════╡
4762
+ # # │ 0.0 │
4763
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4764
+ # # │ 1.0 │
4765
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4766
+ # # │ 1.584963 │
4767
+ # # └──────────┘
1280
4768
  def log(base = Math::E)
1281
4769
  wrap_expr(_rbexpr.log(base))
1282
4770
  end
1283
4771
 
1284
- def entropy(base: 2, normalize: false)
4772
+ # Computes the entropy.
4773
+ #
4774
+ # Uses the formula `-sum(pk * log(pk)` where `pk` are discrete probabilities.
4775
+ #
4776
+ # @param base [Float]
4777
+ # Given base, defaults to `e`.
4778
+ # @param normalize [Boolean]
4779
+ # Normalize pk if it doesn't sum to 1.
4780
+ #
4781
+ # @return [Expr]
4782
+ #
4783
+ # @example
4784
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4785
+ # df.select(Polars.col("a").entropy(base: 2))
4786
+ # # =>
4787
+ # # shape: (1, 1)
4788
+ # # ┌──────────┐
4789
+ # # │ a │
4790
+ # # │ --- │
4791
+ # # │ f64 │
4792
+ # # ╞══════════╡
4793
+ # # │ 1.459148 │
4794
+ # # └──────────┘
4795
+ #
4796
+ # @example
4797
+ # df.select(Polars.col("a").entropy(base: 2, normalize: false))
4798
+ # # =>
4799
+ # # shape: (1, 1)
4800
+ # # ┌───────────┐
4801
+ # # │ a │
4802
+ # # │ --- │
4803
+ # # │ f64 │
4804
+ # # ╞═══════════╡
4805
+ # # │ -6.754888 │
4806
+ # # └───────────┘
4807
+ def entropy(base: 2, normalize: true)
1285
4808
  wrap_expr(_rbexpr.entropy(base, normalize))
1286
4809
  end
1287
4810
 
1288
- # def cumulative_eval
1289
- # end
1290
-
1291
- # def set_sorted
4811
+ # Run an expression over a sliding window that increases `1` slot every iteration.
4812
+ #
4813
+ # @param expr [Expr]
4814
+ # Expression to evaluate
4815
+ # @param min_periods [Integer]
4816
+ # Number of valid values there should be in the window before the expression
4817
+ # is evaluated. valid values = `length - null_count`
4818
+ # @param parallel [Boolean]
4819
+ # Run in parallel. Don't do this in a groupby or another operation that
4820
+ # already has much parallelization.
4821
+ #
4822
+ # @return [Expr]
4823
+ #
4824
+ # @note
4825
+ # This functionality is experimental and may change without it being considered a
4826
+ # breaking change.
4827
+ #
4828
+ # @note
4829
+ # This can be really slow as it can have `O(n^2)` complexity. Don't use this
4830
+ # for operations that visit all elements.
4831
+ #
4832
+ # @example
4833
+ # df = Polars::DataFrame.new({"values" => [1, 2, 3, 4, 5]})
4834
+ # df.select(
4835
+ # [
4836
+ # Polars.col("values").cumulative_eval(
4837
+ # Polars.element.first - Polars.element.last ** 2
4838
+ # )
4839
+ # ]
4840
+ # )
4841
+ # # =>
4842
+ # # shape: (5, 1)
4843
+ # # ┌────────┐
4844
+ # # │ values │
4845
+ # # │ --- │
4846
+ # # │ f64 │
4847
+ # # ╞════════╡
4848
+ # # │ 0.0 │
4849
+ # # ├╌╌╌╌╌╌╌╌┤
4850
+ # # │ -3.0 │
4851
+ # # ├╌╌╌╌╌╌╌╌┤
4852
+ # # │ -8.0 │
4853
+ # # ├╌╌╌╌╌╌╌╌┤
4854
+ # # │ -15.0 │
4855
+ # # ├╌╌╌╌╌╌╌╌┤
4856
+ # # │ -24.0 │
4857
+ # # └────────┘
4858
+ def cumulative_eval(expr, min_periods: 1, parallel: false)
4859
+ wrap_expr(
4860
+ _rbexpr.cumulative_eval(expr._rbexpr, min_periods, parallel)
4861
+ )
4862
+ end
4863
+
4864
+ # Flags the expression as 'sorted'.
4865
+ #
4866
+ # Enables downstream code to user fast paths for sorted arrays.
4867
+ #
4868
+ # @param reverse [Boolean]
4869
+ # If the `Series` order is reversed, e.g. descending.
4870
+ #
4871
+ # @return [Expr]
4872
+ #
4873
+ # @note
4874
+ # This can lead to incorrect results if this `Series` is not sorted!!
4875
+ # Use with care!
4876
+ #
4877
+ # @example
4878
+ # df = Polars::DataFrame.new({"values" => [1, 2, 3]})
4879
+ # df.select(Polars.col("values").set_sorted.max)
4880
+ # # =>
4881
+ # # shape: (1, 1)
4882
+ # # ┌────────┐
4883
+ # # │ values │
4884
+ # # │ --- │
4885
+ # # │ i64 │
4886
+ # # ╞════════╡
4887
+ # # │ 3 │
4888
+ # # └────────┘
4889
+ # def set_sorted(reverse: false)
4890
+ # map { |s| s.set_sorted(reverse) }
1292
4891
  # end
1293
4892
 
4893
+ # Aggregate to list.
4894
+ #
4895
+ # @return [Expr]
1294
4896
  #
4897
+ # @example
4898
+ # df = Polars::DataFrame.new(
4899
+ # {
4900
+ # "a" => [1, 2, 3],
4901
+ # "b" => [4, 5, 6]
4902
+ # }
4903
+ # )
4904
+ # df.select(Polars.all.list)
4905
+ # # =>
4906
+ # # shape: (1, 2)
4907
+ # # ┌───────────┬───────────┐
4908
+ # # │ a ┆ b │
4909
+ # # │ --- ┆ --- │
4910
+ # # │ list[i64] ┆ list[i64] │
4911
+ # # ╞═══════════╪═══════════╡
4912
+ # # │ [1, 2, 3] ┆ [4, 5, 6] │
4913
+ # # └───────────┴───────────┘
1295
4914
  def list
1296
4915
  wrap_expr(_rbexpr.list)
1297
4916
  end
1298
4917
 
4918
+ # Shrink numeric columns to the minimal required datatype.
4919
+ #
4920
+ # Shrink to the dtype needed to fit the extrema of this `Series`.
4921
+ # This can be used to reduce memory pressure.
4922
+ #
4923
+ # @return [Expr]
4924
+ #
4925
+ # @example
4926
+ # Polars::DataFrame.new(
4927
+ # {
4928
+ # "a" => [1, 2, 3],
4929
+ # "b" => [1, 2, 2 << 32],
4930
+ # "c" => [-1, 2, 1 << 30],
4931
+ # "d" => [-112, 2, 112],
4932
+ # "e" => [-112, 2, 129],
4933
+ # "f" => ["a", "b", "c"],
4934
+ # "g" => [0.1, 1.32, 0.12],
4935
+ # "h" => [true, nil, false]
4936
+ # }
4937
+ # ).select(Polars.all.shrink_dtype)
4938
+ # # =>
4939
+ # # shape: (3, 8)
4940
+ # # ┌─────┬────────────┬────────────┬──────┬──────┬─────┬──────┬───────┐
4941
+ # # │ a ┆ b ┆ c ┆ d ┆ e ┆ f ┆ g ┆ h │
4942
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
4943
+ # # │ i8 ┆ i64 ┆ i32 ┆ i8 ┆ i16 ┆ str ┆ f32 ┆ bool │
4944
+ # # ╞═════╪════════════╪════════════╪══════╪══════╪═════╪══════╪═══════╡
4945
+ # # │ 1 ┆ 1 ┆ -1 ┆ -112 ┆ -112 ┆ a ┆ 0.1 ┆ true │
4946
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
4947
+ # # │ 2 ┆ 2 ┆ 2 ┆ 2 ┆ 2 ┆ b ┆ 1.32 ┆ null │
4948
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
4949
+ # # │ 3 ┆ 8589934592 ┆ 1073741824 ┆ 112 ┆ 129 ┆ c ┆ 0.12 ┆ false │
4950
+ # # └─────┴────────────┴────────────┴──────┴──────┴─────┴──────┴───────┘
1299
4951
  def shrink_dtype
1300
4952
  wrap_expr(_rbexpr.shrink_dtype)
1301
4953
  end
1302
4954
 
4955
+ # Create an object namespace of all list related methods.
4956
+ #
4957
+ # @return [ListExpr]
1303
4958
  def arr
1304
4959
  ListExpr.new(self)
1305
4960
  end
1306
4961
 
4962
+ # Create an object namespace of all categorical related methods.
4963
+ #
4964
+ # @return [CatExpr]
1307
4965
  def cat
1308
4966
  CatExpr.new(self)
1309
4967
  end
1310
4968
 
4969
+ # Create an object namespace of all datetime related methods.
4970
+ #
4971
+ # @return [DateTimeExpr]
1311
4972
  def dt
1312
4973
  DateTimeExpr.new(self)
1313
4974
  end
1314
4975
 
4976
+ # Create an object namespace of all meta related expression methods.
4977
+ #
4978
+ # @return [MetaExpr]
1315
4979
  def meta
1316
4980
  MetaExpr.new(self)
1317
4981
  end
1318
4982
 
4983
+ # Create an object namespace of all string related methods.
4984
+ #
4985
+ # @return [StringExpr]
1319
4986
  def str
1320
4987
  StringExpr.new(self)
1321
4988
  end
1322
4989
 
4990
+ # Create an object namespace of all struct related methods.
4991
+ #
4992
+ # @return [StructExpr]
1323
4993
  def struct
1324
4994
  StructExpr.new(self)
1325
4995
  end
@@ -1337,5 +5007,51 @@ module Polars
1337
5007
  def _to_expr(other)
1338
5008
  other.is_a?(Expr) ? other : Utils.lit(other)
1339
5009
  end
5010
+
5011
+ def _prepare_alpha(com, span, half_life, alpha)
5012
+ if [com, span, half_life, alpha].count { |v| !v.nil? } > 1
5013
+ raise ArgumentError, "Parameters 'com', 'span', 'half_life', and 'alpha' are mutually exclusive"
5014
+ end
5015
+
5016
+ if !com.nil?
5017
+ if com < 0.0
5018
+ raise ArgumentError, "Require 'com' >= 0 (found #{com})"
5019
+ end
5020
+ alpha = 1.0 / (1.0 + com)
5021
+
5022
+ elsif !span.nil?
5023
+ if span < 1.0
5024
+ raise ArgumentError, "Require 'span' >= 1 (found #{span})"
5025
+ end
5026
+ alpha = 2.0 / (span + 1.0)
5027
+
5028
+ elsif !half_life.nil?
5029
+ if half_life <= 0.0
5030
+ raise ArgumentError, "Require 'half_life' > 0 (found #{half_life})"
5031
+ end
5032
+ alpha = 1.0 - Math.exp(-Math.log(2.0) / half_life)
5033
+
5034
+ elsif alpha.nil?
5035
+ raise ArgumentError, "One of 'com', 'span', 'half_life', or 'alpha' must be set"
5036
+
5037
+ elsif alpha <= 0 || alpha > 1
5038
+ raise ArgumentError, "Require 0 < 'alpha' <= 1 (found #{alpha})"
5039
+ end
5040
+
5041
+ alpha
5042
+ end
5043
+
5044
+ def _prepare_rolling_window_args(window_size, min_periods)
5045
+ if window_size.is_a?(Integer)
5046
+ if min_periods.nil?
5047
+ min_periods = window_size
5048
+ end
5049
+ window_size = "#{window_size}i"
5050
+ end
5051
+ if min_periods.nil?
5052
+ min_periods = 1
5053
+ end
5054
+ [window_size, min_periods]
5055
+ end
1340
5056
  end
1341
5057
  end