polars-df 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
data/lib/polars/expr.rb CHANGED
@@ -138,8 +138,45 @@ module Polars
138
138
  Utils.lit(0) - self
139
139
  end
140
140
 
141
- # def to_physical
142
- # end
141
+ # Cast to physical representation of the logical dtype.
142
+ #
143
+ # - `:date` -> `:i32`
144
+ # - `:datetime` -> `:i64`
145
+ # - `:time` -> `:i64`
146
+ # - `:duration` -> `:i64`
147
+ # - `:cat` -> `:u32`
148
+ # - Other data types will be left unchanged.
149
+ #
150
+ # @return [Expr]
151
+ #
152
+ # @example
153
+ # Polars::DataFrame.new({"vals" => ["a", "x", nil, "a"]}).with_columns(
154
+ # [
155
+ # Polars.col("vals").cast(:cat),
156
+ # Polars.col("vals")
157
+ # .cast(:cat)
158
+ # .to_physical
159
+ # .alias("vals_physical")
160
+ # ]
161
+ # )
162
+ # # =>
163
+ # # shape: (4, 2)
164
+ # # ┌──────┬───────────────┐
165
+ # # │ vals ┆ vals_physical │
166
+ # # │ --- ┆ --- │
167
+ # # │ cat ┆ u32 │
168
+ # # ╞══════╪═══════════════╡
169
+ # # │ a ┆ 0 │
170
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
171
+ # # │ x ┆ 1 │
172
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
173
+ # # │ null ┆ null │
174
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
175
+ # # │ a ┆ 0 │
176
+ # # └──────┴───────────────┘
177
+ def to_physical
178
+ wrap_expr(_rbexpr.to_physical)
179
+ end
143
180
 
144
181
  # Check if any boolean value in a Boolean column is `true`.
145
182
  #
@@ -258,13 +295,82 @@ module Polars
258
295
  wrap_expr(_rbexpr.exp)
259
296
  end
260
297
 
298
+ # Rename the output of an expression.
299
+ #
300
+ # @param name [String]
301
+ # New name.
302
+ #
303
+ # @return [Expr]
304
+ #
305
+ # @example
306
+ # df = Polars::DataFrame.new(
307
+ # {
308
+ # "a" => [1, 2, 3],
309
+ # "b" => ["a", "b", nil]
310
+ # }
311
+ # )
312
+ # df.select(
313
+ # [
314
+ # Polars.col("a").alias("bar"),
315
+ # Polars.col("b").alias("foo")
316
+ # ]
317
+ # )
318
+ # # =>
319
+ # # shape: (3, 2)
320
+ # # ┌─────┬──────┐
321
+ # # │ bar ┆ foo │
322
+ # # │ --- ┆ --- │
323
+ # # │ i64 ┆ str │
324
+ # # ╞═════╪══════╡
325
+ # # │ 1 ┆ a │
326
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
327
+ # # │ 2 ┆ b │
328
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
329
+ # # │ 3 ┆ null │
330
+ # # └─────┴──────┘
261
331
  def alias(name)
262
332
  wrap_expr(_rbexpr._alias(name))
263
333
  end
264
334
 
265
335
  # TODO support symbols for exclude
266
336
 
337
+ # Exclude certain columns from a wildcard/regex selection.
338
+ #
339
+ # You may also use regexes in the exclude list. They must start with `^` and end
340
+ # with `$`.
341
+ #
342
+ # @param columns [Object]
343
+ # Column(s) to exclude from selection.
344
+ # This can be:
267
345
  #
346
+ # - a column name, or multiple column names
347
+ # - a regular expression starting with `^` and ending with `$`
348
+ # - a dtype or multiple dtypes
349
+ #
350
+ # @return [Expr]
351
+ #
352
+ # @example
353
+ # df = Polars::DataFrame.new(
354
+ # {
355
+ # "aa" => [1, 2, 3],
356
+ # "ba" => ["a", "b", nil],
357
+ # "cc" => [nil, 2.5, 1.5]
358
+ # }
359
+ # )
360
+ # df.select(Polars.all.exclude("ba"))
361
+ # # =>
362
+ # # shape: (3, 2)
363
+ # # ┌─────┬──────┐
364
+ # # │ aa ┆ cc │
365
+ # # │ --- ┆ --- │
366
+ # # │ i64 ┆ f64 │
367
+ # # ╞═════╪══════╡
368
+ # # │ 1 ┆ null │
369
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
370
+ # # │ 2 ┆ 2.5 │
371
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
372
+ # # │ 3 ┆ 1.5 │
373
+ # # └─────┴──────┘
268
374
  def exclude(columns)
269
375
  if columns.is_a?(String)
270
376
  columns = [columns]
@@ -285,14 +391,43 @@ module Polars
285
391
  end
286
392
  end
287
393
 
394
+ # Keep the original root name of the expression.
395
+ #
396
+ # @return [Expr]
397
+ #
398
+ # @example
399
+ # df = Polars::DataFrame.new(
400
+ # {
401
+ # "a" => [1, 2],
402
+ # "b" => [3, 4]
403
+ # }
404
+ # )
405
+ # df.with_columns([(Polars.col("a") * 9).alias("c").keep_name])
406
+ # # =>
407
+ # # shape: (2, 2)
408
+ # # ┌─────┬─────┐
409
+ # # │ a ┆ b │
410
+ # # │ --- ┆ --- │
411
+ # # │ i64 ┆ i64 │
412
+ # # ╞═════╪═════╡
413
+ # # │ 9 ┆ 3 │
414
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
415
+ # # │ 18 ┆ 4 │
416
+ # # └─────┴─────┘
288
417
  def keep_name
289
418
  wrap_expr(_rbexpr.keep_name)
290
419
  end
291
420
 
421
+ # Add a prefix to the root column name of the expression.
422
+ #
423
+ # @return [Expr]
292
424
  def prefix(prefix)
293
425
  wrap_expr(_rbexpr.prefix(prefix))
294
426
  end
295
427
 
428
+ # Add a suffix to the root column name of the expression.
429
+ #
430
+ # @return [Expr]
296
431
  def suffix(suffix)
297
432
  wrap_expr(_rbexpr.suffix(suffix))
298
433
  end
@@ -464,14 +599,112 @@ module Polars
464
599
  wrap_expr(_rbexpr.is_infinite)
465
600
  end
466
601
 
602
+ # Returns a boolean Series indicating which values are NaN.
603
+ #
604
+ # @note
605
+ # Floating point `NaN` (Not A Number) should not be confused
606
+ # with missing data represented as `nil`.
607
+ #
608
+ # @return [Expr]
609
+ #
610
+ # @example
611
+ # df = Polars::DataFrame.new(
612
+ # {
613
+ # "a" => [1, 2, nil, 1, 5],
614
+ # "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
615
+ # }
616
+ # )
617
+ # df.with_column(Polars.col(Polars::Float64).is_nan.suffix("_isnan"))
618
+ # # =>
619
+ # # shape: (5, 3)
620
+ # # ┌──────┬─────┬─────────┐
621
+ # # │ a ┆ b ┆ b_isnan │
622
+ # # │ --- ┆ --- ┆ --- │
623
+ # # │ i64 ┆ f64 ┆ bool │
624
+ # # ╞══════╪═════╪═════════╡
625
+ # # │ 1 ┆ 1.0 ┆ false │
626
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
627
+ # # │ 2 ┆ 2.0 ┆ false │
628
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
629
+ # # │ null ┆ NaN ┆ true │
630
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
631
+ # # │ 1 ┆ 1.0 ┆ false │
632
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
633
+ # # │ 5 ┆ 5.0 ┆ false │
634
+ # # └──────┴─────┴─────────┘
467
635
  def is_nan
468
636
  wrap_expr(_rbexpr.is_nan)
469
637
  end
470
638
 
639
+ # Returns a boolean Series indicating which values are not NaN.
640
+ #
641
+ # @note
642
+ # Floating point `NaN` (Not A Number) should not be confused
643
+ # with missing data represented as `nil`.
644
+ #
645
+ # @return [Expr]
646
+ #
647
+ # @example
648
+ # df = Polars::DataFrame.new(
649
+ # {
650
+ # "a" => [1, 2, nil, 1, 5],
651
+ # "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
652
+ # }
653
+ # )
654
+ # df.with_column(Polars.col(Polars::Float64).is_not_nan.suffix("_is_not_nan"))
655
+ # # =>
656
+ # # shape: (5, 3)
657
+ # # ┌──────┬─────┬──────────────┐
658
+ # # │ a ┆ b ┆ b_is_not_nan │
659
+ # # │ --- ┆ --- ┆ --- │
660
+ # # │ i64 ┆ f64 ┆ bool │
661
+ # # ╞══════╪═════╪══════════════╡
662
+ # # │ 1 ┆ 1.0 ┆ true │
663
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
664
+ # # │ 2 ┆ 2.0 ┆ true │
665
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
666
+ # # │ null ┆ NaN ┆ false │
667
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
668
+ # # │ 1 ┆ 1.0 ┆ true │
669
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
670
+ # # │ 5 ┆ 5.0 ┆ true │
671
+ # # └──────┴─────┴──────────────┘
471
672
  def is_not_nan
472
673
  wrap_expr(_rbexpr.is_not_nan)
473
674
  end
474
675
 
676
+ # Get the group indexes of the group by operation.
677
+ #
678
+ # Should be used in aggregation context only.
679
+ #
680
+ # @return [Expr]
681
+ #
682
+ # @example
683
+ # df = Polars::DataFrame.new(
684
+ # {
685
+ # "group" => [
686
+ # "one",
687
+ # "one",
688
+ # "one",
689
+ # "two",
690
+ # "two",
691
+ # "two"
692
+ # ],
693
+ # "value" => [94, 95, 96, 97, 97, 99]
694
+ # }
695
+ # )
696
+ # df.groupby("group", maintain_order: true).agg(Polars.col("value").agg_groups)
697
+ # # =>
698
+ # # shape: (2, 2)
699
+ # # ┌───────┬───────────┐
700
+ # # │ group ┆ value │
701
+ # # │ --- ┆ --- │
702
+ # # │ str ┆ list[u32] │
703
+ # # ╞═══════╪═══════════╡
704
+ # # │ one ┆ [0, 1, 2] │
705
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
706
+ # # │ two ┆ [3, 4, 5] │
707
+ # # └───────┴───────────┘
475
708
  def agg_groups
476
709
  wrap_expr(_rbexpr.agg_groups)
477
710
  end
@@ -557,6 +790,36 @@ module Polars
557
790
  wrap_expr(_rbexpr.slice(offset._rbexpr, length._rbexpr))
558
791
  end
559
792
 
793
+ # Append expressions.
794
+ #
795
+ # This is done by adding the chunks of `other` to this `Series`.
796
+ #
797
+ # @param other [Expr]
798
+ # Expression to append.
799
+ # @param upcast [Boolean]
800
+ # Cast both `Series` to the same supertype.
801
+ #
802
+ # @return [Expr]
803
+ #
804
+ # @example
805
+ # df = Polars::DataFrame.new(
806
+ # {
807
+ # "a" => [8, 9, 10],
808
+ # "b" => [nil, 4, 4]
809
+ # }
810
+ # )
811
+ # df.select(Polars.all.head(1).append(Polars.all.tail(1)))
812
+ # # =>
813
+ # # shape: (2, 2)
814
+ # # ┌─────┬──────┐
815
+ # # │ a ┆ b │
816
+ # # │ --- ┆ --- │
817
+ # # │ i64 ┆ i64 │
818
+ # # ╞═════╪══════╡
819
+ # # │ 8 ┆ null │
820
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
821
+ # # │ 10 ┆ 4 │
822
+ # # └─────┴──────┘
560
823
  def append(other, upcast: true)
561
824
  other = Utils.expr_to_lit_or_expr(other)
562
825
  wrap_expr(_rbexpr.append(other._rbexpr, upcast))
@@ -567,7 +830,7 @@ module Polars
567
830
  # @return [Expr]
568
831
  #
569
832
  # @example Create a Series with 3 nulls, append column a then rechunk
570
- # df = Polars::DataFrame.new({"a": [1, 1, 2]})
833
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
571
834
  # df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
572
835
  # # =>
573
836
  # # shape: (6, 1)
@@ -650,22 +913,182 @@ module Polars
650
913
  wrap_expr(_rbexpr.drop_nans)
651
914
  end
652
915
 
916
+ # Get an array with the cumulative sum computed at every element.
917
+ #
918
+ # @param reverse [Boolean]
919
+ # Reverse the operation.
920
+ #
921
+ # @return [Expr]
922
+ #
923
+ # @note
924
+ # Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
925
+ # `:i64` before summing to prevent overflow issues.
926
+ #
927
+ # @example
928
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
929
+ # df.select(
930
+ # [
931
+ # Polars.col("a").cumsum,
932
+ # Polars.col("a").cumsum(reverse: true).alias("a_reverse")
933
+ # ]
934
+ # )
935
+ # # =>
936
+ # # shape: (4, 2)
937
+ # # ┌─────┬───────────┐
938
+ # # │ a ┆ a_reverse │
939
+ # # │ --- ┆ --- │
940
+ # # │ i64 ┆ i64 │
941
+ # # ╞═════╪═══════════╡
942
+ # # │ 1 ┆ 10 │
943
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
944
+ # # │ 3 ┆ 9 │
945
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
946
+ # # │ 6 ┆ 7 │
947
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
948
+ # # │ 10 ┆ 4 │
949
+ # # └─────┴───────────┘
653
950
  def cumsum(reverse: false)
654
951
  wrap_expr(_rbexpr.cumsum(reverse))
655
952
  end
656
953
 
954
+ # Get an array with the cumulative product computed at every element.
955
+ #
956
+ # @param reverse [Boolean]
957
+ # Reverse the operation.
958
+ #
959
+ # @return [Expr]
960
+ #
961
+ # @note
962
+ # Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
963
+ # `:i64` before summing to prevent overflow issues.
964
+ #
965
+ # @example
966
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
967
+ # df.select(
968
+ # [
969
+ # Polars.col("a").cumprod,
970
+ # Polars.col("a").cumprod(reverse: true).alias("a_reverse")
971
+ # ]
972
+ # )
973
+ # # =>
974
+ # # shape: (4, 2)
975
+ # # ┌─────┬───────────┐
976
+ # # │ a ┆ a_reverse │
977
+ # # │ --- ┆ --- │
978
+ # # │ i64 ┆ i64 │
979
+ # # ╞═════╪═══════════╡
980
+ # # │ 1 ┆ 24 │
981
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
982
+ # # │ 2 ┆ 24 │
983
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
984
+ # # │ 6 ┆ 12 │
985
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
986
+ # # │ 24 ┆ 4 │
987
+ # # └─────┴───────────┘
657
988
  def cumprod(reverse: false)
658
989
  wrap_expr(_rbexpr.cumprod(reverse))
659
990
  end
660
991
 
992
+ # Get an array with the cumulative min computed at every element.
993
+ #
994
+ # @param reverse [Boolean]
995
+ # Reverse the operation.
996
+ #
997
+ # @return [Expr]
998
+ #
999
+ # @example
1000
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
1001
+ # df.select(
1002
+ # [
1003
+ # Polars.col("a").cummin,
1004
+ # Polars.col("a").cummin(reverse: true).alias("a_reverse")
1005
+ # ]
1006
+ # )
1007
+ # # =>
1008
+ # # shape: (4, 2)
1009
+ # # ┌─────┬───────────┐
1010
+ # # │ a ┆ a_reverse │
1011
+ # # │ --- ┆ --- │
1012
+ # # │ i64 ┆ i64 │
1013
+ # # ╞═════╪═══════════╡
1014
+ # # │ 1 ┆ 1 │
1015
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1016
+ # # │ 1 ┆ 2 │
1017
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1018
+ # # │ 1 ┆ 3 │
1019
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1020
+ # # │ 1 ┆ 4 │
1021
+ # # └─────┴───────────┘
661
1022
  def cummin(reverse: false)
662
1023
  wrap_expr(_rbexpr.cummin(reverse))
663
1024
  end
664
1025
 
1026
+ # Get an array with the cumulative max computed at every element.
1027
+ #
1028
+ # @param reverse [Boolean]
1029
+ # Reverse the operation.
1030
+ #
1031
+ # @return [Expr]
1032
+ #
1033
+ # @example
1034
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
1035
+ # df.select(
1036
+ # [
1037
+ # Polars.col("a").cummax,
1038
+ # Polars.col("a").cummax(reverse: true).alias("a_reverse")
1039
+ # ]
1040
+ # )
1041
+ # # =>
1042
+ # # shape: (4, 2)
1043
+ # # ┌─────┬───────────┐
1044
+ # # │ a ┆ a_reverse │
1045
+ # # │ --- ┆ --- │
1046
+ # # │ i64 ┆ i64 │
1047
+ # # ╞═════╪═══════════╡
1048
+ # # │ 1 ┆ 4 │
1049
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1050
+ # # │ 2 ┆ 4 │
1051
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1052
+ # # │ 3 ┆ 4 │
1053
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1054
+ # # │ 4 ┆ 4 │
1055
+ # # └─────┴───────────┘
665
1056
  def cummax(reverse: false)
666
1057
  wrap_expr(_rbexpr.cummax(reverse))
667
1058
  end
668
1059
 
1060
+ # Get an array with the cumulative count computed at every element.
1061
+ #
1062
+ # Counting from 0 to len
1063
+ #
1064
+ # @param reverse [Boolean]
1065
+ # Reverse the operation.
1066
+ #
1067
+ # @return [Expr]
1068
+ #
1069
+ # @example
1070
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
1071
+ # df.select(
1072
+ # [
1073
+ # Polars.col("a").cumcount,
1074
+ # Polars.col("a").cumcount(reverse: true).alias("a_reverse")
1075
+ # ]
1076
+ # )
1077
+ # # =>
1078
+ # # shape: (4, 2)
1079
+ # # ┌─────┬───────────┐
1080
+ # # │ a ┆ a_reverse │
1081
+ # # │ --- ┆ --- │
1082
+ # # │ u32 ┆ u32 │
1083
+ # # ╞═════╪═══════════╡
1084
+ # # │ 0 ┆ 3 │
1085
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1086
+ # # │ 1 ┆ 2 │
1087
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1088
+ # # │ 2 ┆ 1 │
1089
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1090
+ # # │ 3 ┆ 0 │
1091
+ # # └─────┴───────────┘
669
1092
  def cumcount(reverse: false)
670
1093
  wrap_expr(_rbexpr.cumcount(reverse))
671
1094
  end
@@ -755,6 +1178,30 @@ module Polars
755
1178
  wrap_expr(_rbexpr.round(decimals))
756
1179
  end
757
1180
 
1181
+ # Compute the dot/inner product between two Expressions.
1182
+ #
1183
+ # @param other [Expr]
1184
+ # Expression to compute dot product with.
1185
+ #
1186
+ # @return [Expr]
1187
+ #
1188
+ # @example
1189
+ # df = Polars::DataFrame.new(
1190
+ # {
1191
+ # "a" => [1, 3, 5],
1192
+ # "b" => [2, 4, 6]
1193
+ # }
1194
+ # )
1195
+ # df.select(Polars.col("a").dot(Polars.col("b")))
1196
+ # # =>
1197
+ # # shape: (1, 1)
1198
+ # # ┌─────┐
1199
+ # # │ a │
1200
+ # # │ --- │
1201
+ # # │ i64 │
1202
+ # # ╞═════╡
1203
+ # # │ 44 │
1204
+ # # └─────┘
758
1205
  def dot(other)
759
1206
  other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
760
1207
  wrap_expr(_rbexpr.dot(other._rbexpr))
@@ -789,19 +1236,206 @@ module Polars
789
1236
  wrap_expr(_rbexpr.mode)
790
1237
  end
791
1238
 
1239
+ # Cast between data types.
1240
+ #
1241
+ # @param dtype [Symbol]
1242
+ # DataType to cast to.
1243
+ # @param strict [Boolean]
1244
+ # Throw an error if a cast could not be done.
1245
+ # For instance, due to an overflow.
1246
+ #
1247
+ # @return [Expr]
1248
+ #
1249
+ # @example
1250
+ # df = Polars::DataFrame.new(
1251
+ # {
1252
+ # "a" => [1, 2, 3],
1253
+ # "b" => ["4", "5", "6"]
1254
+ # }
1255
+ # )
1256
+ # df.with_columns(
1257
+ # [
1258
+ # Polars.col("a").cast(:f64),
1259
+ # Polars.col("b").cast(:i32)
1260
+ # ]
1261
+ # )
1262
+ # # =>
1263
+ # # shape: (3, 2)
1264
+ # # ┌─────┬─────┐
1265
+ # # │ a ┆ b │
1266
+ # # │ --- ┆ --- │
1267
+ # # │ f64 ┆ i32 │
1268
+ # # ╞═════╪═════╡
1269
+ # # │ 1.0 ┆ 4 │
1270
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1271
+ # # │ 2.0 ┆ 5 │
1272
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1273
+ # # │ 3.0 ┆ 6 │
1274
+ # # └─────┴─────┘
792
1275
  def cast(dtype, strict: true)
793
1276
  dtype = Utils.rb_type_to_dtype(dtype)
794
1277
  wrap_expr(_rbexpr.cast(dtype, strict))
795
1278
  end
796
1279
 
1280
+ # Sort this column. In projection/ selection context the whole column is sorted.
1281
+ #
1282
+ # If used in a groupby context, the groups are sorted.
1283
+ #
1284
+ # @param reverse [Boolean]
1285
+ # false -> order from small to large.
1286
+ # true -> order from large to small.
1287
+ # @param nulls_last [Boolean]
1288
+ # If true nulls are considered to be larger than any valid value.
1289
+ #
1290
+ # @return [Expr]
1291
+ #
1292
+ # @example
1293
+ # df = Polars::DataFrame.new(
1294
+ # {
1295
+ # "group" => [
1296
+ # "one",
1297
+ # "one",
1298
+ # "one",
1299
+ # "two",
1300
+ # "two",
1301
+ # "two"
1302
+ # ],
1303
+ # "value" => [1, 98, 2, 3, 99, 4]
1304
+ # }
1305
+ # )
1306
+ # df.select(Polars.col("value").sort)
1307
+ # # =>
1308
+ # # shape: (6, 1)
1309
+ # # ┌───────┐
1310
+ # # │ value │
1311
+ # # │ --- │
1312
+ # # │ i64 │
1313
+ # # ╞═══════╡
1314
+ # # │ 1 │
1315
+ # # ├╌╌╌╌╌╌╌┤
1316
+ # # │ 2 │
1317
+ # # ├╌╌╌╌╌╌╌┤
1318
+ # # │ 3 │
1319
+ # # ├╌╌╌╌╌╌╌┤
1320
+ # # │ 4 │
1321
+ # # ├╌╌╌╌╌╌╌┤
1322
+ # # │ 98 │
1323
+ # # ├╌╌╌╌╌╌╌┤
1324
+ # # │ 99 │
1325
+ # # └───────┘
1326
+ #
1327
+ # @example
1328
+ # df.select(Polars.col("value").sort)
1329
+ # # =>
1330
+ # # shape: (6, 1)
1331
+ # # ┌───────┐
1332
+ # # │ value │
1333
+ # # │ --- │
1334
+ # # │ i64 │
1335
+ # # ╞═══════╡
1336
+ # # │ 1 │
1337
+ # # ├╌╌╌╌╌╌╌┤
1338
+ # # │ 2 │
1339
+ # # ├╌╌╌╌╌╌╌┤
1340
+ # # │ 3 │
1341
+ # # ├╌╌╌╌╌╌╌┤
1342
+ # # │ 4 │
1343
+ # # ├╌╌╌╌╌╌╌┤
1344
+ # # │ 98 │
1345
+ # # ├╌╌╌╌╌╌╌┤
1346
+ # # │ 99 │
1347
+ # # └───────┘
1348
+ #
1349
+ # @example
1350
+ # df.groupby("group").agg(Polars.col("value").sort)
1351
+ # # =>
1352
+ # # shape: (2, 2)
1353
+ # # ┌───────┬────────────┐
1354
+ # # │ group ┆ value │
1355
+ # # │ --- ┆ --- │
1356
+ # # │ str ┆ list[i64] │
1357
+ # # ╞═══════╪════════════╡
1358
+ # # │ two ┆ [3, 4, 99] │
1359
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1360
+ # # │ one ┆ [1, 2, 98] │
1361
+ # # └───────┴────────────┘
797
1362
  def sort(reverse: false, nulls_last: false)
798
1363
  wrap_expr(_rbexpr.sort_with(reverse, nulls_last))
799
1364
  end
800
1365
 
1366
+ # Return the `k` largest elements.
1367
+ #
1368
+ # If 'reverse: true` the smallest elements will be given.
1369
+ #
1370
+ # @param k [Integer]
1371
+ # Number of elements to return.
1372
+ # @param reverse [Boolean]
1373
+ # Return the smallest elements.
1374
+ #
1375
+ # @return [Expr]
1376
+ #
1377
+ # @example
1378
+ # df = Polars::DataFrame.new(
1379
+ # {
1380
+ # "value" => [1, 98, 2, 3, 99, 4]
1381
+ # }
1382
+ # )
1383
+ # df.select(
1384
+ # [
1385
+ # Polars.col("value").top_k.alias("top_k"),
1386
+ # Polars.col("value").top_k(reverse: true).alias("bottom_k")
1387
+ # ]
1388
+ # )
1389
+ # # =>
1390
+ # # shape: (5, 2)
1391
+ # # ┌───────┬──────────┐
1392
+ # # │ top_k ┆ bottom_k │
1393
+ # # │ --- ┆ --- │
1394
+ # # │ i64 ┆ i64 │
1395
+ # # ╞═══════╪══════════╡
1396
+ # # │ 99 ┆ 1 │
1397
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
1398
+ # # │ 98 ┆ 2 │
1399
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
1400
+ # # │ 4 ┆ 3 │
1401
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
1402
+ # # │ 3 ┆ 4 │
1403
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
1404
+ # # │ 2 ┆ 98 │
1405
+ # # └───────┴──────────┘
801
1406
  def top_k(k: 5, reverse: false)
802
1407
  wrap_expr(_rbexpr.top_k(k, reverse))
803
1408
  end
804
1409
 
1410
+ # Get the index values that would sort this column.
1411
+ #
1412
+ # @param reverse [Boolean]
1413
+ # Sort in reverse (descending) order.
1414
+ # @param nulls_last [Boolean]
1415
+ # Place null values last instead of first.
1416
+ #
1417
+ # @return [Expr]
1418
+ #
1419
+ # @example
1420
+ # df = Polars::DataFrame.new(
1421
+ # {
1422
+ # "a" => [20, 10, 30]
1423
+ # }
1424
+ # )
1425
+ # df.select(Polars.col("a").arg_sort)
1426
+ # # =>
1427
+ # # shape: (3, 1)
1428
+ # # ┌─────┐
1429
+ # # │ a │
1430
+ # # │ --- │
1431
+ # # │ u32 │
1432
+ # # ╞═════╡
1433
+ # # │ 1 │
1434
+ # # ├╌╌╌╌╌┤
1435
+ # # │ 0 │
1436
+ # # ├╌╌╌╌╌┤
1437
+ # # │ 2 │
1438
+ # # └─────┘
805
1439
  def arg_sort(reverse: false, nulls_last: false)
806
1440
  wrap_expr(_rbexpr.arg_sort(reverse, nulls_last))
807
1441
  end
@@ -854,15 +1488,91 @@ module Polars
854
1488
  wrap_expr(_rbexpr.arg_min)
855
1489
  end
856
1490
 
857
- def search_sorted(element)
858
- element = Utils.expr_to_lit_or_expr(element, str_to_lit: false)
859
- wrap_expr(_rbexpr.search_sorted(element._rbexpr))
860
- end
861
-
862
- def sort_by(by, reverse: false)
863
- if !by.is_a?(Array)
864
- by = [by]
865
- end
1491
+ # Find indices where elements should be inserted to maintain order.
1492
+ #
1493
+ # @param element [Object]
1494
+ # Expression or scalar value.
1495
+ #
1496
+ # @return [Expr]
1497
+ #
1498
+ # @example
1499
+ # df = Polars::DataFrame.new(
1500
+ # {
1501
+ # "values" => [1, 2, 3, 5]
1502
+ # }
1503
+ # )
1504
+ # df.select(
1505
+ # [
1506
+ # Polars.col("values").search_sorted(0).alias("zero"),
1507
+ # Polars.col("values").search_sorted(3).alias("three"),
1508
+ # Polars.col("values").search_sorted(6).alias("six")
1509
+ # ]
1510
+ # )
1511
+ # # =>
1512
+ # # shape: (1, 3)
1513
+ # # ┌──────┬───────┬─────┐
1514
+ # # │ zero ┆ three ┆ six │
1515
+ # # │ --- ┆ --- ┆ --- │
1516
+ # # │ u32 ┆ u32 ┆ u32 │
1517
+ # # ╞══════╪═══════╪═════╡
1518
+ # # │ 0 ┆ 2 ┆ 4 │
1519
+ # # └──────┴───────┴─────┘
1520
+ def search_sorted(element)
1521
+ element = Utils.expr_to_lit_or_expr(element, str_to_lit: false)
1522
+ wrap_expr(_rbexpr.search_sorted(element._rbexpr))
1523
+ end
1524
+
1525
+ # Sort this column by the ordering of another column, or multiple other columns.
1526
+ #
1527
+ # In projection/ selection context the whole column is sorted.
1528
+ # If used in a groupby context, the groups are sorted.
1529
+ #
1530
+ # @param by [Object]
1531
+ # The column(s) used for sorting.
1532
+ # @param reverse [Boolean]
1533
+ # false -> order from small to large.
1534
+ # true -> order from large to small.
1535
+ #
1536
+ # @return [Expr]
1537
+ #
1538
+ # @example
1539
+ # df = Polars::DataFrame.new(
1540
+ # {
1541
+ # "group" => [
1542
+ # "one",
1543
+ # "one",
1544
+ # "one",
1545
+ # "two",
1546
+ # "two",
1547
+ # "two"
1548
+ # ],
1549
+ # "value" => [1, 98, 2, 3, 99, 4]
1550
+ # }
1551
+ # )
1552
+ # df.select(Polars.col("group").sort_by("value"))
1553
+ # # =>
1554
+ # # shape: (6, 1)
1555
+ # # ┌───────┐
1556
+ # # │ group │
1557
+ # # │ --- │
1558
+ # # │ str │
1559
+ # # ╞═══════╡
1560
+ # # │ one │
1561
+ # # ├╌╌╌╌╌╌╌┤
1562
+ # # │ one │
1563
+ # # ├╌╌╌╌╌╌╌┤
1564
+ # # │ two │
1565
+ # # ├╌╌╌╌╌╌╌┤
1566
+ # # │ two │
1567
+ # # ├╌╌╌╌╌╌╌┤
1568
+ # # │ one │
1569
+ # # ├╌╌╌╌╌╌╌┤
1570
+ # # │ two │
1571
+ # # └───────┘
1572
+ def sort_by(by, reverse: false)
1573
+ if !by.is_a?(Array)
1574
+ by = [by]
1575
+ end
866
1576
  if !reverse.is_a?(Array)
867
1577
  reverse = [reverse]
868
1578
  end
@@ -871,6 +1581,39 @@ module Polars
871
1581
  wrap_expr(_rbexpr.sort_by(by, reverse))
872
1582
  end
873
1583
 
1584
+ # Take values by index.
1585
+ #
1586
+ # @param indices [Expr]
1587
+ # An expression that leads to a `:u32` dtyped Series.
1588
+ #
1589
+ # @return [Expr]
1590
+ #
1591
+ # @example
1592
+ # df = Polars::DataFrame.new(
1593
+ # {
1594
+ # "group" => [
1595
+ # "one",
1596
+ # "one",
1597
+ # "one",
1598
+ # "two",
1599
+ # "two",
1600
+ # "two"
1601
+ # ],
1602
+ # "value" => [1, 98, 2, 3, 99, 4]
1603
+ # }
1604
+ # )
1605
+ # df.groupby("group", maintain_order: true).agg(Polars.col("value").take(1))
1606
+ # # =>
1607
+ # # shape: (2, 2)
1608
+ # # ┌───────┬───────┐
1609
+ # # │ group ┆ value │
1610
+ # # │ --- ┆ --- │
1611
+ # # │ str ┆ i64 │
1612
+ # # ╞═══════╪═══════╡
1613
+ # # │ one ┆ 98 │
1614
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1615
+ # # │ two ┆ 99 │
1616
+ # # └───────┴───────┘
874
1617
  def take(indices)
875
1618
  if indices.is_a?(Array)
876
1619
  indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
@@ -909,11 +1652,105 @@ module Polars
909
1652
  wrap_expr(_rbexpr.shift(periods))
910
1653
  end
911
1654
 
1655
+ # Shift the values by a given period and fill the resulting null values.
1656
+ #
1657
+ # @param periods [Integer]
1658
+ # Number of places to shift (may be negative).
1659
+ # @param fill_value [Object]
1660
+ # Fill nil values with the result of this expression.
1661
+ #
1662
+ # @return [Expr]
1663
+ #
1664
+ # @example
1665
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
1666
+ # df.select(Polars.col("foo").shift_and_fill(1, "a"))
1667
+ # # =>
1668
+ # # shape: (4, 1)
1669
+ # # ┌─────┐
1670
+ # # │ foo │
1671
+ # # │ --- │
1672
+ # # │ str │
1673
+ # # ╞═════╡
1674
+ # # │ a │
1675
+ # # ├╌╌╌╌╌┤
1676
+ # # │ 1 │
1677
+ # # ├╌╌╌╌╌┤
1678
+ # # │ 2 │
1679
+ # # ├╌╌╌╌╌┤
1680
+ # # │ 3 │
1681
+ # # └─────┘
912
1682
  def shift_and_fill(periods, fill_value)
913
1683
  fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
914
1684
  wrap_expr(_rbexpr.shift_and_fill(periods, fill_value._rbexpr))
915
1685
  end
916
1686
 
1687
+ # Fill null values using the specified value or strategy.
1688
+ #
1689
+ # To interpolate over null values see interpolate.
1690
+ #
1691
+ # @param value [Object]
1692
+ # Value used to fill null values.
1693
+ # @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
1694
+ # Strategy used to fill null values.
1695
+ # @param limit [Integer]
1696
+ # Number of consecutive null values to fill when using the 'forward' or
1697
+ # 'backward' strategy.
1698
+ #
1699
+ # @return [Expr]
1700
+ #
1701
+ # @example
1702
+ # df = Polars::DataFrame.new(
1703
+ # {
1704
+ # "a" => [1, 2, nil],
1705
+ # "b" => [4, nil, 6]
1706
+ # }
1707
+ # )
1708
+ # df.fill_null(strategy: "zero")
1709
+ # # =>
1710
+ # # shape: (3, 2)
1711
+ # # ┌─────┬─────┐
1712
+ # # │ a ┆ b │
1713
+ # # │ --- ┆ --- │
1714
+ # # │ i64 ┆ i64 │
1715
+ # # ╞═════╪═════╡
1716
+ # # │ 1 ┆ 4 │
1717
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1718
+ # # │ 2 ┆ 0 │
1719
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1720
+ # # │ 0 ┆ 6 │
1721
+ # # └─────┴─────┘
1722
+ #
1723
+ # @example
1724
+ # df.fill_null(99)
1725
+ # # =>
1726
+ # # shape: (3, 2)
1727
+ # # ┌─────┬─────┐
1728
+ # # │ a ┆ b │
1729
+ # # │ --- ┆ --- │
1730
+ # # │ i64 ┆ i64 │
1731
+ # # ╞═════╪═════╡
1732
+ # # │ 1 ┆ 4 │
1733
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1734
+ # # │ 2 ┆ 99 │
1735
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1736
+ # # │ 99 ┆ 6 │
1737
+ # # └─────┴─────┘
1738
+ #
1739
+ # @example
1740
+ # df.fill_null(strategy: "forward")
1741
+ # # =>
1742
+ # # shape: (3, 2)
1743
+ # # ┌─────┬─────┐
1744
+ # # │ a ┆ b │
1745
+ # # │ --- ┆ --- │
1746
+ # # │ i64 ┆ i64 │
1747
+ # # ╞═════╪═════╡
1748
+ # # │ 1 ┆ 4 │
1749
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1750
+ # # │ 2 ┆ 4 │
1751
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1752
+ # # │ 2 ┆ 6 │
1753
+ # # └─────┴─────┘
917
1754
  def fill_null(value = nil, strategy: nil, limit: nil)
918
1755
  if !value.nil? && !strategy.nil?
919
1756
  raise ArgumentError, "cannot specify both 'value' and 'strategy'."
@@ -931,75 +1768,426 @@ module Polars
931
1768
  end
932
1769
  end
933
1770
 
1771
+ # Fill floating point NaN value with a fill value.
1772
+ #
1773
+ # @return [Expr]
1774
+ #
1775
+ # @example
1776
+ # df = Polars::DataFrame.new(
1777
+ # {
1778
+ # "a" => [1.0, nil, Float::NAN],
1779
+ # "b" => [4.0, Float::NAN, 6]
1780
+ # }
1781
+ # )
1782
+ # df.fill_nan("zero")
1783
+ # # =>
1784
+ # # shape: (3, 2)
1785
+ # # ┌──────┬──────┐
1786
+ # # │ a ┆ b │
1787
+ # # │ --- ┆ --- │
1788
+ # # │ str ┆ str │
1789
+ # # ╞══════╪══════╡
1790
+ # # │ 1.0 ┆ 4.0 │
1791
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1792
+ # # │ null ┆ zero │
1793
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1794
+ # # │ zero ┆ 6.0 │
1795
+ # # └──────┴──────┘
934
1796
  def fill_nan(fill_value)
935
1797
  fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
936
1798
  wrap_expr(_rbexpr.fill_nan(fill_value._rbexpr))
937
1799
  end
938
1800
 
1801
+ # Fill missing values with the latest seen values.
1802
+ #
1803
+ # @param limit [Integer]
1804
+ # The number of consecutive null values to forward fill.
1805
+ #
1806
+ # @return [Expr]
1807
+ #
1808
+ # @example
1809
+ # df = Polars::DataFrame.new(
1810
+ # {
1811
+ # "a" => [1, 2, nil],
1812
+ # "b" => [4, nil, 6]
1813
+ # }
1814
+ # )
1815
+ # df.select(Polars.all.forward_fill)
1816
+ # # =>
1817
+ # # shape: (3, 2)
1818
+ # # ┌─────┬─────┐
1819
+ # # │ a ┆ b │
1820
+ # # │ --- ┆ --- │
1821
+ # # │ i64 ┆ i64 │
1822
+ # # ╞═════╪═════╡
1823
+ # # │ 1 ┆ 4 │
1824
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1825
+ # # │ 2 ┆ 4 │
1826
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1827
+ # # │ 2 ┆ 6 │
1828
+ # # └─────┴─────┘
939
1829
  def forward_fill(limit: nil)
940
1830
  wrap_expr(_rbexpr.forward_fill(limit))
941
1831
  end
942
1832
 
1833
+ # Fill missing values with the next to be seen values.
1834
+ #
1835
+ # @param limit [Integer]
1836
+ # The number of consecutive null values to backward fill.
1837
+ #
1838
+ # @return [Expr]
1839
+ #
1840
+ # @example
1841
+ # df = Polars::DataFrame.new(
1842
+ # {
1843
+ # "a" => [1, 2, nil],
1844
+ # "b" => [4, nil, 6]
1845
+ # }
1846
+ # )
1847
+ # df.select(Polars.all.backward_fill)
1848
+ # # =>
1849
+ # # shape: (3, 2)
1850
+ # # ┌──────┬─────┐
1851
+ # # │ a ┆ b │
1852
+ # # │ --- ┆ --- │
1853
+ # # │ i64 ┆ i64 │
1854
+ # # ╞══════╪═════╡
1855
+ # # │ 1 ┆ 4 │
1856
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
1857
+ # # │ 2 ┆ 6 │
1858
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
1859
+ # # │ null ┆ 6 │
1860
+ # # └──────┴─────┘
943
1861
  def backward_fill(limit: nil)
944
1862
  wrap_expr(_rbexpr.backward_fill(limit))
945
1863
  end
946
1864
 
1865
+ # Reverse the selection.
1866
+ #
1867
+ # @return [Expr]
947
1868
  def reverse
948
1869
  wrap_expr(_rbexpr.reverse)
949
1870
  end
950
1871
 
1872
+ # Get standard deviation.
1873
+ #
1874
+ # @param ddof [Integer]
1875
+ # Degrees of freedom.
1876
+ #
1877
+ # @return [Expr]
1878
+ #
1879
+ # @example
1880
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
1881
+ # df.select(Polars.col("a").std)
1882
+ # # =>
1883
+ # # shape: (1, 1)
1884
+ # # ┌─────┐
1885
+ # # │ a │
1886
+ # # │ --- │
1887
+ # # │ f64 │
1888
+ # # ╞═════╡
1889
+ # # │ 1.0 │
1890
+ # # └─────┘
951
1891
  def std(ddof: 1)
952
1892
  wrap_expr(_rbexpr.std(ddof))
953
1893
  end
954
1894
 
1895
+ # Get variance.
1896
+ #
1897
+ # @param ddof [Integer]
1898
+ # Degrees of freedom.
1899
+ #
1900
+ # @return [Expr]
1901
+ #
1902
+ # @example
1903
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
1904
+ # df.select(Polars.col("a").var)
1905
+ # # =>
1906
+ # # shape: (1, 1)
1907
+ # # ┌─────┐
1908
+ # # │ a │
1909
+ # # │ --- │
1910
+ # # │ f64 │
1911
+ # # ╞═════╡
1912
+ # # │ 1.0 │
1913
+ # # └─────┘
955
1914
  def var(ddof: 1)
956
1915
  wrap_expr(_rbexpr.var(ddof))
957
1916
  end
958
1917
 
1918
+ # Get maximum value.
1919
+ #
1920
+ # @return [Expr]
1921
+ #
1922
+ # @example
1923
+ # df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
1924
+ # df.select(Polars.col("a").max)
1925
+ # # =>
1926
+ # # shape: (1, 1)
1927
+ # # ┌─────┐
1928
+ # # │ a │
1929
+ # # │ --- │
1930
+ # # │ f64 │
1931
+ # # ╞═════╡
1932
+ # # │ 1.0 │
1933
+ # # └─────┘
959
1934
  def max
960
1935
  wrap_expr(_rbexpr.max)
961
1936
  end
962
1937
 
1938
+ # Get minimum value.
1939
+ #
1940
+ # @return [Expr]
1941
+ #
1942
+ # @example
1943
+ # df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
1944
+ # df.select(Polars.col("a").min)
1945
+ # # =>
1946
+ # # shape: (1, 1)
1947
+ # # ┌──────┐
1948
+ # # │ a │
1949
+ # # │ --- │
1950
+ # # │ f64 │
1951
+ # # ╞══════╡
1952
+ # # │ -1.0 │
1953
+ # # └──────┘
963
1954
  def min
964
1955
  wrap_expr(_rbexpr.min)
965
1956
  end
966
1957
 
1958
+ # Get maximum value, but propagate/poison encountered NaN values.
1959
+ #
1960
+ # @return [Expr]
1961
+ #
1962
+ # @example
1963
+ # df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
1964
+ # df.select(Polars.col("a").nan_max)
1965
+ # # =>
1966
+ # # shape: (1, 1)
1967
+ # # ┌─────┐
1968
+ # # │ a │
1969
+ # # │ --- │
1970
+ # # │ f64 │
1971
+ # # ╞═════╡
1972
+ # # │ NaN │
1973
+ # # └─────┘
967
1974
  def nan_max
968
1975
  wrap_expr(_rbexpr.nan_max)
969
1976
  end
970
1977
 
1978
+ # Get minimum value, but propagate/poison encountered NaN values.
1979
+ #
1980
+ # @return [Expr]
1981
+ #
1982
+ # @example
1983
+ # df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
1984
+ # df.select(Polars.col("a").nan_min)
1985
+ # # =>
1986
+ # # shape: (1, 1)
1987
+ # # ┌─────┐
1988
+ # # │ a │
1989
+ # # │ --- │
1990
+ # # │ f64 │
1991
+ # # ╞═════╡
1992
+ # # │ NaN │
1993
+ # # └─────┘
971
1994
  def nan_min
972
1995
  wrap_expr(_rbexpr.nan_min)
973
1996
  end
974
1997
 
1998
+ # Get sum value.
1999
+ #
2000
+ # @return [Expr]
2001
+ #
2002
+ # @note
2003
+ # Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
2004
+ # `:i64` before summing to prevent overflow issues.
2005
+ #
2006
+ # @example
2007
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
2008
+ # df.select(Polars.col("a").sum)
2009
+ # # =>
2010
+ # # shape: (1, 1)
2011
+ # # ┌─────┐
2012
+ # # │ a │
2013
+ # # │ --- │
2014
+ # # │ i64 │
2015
+ # # ╞═════╡
2016
+ # # │ 0 │
2017
+ # # └─────┘
975
2018
  def sum
976
2019
  wrap_expr(_rbexpr.sum)
977
2020
  end
978
2021
 
979
- def mean
980
- wrap_expr(_rbexpr.mean)
2022
+ # Get mean value.
2023
+ #
2024
+ # @return [Expr]
2025
+ #
2026
+ # @example
2027
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
2028
+ # df.select(Polars.col("a").mean)
2029
+ # # =>
2030
+ # # shape: (1, 1)
2031
+ # # ┌─────┐
2032
+ # # │ a │
2033
+ # # │ --- │
2034
+ # # │ f64 │
2035
+ # # ╞═════╡
2036
+ # # │ 0.0 │
2037
+ # # └─────┘
2038
+ def mean
2039
+ wrap_expr(_rbexpr.mean)
981
2040
  end
982
2041
 
2042
+ # Get median value using linear interpolation.
2043
+ #
2044
+ # @return [Expr]
2045
+ #
2046
+ # @example
2047
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
2048
+ # df.select(Polars.col("a").median)
2049
+ # # =>
2050
+ # # shape: (1, 1)
2051
+ # # ┌─────┐
2052
+ # # │ a │
2053
+ # # │ --- │
2054
+ # # │ f64 │
2055
+ # # ╞═════╡
2056
+ # # │ 0.0 │
2057
+ # # └─────┘
983
2058
  def median
984
2059
  wrap_expr(_rbexpr.median)
985
2060
  end
986
2061
 
2062
+ # Compute the product of an expression.
2063
+ #
2064
+ # @return [Expr]
2065
+ #
2066
+ # @example
2067
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
2068
+ # df.select(Polars.col("a").product)
2069
+ # # =>
2070
+ # # shape: (1, 1)
2071
+ # # ┌─────┐
2072
+ # # │ a │
2073
+ # # │ --- │
2074
+ # # │ i64 │
2075
+ # # ╞═════╡
2076
+ # # │ 6 │
2077
+ # # └─────┘
987
2078
  def product
988
2079
  wrap_expr(_rbexpr.product)
989
2080
  end
990
2081
 
2082
+ # Count unique values.
2083
+ #
2084
+ # @return [Expr]
2085
+ #
2086
+ # @example
2087
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2088
+ # df.select(Polars.col("a").n_unique)
2089
+ # # =>
2090
+ # # shape: (1, 1)
2091
+ # # ┌─────┐
2092
+ # # │ a │
2093
+ # # │ --- │
2094
+ # # │ u32 │
2095
+ # # ╞═════╡
2096
+ # # │ 2 │
2097
+ # # └─────┘
991
2098
  def n_unique
992
2099
  wrap_expr(_rbexpr.n_unique)
993
2100
  end
994
2101
 
2102
+ # Count null values.
2103
+ #
2104
+ # @return [Expr]
2105
+ #
2106
+ # @example
2107
+ # df = Polars::DataFrame.new(
2108
+ # {
2109
+ # "a" => [nil, 1, nil],
2110
+ # "b" => [1, 2, 3]
2111
+ # }
2112
+ # )
2113
+ # df.select(Polars.all.null_count)
2114
+ # # =>
2115
+ # # shape: (1, 2)
2116
+ # # ┌─────┬─────┐
2117
+ # # │ a ┆ b │
2118
+ # # │ --- ┆ --- │
2119
+ # # │ u32 ┆ u32 │
2120
+ # # ╞═════╪═════╡
2121
+ # # │ 2 ┆ 0 │
2122
+ # # └─────┴─────┘
995
2123
  def null_count
996
2124
  wrap_expr(_rbexpr.null_count)
997
2125
  end
998
2126
 
2127
+ # Get index of first unique value.
2128
+ #
2129
+ # @return [Expr]
2130
+ #
2131
+ # @example
2132
+ # df = Polars::DataFrame.new(
2133
+ # {
2134
+ # "a" => [8, 9, 10],
2135
+ # "b" => [nil, 4, 4]
2136
+ # }
2137
+ # )
2138
+ # df.select(Polars.col("a").arg_unique)
2139
+ # # =>
2140
+ # # shape: (3, 1)
2141
+ # # ┌─────┐
2142
+ # # │ a │
2143
+ # # │ --- │
2144
+ # # │ u32 │
2145
+ # # ╞═════╡
2146
+ # # │ 0 │
2147
+ # # ├╌╌╌╌╌┤
2148
+ # # │ 1 │
2149
+ # # ├╌╌╌╌╌┤
2150
+ # # │ 2 │
2151
+ # # └─────┘
2152
+ #
2153
+ # @example
2154
+ # df.select(Polars.col("b").arg_unique)
2155
+ # # =>
2156
+ # # shape: (2, 1)
2157
+ # # ┌─────┐
2158
+ # # │ b │
2159
+ # # │ --- │
2160
+ # # │ u32 │
2161
+ # # ╞═════╡
2162
+ # # │ 0 │
2163
+ # # ├╌╌╌╌╌┤
2164
+ # # │ 1 │
2165
+ # # └─────┘
999
2166
  def arg_unique
1000
2167
  wrap_expr(_rbexpr.arg_unique)
1001
2168
  end
1002
2169
 
2170
+ # Get unique values of this expression.
2171
+ #
2172
+ # @param maintain_order [Boolean]
2173
+ # Maintain order of data. This requires more work.
2174
+ #
2175
+ # @return [Expr]
2176
+ #
2177
+ # @example
2178
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2179
+ # df.select(Polars.col("a").unique(maintain_order: true))
2180
+ # # =>
2181
+ # # shape: (2, 1)
2182
+ # # ┌─────┐
2183
+ # # │ a │
2184
+ # # │ --- │
2185
+ # # │ i64 │
2186
+ # # ╞═════╡
2187
+ # # │ 1 │
2188
+ # # ├╌╌╌╌╌┤
2189
+ # # │ 2 │
2190
+ # # └─────┘
1003
2191
  def unique(maintain_order: false)
1004
2192
  if maintain_order
1005
2193
  wrap_expr(_rbexpr.unique_stable)
@@ -1008,95 +2196,743 @@ module Polars
1008
2196
  end
1009
2197
  end
1010
2198
 
2199
+ # Get the first value.
2200
+ #
2201
+ # @return [Expr]
2202
+ #
2203
+ # @example
2204
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2205
+ # df.select(Polars.col("a").first)
2206
+ # # =>
2207
+ # # shape: (1, 1)
2208
+ # # ┌─────┐
2209
+ # # │ a │
2210
+ # # │ --- │
2211
+ # # │ i64 │
2212
+ # # ╞═════╡
2213
+ # # │ 1 │
2214
+ # # └─────┘
1011
2215
  def first
1012
2216
  wrap_expr(_rbexpr.first)
1013
2217
  end
1014
2218
 
2219
+ # Get the last value.
2220
+ #
2221
+ # @return [Expr]
2222
+ #
2223
+ # @example
2224
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2225
+ # df.select(Polars.col("a").last)
2226
+ # # =>
2227
+ # # shape: (1, 1)
2228
+ # # ┌─────┐
2229
+ # # │ a │
2230
+ # # │ --- │
2231
+ # # │ i64 │
2232
+ # # ╞═════╡
2233
+ # # │ 2 │
2234
+ # # └─────┘
1015
2235
  def last
1016
2236
  wrap_expr(_rbexpr.last)
1017
2237
  end
1018
2238
 
2239
+ # Apply window function over a subgroup.
2240
+ #
2241
+ # This is similar to a groupby + aggregation + self join.
2242
+ # Or similar to [window functions in Postgres](https://www.postgresql.org/docs/current/tutorial-window.html).
2243
+ #
2244
+ # @param expr [Object]
2245
+ # Column(s) to group by.
2246
+ #
2247
+ # @return [Expr]
2248
+ #
2249
+ # @example
2250
+ # df = Polars::DataFrame.new(
2251
+ # {
2252
+ # "groups" => ["g1", "g1", "g2"],
2253
+ # "values" => [1, 2, 3]
2254
+ # }
2255
+ # )
2256
+ # df.with_column(
2257
+ # Polars.col("values").max.over("groups").alias("max_by_group")
2258
+ # )
2259
+ # # =>
2260
+ # # shape: (3, 3)
2261
+ # # ┌────────┬────────┬──────────────┐
2262
+ # # │ groups ┆ values ┆ max_by_group │
2263
+ # # │ --- ┆ --- ┆ --- │
2264
+ # # │ str ┆ i64 ┆ i64 │
2265
+ # # ╞════════╪════════╪══════════════╡
2266
+ # # │ g1 ┆ 1 ┆ 2 │
2267
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
2268
+ # # │ g1 ┆ 2 ┆ 2 │
2269
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
2270
+ # # │ g2 ┆ 3 ┆ 3 │
2271
+ # # └────────┴────────┴──────────────┘
2272
+ #
2273
+ # @example
2274
+ # df = Polars::DataFrame.new(
2275
+ # {
2276
+ # "groups" => [1, 1, 2, 2, 1, 2, 3, 3, 1],
2277
+ # "values" => [1, 2, 3, 4, 5, 6, 7, 8, 8]
2278
+ # }
2279
+ # )
2280
+ # df.lazy
2281
+ # .select([Polars.col("groups").sum.over("groups")])
2282
+ # .collect
2283
+ # # =>
2284
+ # # shape: (9, 1)
2285
+ # # ┌────────┐
2286
+ # # │ groups │
2287
+ # # │ --- │
2288
+ # # │ i64 │
2289
+ # # ╞════════╡
2290
+ # # │ 4 │
2291
+ # # ├╌╌╌╌╌╌╌╌┤
2292
+ # # │ 4 │
2293
+ # # ├╌╌╌╌╌╌╌╌┤
2294
+ # # │ 6 │
2295
+ # # ├╌╌╌╌╌╌╌╌┤
2296
+ # # │ 6 │
2297
+ # # ├╌╌╌╌╌╌╌╌┤
2298
+ # # │ ... │
2299
+ # # ├╌╌╌╌╌╌╌╌┤
2300
+ # # │ 6 │
2301
+ # # ├╌╌╌╌╌╌╌╌┤
2302
+ # # │ 6 │
2303
+ # # ├╌╌╌╌╌╌╌╌┤
2304
+ # # │ 6 │
2305
+ # # ├╌╌╌╌╌╌╌╌┤
2306
+ # # │ 4 │
2307
+ # # └────────┘
1019
2308
  def over(expr)
1020
2309
  rbexprs = Utils.selection_to_rbexpr_list(expr)
1021
2310
  wrap_expr(_rbexpr.over(rbexprs))
1022
2311
  end
1023
2312
 
2313
+ # Get mask of unique values.
2314
+ #
2315
+ # @return [Expr]
2316
+ #
2317
+ # @example
2318
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2319
+ # df.select(Polars.col("a").is_unique)
2320
+ # # =>
2321
+ # # shape: (3, 1)
2322
+ # # ┌───────┐
2323
+ # # │ a │
2324
+ # # │ --- │
2325
+ # # │ bool │
2326
+ # # ╞═══════╡
2327
+ # # │ false │
2328
+ # # ├╌╌╌╌╌╌╌┤
2329
+ # # │ false │
2330
+ # # ├╌╌╌╌╌╌╌┤
2331
+ # # │ true │
2332
+ # # └───────┘
1024
2333
  def is_unique
1025
2334
  wrap_expr(_rbexpr.is_unique)
1026
2335
  end
1027
2336
 
2337
+ # Get a mask of the first unique value.
2338
+ #
2339
+ # @return [Expr]
2340
+ #
2341
+ # @example
2342
+ # df = Polars::DataFrame.new(
2343
+ # {
2344
+ # "num" => [1, 2, 3, 1, 5]
2345
+ # }
2346
+ # )
2347
+ # df.with_column(Polars.col("num").is_first.alias("is_first"))
2348
+ # # =>
2349
+ # # shape: (5, 2)
2350
+ # # ┌─────┬──────────┐
2351
+ # # │ num ┆ is_first │
2352
+ # # │ --- ┆ --- │
2353
+ # # │ i64 ┆ bool │
2354
+ # # ╞═════╪══════════╡
2355
+ # # │ 1 ┆ true │
2356
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2357
+ # # │ 2 ┆ true │
2358
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2359
+ # # │ 3 ┆ true │
2360
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2361
+ # # │ 1 ┆ false │
2362
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2363
+ # # │ 5 ┆ true │
2364
+ # # └─────┴──────────┘
1028
2365
  def is_first
1029
2366
  wrap_expr(_rbexpr.is_first)
1030
2367
  end
1031
2368
 
2369
+ # Get mask of duplicated values.
2370
+ #
2371
+ # @return [Expr]
2372
+ #
2373
+ # @example
2374
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2375
+ # df.select(Polars.col("a").is_duplicated)
2376
+ # # =>
2377
+ # # shape: (3, 1)
2378
+ # # ┌───────┐
2379
+ # # │ a │
2380
+ # # │ --- │
2381
+ # # │ bool │
2382
+ # # ╞═══════╡
2383
+ # # │ true │
2384
+ # # ├╌╌╌╌╌╌╌┤
2385
+ # # │ true │
2386
+ # # ├╌╌╌╌╌╌╌┤
2387
+ # # │ false │
2388
+ # # └───────┘
1032
2389
  def is_duplicated
1033
2390
  wrap_expr(_rbexpr.is_duplicated)
1034
2391
  end
1035
2392
 
2393
+ # Get quantile value.
2394
+ #
2395
+ # @param quantile [Float]
2396
+ # Quantile between 0.0 and 1.0.
2397
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
2398
+ # Interpolation method.
2399
+ #
2400
+ # @return [Expr]
2401
+ #
2402
+ # @example
2403
+ # df = Polars::DataFrame.new({"a" => [0, 1, 2, 3, 4, 5]})
2404
+ # df.select(Polars.col("a").quantile(0.3))
2405
+ # # =>
2406
+ # # shape: (1, 1)
2407
+ # # ┌─────┐
2408
+ # # │ a │
2409
+ # # │ --- │
2410
+ # # │ f64 │
2411
+ # # ╞═════╡
2412
+ # # │ 1.0 │
2413
+ # # └─────┘
2414
+ #
2415
+ # @example
2416
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "higher"))
2417
+ # # =>
2418
+ # # shape: (1, 1)
2419
+ # # ┌─────┐
2420
+ # # │ a │
2421
+ # # │ --- │
2422
+ # # │ f64 │
2423
+ # # ╞═════╡
2424
+ # # │ 2.0 │
2425
+ # # └─────┘
2426
+ #
2427
+ # @example
2428
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "lower"))
2429
+ # # =>
2430
+ # # shape: (1, 1)
2431
+ # # ┌─────┐
2432
+ # # │ a │
2433
+ # # │ --- │
2434
+ # # │ f64 │
2435
+ # # ╞═════╡
2436
+ # # │ 1.0 │
2437
+ # # └─────┘
2438
+ #
2439
+ # @example
2440
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "midpoint"))
2441
+ # # =>
2442
+ # # shape: (1, 1)
2443
+ # # ┌─────┐
2444
+ # # │ a │
2445
+ # # │ --- │
2446
+ # # │ f64 │
2447
+ # # ╞═════╡
2448
+ # # │ 1.5 │
2449
+ # # └─────┘
2450
+ #
2451
+ # @example
2452
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "linear"))
2453
+ # # =>
2454
+ # # shape: (1, 1)
2455
+ # # ┌─────┐
2456
+ # # │ a │
2457
+ # # │ --- │
2458
+ # # │ f64 │
2459
+ # # ╞═════╡
2460
+ # # │ 1.5 │
2461
+ # # └─────┘
1036
2462
  def quantile(quantile, interpolation: "nearest")
1037
2463
  wrap_expr(_rbexpr.quantile(quantile, interpolation))
1038
2464
  end
1039
2465
 
1040
- def filter(predicate)
2466
+ # Filter a single column.
2467
+ #
2468
+ # Mostly useful in an aggregation context. If you want to filter on a DataFrame
2469
+ # level, use `LazyFrame#filter`.
2470
+ #
2471
+ # @param predicate [Expr]
2472
+ # Boolean expression.
2473
+ #
2474
+ # @return [Expr]
2475
+ #
2476
+ # @example
2477
+ # df = Polars::DataFrame.new(
2478
+ # {
2479
+ # "group_col" => ["g1", "g1", "g2"],
2480
+ # "b" => [1, 2, 3]
2481
+ # }
2482
+ # )
2483
+ # (
2484
+ # df.groupby("group_col").agg(
2485
+ # [
2486
+ # Polars.col("b").filter(Polars.col("b") < 2).sum.alias("lt"),
2487
+ # Polars.col("b").filter(Polars.col("b") >= 2).sum.alias("gte")
2488
+ # ]
2489
+ # )
2490
+ # ).sort("group_col")
2491
+ # # =>
2492
+ # # shape: (2, 3)
2493
+ # # ┌───────────┬──────┬─────┐
2494
+ # # │ group_col ┆ lt ┆ gte │
2495
+ # # │ --- ┆ --- ┆ --- │
2496
+ # # │ str ┆ i64 ┆ i64 │
2497
+ # # ╞═══════════╪══════╪═════╡
2498
+ # # │ g1 ┆ 1 ┆ 2 │
2499
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
2500
+ # # │ g2 ┆ null ┆ 3 │
2501
+ # # └───────────┴──────┴─────┘
2502
+ def filter(predicate)
1041
2503
  wrap_expr(_rbexpr.filter(predicate._rbexpr))
1042
2504
  end
1043
2505
 
2506
+ # Filter a single column.
2507
+ #
2508
+ # Alias for {#filter}.
2509
+ #
2510
+ # @param predicate [Expr]
2511
+ # Boolean expression.
2512
+ #
2513
+ # @return [Expr]
2514
+ #
2515
+ # @example
2516
+ # df = Polars::DataFrame.new(
2517
+ # {
2518
+ # "group_col" => ["g1", "g1", "g2"],
2519
+ # "b" => [1, 2, 3]
2520
+ # }
2521
+ # )
2522
+ # (
2523
+ # df.groupby("group_col").agg(
2524
+ # [
2525
+ # Polars.col("b").where(Polars.col("b") < 2).sum.alias("lt"),
2526
+ # Polars.col("b").where(Polars.col("b") >= 2).sum.alias("gte")
2527
+ # ]
2528
+ # )
2529
+ # ).sort("group_col")
2530
+ # # =>
2531
+ # # shape: (2, 3)
2532
+ # # ┌───────────┬──────┬─────┐
2533
+ # # │ group_col ┆ lt ┆ gte │
2534
+ # # │ --- ┆ --- ┆ --- │
2535
+ # # │ str ┆ i64 ┆ i64 │
2536
+ # # ╞═══════════╪══════╪═════╡
2537
+ # # │ g1 ┆ 1 ┆ 2 │
2538
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
2539
+ # # │ g2 ┆ null ┆ 3 │
2540
+ # # └───────────┴──────┴─────┘
1044
2541
  def where(predicate)
1045
2542
  filter(predicate)
1046
2543
  end
1047
2544
 
1048
- # def map
2545
+ # Apply a custom Ruby function to a Series or sequence of Series.
2546
+ #
2547
+ # The output of this custom function must be a Series.
2548
+ # If you want to apply a custom function elementwise over single values, see
2549
+ # {#apply}. A use case for `map` is when you want to transform an
2550
+ # expression with a third-party library.
2551
+ #
2552
+ # Read more in [the book](https://pola-rs.github.io/polars-book/user-guide/dsl/custom_functions.html).
2553
+ #
2554
+ # @param return_dtype [Symbol]
2555
+ # Dtype of the output Series.
2556
+ # @param agg_list [Boolean]
2557
+ # Aggregate list.
2558
+ #
2559
+ # @return [Expr]
2560
+ #
2561
+ # @example
2562
+ # df = Polars::DataFrame.new(
2563
+ # {
2564
+ # "sine" => [0.0, 1.0, 0.0, -1.0],
2565
+ # "cosine" => [1.0, 0.0, -1.0, 0.0]
2566
+ # }
2567
+ # )
2568
+ # df.select(Polars.all.map { |x| x.to_numpy.argmax })
2569
+ # # =>
2570
+ # # shape: (1, 2)
2571
+ # # ┌──────┬────────┐
2572
+ # # │ sine ┆ cosine │
2573
+ # # │ --- ┆ --- │
2574
+ # # │ i64 ┆ i64 │
2575
+ # # ╞══════╪════════╡
2576
+ # # │ 1 ┆ 0 │
2577
+ # # └──────┴────────┘
2578
+ # def map(return_dtype: nil, agg_list: false, &block)
2579
+ # if !return_dtype.nil?
2580
+ # return_dtype = Utils.rb_type_to_dtype(return_dtype)
2581
+ # end
2582
+ # wrap_expr(_rbexpr.map(return_dtype, agg_list, &block))
1049
2583
  # end
1050
2584
 
1051
2585
  # def apply
1052
2586
  # end
1053
2587
 
2588
+ # Explode a list or utf8 Series. This means that every item is expanded to a new
2589
+ # row.
2590
+ #
2591
+ # Alias for {#explode}.
2592
+ #
2593
+ # @return [Expr]
1054
2594
  #
2595
+ # @example
2596
+ # df = Polars::DataFrame.new({"foo" => ["hello", "world"]})
2597
+ # df.select(Polars.col("foo").flatten)
2598
+ # # =>
2599
+ # # shape: (10, 1)
2600
+ # # ┌─────┐
2601
+ # # │ foo │
2602
+ # # │ --- │
2603
+ # # │ str │
2604
+ # # ╞═════╡
2605
+ # # │ h │
2606
+ # # ├╌╌╌╌╌┤
2607
+ # # │ e │
2608
+ # # ├╌╌╌╌╌┤
2609
+ # # │ l │
2610
+ # # ├╌╌╌╌╌┤
2611
+ # # │ l │
2612
+ # # ├╌╌╌╌╌┤
2613
+ # # │ ... │
2614
+ # # ├╌╌╌╌╌┤
2615
+ # # │ o │
2616
+ # # ├╌╌╌╌╌┤
2617
+ # # │ r │
2618
+ # # ├╌╌╌╌╌┤
2619
+ # # │ l │
2620
+ # # ├╌╌╌╌╌┤
2621
+ # # │ d │
2622
+ # # └─────┘
1055
2623
  def flatten
1056
2624
  wrap_expr(_rbexpr.explode)
1057
2625
  end
1058
2626
 
2627
+ # Explode a list or utf8 Series.
2628
+ #
2629
+ # This means that every item is expanded to a new row.
2630
+ #
2631
+ # @return [Expr]
2632
+ #
2633
+ # @example
2634
+ # df = Polars::DataFrame.new({"b" => [[1, 2, 3], [4, 5, 6]]})
2635
+ # df.select(Polars.col("b").explode)
2636
+ # # =>
2637
+ # # shape: (6, 1)
2638
+ # # ┌─────┐
2639
+ # # │ b │
2640
+ # # │ --- │
2641
+ # # │ i64 │
2642
+ # # ╞═════╡
2643
+ # # │ 1 │
2644
+ # # ├╌╌╌╌╌┤
2645
+ # # │ 2 │
2646
+ # # ├╌╌╌╌╌┤
2647
+ # # │ 3 │
2648
+ # # ├╌╌╌╌╌┤
2649
+ # # │ 4 │
2650
+ # # ├╌╌╌╌╌┤
2651
+ # # │ 5 │
2652
+ # # ├╌╌╌╌╌┤
2653
+ # # │ 6 │
2654
+ # # └─────┘
1059
2655
  def explode
1060
2656
  wrap_expr(_rbexpr.explode)
1061
2657
  end
1062
2658
 
2659
+ # Take every nth value in the Series and return as a new Series.
2660
+ #
2661
+ # @return [Expr]
2662
+ #
2663
+ # @example
2664
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
2665
+ # df.select(Polars.col("foo").take_every(3))
2666
+ # # =>
2667
+ # # shape: (3, 1)
2668
+ # # ┌─────┐
2669
+ # # │ foo │
2670
+ # # │ --- │
2671
+ # # │ i64 │
2672
+ # # ╞═════╡
2673
+ # # │ 1 │
2674
+ # # ├╌╌╌╌╌┤
2675
+ # # │ 4 │
2676
+ # # ├╌╌╌╌╌┤
2677
+ # # │ 7 │
2678
+ # # └─────┘
1063
2679
  def take_every(n)
1064
2680
  wrap_expr(_rbexpr.take_every(n))
1065
2681
  end
1066
2682
 
2683
+ # Get the first `n` rows.
2684
+ #
2685
+ # @param n [Integer]
2686
+ # Number of rows to return.
2687
+ #
2688
+ # @return [Expr]
2689
+ #
2690
+ # @example
2691
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
2692
+ # df.head(3)
2693
+ # # =>
2694
+ # # shape: (3, 1)
2695
+ # # ┌─────┐
2696
+ # # │ foo │
2697
+ # # │ --- │
2698
+ # # │ i64 │
2699
+ # # ╞═════╡
2700
+ # # │ 1 │
2701
+ # # ├╌╌╌╌╌┤
2702
+ # # │ 2 │
2703
+ # # ├╌╌╌╌╌┤
2704
+ # # │ 3 │
2705
+ # # └─────┘
1067
2706
  def head(n = 10)
1068
2707
  wrap_expr(_rbexpr.head(n))
1069
2708
  end
1070
2709
 
2710
+ # Get the last `n` rows.
2711
+ #
2712
+ # @param n [Integer]
2713
+ # Number of rows to return.
2714
+ #
2715
+ # @return [Expr]
2716
+ #
2717
+ # @example
2718
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
2719
+ # df.tail(3)
2720
+ # # =>
2721
+ # # shape: (3, 1)
2722
+ # # ┌─────┐
2723
+ # # │ foo │
2724
+ # # │ --- │
2725
+ # # │ i64 │
2726
+ # # ╞═════╡
2727
+ # # │ 5 │
2728
+ # # ├╌╌╌╌╌┤
2729
+ # # │ 6 │
2730
+ # # ├╌╌╌╌╌┤
2731
+ # # │ 7 │
2732
+ # # └─────┘
1071
2733
  def tail(n = 10)
1072
2734
  wrap_expr(_rbexpr.tail(n))
1073
2735
  end
1074
2736
 
2737
+ # Get the first `n` rows.
2738
+ #
2739
+ # Alias for {#head}.
2740
+ #
2741
+ # @param n [Integer]
2742
+ # Number of rows to return.
2743
+ #
2744
+ # @return [Expr]
1075
2745
  def limit(n = 10)
1076
2746
  head(n)
1077
2747
  end
1078
2748
 
2749
+ # Raise expression to the power of exponent.
2750
+ #
2751
+ # @return [Expr]
2752
+ #
2753
+ # @example
2754
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
2755
+ # df.select(Polars.col("foo").pow(3))
2756
+ # # =>
2757
+ # # shape: (4, 1)
2758
+ # # ┌──────┐
2759
+ # # │ foo │
2760
+ # # │ --- │
2761
+ # # │ f64 │
2762
+ # # ╞══════╡
2763
+ # # │ 1.0 │
2764
+ # # ├╌╌╌╌╌╌┤
2765
+ # # │ 8.0 │
2766
+ # # ├╌╌╌╌╌╌┤
2767
+ # # │ 27.0 │
2768
+ # # ├╌╌╌╌╌╌┤
2769
+ # # │ 64.0 │
2770
+ # # └──────┘
1079
2771
  def pow(exponent)
1080
2772
  exponent = Utils.expr_to_lit_or_expr(exponent)
1081
2773
  wrap_expr(_rbexpr.pow(exponent._rbexpr))
1082
2774
  end
1083
2775
 
1084
- # def is_in
1085
- # end
2776
+ # Check if elements of this expression are present in the other Series.
2777
+ #
2778
+ # @param other [Object]
2779
+ # Series or sequence of primitive type.
2780
+ #
2781
+ # @return [Expr]
2782
+ #
2783
+ # @example
2784
+ # df = Polars::DataFrame.new(
2785
+ # {"sets" => [[1, 2, 3], [1, 2], [9, 10]], "optional_members" => [1, 2, 3]}
2786
+ # )
2787
+ # df.select([Polars.col("optional_members").is_in("sets").alias("contains")])
2788
+ # # =>
2789
+ # # shape: (3, 1)
2790
+ # # ┌──────────┐
2791
+ # # │ contains │
2792
+ # # │ --- │
2793
+ # # │ bool │
2794
+ # # ╞══════════╡
2795
+ # # │ true │
2796
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
2797
+ # # │ true │
2798
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
2799
+ # # │ false │
2800
+ # # └──────────┘
2801
+ def is_in(other)
2802
+ if other.is_a?(Array)
2803
+ if other.length == 0
2804
+ other = Polars.lit(nil)
2805
+ else
2806
+ other = Polars.lit(Series.new(other))
2807
+ end
2808
+ else
2809
+ other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
2810
+ end
2811
+ wrap_expr(_rbexpr.is_in(other._rbexpr))
2812
+ end
1086
2813
 
2814
+ # Repeat the elements in this Series as specified in the given expression.
2815
+ #
2816
+ # The repeated elements are expanded into a `List`.
2817
+ #
2818
+ # @param by [Object]
2819
+ # Numeric column that determines how often the values will be repeated.
2820
+ # The column will be coerced to UInt32. Give this dtype to make the coercion a
2821
+ # no-op.
1087
2822
  #
2823
+ # @return [Expr]
2824
+ #
2825
+ # @example
2826
+ # df = Polars::DataFrame.new(
2827
+ # {
2828
+ # "a" => ["x", "y", "z"],
2829
+ # "n" => [1, 2, 3]
2830
+ # }
2831
+ # )
2832
+ # df.select(Polars.col("a").repeat_by("n"))
2833
+ # # =>
2834
+ # # shape: (3, 1)
2835
+ # # ┌─────────────────┐
2836
+ # # │ a │
2837
+ # # │ --- │
2838
+ # # │ list[str] │
2839
+ # # ╞═════════════════╡
2840
+ # # │ ["x"] │
2841
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
2842
+ # # │ ["y", "y"] │
2843
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
2844
+ # # │ ["z", "z", "z"] │
2845
+ # # └─────────────────┘
1088
2846
  def repeat_by(by)
1089
- by = Utils.expr_to_lit_or_expr(by, false)
2847
+ by = Utils.expr_to_lit_or_expr(by, str_to_lit: false)
1090
2848
  wrap_expr(_rbexpr.repeat_by(by._rbexpr))
1091
2849
  end
1092
2850
 
1093
- # def is_between
1094
- # end
2851
+ # Check if this expression is between start and end.
2852
+ #
2853
+ # @param start [Object]
2854
+ # Lower bound as primitive type or datetime.
2855
+ # @param _end [Object]
2856
+ # Upper bound as primitive type or datetime.
2857
+ # @param include_bounds [Boolean]
2858
+ # False: Exclude both start and end (default).
2859
+ # True: Include both start and end.
2860
+ # (False, False): Exclude start and exclude end.
2861
+ # (True, True): Include start and include end.
2862
+ # (False, True): Exclude start and include end.
2863
+ # (True, False): Include start and exclude end.
2864
+ #
2865
+ # @return [Expr]
2866
+ #
2867
+ # @example
2868
+ # df = Polars::DataFrame.new({"num" => [1, 2, 3, 4, 5]})
2869
+ # df.with_column(Polars.col("num").is_between(2, 4))
2870
+ # # =>
2871
+ # # shape: (5, 2)
2872
+ # # ┌─────┬────────────┐
2873
+ # # │ num ┆ is_between │
2874
+ # # │ --- ┆ --- │
2875
+ # # │ i64 ┆ bool │
2876
+ # # ╞═════╪════════════╡
2877
+ # # │ 1 ┆ false │
2878
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2879
+ # # │ 2 ┆ false │
2880
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2881
+ # # │ 3 ┆ true │
2882
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2883
+ # # │ 4 ┆ false │
2884
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2885
+ # # │ 5 ┆ false │
2886
+ # # └─────┴────────────┘
2887
+ def is_between(start, _end, include_bounds: false)
2888
+ if include_bounds == false || include_bounds == [false, false]
2889
+ ((self > start) & (self < _end)).alias("is_between")
2890
+ elsif include_bounds == true || include_bounds == [true, true]
2891
+ ((self >= start) & (self <= _end)).alias("is_between")
2892
+ elsif include_bounds == [false, true]
2893
+ ((self > start) & (self <= _end)).alias("is_between")
2894
+ elsif include_bounds == [true, false]
2895
+ ((self >= start) & (self < _end)).alias("is_between")
2896
+ else
2897
+ raise ArgumentError, "include_bounds should be a bool or [bool, bool]."
2898
+ end
2899
+ end
1095
2900
 
1096
2901
  # def _hash
1097
2902
  # end
1098
2903
 
2904
+ # Reinterpret the underlying bits as a signed/unsigned integer.
2905
+ #
2906
+ # This operation is only allowed for 64bit integers. For lower bits integers,
2907
+ # you can safely use that cast operation.
2908
+ #
2909
+ # @param signed [Boolean]
2910
+ # If true, reinterpret as `:i64`. Otherwise, reinterpret as `:u64`.
2911
+ #
2912
+ # @return [Expr]
1099
2913
  #
2914
+ # @example
2915
+ # s = Polars::Series.new("a", [1, 1, 2], dtype: :u64)
2916
+ # df = Polars::DataFrame.new([s])
2917
+ # df.select(
2918
+ # [
2919
+ # Polars.col("a").reinterpret(signed: true).alias("reinterpreted"),
2920
+ # Polars.col("a").alias("original")
2921
+ # ]
2922
+ # )
2923
+ # # =>
2924
+ # # shape: (3, 2)
2925
+ # # ┌───────────────┬──────────┐
2926
+ # # │ reinterpreted ┆ original │
2927
+ # # │ --- ┆ --- │
2928
+ # # │ i64 ┆ u64 │
2929
+ # # ╞═══════════════╪══════════╡
2930
+ # # │ 1 ┆ 1 │
2931
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2932
+ # # │ 1 ┆ 1 │
2933
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2934
+ # # │ 2 ┆ 2 │
2935
+ # # └───────────────┴──────────┘
1100
2936
  def reinterpret(signed: false)
1101
2937
  wrap_expr(_rbexpr.reinterpret(signed))
1102
2938
  end
@@ -1104,147 +2940,1541 @@ module Polars
1104
2940
  # def _inspect
1105
2941
  # end
1106
2942
 
2943
+ # Fill nulls with linear interpolation over missing values.
2944
+ #
2945
+ # Can also be used to regrid data to a new grid - see examples below.
2946
+ #
2947
+ # @return [Expr]
1107
2948
  #
2949
+ # @example Fill nulls with linear interpolation
2950
+ # df = Polars::DataFrame.new(
2951
+ # {
2952
+ # "a" => [1, nil, 3],
2953
+ # "b" => [1.0, Float::NAN, 3.0]
2954
+ # }
2955
+ # )
2956
+ # df.select(Polars.all.interpolate)
2957
+ # # =>
2958
+ # # shape: (3, 2)
2959
+ # # ┌─────┬─────┐
2960
+ # # │ a ┆ b │
2961
+ # # │ --- ┆ --- │
2962
+ # # │ i64 ┆ f64 │
2963
+ # # ╞═════╪═════╡
2964
+ # # │ 1 ┆ 1.0 │
2965
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2966
+ # # │ 2 ┆ NaN │
2967
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2968
+ # # │ 3 ┆ 3.0 │
2969
+ # # └─────┴─────┘
1108
2970
  def interpolate
1109
2971
  wrap_expr(_rbexpr.interpolate)
1110
2972
  end
1111
2973
 
1112
- # def rolling_min
1113
- # end
1114
-
1115
- # def rolling_max
1116
- # end
1117
-
1118
- # def rolling_mean
1119
- # end
1120
-
1121
- # def rolling_sum
1122
- # end
1123
-
1124
- # def rolling_std
1125
- # end
1126
-
1127
- # def rolling_var
1128
- # end
1129
-
1130
- # def rolling_median
1131
- # end
1132
-
1133
- # def rolling_quantile
1134
- # end
2974
+ # Apply a rolling min (moving min) over the values in this array.
2975
+ #
2976
+ # A window of length `window_size` will traverse the array. The values that fill
2977
+ # this window will (optionally) be multiplied with the weights given by the
2978
+ # `weight` vector. The resulting values will be aggregated to their sum.
2979
+ #
2980
+ # @param window_size [Integer]
2981
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
2982
+ # size indicated by a timedelta or the following string language:
2983
+ #
2984
+ # - 1ns (1 nanosecond)
2985
+ # - 1us (1 microsecond)
2986
+ # - 1ms (1 millisecond)
2987
+ # - 1s (1 second)
2988
+ # - 1m (1 minute)
2989
+ # - 1h (1 hour)
2990
+ # - 1d (1 day)
2991
+ # - 1w (1 week)
2992
+ # - 1mo (1 calendar month)
2993
+ # - 1y (1 calendar year)
2994
+ # - 1i (1 index count)
2995
+ #
2996
+ # If a timedelta or the dynamic string language is used, the `by`
2997
+ # and `closed` arguments must also be set.
2998
+ # @param weights [Array]
2999
+ # An optional slice with the same length as the window that will be multiplied
3000
+ # elementwise with the values in the window.
3001
+ # @param min_periods [Integer]
3002
+ # The number of values in the window that should be non-null before computing
3003
+ # a result. If None, it will be set equal to window size.
3004
+ # @param center [Boolean]
3005
+ # Set the labels at the center of the window
3006
+ # @param by [String]
3007
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3008
+ # set the column that will be used to determine the windows. This column must
3009
+ # be of dtype `{Date, Datetime}`
3010
+ # @param closed ["left", "right", "both", "none"]
3011
+ # Define whether the temporal window interval is closed or not.
3012
+ #
3013
+ # @note
3014
+ # This functionality is experimental and may change without it being considered a
3015
+ # breaking change.
3016
+ #
3017
+ # @note
3018
+ # If you want to compute multiple aggregation statistics over the same dynamic
3019
+ # window, consider using `groupby_rolling` this method can cache the window size
3020
+ # computation.
3021
+ #
3022
+ # @return [Expr]
3023
+ #
3024
+ # @example
3025
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
3026
+ # df.select(
3027
+ # [
3028
+ # Polars.col("A").rolling_min(2)
3029
+ # ]
3030
+ # )
3031
+ # # =>
3032
+ # # shape: (6, 1)
3033
+ # # ┌──────┐
3034
+ # # │ A │
3035
+ # # │ --- │
3036
+ # # │ f64 │
3037
+ # # ╞══════╡
3038
+ # # │ null │
3039
+ # # ├╌╌╌╌╌╌┤
3040
+ # # │ 1.0 │
3041
+ # # ├╌╌╌╌╌╌┤
3042
+ # # │ 2.0 │
3043
+ # # ├╌╌╌╌╌╌┤
3044
+ # # │ 3.0 │
3045
+ # # ├╌╌╌╌╌╌┤
3046
+ # # │ 4.0 │
3047
+ # # ├╌╌╌╌╌╌┤
3048
+ # # │ 5.0 │
3049
+ # # └──────┘
3050
+ def rolling_min(
3051
+ window_size,
3052
+ weights: nil,
3053
+ min_periods: nil,
3054
+ center: false,
3055
+ by: nil,
3056
+ closed: "left"
3057
+ )
3058
+ window_size, min_periods = _prepare_rolling_window_args(
3059
+ window_size, min_periods
3060
+ )
3061
+ wrap_expr(
3062
+ _rbexpr.rolling_min(
3063
+ window_size, weights, min_periods, center, by, closed
3064
+ )
3065
+ )
3066
+ end
3067
+
3068
+ # Apply a rolling max (moving max) over the values in this array.
3069
+ #
3070
+ # A window of length `window_size` will traverse the array. The values that fill
3071
+ # this window will (optionally) be multiplied with the weights given by the
3072
+ # `weight` vector. The resulting values will be aggregated to their sum.
3073
+ #
3074
+ # @param window_size [Integer]
3075
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3076
+ # size indicated by a timedelta or the following string language:
3077
+ #
3078
+ # - 1ns (1 nanosecond)
3079
+ # - 1us (1 microsecond)
3080
+ # - 1ms (1 millisecond)
3081
+ # - 1s (1 second)
3082
+ # - 1m (1 minute)
3083
+ # - 1h (1 hour)
3084
+ # - 1d (1 day)
3085
+ # - 1w (1 week)
3086
+ # - 1mo (1 calendar month)
3087
+ # - 1y (1 calendar year)
3088
+ # - 1i (1 index count)
3089
+ #
3090
+ # If a timedelta or the dynamic string language is used, the `by`
3091
+ # and `closed` arguments must also be set.
3092
+ # @param weights [Array]
3093
+ # An optional slice with the same length as the window that will be multiplied
3094
+ # elementwise with the values in the window.
3095
+ # @param min_periods [Integer]
3096
+ # The number of values in the window that should be non-null before computing
3097
+ # a result. If None, it will be set equal to window size.
3098
+ # @param center [Boolean]
3099
+ # Set the labels at the center of the window
3100
+ # @param by [String]
3101
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3102
+ # set the column that will be used to determine the windows. This column must
3103
+ # be of dtype `{Date, Datetime}`
3104
+ # @param closed ["left", "right", "both", "none"]
3105
+ # Define whether the temporal window interval is closed or not.
3106
+ #
3107
+ # @note
3108
+ # This functionality is experimental and may change without it being considered a
3109
+ # breaking change.
3110
+ #
3111
+ # @note
3112
+ # If you want to compute multiple aggregation statistics over the same dynamic
3113
+ # window, consider using `groupby_rolling` this method can cache the window size
3114
+ # computation.
3115
+ #
3116
+ # @return [Expr]
3117
+ #
3118
+ # @example
3119
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
3120
+ # df.select(
3121
+ # [
3122
+ # Polars.col("A").rolling_max(2)
3123
+ # ]
3124
+ # )
3125
+ # # =>
3126
+ # # shape: (6, 1)
3127
+ # # ┌──────┐
3128
+ # # │ A │
3129
+ # # │ --- │
3130
+ # # │ f64 │
3131
+ # # ╞══════╡
3132
+ # # │ null │
3133
+ # # ├╌╌╌╌╌╌┤
3134
+ # # │ 2.0 │
3135
+ # # ├╌╌╌╌╌╌┤
3136
+ # # │ 3.0 │
3137
+ # # ├╌╌╌╌╌╌┤
3138
+ # # │ 4.0 │
3139
+ # # ├╌╌╌╌╌╌┤
3140
+ # # │ 5.0 │
3141
+ # # ├╌╌╌╌╌╌┤
3142
+ # # │ 6.0 │
3143
+ # # └──────┘
3144
+ def rolling_max(
3145
+ window_size,
3146
+ weights: nil,
3147
+ min_periods: nil,
3148
+ center: false,
3149
+ by: nil,
3150
+ closed: "left"
3151
+ )
3152
+ window_size, min_periods = _prepare_rolling_window_args(
3153
+ window_size, min_periods
3154
+ )
3155
+ wrap_expr(
3156
+ _rbexpr.rolling_max(
3157
+ window_size, weights, min_periods, center, by, closed
3158
+ )
3159
+ )
3160
+ end
3161
+
3162
+ # Apply a rolling mean (moving mean) over the values in this array.
3163
+ #
3164
+ # A window of length `window_size` will traverse the array. The values that fill
3165
+ # this window will (optionally) be multiplied with the weights given by the
3166
+ # `weight` vector. The resulting values will be aggregated to their sum.
3167
+ #
3168
+ # @param window_size [Integer]
3169
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3170
+ # size indicated by a timedelta or the following string language:
3171
+ #
3172
+ # - 1ns (1 nanosecond)
3173
+ # - 1us (1 microsecond)
3174
+ # - 1ms (1 millisecond)
3175
+ # - 1s (1 second)
3176
+ # - 1m (1 minute)
3177
+ # - 1h (1 hour)
3178
+ # - 1d (1 day)
3179
+ # - 1w (1 week)
3180
+ # - 1mo (1 calendar month)
3181
+ # - 1y (1 calendar year)
3182
+ # - 1i (1 index count)
3183
+ #
3184
+ # If a timedelta or the dynamic string language is used, the `by`
3185
+ # and `closed` arguments must also be set.
3186
+ # @param weights [Array]
3187
+ # An optional slice with the same length as the window that will be multiplied
3188
+ # elementwise with the values in the window.
3189
+ # @param min_periods [Integer]
3190
+ # The number of values in the window that should be non-null before computing
3191
+ # a result. If None, it will be set equal to window size.
3192
+ # @param center [Boolean]
3193
+ # Set the labels at the center of the window
3194
+ # @param by [String]
3195
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3196
+ # set the column that will be used to determine the windows. This column must
3197
+ # be of dtype `{Date, Datetime}`
3198
+ # @param closed ["left", "right", "both", "none"]
3199
+ # Define whether the temporal window interval is closed or not.
3200
+ #
3201
+ # @note
3202
+ # This functionality is experimental and may change without it being considered a
3203
+ # breaking change.
3204
+ #
3205
+ # @note
3206
+ # If you want to compute multiple aggregation statistics over the same dynamic
3207
+ # window, consider using `groupby_rolling` this method can cache the window size
3208
+ # computation.
3209
+ #
3210
+ # @return [Expr]
3211
+ #
3212
+ # @example
3213
+ # df = Polars::DataFrame.new({"A" => [1.0, 8.0, 6.0, 2.0, 16.0, 10.0]})
3214
+ # df.select(
3215
+ # [
3216
+ # Polars.col("A").rolling_mean(2)
3217
+ # ]
3218
+ # )
3219
+ # # =>
3220
+ # # shape: (6, 1)
3221
+ # # ┌──────┐
3222
+ # # │ A │
3223
+ # # │ --- │
3224
+ # # │ f64 │
3225
+ # # ╞══════╡
3226
+ # # │ null │
3227
+ # # ├╌╌╌╌╌╌┤
3228
+ # # │ 4.5 │
3229
+ # # ├╌╌╌╌╌╌┤
3230
+ # # │ 7.0 │
3231
+ # # ├╌╌╌╌╌╌┤
3232
+ # # │ 4.0 │
3233
+ # # ├╌╌╌╌╌╌┤
3234
+ # # │ 9.0 │
3235
+ # # ├╌╌╌╌╌╌┤
3236
+ # # │ 13.0 │
3237
+ # # └──────┘
3238
+ def rolling_mean(
3239
+ window_size,
3240
+ weights: nil,
3241
+ min_periods: nil,
3242
+ center: false,
3243
+ by: nil,
3244
+ closed: "left"
3245
+ )
3246
+ window_size, min_periods = _prepare_rolling_window_args(
3247
+ window_size, min_periods
3248
+ )
3249
+ wrap_expr(
3250
+ _rbexpr.rolling_mean(
3251
+ window_size, weights, min_periods, center, by, closed
3252
+ )
3253
+ )
3254
+ end
3255
+
3256
+ # Apply a rolling sum (moving sum) over the values in this array.
3257
+ #
3258
+ # A window of length `window_size` will traverse the array. The values that fill
3259
+ # this window will (optionally) be multiplied with the weights given by the
3260
+ # `weight` vector. The resulting values will be aggregated to their sum.
3261
+ #
3262
+ # @param window_size [Integer]
3263
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3264
+ # size indicated by a timedelta or the following string language:
3265
+ #
3266
+ # - 1ns (1 nanosecond)
3267
+ # - 1us (1 microsecond)
3268
+ # - 1ms (1 millisecond)
3269
+ # - 1s (1 second)
3270
+ # - 1m (1 minute)
3271
+ # - 1h (1 hour)
3272
+ # - 1d (1 day)
3273
+ # - 1w (1 week)
3274
+ # - 1mo (1 calendar month)
3275
+ # - 1y (1 calendar year)
3276
+ # - 1i (1 index count)
3277
+ #
3278
+ # If a timedelta or the dynamic string language is used, the `by`
3279
+ # and `closed` arguments must also be set.
3280
+ # @param weights [Array]
3281
+ # An optional slice with the same length as the window that will be multiplied
3282
+ # elementwise with the values in the window.
3283
+ # @param min_periods [Integer]
3284
+ # The number of values in the window that should be non-null before computing
3285
+ # a result. If None, it will be set equal to window size.
3286
+ # @param center [Boolean]
3287
+ # Set the labels at the center of the window
3288
+ # @param by [String]
3289
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3290
+ # set the column that will be used to determine the windows. This column must
3291
+ # be of dtype `{Date, Datetime}`
3292
+ # @param closed ["left", "right", "both", "none"]
3293
+ # Define whether the temporal window interval is closed or not.
3294
+ #
3295
+ # @note
3296
+ # This functionality is experimental and may change without it being considered a
3297
+ # breaking change.
3298
+ #
3299
+ # @note
3300
+ # If you want to compute multiple aggregation statistics over the same dynamic
3301
+ # window, consider using `groupby_rolling` this method can cache the window size
3302
+ # computation.
3303
+ #
3304
+ # @return [Expr]
3305
+ #
3306
+ # @example
3307
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
3308
+ # df.select(
3309
+ # [
3310
+ # Polars.col("A").rolling_sum(2)
3311
+ # ]
3312
+ # )
3313
+ # # =>
3314
+ # # shape: (6, 1)
3315
+ # # ┌──────┐
3316
+ # # │ A │
3317
+ # # │ --- │
3318
+ # # │ f64 │
3319
+ # # ╞══════╡
3320
+ # # │ null │
3321
+ # # ├╌╌╌╌╌╌┤
3322
+ # # │ 3.0 │
3323
+ # # ├╌╌╌╌╌╌┤
3324
+ # # │ 5.0 │
3325
+ # # ├╌╌╌╌╌╌┤
3326
+ # # │ 7.0 │
3327
+ # # ├╌╌╌╌╌╌┤
3328
+ # # │ 9.0 │
3329
+ # # ├╌╌╌╌╌╌┤
3330
+ # # │ 11.0 │
3331
+ # # └──────┘
3332
+ def rolling_sum(
3333
+ window_size,
3334
+ weights: nil,
3335
+ min_periods: nil,
3336
+ center: false,
3337
+ by: nil,
3338
+ closed: "left"
3339
+ )
3340
+ window_size, min_periods = _prepare_rolling_window_args(
3341
+ window_size, min_periods
3342
+ )
3343
+ wrap_expr(
3344
+ _rbexpr.rolling_sum(
3345
+ window_size, weights, min_periods, center, by, closed
3346
+ )
3347
+ )
3348
+ end
3349
+
3350
+ # Compute a rolling standard deviation.
3351
+ #
3352
+ # A window of length `window_size` will traverse the array. The values that fill
3353
+ # this window will (optionally) be multiplied with the weights given by the
3354
+ # `weight` vector. The resulting values will be aggregated to their sum.
3355
+ #
3356
+ # @param window_size [Integer]
3357
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3358
+ # size indicated by a timedelta or the following string language:
3359
+ #
3360
+ # - 1ns (1 nanosecond)
3361
+ # - 1us (1 microsecond)
3362
+ # - 1ms (1 millisecond)
3363
+ # - 1s (1 second)
3364
+ # - 1m (1 minute)
3365
+ # - 1h (1 hour)
3366
+ # - 1d (1 day)
3367
+ # - 1w (1 week)
3368
+ # - 1mo (1 calendar month)
3369
+ # - 1y (1 calendar year)
3370
+ # - 1i (1 index count)
3371
+ #
3372
+ # If a timedelta or the dynamic string language is used, the `by`
3373
+ # and `closed` arguments must also be set.
3374
+ # @param weights [Array]
3375
+ # An optional slice with the same length as the window that will be multiplied
3376
+ # elementwise with the values in the window.
3377
+ # @param min_periods [Integer]
3378
+ # The number of values in the window that should be non-null before computing
3379
+ # a result. If None, it will be set equal to window size.
3380
+ # @param center [Boolean]
3381
+ # Set the labels at the center of the window
3382
+ # @param by [String]
3383
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3384
+ # set the column that will be used to determine the windows. This column must
3385
+ # be of dtype `{Date, Datetime}`
3386
+ # @param closed ["left", "right", "both", "none"]
3387
+ # Define whether the temporal window interval is closed or not.
3388
+ #
3389
+ # @note
3390
+ # This functionality is experimental and may change without it being considered a
3391
+ # breaking change.
3392
+ #
3393
+ # @note
3394
+ # If you want to compute multiple aggregation statistics over the same dynamic
3395
+ # window, consider using `groupby_rolling` this method can cache the window size
3396
+ # computation.
3397
+ #
3398
+ # @return [Expr]
3399
+ #
3400
+ # @example
3401
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
3402
+ # df.select(
3403
+ # [
3404
+ # Polars.col("A").rolling_std(3)
3405
+ # ]
3406
+ # )
3407
+ # # =>
3408
+ # # shape: (6, 1)
3409
+ # # ┌──────────┐
3410
+ # # │ A │
3411
+ # # │ --- │
3412
+ # # │ f64 │
3413
+ # # ╞══════════╡
3414
+ # # │ null │
3415
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3416
+ # # │ null │
3417
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3418
+ # # │ 1.0 │
3419
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3420
+ # # │ 1.0 │
3421
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3422
+ # # │ 1.527525 │
3423
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3424
+ # # │ 2.0 │
3425
+ # # └──────────┘
3426
+ def rolling_std(
3427
+ window_size,
3428
+ weights: nil,
3429
+ min_periods: nil,
3430
+ center: false,
3431
+ by: nil,
3432
+ closed: "left"
3433
+ )
3434
+ window_size, min_periods = _prepare_rolling_window_args(
3435
+ window_size, min_periods
3436
+ )
3437
+ wrap_expr(
3438
+ _rbexpr.rolling_std(
3439
+ window_size, weights, min_periods, center, by, closed
3440
+ )
3441
+ )
3442
+ end
3443
+
3444
+ # Compute a rolling variance.
3445
+ #
3446
+ # A window of length `window_size` will traverse the array. The values that fill
3447
+ # this window will (optionally) be multiplied with the weights given by the
3448
+ # `weight` vector. The resulting values will be aggregated to their sum.
3449
+ #
3450
+ # @param window_size [Integer]
3451
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3452
+ # size indicated by a timedelta or the following string language:
3453
+ #
3454
+ # - 1ns (1 nanosecond)
3455
+ # - 1us (1 microsecond)
3456
+ # - 1ms (1 millisecond)
3457
+ # - 1s (1 second)
3458
+ # - 1m (1 minute)
3459
+ # - 1h (1 hour)
3460
+ # - 1d (1 day)
3461
+ # - 1w (1 week)
3462
+ # - 1mo (1 calendar month)
3463
+ # - 1y (1 calendar year)
3464
+ # - 1i (1 index count)
3465
+ #
3466
+ # If a timedelta or the dynamic string language is used, the `by`
3467
+ # and `closed` arguments must also be set.
3468
+ # @param weights [Array]
3469
+ # An optional slice with the same length as the window that will be multiplied
3470
+ # elementwise with the values in the window.
3471
+ # @param min_periods [Integer]
3472
+ # The number of values in the window that should be non-null before computing
3473
+ # a result. If None, it will be set equal to window size.
3474
+ # @param center [Boolean]
3475
+ # Set the labels at the center of the window
3476
+ # @param by [String]
3477
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3478
+ # set the column that will be used to determine the windows. This column must
3479
+ # be of dtype `{Date, Datetime}`
3480
+ # @param closed ["left", "right", "both", "none"]
3481
+ # Define whether the temporal window interval is closed or not.
3482
+ #
3483
+ # @note
3484
+ # This functionality is experimental and may change without it being considered a
3485
+ # breaking change.
3486
+ #
3487
+ # @note
3488
+ # If you want to compute multiple aggregation statistics over the same dynamic
3489
+ # window, consider using `groupby_rolling` this method can cache the window size
3490
+ # computation.
3491
+ #
3492
+ # @return [Expr]
3493
+ #
3494
+ # @example
3495
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
3496
+ # df.select(
3497
+ # [
3498
+ # Polars.col("A").rolling_var(3)
3499
+ # ]
3500
+ # )
3501
+ # # =>
3502
+ # # shape: (6, 1)
3503
+ # # ┌──────────┐
3504
+ # # │ A │
3505
+ # # │ --- │
3506
+ # # │ f64 │
3507
+ # # ╞══════════╡
3508
+ # # │ null │
3509
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3510
+ # # │ null │
3511
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3512
+ # # │ 1.0 │
3513
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3514
+ # # │ 1.0 │
3515
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3516
+ # # │ 2.333333 │
3517
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3518
+ # # │ 4.0 │
3519
+ # # └──────────┘
3520
+ def rolling_var(
3521
+ window_size,
3522
+ weights: nil,
3523
+ min_periods: nil,
3524
+ center: false,
3525
+ by: nil,
3526
+ closed: "left"
3527
+ )
3528
+ window_size, min_periods = _prepare_rolling_window_args(
3529
+ window_size, min_periods
3530
+ )
3531
+ wrap_expr(
3532
+ _rbexpr.rolling_var(
3533
+ window_size, weights, min_periods, center, by, closed
3534
+ )
3535
+ )
3536
+ end
3537
+
3538
+ # Compute a rolling median.
3539
+ #
3540
+ # @param window_size [Integer]
3541
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3542
+ # size indicated by a timedelta or the following string language:
3543
+ #
3544
+ # - 1ns (1 nanosecond)
3545
+ # - 1us (1 microsecond)
3546
+ # - 1ms (1 millisecond)
3547
+ # - 1s (1 second)
3548
+ # - 1m (1 minute)
3549
+ # - 1h (1 hour)
3550
+ # - 1d (1 day)
3551
+ # - 1w (1 week)
3552
+ # - 1mo (1 calendar month)
3553
+ # - 1y (1 calendar year)
3554
+ # - 1i (1 index count)
3555
+ #
3556
+ # If a timedelta or the dynamic string language is used, the `by`
3557
+ # and `closed` arguments must also be set.
3558
+ # @param weights [Array]
3559
+ # An optional slice with the same length as the window that will be multiplied
3560
+ # elementwise with the values in the window.
3561
+ # @param min_periods [Integer]
3562
+ # The number of values in the window that should be non-null before computing
3563
+ # a result. If None, it will be set equal to window size.
3564
+ # @param center [Boolean]
3565
+ # Set the labels at the center of the window
3566
+ # @param by [String]
3567
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3568
+ # set the column that will be used to determine the windows. This column must
3569
+ # be of dtype `{Date, Datetime}`
3570
+ # @param closed ["left", "right", "both", "none"]
3571
+ # Define whether the temporal window interval is closed or not.
3572
+ #
3573
+ # @note
3574
+ # This functionality is experimental and may change without it being considered a
3575
+ # breaking change.
3576
+ #
3577
+ # @note
3578
+ # If you want to compute multiple aggregation statistics over the same dynamic
3579
+ # window, consider using `groupby_rolling` this method can cache the window size
3580
+ # computation.
3581
+ #
3582
+ # @return [Expr]
3583
+ #
3584
+ # @example
3585
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
3586
+ # df.select(
3587
+ # [
3588
+ # Polars.col("A").rolling_median(3)
3589
+ # ]
3590
+ # )
3591
+ # # =>
3592
+ # # shape: (6, 1)
3593
+ # # ┌──────┐
3594
+ # # │ A │
3595
+ # # │ --- │
3596
+ # # │ f64 │
3597
+ # # ╞══════╡
3598
+ # # │ null │
3599
+ # # ├╌╌╌╌╌╌┤
3600
+ # # │ null │
3601
+ # # ├╌╌╌╌╌╌┤
3602
+ # # │ 2.0 │
3603
+ # # ├╌╌╌╌╌╌┤
3604
+ # # │ 3.0 │
3605
+ # # ├╌╌╌╌╌╌┤
3606
+ # # │ 4.0 │
3607
+ # # ├╌╌╌╌╌╌┤
3608
+ # # │ 6.0 │
3609
+ # # └──────┘
3610
+ def rolling_median(
3611
+ window_size,
3612
+ weights: nil,
3613
+ min_periods: nil,
3614
+ center: false,
3615
+ by: nil,
3616
+ closed: "left"
3617
+ )
3618
+ window_size, min_periods = _prepare_rolling_window_args(
3619
+ window_size, min_periods
3620
+ )
3621
+ wrap_expr(
3622
+ _rbexpr.rolling_median(
3623
+ window_size, weights, min_periods, center, by, closed
3624
+ )
3625
+ )
3626
+ end
3627
+
3628
+ # Compute a rolling quantile.
3629
+ #
3630
+ # @param quantile [Float]
3631
+ # Quantile between 0.0 and 1.0.
3632
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
3633
+ # Interpolation method.
3634
+ # @param window_size [Integer]
3635
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3636
+ # size indicated by a timedelta or the following string language:
3637
+ #
3638
+ # - 1ns (1 nanosecond)
3639
+ # - 1us (1 microsecond)
3640
+ # - 1ms (1 millisecond)
3641
+ # - 1s (1 second)
3642
+ # - 1m (1 minute)
3643
+ # - 1h (1 hour)
3644
+ # - 1d (1 day)
3645
+ # - 1w (1 week)
3646
+ # - 1mo (1 calendar month)
3647
+ # - 1y (1 calendar year)
3648
+ # - 1i (1 index count)
3649
+ #
3650
+ # If a timedelta or the dynamic string language is used, the `by`
3651
+ # and `closed` arguments must also be set.
3652
+ # @param weights [Array]
3653
+ # An optional slice with the same length as the window that will be multiplied
3654
+ # elementwise with the values in the window.
3655
+ # @param min_periods [Integer]
3656
+ # The number of values in the window that should be non-null before computing
3657
+ # a result. If None, it will be set equal to window size.
3658
+ # @param center [Boolean]
3659
+ # Set the labels at the center of the window
3660
+ # @param by [String]
3661
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3662
+ # set the column that will be used to determine the windows. This column must
3663
+ # be of dtype `{Date, Datetime}`
3664
+ # @param closed ["left", "right", "both", "none"]
3665
+ # Define whether the temporal window interval is closed or not.
3666
+ #
3667
+ # @note
3668
+ # This functionality is experimental and may change without it being considered a
3669
+ # breaking change.
3670
+ #
3671
+ # @note
3672
+ # If you want to compute multiple aggregation statistics over the same dynamic
3673
+ # window, consider using `groupby_rolling` this method can cache the window size
3674
+ # computation.
3675
+ #
3676
+ # @return [Expr]
3677
+ #
3678
+ # @example
3679
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
3680
+ # df.select(
3681
+ # [
3682
+ # Polars.col("A").rolling_quantile(0.33, window_size: 3)
3683
+ # ]
3684
+ # )
3685
+ # # =>
3686
+ # # shape: (6, 1)
3687
+ # # ┌──────┐
3688
+ # # │ A │
3689
+ # # │ --- │
3690
+ # # │ f64 │
3691
+ # # ╞══════╡
3692
+ # # │ null │
3693
+ # # ├╌╌╌╌╌╌┤
3694
+ # # │ null │
3695
+ # # ├╌╌╌╌╌╌┤
3696
+ # # │ 1.0 │
3697
+ # # ├╌╌╌╌╌╌┤
3698
+ # # │ 2.0 │
3699
+ # # ├╌╌╌╌╌╌┤
3700
+ # # │ 3.0 │
3701
+ # # ├╌╌╌╌╌╌┤
3702
+ # # │ 4.0 │
3703
+ # # └──────┘
3704
+ def rolling_quantile(
3705
+ quantile,
3706
+ interpolation: "nearest",
3707
+ window_size: 2,
3708
+ weights: nil,
3709
+ min_periods: nil,
3710
+ center: false,
3711
+ by: nil,
3712
+ closed: "left"
3713
+ )
3714
+ window_size, min_periods = _prepare_rolling_window_args(
3715
+ window_size, min_periods
3716
+ )
3717
+ wrap_expr(
3718
+ _rbexpr.rolling_quantile(
3719
+ quantile, interpolation, window_size, weights, min_periods, center, by, closed
3720
+ )
3721
+ )
3722
+ end
1135
3723
 
1136
3724
  # def rolling_apply
1137
3725
  # end
1138
3726
 
3727
+ # Compute a rolling skew.
1139
3728
  #
3729
+ # @param window_size [Integer]
3730
+ # Integer size of the rolling window.
3731
+ # @param bias [Boolean]
3732
+ # If false, the calculations are corrected for statistical bias.
3733
+ #
3734
+ # @return [Expr]
1140
3735
  def rolling_skew(window_size, bias: true)
1141
3736
  wrap_expr(_rbexpr.rolling_skew(window_size, bias))
1142
3737
  end
1143
3738
 
3739
+ # Compute absolute values.
3740
+ #
3741
+ # @return [Expr]
3742
+ #
3743
+ # @example
3744
+ # df = Polars::DataFrame.new(
3745
+ # {
3746
+ # "A" => [-1.0, 0.0, 1.0, 2.0]
3747
+ # }
3748
+ # )
3749
+ # df.select(Polars.col("A").abs)
3750
+ # # =>
3751
+ # # shape: (4, 1)
3752
+ # # ┌─────┐
3753
+ # # │ A │
3754
+ # # │ --- │
3755
+ # # │ f64 │
3756
+ # # ╞═════╡
3757
+ # # │ 1.0 │
3758
+ # # ├╌╌╌╌╌┤
3759
+ # # │ 0.0 │
3760
+ # # ├╌╌╌╌╌┤
3761
+ # # │ 1.0 │
3762
+ # # ├╌╌╌╌╌┤
3763
+ # # │ 2.0 │
3764
+ # # └─────┘
1144
3765
  def abs
1145
3766
  wrap_expr(_rbexpr.abs)
1146
3767
  end
1147
3768
 
3769
+ # Get the index values that would sort this column.
3770
+ #
3771
+ # Alias for {#arg_sort}.
3772
+ #
3773
+ # @param reverse [Boolean]
3774
+ # Sort in reverse (descending) order.
3775
+ # @param nulls_last [Boolean]
3776
+ # Place null values last instead of first.
3777
+ #
3778
+ # @return [expr]
3779
+ #
3780
+ # @example
3781
+ # df = Polars::DataFrame.new(
3782
+ # {
3783
+ # "a" => [20, 10, 30]
3784
+ # }
3785
+ # )
3786
+ # df.select(Polars.col("a").argsort)
3787
+ # # =>
3788
+ # # shape: (3, 1)
3789
+ # # ┌─────┐
3790
+ # # │ a │
3791
+ # # │ --- │
3792
+ # # │ u32 │
3793
+ # # ╞═════╡
3794
+ # # │ 1 │
3795
+ # # ├╌╌╌╌╌┤
3796
+ # # │ 0 │
3797
+ # # ├╌╌╌╌╌┤
3798
+ # # │ 2 │
3799
+ # # └─────┘
1148
3800
  def argsort(reverse: false, nulls_last: false)
1149
3801
  arg_sort(reverse: reverse, nulls_last: nulls_last)
1150
3802
  end
1151
3803
 
3804
+ # Assign ranks to data, dealing with ties appropriately.
3805
+ #
3806
+ # @param method ["average", "min", "max", "dense", "ordinal", "random"]
3807
+ # The method used to assign ranks to tied elements.
3808
+ # The following methods are available:
3809
+ #
3810
+ # - 'average' : The average of the ranks that would have been assigned to
3811
+ # all the tied values is assigned to each value.
3812
+ # - 'min' : The minimum of the ranks that would have been assigned to all
3813
+ # the tied values is assigned to each value. (This is also referred to
3814
+ # as "competition" ranking.)
3815
+ # - 'max' : The maximum of the ranks that would have been assigned to all
3816
+ # the tied values is assigned to each value.
3817
+ # - 'dense' : Like 'min', but the rank of the next highest element is
3818
+ # assigned the rank immediately after those assigned to the tied
3819
+ # elements.
3820
+ # - 'ordinal' : All values are given a distinct rank, corresponding to
3821
+ # the order that the values occur in the Series.
3822
+ # - 'random' : Like 'ordinal', but the rank for ties is not dependent
3823
+ # on the order that the values occur in the Series.
3824
+ # @param reverse [Boolean]
3825
+ # Reverse the operation.
3826
+ #
3827
+ # @return [Expr]
3828
+ #
3829
+ # @example The 'average' method:
3830
+ # df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
3831
+ # df.select(Polars.col("a").rank)
3832
+ # # =>
3833
+ # # shape: (5, 1)
3834
+ # # ┌─────┐
3835
+ # # │ a │
3836
+ # # │ --- │
3837
+ # # │ f32 │
3838
+ # # ╞═════╡
3839
+ # # │ 3.0 │
3840
+ # # ├╌╌╌╌╌┤
3841
+ # # │ 4.5 │
3842
+ # # ├╌╌╌╌╌┤
3843
+ # # │ 1.5 │
3844
+ # # ├╌╌╌╌╌┤
3845
+ # # │ 1.5 │
3846
+ # # ├╌╌╌╌╌┤
3847
+ # # │ 4.5 │
3848
+ # # └─────┘
3849
+ #
3850
+ # @example The 'ordinal' method:
3851
+ # df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
3852
+ # df.select(Polars.col("a").rank(method: "ordinal"))
3853
+ # # =>
3854
+ # # shape: (5, 1)
3855
+ # # ┌─────┐
3856
+ # # │ a │
3857
+ # # │ --- │
3858
+ # # │ u32 │
3859
+ # # ╞═════╡
3860
+ # # │ 3 │
3861
+ # # ├╌╌╌╌╌┤
3862
+ # # │ 4 │
3863
+ # # ├╌╌╌╌╌┤
3864
+ # # │ 1 │
3865
+ # # ├╌╌╌╌╌┤
3866
+ # # │ 2 │
3867
+ # # ├╌╌╌╌╌┤
3868
+ # # │ 5 │
3869
+ # # └─────┘
1152
3870
  def rank(method: "average", reverse: false)
1153
3871
  wrap_expr(_rbexpr.rank(method, reverse))
1154
3872
  end
1155
3873
 
3874
+ # Calculate the n-th discrete difference.
3875
+ #
3876
+ # @param n [Integer]
3877
+ # Number of slots to shift.
3878
+ # @param null_behavior ["ignore", "drop"]
3879
+ # How to handle null values.
3880
+ #
3881
+ # @return [Expr]
3882
+ #
3883
+ # @example
3884
+ # df = Polars::DataFrame.new(
3885
+ # {
3886
+ # "a" => [20, 10, 30]
3887
+ # }
3888
+ # )
3889
+ # df.select(Polars.col("a").diff)
3890
+ # # =>
3891
+ # # shape: (3, 1)
3892
+ # # ┌──────┐
3893
+ # # │ a │
3894
+ # # │ --- │
3895
+ # # │ i64 │
3896
+ # # ╞══════╡
3897
+ # # │ null │
3898
+ # # ├╌╌╌╌╌╌┤
3899
+ # # │ -10 │
3900
+ # # ├╌╌╌╌╌╌┤
3901
+ # # │ 20 │
3902
+ # # └──────┘
1156
3903
  def diff(n: 1, null_behavior: "ignore")
1157
3904
  wrap_expr(_rbexpr.diff(n, null_behavior))
1158
3905
  end
1159
3906
 
3907
+ # Computes percentage change between values.
3908
+ #
3909
+ # Percentage change (as fraction) between current element and most-recent
3910
+ # non-null element at least `n` period(s) before the current element.
3911
+ #
3912
+ # Computes the change from the previous row by default.
3913
+ #
3914
+ # @param n [Integer]
3915
+ # Periods to shift for forming percent change.
3916
+ #
3917
+ # @return [Expr]
3918
+ #
3919
+ # @example
3920
+ # df = Polars::DataFrame.new(
3921
+ # {
3922
+ # "a" => [10, 11, 12, nil, 12]
3923
+ # }
3924
+ # )
3925
+ # df.with_column(Polars.col("a").pct_change.alias("pct_change"))
3926
+ # # =>
3927
+ # # shape: (5, 2)
3928
+ # # ┌──────┬────────────┐
3929
+ # # │ a ┆ pct_change │
3930
+ # # │ --- ┆ --- │
3931
+ # # │ i64 ┆ f64 │
3932
+ # # ╞══════╪════════════╡
3933
+ # # │ 10 ┆ null │
3934
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
3935
+ # # │ 11 ┆ 0.1 │
3936
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
3937
+ # # │ 12 ┆ 0.090909 │
3938
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
3939
+ # # │ null ┆ 0.0 │
3940
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
3941
+ # # │ 12 ┆ 0.0 │
3942
+ # # └──────┴────────────┘
1160
3943
  def pct_change(n: 1)
1161
3944
  wrap_expr(_rbexpr.pct_change(n))
1162
3945
  end
1163
3946
 
3947
+ # Compute the sample skewness of a data set.
3948
+ #
3949
+ # For normally distributed data, the skewness should be about zero. For
3950
+ # unimodal continuous distributions, a skewness value greater than zero means
3951
+ # that there is more weight in the right tail of the distribution. The
3952
+ # function `skewtest` can be used to determine if the skewness value
3953
+ # is close enough to zero, statistically speaking.
3954
+ #
3955
+ # @param bias [Boolean]
3956
+ # If false, the calculations are corrected for statistical bias.
3957
+ #
3958
+ # @return [Expr]
3959
+ #
3960
+ # @example
3961
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
3962
+ # df.select(Polars.col("a").skew)
3963
+ # # =>
3964
+ # # shape: (1, 1)
3965
+ # # ┌──────────┐
3966
+ # # │ a │
3967
+ # # │ --- │
3968
+ # # │ f64 │
3969
+ # # ╞══════════╡
3970
+ # # │ 0.343622 │
3971
+ # # └──────────┘
1164
3972
  def skew(bias: true)
1165
3973
  wrap_expr(_rbexpr.skew(bias))
1166
3974
  end
1167
3975
 
3976
+ # Compute the kurtosis (Fisher or Pearson) of a dataset.
3977
+ #
3978
+ # Kurtosis is the fourth central moment divided by the square of the
3979
+ # variance. If Fisher's definition is used, then 3.0 is subtracted from
3980
+ # the result to give 0.0 for a normal distribution.
3981
+ # If bias is False then the kurtosis is calculated using k statistics to
3982
+ # eliminate bias coming from biased moment estimators
3983
+ #
3984
+ # @param fisher [Boolean]
3985
+ # If true, Fisher's definition is used (normal ==> 0.0). If false,
3986
+ # Pearson's definition is used (normal ==> 3.0).
3987
+ # @param bias [Boolean]
3988
+ # If false, the calculations are corrected for statistical bias.
3989
+ #
3990
+ # @return [Expr]
3991
+ #
3992
+ # @example
3993
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
3994
+ # df.select(Polars.col("a").kurtosis)
3995
+ # # =>
3996
+ # # shape: (1, 1)
3997
+ # # ┌───────────┐
3998
+ # # │ a │
3999
+ # # │ --- │
4000
+ # # │ f64 │
4001
+ # # ╞═══════════╡
4002
+ # # │ -1.153061 │
4003
+ # # └───────────┘
1168
4004
  def kurtosis(fisher: true, bias: true)
1169
4005
  wrap_expr(_rbexpr.kurtosis(fisher, bias))
1170
4006
  end
1171
4007
 
4008
+ # Clip (limit) the values in an array to a `min` and `max` boundary.
4009
+ #
4010
+ # Only works for numerical types.
4011
+ #
4012
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4013
+ # expression. See `when` for more information.
4014
+ #
4015
+ # @param min_val [Numeric]
4016
+ # Minimum value.
4017
+ # @param max_val [Numeric]
4018
+ # Maximum value.
4019
+ #
4020
+ # @return [Expr]
4021
+ #
4022
+ # @example
4023
+ # df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
4024
+ # df.with_column(Polars.col("foo").clip(1, 10).alias("foo_clipped"))
4025
+ # # =>
4026
+ # # shape: (4, 2)
4027
+ # # ┌──────┬─────────────┐
4028
+ # # │ foo ┆ foo_clipped │
4029
+ # # │ --- ┆ --- │
4030
+ # # │ i64 ┆ i64 │
4031
+ # # ╞══════╪═════════════╡
4032
+ # # │ -50 ┆ 1 │
4033
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4034
+ # # │ 5 ┆ 5 │
4035
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4036
+ # # │ null ┆ null │
4037
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4038
+ # # │ 50 ┆ 10 │
4039
+ # # └──────┴─────────────┘
1172
4040
  def clip(min_val, max_val)
1173
4041
  wrap_expr(_rbexpr.clip(min_val, max_val))
1174
4042
  end
1175
4043
 
4044
+ # Clip (limit) the values in an array to a `min` boundary.
4045
+ #
4046
+ # Only works for numerical types.
4047
+ #
4048
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4049
+ # expression. See `when` for more information.
4050
+ #
4051
+ # @param min_val [Numeric]
4052
+ # Minimum value.
4053
+ #
4054
+ # @return [Expr]
4055
+ #
4056
+ # @example
4057
+ # df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
4058
+ # df.with_column(Polars.col("foo").clip_min(0).alias("foo_clipped"))
4059
+ # # =>
4060
+ # # shape: (4, 2)
4061
+ # # ┌──────┬─────────────┐
4062
+ # # │ foo ┆ foo_clipped │
4063
+ # # │ --- ┆ --- │
4064
+ # # │ i64 ┆ i64 │
4065
+ # # ╞══════╪═════════════╡
4066
+ # # │ -50 ┆ 0 │
4067
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4068
+ # # │ 5 ┆ 5 │
4069
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4070
+ # # │ null ┆ null │
4071
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4072
+ # # │ 50 ┆ 50 │
4073
+ # # └──────┴─────────────┘
1176
4074
  def clip_min(min_val)
1177
4075
  wrap_expr(_rbexpr.clip_min(min_val))
1178
4076
  end
1179
4077
 
4078
+ # Clip (limit) the values in an array to a `max` boundary.
4079
+ #
4080
+ # Only works for numerical types.
4081
+ #
4082
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4083
+ # expression. See `when` for more information.
4084
+ #
4085
+ # @param max_val [Numeric]
4086
+ # Maximum value.
4087
+ #
4088
+ # @return [Expr]
4089
+ #
4090
+ # @example
4091
+ # df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
4092
+ # df.with_column(Polars.col("foo").clip_max(0).alias("foo_clipped"))
4093
+ # # =>
4094
+ # # shape: (4, 2)
4095
+ # # ┌──────┬─────────────┐
4096
+ # # │ foo ┆ foo_clipped │
4097
+ # # │ --- ┆ --- │
4098
+ # # │ i64 ┆ i64 │
4099
+ # # ╞══════╪═════════════╡
4100
+ # # │ -50 ┆ -50 │
4101
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4102
+ # # │ 5 ┆ 0 │
4103
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4104
+ # # │ null ┆ null │
4105
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4106
+ # # │ 50 ┆ 0 │
4107
+ # # └──────┴─────────────┘
1180
4108
  def clip_max(max_val)
1181
4109
  wrap_expr(_rbexpr.clip_max(max_val))
1182
4110
  end
1183
4111
 
4112
+ # Calculate the lower bound.
4113
+ #
4114
+ # Returns a unit Series with the lowest value possible for the dtype of this
4115
+ # expression.
4116
+ #
4117
+ # @return [Expr]
4118
+ #
4119
+ # @example
4120
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
4121
+ # df.select(Polars.col("a").lower_bound)
4122
+ # # =>
4123
+ # # shape: (1, 1)
4124
+ # # ┌──────────────────────┐
4125
+ # # │ a │
4126
+ # # │ --- │
4127
+ # # │ i64 │
4128
+ # # ╞══════════════════════╡
4129
+ # # │ -9223372036854775808 │
4130
+ # # └──────────────────────┘
1184
4131
  def lower_bound
1185
4132
  wrap_expr(_rbexpr.lower_bound)
1186
4133
  end
1187
4134
 
4135
+ # Calculate the upper bound.
4136
+ #
4137
+ # Returns a unit Series with the highest value possible for the dtype of this
4138
+ # expression.
4139
+ #
4140
+ # @return [Expr]
4141
+ #
4142
+ # @example
4143
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
4144
+ # df.select(Polars.col("a").upper_bound)
4145
+ # # =>
4146
+ # # shape: (1, 1)
4147
+ # # ┌─────────────────────┐
4148
+ # # │ a │
4149
+ # # │ --- │
4150
+ # # │ i64 │
4151
+ # # ╞═════════════════════╡
4152
+ # # │ 9223372036854775807 │
4153
+ # # └─────────────────────┘
1188
4154
  def upper_bound
1189
4155
  wrap_expr(_rbexpr.upper_bound)
1190
4156
  end
1191
4157
 
4158
+ # Compute the element-wise indication of the sign.
4159
+ #
4160
+ # @return [Expr]
4161
+ #
4162
+ # @example
4163
+ # df = Polars::DataFrame.new({"a" => [-9.0, -0.0, 0.0, 4.0, nil]})
4164
+ # df.select(Polars.col("a").sign)
4165
+ # # =>
4166
+ # # shape: (5, 1)
4167
+ # # ┌──────┐
4168
+ # # │ a │
4169
+ # # │ --- │
4170
+ # # │ i64 │
4171
+ # # ╞══════╡
4172
+ # # │ -1 │
4173
+ # # ├╌╌╌╌╌╌┤
4174
+ # # │ 0 │
4175
+ # # ├╌╌╌╌╌╌┤
4176
+ # # │ 0 │
4177
+ # # ├╌╌╌╌╌╌┤
4178
+ # # │ 1 │
4179
+ # # ├╌╌╌╌╌╌┤
4180
+ # # │ null │
4181
+ # # └──────┘
1192
4182
  def sign
1193
4183
  wrap_expr(_rbexpr.sign)
1194
4184
  end
1195
4185
 
4186
+ # Compute the element-wise value for the sine.
4187
+ #
4188
+ # @return [Expr]
4189
+ #
4190
+ # @example
4191
+ # df = Polars::DataFrame.new({"a" => [0.0]})
4192
+ # df.select(Polars.col("a").sin)
4193
+ # # =>
4194
+ # # shape: (1, 1)
4195
+ # # ┌─────┐
4196
+ # # │ a │
4197
+ # # │ --- │
4198
+ # # │ f64 │
4199
+ # # ╞═════╡
4200
+ # # │ 0.0 │
4201
+ # # └─────┘
1196
4202
  def sin
1197
4203
  wrap_expr(_rbexpr.sin)
1198
4204
  end
1199
4205
 
4206
+ # Compute the element-wise value for the cosine.
4207
+ #
4208
+ # @return [Expr]
4209
+ #
4210
+ # @example
4211
+ # df = Polars::DataFrame.new({"a" => [0.0]})
4212
+ # df.select(Polars.col("a").cos)
4213
+ # # =>
4214
+ # # shape: (1, 1)
4215
+ # # ┌─────┐
4216
+ # # │ a │
4217
+ # # │ --- │
4218
+ # # │ f64 │
4219
+ # # ╞═════╡
4220
+ # # │ 1.0 │
4221
+ # # └─────┘
1200
4222
  def cos
1201
4223
  wrap_expr(_rbexpr.cos)
1202
4224
  end
1203
4225
 
4226
+ # Compute the element-wise value for the tangent.
4227
+ #
4228
+ # @return [Expr]
4229
+ #
4230
+ # @example
4231
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4232
+ # df.select(Polars.col("a").tan)
4233
+ # # =>
4234
+ # # shape: (1, 1)
4235
+ # # ┌──────────┐
4236
+ # # │ a │
4237
+ # # │ --- │
4238
+ # # │ f64 │
4239
+ # # ╞══════════╡
4240
+ # # │ 1.557408 │
4241
+ # # └──────────┘
1204
4242
  def tan
1205
4243
  wrap_expr(_rbexpr.tan)
1206
4244
  end
1207
4245
 
4246
+ # Compute the element-wise value for the inverse sine.
4247
+ #
4248
+ # @return [Expr]
4249
+ #
4250
+ # @example
4251
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4252
+ # df.select(Polars.col("a").arcsin)
4253
+ # # =>
4254
+ # # shape: (1, 1)
4255
+ # # ┌──────────┐
4256
+ # # │ a │
4257
+ # # │ --- │
4258
+ # # │ f64 │
4259
+ # # ╞══════════╡
4260
+ # # │ 1.570796 │
4261
+ # # └──────────┘
1208
4262
  def arcsin
1209
4263
  wrap_expr(_rbexpr.arcsin)
1210
4264
  end
1211
4265
 
4266
+ # Compute the element-wise value for the inverse cosine.
4267
+ #
4268
+ # @return [Expr]
4269
+ #
4270
+ # @example
4271
+ # df = Polars::DataFrame.new({"a" => [0.0]})
4272
+ # df.select(Polars.col("a").arccos)
4273
+ # # =>
4274
+ # # shape: (1, 1)
4275
+ # # ┌──────────┐
4276
+ # # │ a │
4277
+ # # │ --- │
4278
+ # # │ f64 │
4279
+ # # ╞══════════╡
4280
+ # # │ 1.570796 │
4281
+ # # └──────────┘
1212
4282
  def arccos
1213
4283
  wrap_expr(_rbexpr.arccos)
1214
4284
  end
1215
4285
 
4286
+ # Compute the element-wise value for the inverse tangent.
4287
+ #
4288
+ # @return [Expr]
4289
+ #
4290
+ # @example
4291
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4292
+ # df.select(Polars.col("a").arctan)
4293
+ # # =>
4294
+ # # shape: (1, 1)
4295
+ # # ┌──────────┐
4296
+ # # │ a │
4297
+ # # │ --- │
4298
+ # # │ f64 │
4299
+ # # ╞══════════╡
4300
+ # # │ 0.785398 │
4301
+ # # └──────────┘
1216
4302
  def arctan
1217
4303
  wrap_expr(_rbexpr.arctan)
1218
4304
  end
1219
4305
 
4306
+ # Compute the element-wise value for the hyperbolic sine.
4307
+ #
4308
+ # @return [Expr]
4309
+ #
4310
+ # @example
4311
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4312
+ # df.select(Polars.col("a").sinh)
4313
+ # # =>
4314
+ # # shape: (1, 1)
4315
+ # # ┌──────────┐
4316
+ # # │ a │
4317
+ # # │ --- │
4318
+ # # │ f64 │
4319
+ # # ╞══════════╡
4320
+ # # │ 1.175201 │
4321
+ # # └──────────┘
1220
4322
  def sinh
1221
4323
  wrap_expr(_rbexpr.sinh)
1222
4324
  end
1223
4325
 
4326
+ # Compute the element-wise value for the hyperbolic cosine.
4327
+ #
4328
+ # @return [Expr]
4329
+ #
4330
+ # @example
4331
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4332
+ # df.select(Polars.col("a").cosh)
4333
+ # # =>
4334
+ # # shape: (1, 1)
4335
+ # # ┌──────────┐
4336
+ # # │ a │
4337
+ # # │ --- │
4338
+ # # │ f64 │
4339
+ # # ╞══════════╡
4340
+ # # │ 1.543081 │
4341
+ # # └──────────┘
1224
4342
  def cosh
1225
4343
  wrap_expr(_rbexpr.cosh)
1226
4344
  end
1227
4345
 
4346
+ # Compute the element-wise value for the hyperbolic tangent.
4347
+ #
4348
+ # @return [Expr]
4349
+ #
4350
+ # @example
4351
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4352
+ # df.select(Polars.col("a").tanh)
4353
+ # # =>
4354
+ # # shape: (1, 1)
4355
+ # # ┌──────────┐
4356
+ # # │ a │
4357
+ # # │ --- │
4358
+ # # │ f64 │
4359
+ # # ╞══════════╡
4360
+ # # │ 0.761594 │
4361
+ # # └──────────┘
1228
4362
  def tanh
1229
4363
  wrap_expr(_rbexpr.tanh)
1230
4364
  end
1231
4365
 
4366
+ # Compute the element-wise value for the inverse hyperbolic sine.
4367
+ #
4368
+ # @return [Expr]
4369
+ #
4370
+ # @example
4371
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4372
+ # df.select(Polars.col("a").arcsinh)
4373
+ # # =>
4374
+ # # shape: (1, 1)
4375
+ # # ┌──────────┐
4376
+ # # │ a │
4377
+ # # │ --- │
4378
+ # # │ f64 │
4379
+ # # ╞══════════╡
4380
+ # # │ 0.881374 │
4381
+ # # └──────────┘
1232
4382
  def arcsinh
1233
4383
  wrap_expr(_rbexpr.arcsinh)
1234
4384
  end
1235
4385
 
4386
+ # Compute the element-wise value for the inverse hyperbolic cosine.
4387
+ #
4388
+ # @return [Expr]
4389
+ #
4390
+ # @example
4391
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4392
+ # df.select(Polars.col("a").arccosh)
4393
+ # # =>
4394
+ # # shape: (1, 1)
4395
+ # # ┌─────┐
4396
+ # # │ a │
4397
+ # # │ --- │
4398
+ # # │ f64 │
4399
+ # # ╞═════╡
4400
+ # # │ 0.0 │
4401
+ # # └─────┘
1236
4402
  def arccosh
1237
4403
  wrap_expr(_rbexpr.arccosh)
1238
4404
  end
1239
4405
 
4406
+ # Compute the element-wise value for the inverse hyperbolic tangent.
4407
+ #
4408
+ # @return [Expr]
4409
+ #
4410
+ # @example
4411
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4412
+ # df.select(Polars.col("a").arctanh)
4413
+ # # =>
4414
+ # # shape: (1, 1)
4415
+ # # ┌─────┐
4416
+ # # │ a │
4417
+ # # │ --- │
4418
+ # # │ f64 │
4419
+ # # ╞═════╡
4420
+ # # │ inf │
4421
+ # # └─────┘
1240
4422
  def arctanh
1241
4423
  wrap_expr(_rbexpr.arctanh)
1242
4424
  end
1243
4425
 
4426
+ # Reshape this Expr to a flat Series or a Series of Lists.
4427
+ #
4428
+ # @param dims [Array]
4429
+ # Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that
4430
+ # dimension is inferred.
4431
+ #
4432
+ # @return [Expr]
4433
+ #
4434
+ # @example
4435
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
4436
+ # df.select(Polars.col("foo").reshape([3, 3]))
4437
+ # # =>
4438
+ # # shape: (3, 1)
4439
+ # # ┌───────────┐
4440
+ # # │ foo │
4441
+ # # │ --- │
4442
+ # # │ list[i64] │
4443
+ # # ╞═══════════╡
4444
+ # # │ [1, 2, 3] │
4445
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
4446
+ # # │ [4, 5, 6] │
4447
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
4448
+ # # │ [7, 8, 9] │
4449
+ # # └───────────┘
1244
4450
  def reshape(dims)
1245
4451
  wrap_expr(_rbexpr.reshape(dims))
1246
4452
  end
1247
4453
 
4454
+ # Shuffle the contents of this expr.
4455
+ #
4456
+ # @param seed [Integer]
4457
+ # Seed for the random number generator. If set to None (default), a random
4458
+ # seed is generated using the `random` module.
4459
+ #
4460
+ # @return [Expr]
4461
+ #
4462
+ # @example
4463
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4464
+ # df.select(Polars.col("a").shuffle(seed: 1))
4465
+ # # =>
4466
+ # # shape: (3, 1)
4467
+ # # ┌─────┐
4468
+ # # │ a │
4469
+ # # │ --- │
4470
+ # # │ i64 │
4471
+ # # ╞═════╡
4472
+ # # │ 2 │
4473
+ # # ├╌╌╌╌╌┤
4474
+ # # │ 1 │
4475
+ # # ├╌╌╌╌╌┤
4476
+ # # │ 3 │
4477
+ # # └─────┘
1248
4478
  def shuffle(seed: nil)
1249
4479
  if seed.nil?
1250
4480
  seed = rand(10000)
@@ -1252,74 +4482,514 @@ module Polars
1252
4482
  wrap_expr(_rbexpr.shuffle(seed))
1253
4483
  end
1254
4484
 
1255
- # def sample
1256
- # end
1257
-
1258
- # def ewm_mean
1259
- # end
4485
+ # Sample from this expression.
4486
+ #
4487
+ # @param frac [Float]
4488
+ # Fraction of items to return. Cannot be used with `n`.
4489
+ # @param with_replacement [Boolean]
4490
+ # Allow values to be sampled more than once.
4491
+ # @param shuffle [Boolean]
4492
+ # Shuffle the order of sampled data points.
4493
+ # @param seed [Integer]
4494
+ # Seed for the random number generator. If set to None (default), a random
4495
+ # seed is used.
4496
+ # @param n [Integer]
4497
+ # Number of items to return. Cannot be used with `frac`.
4498
+ #
4499
+ # @return [Expr]
4500
+ #
4501
+ # @example
4502
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4503
+ # df.select(Polars.col("a").sample(frac: 1.0, with_replacement: true, seed: 1))
4504
+ # # =>
4505
+ # # shape: (3, 1)
4506
+ # # ┌─────┐
4507
+ # # │ a │
4508
+ # # │ --- │
4509
+ # # │ i64 │
4510
+ # # ╞═════╡
4511
+ # # │ 3 │
4512
+ # # ├╌╌╌╌╌┤
4513
+ # # │ 1 │
4514
+ # # ├╌╌╌╌╌┤
4515
+ # # │ 1 │
4516
+ # # └─────┘
4517
+ def sample(
4518
+ frac: nil,
4519
+ with_replacement: true,
4520
+ shuffle: false,
4521
+ seed: nil,
4522
+ n: nil
4523
+ )
4524
+ if !n.nil? && !frac.nil?
4525
+ raise ArgumentError, "cannot specify both `n` and `frac`"
4526
+ end
1260
4527
 
1261
- # def ewm_std
1262
- # end
4528
+ if !n.nil? && frac.nil?
4529
+ return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
4530
+ end
1263
4531
 
1264
- # def ewm_var
1265
- # end
4532
+ if frac.nil?
4533
+ frac = 1.0
4534
+ end
4535
+ wrap_expr(
4536
+ _rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
4537
+ )
4538
+ end
1266
4539
 
4540
+ # Exponentially-weighted moving average.
4541
+ #
4542
+ # @return [Expr]
4543
+ #
4544
+ # @example
4545
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4546
+ # df.select(Polars.col("a").ewm_mean(com: 1))
4547
+ # # =>
4548
+ # # shape: (3, 1)
4549
+ # # ┌──────────┐
4550
+ # # │ a │
4551
+ # # │ --- │
4552
+ # # │ f64 │
4553
+ # # ╞══════════╡
4554
+ # # │ 1.0 │
4555
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4556
+ # # │ 1.666667 │
4557
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4558
+ # # │ 2.428571 │
4559
+ # # └──────────┘
4560
+ def ewm_mean(
4561
+ com: nil,
4562
+ span: nil,
4563
+ half_life: nil,
4564
+ alpha: nil,
4565
+ adjust: true,
4566
+ min_periods: 1
4567
+ )
4568
+ alpha = _prepare_alpha(com, span, half_life, alpha)
4569
+ wrap_expr(_rbexpr.ewm_mean(alpha, adjust, min_periods))
4570
+ end
4571
+
4572
+ # Exponentially-weighted moving standard deviation.
4573
+ #
4574
+ # @return [Expr]
4575
+ #
4576
+ # @example
4577
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4578
+ # df.select(Polars.col("a").ewm_std(com: 1))
4579
+ # # =>
4580
+ # # shape: (3, 1)
4581
+ # # ┌──────────┐
4582
+ # # │ a │
4583
+ # # │ --- │
4584
+ # # │ f64 │
4585
+ # # ╞══════════╡
4586
+ # # │ 0.0 │
4587
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4588
+ # # │ 0.707107 │
4589
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4590
+ # # │ 0.963624 │
4591
+ # # └──────────┘
4592
+ def ewm_std(
4593
+ com: nil,
4594
+ span: nil,
4595
+ half_life: nil,
4596
+ alpha: nil,
4597
+ adjust: true,
4598
+ bias: false,
4599
+ min_periods: 1
4600
+ )
4601
+ alpha = _prepare_alpha(com, span, half_life, alpha)
4602
+ wrap_expr(_rbexpr.ewm_std(alpha, adjust, bias, min_periods))
4603
+ end
4604
+
4605
+ # Exponentially-weighted moving variance.
4606
+ #
4607
+ # @return [Expr]
4608
+ #
4609
+ # @example
4610
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4611
+ # df.select(Polars.col("a").ewm_var(com: 1))
4612
+ # # =>
4613
+ # # shape: (3, 1)
4614
+ # # ┌──────────┐
4615
+ # # │ a │
4616
+ # # │ --- │
4617
+ # # │ f64 │
4618
+ # # ╞══════════╡
4619
+ # # │ 0.0 │
4620
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4621
+ # # │ 0.5 │
4622
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4623
+ # # │ 0.928571 │
4624
+ # # └──────────┘
4625
+ def ewm_var(
4626
+ com: nil,
4627
+ span: nil,
4628
+ half_life: nil,
4629
+ alpha: nil,
4630
+ adjust: true,
4631
+ bias: false,
4632
+ min_periods: 1
4633
+ )
4634
+ alpha = _prepare_alpha(com, span, half_life, alpha)
4635
+ wrap_expr(_rbexpr.ewm_var(alpha, adjust, bias, min_periods))
4636
+ end
4637
+
4638
+ # Extend the Series with given number of values.
4639
+ #
4640
+ # @param value [Object]
4641
+ # The value to extend the Series with. This value may be nil to fill with
4642
+ # nulls.
4643
+ # @param n [Integer]
4644
+ # The number of values to extend.
4645
+ #
4646
+ # @return [Expr]
1267
4647
  #
4648
+ # @example
4649
+ # df = Polars::DataFrame.new({"values" => [1, 2, 3]})
4650
+ # df.select(Polars.col("values").extend_constant(99, 2))
4651
+ # # =>
4652
+ # # shape: (5, 1)
4653
+ # # ┌────────┐
4654
+ # # │ values │
4655
+ # # │ --- │
4656
+ # # │ i64 │
4657
+ # # ╞════════╡
4658
+ # # │ 1 │
4659
+ # # ├╌╌╌╌╌╌╌╌┤
4660
+ # # │ 2 │
4661
+ # # ├╌╌╌╌╌╌╌╌┤
4662
+ # # │ 3 │
4663
+ # # ├╌╌╌╌╌╌╌╌┤
4664
+ # # │ 99 │
4665
+ # # ├╌╌╌╌╌╌╌╌┤
4666
+ # # │ 99 │
4667
+ # # └────────┘
1268
4668
  def extend_constant(value, n)
1269
4669
  wrap_expr(_rbexpr.extend_constant(value, n))
1270
4670
  end
1271
4671
 
4672
+ # Count all unique values and create a struct mapping value to count.
4673
+ #
4674
+ # @param multithreaded [Boolean]
4675
+ # Better to turn this off in the aggregation context, as it can lead to
4676
+ # contention.
4677
+ # @param sort [Boolean]
4678
+ # Ensure the output is sorted from most values to least.
4679
+ #
4680
+ # @return [Expr]
4681
+ #
4682
+ # @example
4683
+ # df = Polars::DataFrame.new(
4684
+ # {
4685
+ # "id" => ["a", "b", "b", "c", "c", "c"]
4686
+ # }
4687
+ # )
4688
+ # df.select(
4689
+ # [
4690
+ # Polars.col("id").value_counts(sort: true),
4691
+ # ]
4692
+ # )
4693
+ # # =>
4694
+ # # shape: (3, 1)
4695
+ # # ┌───────────┐
4696
+ # # │ id │
4697
+ # # │ --- │
4698
+ # # │ struct[2] │
4699
+ # # ╞═══════════╡
4700
+ # # │ {"c",3} │
4701
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
4702
+ # # │ {"b",2} │
4703
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
4704
+ # # │ {"a",1} │
4705
+ # # └───────────┘
1272
4706
  def value_counts(multithreaded: false, sort: false)
1273
4707
  wrap_expr(_rbexpr.value_counts(multithreaded, sort))
1274
4708
  end
1275
4709
 
4710
+ # Return a count of the unique values in the order of appearance.
4711
+ #
4712
+ # This method differs from `value_counts` in that it does not return the
4713
+ # values, only the counts and might be faster
4714
+ #
4715
+ # @return [Expr]
4716
+ #
4717
+ # @example
4718
+ # df = Polars::DataFrame.new(
4719
+ # {
4720
+ # "id" => ["a", "b", "b", "c", "c", "c"]
4721
+ # }
4722
+ # )
4723
+ # df.select(
4724
+ # [
4725
+ # Polars.col("id").unique_counts
4726
+ # ]
4727
+ # )
4728
+ # # =>
4729
+ # # shape: (3, 1)
4730
+ # # ┌─────┐
4731
+ # # │ id │
4732
+ # # │ --- │
4733
+ # # │ u32 │
4734
+ # # ╞═════╡
4735
+ # # │ 1 │
4736
+ # # ├╌╌╌╌╌┤
4737
+ # # │ 2 │
4738
+ # # ├╌╌╌╌╌┤
4739
+ # # │ 3 │
4740
+ # # └─────┘
1276
4741
  def unique_counts
1277
4742
  wrap_expr(_rbexpr.unique_counts)
1278
4743
  end
1279
4744
 
4745
+ # Compute the logarithm to a given base.
4746
+ #
4747
+ # @param base [Float]
4748
+ # Given base, defaults to `e`.
4749
+ #
4750
+ # @return [Expr]
4751
+ #
4752
+ # @example
4753
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4754
+ # df.select(Polars.col("a").log(2))
4755
+ # # =>
4756
+ # # shape: (3, 1)
4757
+ # # ┌──────────┐
4758
+ # # │ a │
4759
+ # # │ --- │
4760
+ # # │ f64 │
4761
+ # # ╞══════════╡
4762
+ # # │ 0.0 │
4763
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4764
+ # # │ 1.0 │
4765
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4766
+ # # │ 1.584963 │
4767
+ # # └──────────┘
1280
4768
  def log(base = Math::E)
1281
4769
  wrap_expr(_rbexpr.log(base))
1282
4770
  end
1283
4771
 
1284
- def entropy(base: 2, normalize: false)
4772
+ # Computes the entropy.
4773
+ #
4774
+ # Uses the formula `-sum(pk * log(pk)` where `pk` are discrete probabilities.
4775
+ #
4776
+ # @param base [Float]
4777
+ # Given base, defaults to `e`.
4778
+ # @param normalize [Boolean]
4779
+ # Normalize pk if it doesn't sum to 1.
4780
+ #
4781
+ # @return [Expr]
4782
+ #
4783
+ # @example
4784
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4785
+ # df.select(Polars.col("a").entropy(base: 2))
4786
+ # # =>
4787
+ # # shape: (1, 1)
4788
+ # # ┌──────────┐
4789
+ # # │ a │
4790
+ # # │ --- │
4791
+ # # │ f64 │
4792
+ # # ╞══════════╡
4793
+ # # │ 1.459148 │
4794
+ # # └──────────┘
4795
+ #
4796
+ # @example
4797
+ # df.select(Polars.col("a").entropy(base: 2, normalize: false))
4798
+ # # =>
4799
+ # # shape: (1, 1)
4800
+ # # ┌───────────┐
4801
+ # # │ a │
4802
+ # # │ --- │
4803
+ # # │ f64 │
4804
+ # # ╞═══════════╡
4805
+ # # │ -6.754888 │
4806
+ # # └───────────┘
4807
+ def entropy(base: 2, normalize: true)
1285
4808
  wrap_expr(_rbexpr.entropy(base, normalize))
1286
4809
  end
1287
4810
 
1288
- # def cumulative_eval
1289
- # end
1290
-
1291
- # def set_sorted
4811
+ # Run an expression over a sliding window that increases `1` slot every iteration.
4812
+ #
4813
+ # @param expr [Expr]
4814
+ # Expression to evaluate
4815
+ # @param min_periods [Integer]
4816
+ # Number of valid values there should be in the window before the expression
4817
+ # is evaluated. valid values = `length - null_count`
4818
+ # @param parallel [Boolean]
4819
+ # Run in parallel. Don't do this in a groupby or another operation that
4820
+ # already has much parallelization.
4821
+ #
4822
+ # @return [Expr]
4823
+ #
4824
+ # @note
4825
+ # This functionality is experimental and may change without it being considered a
4826
+ # breaking change.
4827
+ #
4828
+ # @note
4829
+ # This can be really slow as it can have `O(n^2)` complexity. Don't use this
4830
+ # for operations that visit all elements.
4831
+ #
4832
+ # @example
4833
+ # df = Polars::DataFrame.new({"values" => [1, 2, 3, 4, 5]})
4834
+ # df.select(
4835
+ # [
4836
+ # Polars.col("values").cumulative_eval(
4837
+ # Polars.element.first - Polars.element.last ** 2
4838
+ # )
4839
+ # ]
4840
+ # )
4841
+ # # =>
4842
+ # # shape: (5, 1)
4843
+ # # ┌────────┐
4844
+ # # │ values │
4845
+ # # │ --- │
4846
+ # # │ f64 │
4847
+ # # ╞════════╡
4848
+ # # │ 0.0 │
4849
+ # # ├╌╌╌╌╌╌╌╌┤
4850
+ # # │ -3.0 │
4851
+ # # ├╌╌╌╌╌╌╌╌┤
4852
+ # # │ -8.0 │
4853
+ # # ├╌╌╌╌╌╌╌╌┤
4854
+ # # │ -15.0 │
4855
+ # # ├╌╌╌╌╌╌╌╌┤
4856
+ # # │ -24.0 │
4857
+ # # └────────┘
4858
+ def cumulative_eval(expr, min_periods: 1, parallel: false)
4859
+ wrap_expr(
4860
+ _rbexpr.cumulative_eval(expr._rbexpr, min_periods, parallel)
4861
+ )
4862
+ end
4863
+
4864
+ # Flags the expression as 'sorted'.
4865
+ #
4866
+ # Enables downstream code to user fast paths for sorted arrays.
4867
+ #
4868
+ # @param reverse [Boolean]
4869
+ # If the `Series` order is reversed, e.g. descending.
4870
+ #
4871
+ # @return [Expr]
4872
+ #
4873
+ # @note
4874
+ # This can lead to incorrect results if this `Series` is not sorted!!
4875
+ # Use with care!
4876
+ #
4877
+ # @example
4878
+ # df = Polars::DataFrame.new({"values" => [1, 2, 3]})
4879
+ # df.select(Polars.col("values").set_sorted.max)
4880
+ # # =>
4881
+ # # shape: (1, 1)
4882
+ # # ┌────────┐
4883
+ # # │ values │
4884
+ # # │ --- │
4885
+ # # │ i64 │
4886
+ # # ╞════════╡
4887
+ # # │ 3 │
4888
+ # # └────────┘
4889
+ # def set_sorted(reverse: false)
4890
+ # map { |s| s.set_sorted(reverse) }
1292
4891
  # end
1293
4892
 
4893
+ # Aggregate to list.
4894
+ #
4895
+ # @return [Expr]
1294
4896
  #
4897
+ # @example
4898
+ # df = Polars::DataFrame.new(
4899
+ # {
4900
+ # "a" => [1, 2, 3],
4901
+ # "b" => [4, 5, 6]
4902
+ # }
4903
+ # )
4904
+ # df.select(Polars.all.list)
4905
+ # # =>
4906
+ # # shape: (1, 2)
4907
+ # # ┌───────────┬───────────┐
4908
+ # # │ a ┆ b │
4909
+ # # │ --- ┆ --- │
4910
+ # # │ list[i64] ┆ list[i64] │
4911
+ # # ╞═══════════╪═══════════╡
4912
+ # # │ [1, 2, 3] ┆ [4, 5, 6] │
4913
+ # # └───────────┴───────────┘
1295
4914
  def list
1296
4915
  wrap_expr(_rbexpr.list)
1297
4916
  end
1298
4917
 
4918
+ # Shrink numeric columns to the minimal required datatype.
4919
+ #
4920
+ # Shrink to the dtype needed to fit the extrema of this `Series`.
4921
+ # This can be used to reduce memory pressure.
4922
+ #
4923
+ # @return [Expr]
4924
+ #
4925
+ # @example
4926
+ # Polars::DataFrame.new(
4927
+ # {
4928
+ # "a" => [1, 2, 3],
4929
+ # "b" => [1, 2, 2 << 32],
4930
+ # "c" => [-1, 2, 1 << 30],
4931
+ # "d" => [-112, 2, 112],
4932
+ # "e" => [-112, 2, 129],
4933
+ # "f" => ["a", "b", "c"],
4934
+ # "g" => [0.1, 1.32, 0.12],
4935
+ # "h" => [true, nil, false]
4936
+ # }
4937
+ # ).select(Polars.all.shrink_dtype)
4938
+ # # =>
4939
+ # # shape: (3, 8)
4940
+ # # ┌─────┬────────────┬────────────┬──────┬──────┬─────┬──────┬───────┐
4941
+ # # │ a ┆ b ┆ c ┆ d ┆ e ┆ f ┆ g ┆ h │
4942
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
4943
+ # # │ i8 ┆ i64 ┆ i32 ┆ i8 ┆ i16 ┆ str ┆ f32 ┆ bool │
4944
+ # # ╞═════╪════════════╪════════════╪══════╪══════╪═════╪══════╪═══════╡
4945
+ # # │ 1 ┆ 1 ┆ -1 ┆ -112 ┆ -112 ┆ a ┆ 0.1 ┆ true │
4946
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
4947
+ # # │ 2 ┆ 2 ┆ 2 ┆ 2 ┆ 2 ┆ b ┆ 1.32 ┆ null │
4948
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
4949
+ # # │ 3 ┆ 8589934592 ┆ 1073741824 ┆ 112 ┆ 129 ┆ c ┆ 0.12 ┆ false │
4950
+ # # └─────┴────────────┴────────────┴──────┴──────┴─────┴──────┴───────┘
1299
4951
  def shrink_dtype
1300
4952
  wrap_expr(_rbexpr.shrink_dtype)
1301
4953
  end
1302
4954
 
4955
+ # Create an object namespace of all list related methods.
4956
+ #
4957
+ # @return [ListExpr]
1303
4958
  def arr
1304
4959
  ListExpr.new(self)
1305
4960
  end
1306
4961
 
4962
+ # Create an object namespace of all categorical related methods.
4963
+ #
4964
+ # @return [CatExpr]
1307
4965
  def cat
1308
4966
  CatExpr.new(self)
1309
4967
  end
1310
4968
 
4969
+ # Create an object namespace of all datetime related methods.
4970
+ #
4971
+ # @return [DateTimeExpr]
1311
4972
  def dt
1312
4973
  DateTimeExpr.new(self)
1313
4974
  end
1314
4975
 
4976
+ # Create an object namespace of all meta related expression methods.
4977
+ #
4978
+ # @return [MetaExpr]
1315
4979
  def meta
1316
4980
  MetaExpr.new(self)
1317
4981
  end
1318
4982
 
4983
+ # Create an object namespace of all string related methods.
4984
+ #
4985
+ # @return [StringExpr]
1319
4986
  def str
1320
4987
  StringExpr.new(self)
1321
4988
  end
1322
4989
 
4990
+ # Create an object namespace of all struct related methods.
4991
+ #
4992
+ # @return [StructExpr]
1323
4993
  def struct
1324
4994
  StructExpr.new(self)
1325
4995
  end
@@ -1337,5 +5007,51 @@ module Polars
1337
5007
  def _to_expr(other)
1338
5008
  other.is_a?(Expr) ? other : Utils.lit(other)
1339
5009
  end
5010
+
5011
+ def _prepare_alpha(com, span, half_life, alpha)
5012
+ if [com, span, half_life, alpha].count { |v| !v.nil? } > 1
5013
+ raise ArgumentError, "Parameters 'com', 'span', 'half_life', and 'alpha' are mutually exclusive"
5014
+ end
5015
+
5016
+ if !com.nil?
5017
+ if com < 0.0
5018
+ raise ArgumentError, "Require 'com' >= 0 (found #{com})"
5019
+ end
5020
+ alpha = 1.0 / (1.0 + com)
5021
+
5022
+ elsif !span.nil?
5023
+ if span < 1.0
5024
+ raise ArgumentError, "Require 'span' >= 1 (found #{span})"
5025
+ end
5026
+ alpha = 2.0 / (span + 1.0)
5027
+
5028
+ elsif !half_life.nil?
5029
+ if half_life <= 0.0
5030
+ raise ArgumentError, "Require 'half_life' > 0 (found #{half_life})"
5031
+ end
5032
+ alpha = 1.0 - Math.exp(-Math.log(2.0) / half_life)
5033
+
5034
+ elsif alpha.nil?
5035
+ raise ArgumentError, "One of 'com', 'span', 'half_life', or 'alpha' must be set"
5036
+
5037
+ elsif alpha <= 0 || alpha > 1
5038
+ raise ArgumentError, "Require 0 < 'alpha' <= 1 (found #{alpha})"
5039
+ end
5040
+
5041
+ alpha
5042
+ end
5043
+
5044
+ def _prepare_rolling_window_args(window_size, min_periods)
5045
+ if window_size.is_a?(Integer)
5046
+ if min_periods.nil?
5047
+ min_periods = window_size
5048
+ end
5049
+ window_size = "#{window_size}i"
5050
+ end
5051
+ if min_periods.nil?
5052
+ min_periods = 1
5053
+ end
5054
+ [window_size, min_periods]
5055
+ end
1340
5056
  end
1341
5057
  end