polars-df 0.1.3 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/Cargo.lock +142 -11
  4. data/Cargo.toml +5 -0
  5. data/ext/polars/Cargo.toml +17 -1
  6. data/ext/polars/src/apply/dataframe.rs +292 -0
  7. data/ext/polars/src/apply/mod.rs +254 -0
  8. data/ext/polars/src/apply/series.rs +1173 -0
  9. data/ext/polars/src/conversion.rs +180 -5
  10. data/ext/polars/src/dataframe.rs +146 -1
  11. data/ext/polars/src/error.rs +12 -0
  12. data/ext/polars/src/lazy/apply.rs +34 -2
  13. data/ext/polars/src/lazy/dataframe.rs +74 -3
  14. data/ext/polars/src/lazy/dsl.rs +136 -0
  15. data/ext/polars/src/lib.rs +199 -1
  16. data/ext/polars/src/list_construction.rs +100 -0
  17. data/ext/polars/src/series.rs +331 -0
  18. data/ext/polars/src/utils.rs +25 -0
  19. data/lib/polars/cat_name_space.rb +54 -0
  20. data/lib/polars/convert.rb +100 -0
  21. data/lib/polars/data_frame.rb +1558 -60
  22. data/lib/polars/date_time_expr.rb +2 -2
  23. data/lib/polars/date_time_name_space.rb +1484 -0
  24. data/lib/polars/dynamic_group_by.rb +49 -0
  25. data/lib/polars/expr.rb +4072 -107
  26. data/lib/polars/expr_dispatch.rb +8 -0
  27. data/lib/polars/functions.rb +192 -3
  28. data/lib/polars/group_by.rb +44 -3
  29. data/lib/polars/io.rb +20 -4
  30. data/lib/polars/lazy_frame.rb +800 -26
  31. data/lib/polars/lazy_functions.rb +687 -43
  32. data/lib/polars/lazy_group_by.rb +1 -0
  33. data/lib/polars/list_expr.rb +502 -5
  34. data/lib/polars/list_name_space.rb +346 -0
  35. data/lib/polars/rolling_group_by.rb +35 -0
  36. data/lib/polars/series.rb +934 -62
  37. data/lib/polars/string_expr.rb +189 -13
  38. data/lib/polars/string_name_space.rb +690 -0
  39. data/lib/polars/struct_name_space.rb +64 -0
  40. data/lib/polars/utils.rb +44 -0
  41. data/lib/polars/version.rb +1 -1
  42. data/lib/polars.rb +14 -1
  43. metadata +15 -3
data/lib/polars/expr.rb CHANGED
@@ -138,8 +138,45 @@ module Polars
138
138
  Utils.lit(0) - self
139
139
  end
140
140
 
141
- # def to_physical
142
- # end
141
+ # Cast to physical representation of the logical dtype.
142
+ #
143
+ # - `:date` -> `:i32`
144
+ # - `:datetime` -> `:i64`
145
+ # - `:time` -> `:i64`
146
+ # - `:duration` -> `:i64`
147
+ # - `:cat` -> `:u32`
148
+ # - Other data types will be left unchanged.
149
+ #
150
+ # @return [Expr]
151
+ #
152
+ # @example
153
+ # Polars::DataFrame.new({"vals" => ["a", "x", nil, "a"]}).with_columns(
154
+ # [
155
+ # Polars.col("vals").cast(:cat),
156
+ # Polars.col("vals")
157
+ # .cast(:cat)
158
+ # .to_physical
159
+ # .alias("vals_physical")
160
+ # ]
161
+ # )
162
+ # # =>
163
+ # # shape: (4, 2)
164
+ # # ┌──────┬───────────────┐
165
+ # # │ vals ┆ vals_physical │
166
+ # # │ --- ┆ --- │
167
+ # # │ cat ┆ u32 │
168
+ # # ╞══════╪═══════════════╡
169
+ # # │ a ┆ 0 │
170
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
171
+ # # │ x ┆ 1 │
172
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
173
+ # # │ null ┆ null │
174
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
175
+ # # │ a ┆ 0 │
176
+ # # └──────┴───────────────┘
177
+ def to_physical
178
+ wrap_expr(_rbexpr.to_physical)
179
+ end
143
180
 
144
181
  # Check if any boolean value in a Boolean column is `true`.
145
182
  #
@@ -258,13 +295,82 @@ module Polars
258
295
  wrap_expr(_rbexpr.exp)
259
296
  end
260
297
 
298
+ # Rename the output of an expression.
299
+ #
300
+ # @param name [String]
301
+ # New name.
302
+ #
303
+ # @return [Expr]
304
+ #
305
+ # @example
306
+ # df = Polars::DataFrame.new(
307
+ # {
308
+ # "a" => [1, 2, 3],
309
+ # "b" => ["a", "b", nil]
310
+ # }
311
+ # )
312
+ # df.select(
313
+ # [
314
+ # Polars.col("a").alias("bar"),
315
+ # Polars.col("b").alias("foo")
316
+ # ]
317
+ # )
318
+ # # =>
319
+ # # shape: (3, 2)
320
+ # # ┌─────┬──────┐
321
+ # # │ bar ┆ foo │
322
+ # # │ --- ┆ --- │
323
+ # # │ i64 ┆ str │
324
+ # # ╞═════╪══════╡
325
+ # # │ 1 ┆ a │
326
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
327
+ # # │ 2 ┆ b │
328
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
329
+ # # │ 3 ┆ null │
330
+ # # └─────┴──────┘
261
331
  def alias(name)
262
332
  wrap_expr(_rbexpr._alias(name))
263
333
  end
264
334
 
265
335
  # TODO support symbols for exclude
266
336
 
337
+ # Exclude certain columns from a wildcard/regex selection.
338
+ #
339
+ # You may also use regexes in the exclude list. They must start with `^` and end
340
+ # with `$`.
341
+ #
342
+ # @param columns [Object]
343
+ # Column(s) to exclude from selection.
344
+ # This can be:
267
345
  #
346
+ # - a column name, or multiple column names
347
+ # - a regular expression starting with `^` and ending with `$`
348
+ # - a dtype or multiple dtypes
349
+ #
350
+ # @return [Expr]
351
+ #
352
+ # @example
353
+ # df = Polars::DataFrame.new(
354
+ # {
355
+ # "aa" => [1, 2, 3],
356
+ # "ba" => ["a", "b", nil],
357
+ # "cc" => [nil, 2.5, 1.5]
358
+ # }
359
+ # )
360
+ # df.select(Polars.all.exclude("ba"))
361
+ # # =>
362
+ # # shape: (3, 2)
363
+ # # ┌─────┬──────┐
364
+ # # │ aa ┆ cc │
365
+ # # │ --- ┆ --- │
366
+ # # │ i64 ┆ f64 │
367
+ # # ╞═════╪══════╡
368
+ # # │ 1 ┆ null │
369
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
370
+ # # │ 2 ┆ 2.5 │
371
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
372
+ # # │ 3 ┆ 1.5 │
373
+ # # └─────┴──────┘
268
374
  def exclude(columns)
269
375
  if columns.is_a?(String)
270
376
  columns = [columns]
@@ -285,20 +391,75 @@ module Polars
285
391
  end
286
392
  end
287
393
 
394
+ # Keep the original root name of the expression.
395
+ #
396
+ # @return [Expr]
397
+ #
398
+ # @example
399
+ # df = Polars::DataFrame.new(
400
+ # {
401
+ # "a" => [1, 2],
402
+ # "b" => [3, 4]
403
+ # }
404
+ # )
405
+ # df.with_columns([(Polars.col("a") * 9).alias("c").keep_name])
406
+ # # =>
407
+ # # shape: (2, 2)
408
+ # # ┌─────┬─────┐
409
+ # # │ a ┆ b │
410
+ # # │ --- ┆ --- │
411
+ # # │ i64 ┆ i64 │
412
+ # # ╞═════╪═════╡
413
+ # # │ 9 ┆ 3 │
414
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
415
+ # # │ 18 ┆ 4 │
416
+ # # └─────┴─────┘
288
417
  def keep_name
289
418
  wrap_expr(_rbexpr.keep_name)
290
419
  end
291
420
 
421
+ # Add a prefix to the root column name of the expression.
422
+ #
423
+ # @return [Expr]
292
424
  def prefix(prefix)
293
425
  wrap_expr(_rbexpr.prefix(prefix))
294
426
  end
295
427
 
428
+ # Add a suffix to the root column name of the expression.
429
+ #
430
+ # @return [Expr]
296
431
  def suffix(suffix)
297
432
  wrap_expr(_rbexpr.suffix(suffix))
298
433
  end
299
434
 
300
- # def map_alias
301
- # end
435
+ # Rename the output of an expression by mapping a function over the root name.
436
+ #
437
+ # @return [Expr]
438
+ #
439
+ # @example
440
+ # df = Polars::DataFrame.new(
441
+ # {
442
+ # "A" => [1, 2],
443
+ # "B" => [3, 4]
444
+ # }
445
+ # )
446
+ # df.select(
447
+ # Polars.all.reverse.map_alias { |colName| colName + "_reverse" }
448
+ # )
449
+ # # =>
450
+ # # shape: (2, 2)
451
+ # # ┌───────────┬───────────┐
452
+ # # │ A_reverse ┆ B_reverse │
453
+ # # │ --- ┆ --- │
454
+ # # │ i64 ┆ i64 │
455
+ # # ╞═══════════╪═══════════╡
456
+ # # │ 2 ┆ 4 │
457
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
458
+ # # │ 1 ┆ 3 │
459
+ # # └───────────┴───────────┘
460
+ def map_alias(&f)
461
+ Utils.wrap_expr(_rbexpr.map_alias(f))
462
+ end
302
463
 
303
464
  # Negate a boolean expression.
304
465
  #
@@ -464,14 +625,112 @@ module Polars
464
625
  wrap_expr(_rbexpr.is_infinite)
465
626
  end
466
627
 
628
+ # Returns a boolean Series indicating which values are NaN.
629
+ #
630
+ # @note
631
+ # Floating point `NaN` (Not A Number) should not be confused
632
+ # with missing data represented as `nil`.
633
+ #
634
+ # @return [Expr]
635
+ #
636
+ # @example
637
+ # df = Polars::DataFrame.new(
638
+ # {
639
+ # "a" => [1, 2, nil, 1, 5],
640
+ # "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
641
+ # }
642
+ # )
643
+ # df.with_column(Polars.col(Polars::Float64).is_nan.suffix("_isnan"))
644
+ # # =>
645
+ # # shape: (5, 3)
646
+ # # ┌──────┬─────┬─────────┐
647
+ # # │ a ┆ b ┆ b_isnan │
648
+ # # │ --- ┆ --- ┆ --- │
649
+ # # │ i64 ┆ f64 ┆ bool │
650
+ # # ╞══════╪═════╪═════════╡
651
+ # # │ 1 ┆ 1.0 ┆ false │
652
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
653
+ # # │ 2 ┆ 2.0 ┆ false │
654
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
655
+ # # │ null ┆ NaN ┆ true │
656
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
657
+ # # │ 1 ┆ 1.0 ┆ false │
658
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
659
+ # # │ 5 ┆ 5.0 ┆ false │
660
+ # # └──────┴─────┴─────────┘
467
661
  def is_nan
468
662
  wrap_expr(_rbexpr.is_nan)
469
663
  end
470
664
 
665
+ # Returns a boolean Series indicating which values are not NaN.
666
+ #
667
+ # @note
668
+ # Floating point `NaN` (Not A Number) should not be confused
669
+ # with missing data represented as `nil`.
670
+ #
671
+ # @return [Expr]
672
+ #
673
+ # @example
674
+ # df = Polars::DataFrame.new(
675
+ # {
676
+ # "a" => [1, 2, nil, 1, 5],
677
+ # "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
678
+ # }
679
+ # )
680
+ # df.with_column(Polars.col(Polars::Float64).is_not_nan.suffix("_is_not_nan"))
681
+ # # =>
682
+ # # shape: (5, 3)
683
+ # # ┌──────┬─────┬──────────────┐
684
+ # # │ a ┆ b ┆ b_is_not_nan │
685
+ # # │ --- ┆ --- ┆ --- │
686
+ # # │ i64 ┆ f64 ┆ bool │
687
+ # # ╞══════╪═════╪══════════════╡
688
+ # # │ 1 ┆ 1.0 ┆ true │
689
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
690
+ # # │ 2 ┆ 2.0 ┆ true │
691
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
692
+ # # │ null ┆ NaN ┆ false │
693
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
694
+ # # │ 1 ┆ 1.0 ┆ true │
695
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
696
+ # # │ 5 ┆ 5.0 ┆ true │
697
+ # # └──────┴─────┴──────────────┘
471
698
  def is_not_nan
472
699
  wrap_expr(_rbexpr.is_not_nan)
473
700
  end
474
701
 
702
+ # Get the group indexes of the group by operation.
703
+ #
704
+ # Should be used in aggregation context only.
705
+ #
706
+ # @return [Expr]
707
+ #
708
+ # @example
709
+ # df = Polars::DataFrame.new(
710
+ # {
711
+ # "group" => [
712
+ # "one",
713
+ # "one",
714
+ # "one",
715
+ # "two",
716
+ # "two",
717
+ # "two"
718
+ # ],
719
+ # "value" => [94, 95, 96, 97, 97, 99]
720
+ # }
721
+ # )
722
+ # df.groupby("group", maintain_order: true).agg(Polars.col("value").agg_groups)
723
+ # # =>
724
+ # # shape: (2, 2)
725
+ # # ┌───────┬───────────┐
726
+ # # │ group ┆ value │
727
+ # # │ --- ┆ --- │
728
+ # # │ str ┆ list[u32] │
729
+ # # ╞═══════╪═══════════╡
730
+ # # │ one ┆ [0, 1, 2] │
731
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
732
+ # # │ two ┆ [3, 4, 5] │
733
+ # # └───────┴───────────┘
475
734
  def agg_groups
476
735
  wrap_expr(_rbexpr.agg_groups)
477
736
  end
@@ -557,6 +816,36 @@ module Polars
557
816
  wrap_expr(_rbexpr.slice(offset._rbexpr, length._rbexpr))
558
817
  end
559
818
 
819
+ # Append expressions.
820
+ #
821
+ # This is done by adding the chunks of `other` to this `Series`.
822
+ #
823
+ # @param other [Expr]
824
+ # Expression to append.
825
+ # @param upcast [Boolean]
826
+ # Cast both `Series` to the same supertype.
827
+ #
828
+ # @return [Expr]
829
+ #
830
+ # @example
831
+ # df = Polars::DataFrame.new(
832
+ # {
833
+ # "a" => [8, 9, 10],
834
+ # "b" => [nil, 4, 4]
835
+ # }
836
+ # )
837
+ # df.select(Polars.all.head(1).append(Polars.all.tail(1)))
838
+ # # =>
839
+ # # shape: (2, 2)
840
+ # # ┌─────┬──────┐
841
+ # # │ a ┆ b │
842
+ # # │ --- ┆ --- │
843
+ # # │ i64 ┆ i64 │
844
+ # # ╞═════╪══════╡
845
+ # # │ 8 ┆ null │
846
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
847
+ # # │ 10 ┆ 4 │
848
+ # # └─────┴──────┘
560
849
  def append(other, upcast: true)
561
850
  other = Utils.expr_to_lit_or_expr(other)
562
851
  wrap_expr(_rbexpr.append(other._rbexpr, upcast))
@@ -567,7 +856,7 @@ module Polars
567
856
  # @return [Expr]
568
857
  #
569
858
  # @example Create a Series with 3 nulls, append column a then rechunk
570
- # df = Polars::DataFrame.new({"a": [1, 1, 2]})
859
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
571
860
  # df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
572
861
  # # =>
573
862
  # # shape: (6, 1)
@@ -650,22 +939,182 @@ module Polars
650
939
  wrap_expr(_rbexpr.drop_nans)
651
940
  end
652
941
 
942
+ # Get an array with the cumulative sum computed at every element.
943
+ #
944
+ # @param reverse [Boolean]
945
+ # Reverse the operation.
946
+ #
947
+ # @return [Expr]
948
+ #
949
+ # @note
950
+ # Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
951
+ # `:i64` before summing to prevent overflow issues.
952
+ #
953
+ # @example
954
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
955
+ # df.select(
956
+ # [
957
+ # Polars.col("a").cumsum,
958
+ # Polars.col("a").cumsum(reverse: true).alias("a_reverse")
959
+ # ]
960
+ # )
961
+ # # =>
962
+ # # shape: (4, 2)
963
+ # # ┌─────┬───────────┐
964
+ # # │ a ┆ a_reverse │
965
+ # # │ --- ┆ --- │
966
+ # # │ i64 ┆ i64 │
967
+ # # ╞═════╪═══════════╡
968
+ # # │ 1 ┆ 10 │
969
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
970
+ # # │ 3 ┆ 9 │
971
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
972
+ # # │ 6 ┆ 7 │
973
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
974
+ # # │ 10 ┆ 4 │
975
+ # # └─────┴───────────┘
653
976
  def cumsum(reverse: false)
654
977
  wrap_expr(_rbexpr.cumsum(reverse))
655
978
  end
656
979
 
980
+ # Get an array with the cumulative product computed at every element.
981
+ #
982
+ # @param reverse [Boolean]
983
+ # Reverse the operation.
984
+ #
985
+ # @return [Expr]
986
+ #
987
+ # @note
988
+ # Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
989
+ # `:i64` before summing to prevent overflow issues.
990
+ #
991
+ # @example
992
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
993
+ # df.select(
994
+ # [
995
+ # Polars.col("a").cumprod,
996
+ # Polars.col("a").cumprod(reverse: true).alias("a_reverse")
997
+ # ]
998
+ # )
999
+ # # =>
1000
+ # # shape: (4, 2)
1001
+ # # ┌─────┬───────────┐
1002
+ # # │ a ┆ a_reverse │
1003
+ # # │ --- ┆ --- │
1004
+ # # │ i64 ┆ i64 │
1005
+ # # ╞═════╪═══════════╡
1006
+ # # │ 1 ┆ 24 │
1007
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1008
+ # # │ 2 ┆ 24 │
1009
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1010
+ # # │ 6 ┆ 12 │
1011
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1012
+ # # │ 24 ┆ 4 │
1013
+ # # └─────┴───────────┘
657
1014
  def cumprod(reverse: false)
658
1015
  wrap_expr(_rbexpr.cumprod(reverse))
659
1016
  end
660
1017
 
1018
+ # Get an array with the cumulative min computed at every element.
1019
+ #
1020
+ # @param reverse [Boolean]
1021
+ # Reverse the operation.
1022
+ #
1023
+ # @return [Expr]
1024
+ #
1025
+ # @example
1026
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
1027
+ # df.select(
1028
+ # [
1029
+ # Polars.col("a").cummin,
1030
+ # Polars.col("a").cummin(reverse: true).alias("a_reverse")
1031
+ # ]
1032
+ # )
1033
+ # # =>
1034
+ # # shape: (4, 2)
1035
+ # # ┌─────┬───────────┐
1036
+ # # │ a ┆ a_reverse │
1037
+ # # │ --- ┆ --- │
1038
+ # # │ i64 ┆ i64 │
1039
+ # # ╞═════╪═══════════╡
1040
+ # # │ 1 ┆ 1 │
1041
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1042
+ # # │ 1 ┆ 2 │
1043
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1044
+ # # │ 1 ┆ 3 │
1045
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1046
+ # # │ 1 ┆ 4 │
1047
+ # # └─────┴───────────┘
661
1048
  def cummin(reverse: false)
662
1049
  wrap_expr(_rbexpr.cummin(reverse))
663
1050
  end
664
1051
 
1052
+ # Get an array with the cumulative max computed at every element.
1053
+ #
1054
+ # @param reverse [Boolean]
1055
+ # Reverse the operation.
1056
+ #
1057
+ # @return [Expr]
1058
+ #
1059
+ # @example
1060
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
1061
+ # df.select(
1062
+ # [
1063
+ # Polars.col("a").cummax,
1064
+ # Polars.col("a").cummax(reverse: true).alias("a_reverse")
1065
+ # ]
1066
+ # )
1067
+ # # =>
1068
+ # # shape: (4, 2)
1069
+ # # ┌─────┬───────────┐
1070
+ # # │ a ┆ a_reverse │
1071
+ # # │ --- ┆ --- │
1072
+ # # │ i64 ┆ i64 │
1073
+ # # ╞═════╪═══════════╡
1074
+ # # │ 1 ┆ 4 │
1075
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1076
+ # # │ 2 ┆ 4 │
1077
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1078
+ # # │ 3 ┆ 4 │
1079
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1080
+ # # │ 4 ┆ 4 │
1081
+ # # └─────┴───────────┘
665
1082
  def cummax(reverse: false)
666
1083
  wrap_expr(_rbexpr.cummax(reverse))
667
1084
  end
668
1085
 
1086
+ # Get an array with the cumulative count computed at every element.
1087
+ #
1088
+ # Counting from 0 to len
1089
+ #
1090
+ # @param reverse [Boolean]
1091
+ # Reverse the operation.
1092
+ #
1093
+ # @return [Expr]
1094
+ #
1095
+ # @example
1096
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
1097
+ # df.select(
1098
+ # [
1099
+ # Polars.col("a").cumcount,
1100
+ # Polars.col("a").cumcount(reverse: true).alias("a_reverse")
1101
+ # ]
1102
+ # )
1103
+ # # =>
1104
+ # # shape: (4, 2)
1105
+ # # ┌─────┬───────────┐
1106
+ # # │ a ┆ a_reverse │
1107
+ # # │ --- ┆ --- │
1108
+ # # │ u32 ┆ u32 │
1109
+ # # ╞═════╪═══════════╡
1110
+ # # │ 0 ┆ 3 │
1111
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1112
+ # # │ 1 ┆ 2 │
1113
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1114
+ # # │ 2 ┆ 1 │
1115
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1116
+ # # │ 3 ┆ 0 │
1117
+ # # └─────┴───────────┘
669
1118
  def cumcount(reverse: false)
670
1119
  wrap_expr(_rbexpr.cumcount(reverse))
671
1120
  end
@@ -755,6 +1204,30 @@ module Polars
755
1204
  wrap_expr(_rbexpr.round(decimals))
756
1205
  end
757
1206
 
1207
+ # Compute the dot/inner product between two Expressions.
1208
+ #
1209
+ # @param other [Expr]
1210
+ # Expression to compute dot product with.
1211
+ #
1212
+ # @return [Expr]
1213
+ #
1214
+ # @example
1215
+ # df = Polars::DataFrame.new(
1216
+ # {
1217
+ # "a" => [1, 3, 5],
1218
+ # "b" => [2, 4, 6]
1219
+ # }
1220
+ # )
1221
+ # df.select(Polars.col("a").dot(Polars.col("b")))
1222
+ # # =>
1223
+ # # shape: (1, 1)
1224
+ # # ┌─────┐
1225
+ # # │ a │
1226
+ # # │ --- │
1227
+ # # │ i64 │
1228
+ # # ╞═════╡
1229
+ # # │ 44 │
1230
+ # # └─────┘
758
1231
  def dot(other)
759
1232
  other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
760
1233
  wrap_expr(_rbexpr.dot(other._rbexpr))
@@ -789,24 +1262,183 @@ module Polars
789
1262
  wrap_expr(_rbexpr.mode)
790
1263
  end
791
1264
 
1265
+ # Cast between data types.
1266
+ #
1267
+ # @param dtype [Symbol]
1268
+ # DataType to cast to.
1269
+ # @param strict [Boolean]
1270
+ # Throw an error if a cast could not be done.
1271
+ # For instance, due to an overflow.
1272
+ #
1273
+ # @return [Expr]
1274
+ #
1275
+ # @example
1276
+ # df = Polars::DataFrame.new(
1277
+ # {
1278
+ # "a" => [1, 2, 3],
1279
+ # "b" => ["4", "5", "6"]
1280
+ # }
1281
+ # )
1282
+ # df.with_columns(
1283
+ # [
1284
+ # Polars.col("a").cast(:f64),
1285
+ # Polars.col("b").cast(:i32)
1286
+ # ]
1287
+ # )
1288
+ # # =>
1289
+ # # shape: (3, 2)
1290
+ # # ┌─────┬─────┐
1291
+ # # │ a ┆ b │
1292
+ # # │ --- ┆ --- │
1293
+ # # │ f64 ┆ i32 │
1294
+ # # ╞═════╪═════╡
1295
+ # # │ 1.0 ┆ 4 │
1296
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1297
+ # # │ 2.0 ┆ 5 │
1298
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1299
+ # # │ 3.0 ┆ 6 │
1300
+ # # └─────┴─────┘
792
1301
  def cast(dtype, strict: true)
793
1302
  dtype = Utils.rb_type_to_dtype(dtype)
794
1303
  wrap_expr(_rbexpr.cast(dtype, strict))
795
1304
  end
796
1305
 
1306
+ # Sort this column. In projection/ selection context the whole column is sorted.
1307
+ #
1308
+ # If used in a groupby context, the groups are sorted.
1309
+ #
1310
+ # @param reverse [Boolean]
1311
+ # false -> order from small to large.
1312
+ # true -> order from large to small.
1313
+ # @param nulls_last [Boolean]
1314
+ # If true nulls are considered to be larger than any valid value.
1315
+ #
1316
+ # @return [Expr]
1317
+ #
1318
+ # @example
1319
+ # df = Polars::DataFrame.new(
1320
+ # {
1321
+ # "group" => [
1322
+ # "one",
1323
+ # "one",
1324
+ # "one",
1325
+ # "two",
1326
+ # "two",
1327
+ # "two"
1328
+ # ],
1329
+ # "value" => [1, 98, 2, 3, 99, 4]
1330
+ # }
1331
+ # )
1332
+ # df.select(Polars.col("value").sort)
1333
+ # # =>
1334
+ # # shape: (6, 1)
1335
+ # # ┌───────┐
1336
+ # # │ value │
1337
+ # # │ --- │
1338
+ # # │ i64 │
1339
+ # # ╞═══════╡
1340
+ # # │ 1 │
1341
+ # # ├╌╌╌╌╌╌╌┤
1342
+ # # │ 2 │
1343
+ # # ├╌╌╌╌╌╌╌┤
1344
+ # # │ 3 │
1345
+ # # ├╌╌╌╌╌╌╌┤
1346
+ # # │ 4 │
1347
+ # # ├╌╌╌╌╌╌╌┤
1348
+ # # │ 98 │
1349
+ # # ├╌╌╌╌╌╌╌┤
1350
+ # # │ 99 │
1351
+ # # └───────┘
1352
+ #
1353
+ # @example
1354
+ # df.select(Polars.col("value").sort)
1355
+ # # =>
1356
+ # # shape: (6, 1)
1357
+ # # ┌───────┐
1358
+ # # │ value │
1359
+ # # │ --- │
1360
+ # # │ i64 │
1361
+ # # ╞═══════╡
1362
+ # # │ 1 │
1363
+ # # ├╌╌╌╌╌╌╌┤
1364
+ # # │ 2 │
1365
+ # # ├╌╌╌╌╌╌╌┤
1366
+ # # │ 3 │
1367
+ # # ├╌╌╌╌╌╌╌┤
1368
+ # # │ 4 │
1369
+ # # ├╌╌╌╌╌╌╌┤
1370
+ # # │ 98 │
1371
+ # # ├╌╌╌╌╌╌╌┤
1372
+ # # │ 99 │
1373
+ # # └───────┘
1374
+ #
1375
+ # @example
1376
+ # df.groupby("group").agg(Polars.col("value").sort)
1377
+ # # =>
1378
+ # # shape: (2, 2)
1379
+ # # ┌───────┬────────────┐
1380
+ # # │ group ┆ value │
1381
+ # # │ --- ┆ --- │
1382
+ # # │ str ┆ list[i64] │
1383
+ # # ╞═══════╪════════════╡
1384
+ # # │ two ┆ [3, 4, 99] │
1385
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1386
+ # # │ one ┆ [1, 2, 98] │
1387
+ # # └───────┴────────────┘
797
1388
  def sort(reverse: false, nulls_last: false)
798
1389
  wrap_expr(_rbexpr.sort_with(reverse, nulls_last))
799
1390
  end
800
1391
 
1392
+ # Return the `k` largest elements.
1393
+ #
1394
+ # If 'reverse: true` the smallest elements will be given.
1395
+ #
1396
+ # @param k [Integer]
1397
+ # Number of elements to return.
1398
+ # @param reverse [Boolean]
1399
+ # Return the smallest elements.
1400
+ #
1401
+ # @return [Expr]
1402
+ #
1403
+ # @example
1404
+ # df = Polars::DataFrame.new(
1405
+ # {
1406
+ # "value" => [1, 98, 2, 3, 99, 4]
1407
+ # }
1408
+ # )
1409
+ # df.select(
1410
+ # [
1411
+ # Polars.col("value").top_k.alias("top_k"),
1412
+ # Polars.col("value").top_k(reverse: true).alias("bottom_k")
1413
+ # ]
1414
+ # )
1415
+ # # =>
1416
+ # # shape: (5, 2)
1417
+ # # ┌───────┬──────────┐
1418
+ # # │ top_k ┆ bottom_k │
1419
+ # # │ --- ┆ --- │
1420
+ # # │ i64 ┆ i64 │
1421
+ # # ╞═══════╪══════════╡
1422
+ # # │ 99 ┆ 1 │
1423
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
1424
+ # # │ 98 ┆ 2 │
1425
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
1426
+ # # │ 4 ┆ 3 │
1427
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
1428
+ # # │ 3 ┆ 4 │
1429
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
1430
+ # # │ 2 ┆ 98 │
1431
+ # # └───────┴──────────┘
801
1432
  def top_k(k: 5, reverse: false)
802
1433
  wrap_expr(_rbexpr.top_k(k, reverse))
803
1434
  end
804
1435
 
805
- def arg_sort(reverse: false, nulls_last: false)
806
- wrap_expr(_rbexpr.arg_sort(reverse, nulls_last))
807
- end
808
-
809
- # Get the index of the maximal value.
1436
+ # Get the index values that would sort this column.
1437
+ #
1438
+ # @param reverse [Boolean]
1439
+ # Sort in reverse (descending) order.
1440
+ # @param nulls_last [Boolean]
1441
+ # Place null values last instead of first.
810
1442
  #
811
1443
  # @return [Expr]
812
1444
  #
@@ -816,21 +1448,49 @@ module Polars
816
1448
  # "a" => [20, 10, 30]
817
1449
  # }
818
1450
  # )
819
- # df.select(Polars.col("a").arg_max)
1451
+ # df.select(Polars.col("a").arg_sort)
820
1452
  # # =>
821
- # # shape: (1, 1)
1453
+ # # shape: (3, 1)
822
1454
  # # ┌─────┐
823
1455
  # # │ a │
824
1456
  # # │ --- │
825
1457
  # # │ u32 │
826
1458
  # # ╞═════╡
1459
+ # # │ 1 │
1460
+ # # ├╌╌╌╌╌┤
1461
+ # # │ 0 │
1462
+ # # ├╌╌╌╌╌┤
827
1463
  # # │ 2 │
828
1464
  # # └─────┘
829
- def arg_max
830
- wrap_expr(_rbexpr.arg_max)
1465
+ def arg_sort(reverse: false, nulls_last: false)
1466
+ wrap_expr(_rbexpr.arg_sort(reverse, nulls_last))
831
1467
  end
832
1468
 
833
- # Get the index of the minimal value.
1469
+ # Get the index of the maximal value.
1470
+ #
1471
+ # @return [Expr]
1472
+ #
1473
+ # @example
1474
+ # df = Polars::DataFrame.new(
1475
+ # {
1476
+ # "a" => [20, 10, 30]
1477
+ # }
1478
+ # )
1479
+ # df.select(Polars.col("a").arg_max)
1480
+ # # =>
1481
+ # # shape: (1, 1)
1482
+ # # ┌─────┐
1483
+ # # │ a │
1484
+ # # │ --- │
1485
+ # # │ u32 │
1486
+ # # ╞═════╡
1487
+ # # │ 2 │
1488
+ # # └─────┘
1489
+ def arg_max
1490
+ wrap_expr(_rbexpr.arg_max)
1491
+ end
1492
+
1493
+ # Get the index of the minimal value.
834
1494
  #
835
1495
  # @return [Expr]
836
1496
  #
@@ -854,11 +1514,87 @@ module Polars
854
1514
  wrap_expr(_rbexpr.arg_min)
855
1515
  end
856
1516
 
1517
+ # Find indices where elements should be inserted to maintain order.
1518
+ #
1519
+ # @param element [Object]
1520
+ # Expression or scalar value.
1521
+ #
1522
+ # @return [Expr]
1523
+ #
1524
+ # @example
1525
+ # df = Polars::DataFrame.new(
1526
+ # {
1527
+ # "values" => [1, 2, 3, 5]
1528
+ # }
1529
+ # )
1530
+ # df.select(
1531
+ # [
1532
+ # Polars.col("values").search_sorted(0).alias("zero"),
1533
+ # Polars.col("values").search_sorted(3).alias("three"),
1534
+ # Polars.col("values").search_sorted(6).alias("six")
1535
+ # ]
1536
+ # )
1537
+ # # =>
1538
+ # # shape: (1, 3)
1539
+ # # ┌──────┬───────┬─────┐
1540
+ # # │ zero ┆ three ┆ six │
1541
+ # # │ --- ┆ --- ┆ --- │
1542
+ # # │ u32 ┆ u32 ┆ u32 │
1543
+ # # ╞══════╪═══════╪═════╡
1544
+ # # │ 0 ┆ 2 ┆ 4 │
1545
+ # # └──────┴───────┴─────┘
857
1546
  def search_sorted(element)
858
1547
  element = Utils.expr_to_lit_or_expr(element, str_to_lit: false)
859
1548
  wrap_expr(_rbexpr.search_sorted(element._rbexpr))
860
1549
  end
861
1550
 
1551
+ # Sort this column by the ordering of another column, or multiple other columns.
1552
+ #
1553
+ # In projection/ selection context the whole column is sorted.
1554
+ # If used in a groupby context, the groups are sorted.
1555
+ #
1556
+ # @param by [Object]
1557
+ # The column(s) used for sorting.
1558
+ # @param reverse [Boolean]
1559
+ # false -> order from small to large.
1560
+ # true -> order from large to small.
1561
+ #
1562
+ # @return [Expr]
1563
+ #
1564
+ # @example
1565
+ # df = Polars::DataFrame.new(
1566
+ # {
1567
+ # "group" => [
1568
+ # "one",
1569
+ # "one",
1570
+ # "one",
1571
+ # "two",
1572
+ # "two",
1573
+ # "two"
1574
+ # ],
1575
+ # "value" => [1, 98, 2, 3, 99, 4]
1576
+ # }
1577
+ # )
1578
+ # df.select(Polars.col("group").sort_by("value"))
1579
+ # # =>
1580
+ # # shape: (6, 1)
1581
+ # # ┌───────┐
1582
+ # # │ group │
1583
+ # # │ --- │
1584
+ # # │ str │
1585
+ # # ╞═══════╡
1586
+ # # │ one │
1587
+ # # ├╌╌╌╌╌╌╌┤
1588
+ # # │ one │
1589
+ # # ├╌╌╌╌╌╌╌┤
1590
+ # # │ two │
1591
+ # # ├╌╌╌╌╌╌╌┤
1592
+ # # │ two │
1593
+ # # ├╌╌╌╌╌╌╌┤
1594
+ # # │ one │
1595
+ # # ├╌╌╌╌╌╌╌┤
1596
+ # # │ two │
1597
+ # # └───────┘
862
1598
  def sort_by(by, reverse: false)
863
1599
  if !by.is_a?(Array)
864
1600
  by = [by]
@@ -871,6 +1607,39 @@ module Polars
871
1607
  wrap_expr(_rbexpr.sort_by(by, reverse))
872
1608
  end
873
1609
 
1610
+ # Take values by index.
1611
+ #
1612
+ # @param indices [Expr]
1613
+ # An expression that leads to a `:u32` dtyped Series.
1614
+ #
1615
+ # @return [Expr]
1616
+ #
1617
+ # @example
1618
+ # df = Polars::DataFrame.new(
1619
+ # {
1620
+ # "group" => [
1621
+ # "one",
1622
+ # "one",
1623
+ # "one",
1624
+ # "two",
1625
+ # "two",
1626
+ # "two"
1627
+ # ],
1628
+ # "value" => [1, 98, 2, 3, 99, 4]
1629
+ # }
1630
+ # )
1631
+ # df.groupby("group", maintain_order: true).agg(Polars.col("value").take(1))
1632
+ # # =>
1633
+ # # shape: (2, 2)
1634
+ # # ┌───────┬───────┐
1635
+ # # │ group ┆ value │
1636
+ # # │ --- ┆ --- │
1637
+ # # │ str ┆ i64 │
1638
+ # # ╞═══════╪═══════╡
1639
+ # # │ one ┆ 98 │
1640
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1641
+ # # │ two ┆ 99 │
1642
+ # # └───────┴───────┘
874
1643
  def take(indices)
875
1644
  if indices.is_a?(Array)
876
1645
  indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
@@ -909,11 +1678,105 @@ module Polars
909
1678
  wrap_expr(_rbexpr.shift(periods))
910
1679
  end
911
1680
 
1681
+ # Shift the values by a given period and fill the resulting null values.
1682
+ #
1683
+ # @param periods [Integer]
1684
+ # Number of places to shift (may be negative).
1685
+ # @param fill_value [Object]
1686
+ # Fill nil values with the result of this expression.
1687
+ #
1688
+ # @return [Expr]
1689
+ #
1690
+ # @example
1691
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
1692
+ # df.select(Polars.col("foo").shift_and_fill(1, "a"))
1693
+ # # =>
1694
+ # # shape: (4, 1)
1695
+ # # ┌─────┐
1696
+ # # │ foo │
1697
+ # # │ --- │
1698
+ # # │ str │
1699
+ # # ╞═════╡
1700
+ # # │ a │
1701
+ # # ├╌╌╌╌╌┤
1702
+ # # │ 1 │
1703
+ # # ├╌╌╌╌╌┤
1704
+ # # │ 2 │
1705
+ # # ├╌╌╌╌╌┤
1706
+ # # │ 3 │
1707
+ # # └─────┘
912
1708
  def shift_and_fill(periods, fill_value)
913
1709
  fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
914
1710
  wrap_expr(_rbexpr.shift_and_fill(periods, fill_value._rbexpr))
915
1711
  end
916
1712
 
1713
+ # Fill null values using the specified value or strategy.
1714
+ #
1715
+ # To interpolate over null values see interpolate.
1716
+ #
1717
+ # @param value [Object]
1718
+ # Value used to fill null values.
1719
+ # @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
1720
+ # Strategy used to fill null values.
1721
+ # @param limit [Integer]
1722
+ # Number of consecutive null values to fill when using the 'forward' or
1723
+ # 'backward' strategy.
1724
+ #
1725
+ # @return [Expr]
1726
+ #
1727
+ # @example
1728
+ # df = Polars::DataFrame.new(
1729
+ # {
1730
+ # "a" => [1, 2, nil],
1731
+ # "b" => [4, nil, 6]
1732
+ # }
1733
+ # )
1734
+ # df.fill_null(strategy: "zero")
1735
+ # # =>
1736
+ # # shape: (3, 2)
1737
+ # # ┌─────┬─────┐
1738
+ # # │ a ┆ b │
1739
+ # # │ --- ┆ --- │
1740
+ # # │ i64 ┆ i64 │
1741
+ # # ╞═════╪═════╡
1742
+ # # │ 1 ┆ 4 │
1743
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1744
+ # # │ 2 ┆ 0 │
1745
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1746
+ # # │ 0 ┆ 6 │
1747
+ # # └─────┴─────┘
1748
+ #
1749
+ # @example
1750
+ # df.fill_null(99)
1751
+ # # =>
1752
+ # # shape: (3, 2)
1753
+ # # ┌─────┬─────┐
1754
+ # # │ a ┆ b │
1755
+ # # │ --- ┆ --- │
1756
+ # # │ i64 ┆ i64 │
1757
+ # # ╞═════╪═════╡
1758
+ # # │ 1 ┆ 4 │
1759
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1760
+ # # │ 2 ┆ 99 │
1761
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1762
+ # # │ 99 ┆ 6 │
1763
+ # # └─────┴─────┘
1764
+ #
1765
+ # @example
1766
+ # df.fill_null(strategy: "forward")
1767
+ # # =>
1768
+ # # shape: (3, 2)
1769
+ # # ┌─────┬─────┐
1770
+ # # │ a ┆ b │
1771
+ # # │ --- ┆ --- │
1772
+ # # │ i64 ┆ i64 │
1773
+ # # ╞═════╪═════╡
1774
+ # # │ 1 ┆ 4 │
1775
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1776
+ # # │ 2 ┆ 4 │
1777
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1778
+ # # │ 2 ┆ 6 │
1779
+ # # └─────┴─────┘
917
1780
  def fill_null(value = nil, strategy: nil, limit: nil)
918
1781
  if !value.nil? && !strategy.nil?
919
1782
  raise ArgumentError, "cannot specify both 'value' and 'strategy'."
@@ -931,75 +1794,426 @@ module Polars
931
1794
  end
932
1795
  end
933
1796
 
1797
+ # Fill floating point NaN value with a fill value.
1798
+ #
1799
+ # @return [Expr]
1800
+ #
1801
+ # @example
1802
+ # df = Polars::DataFrame.new(
1803
+ # {
1804
+ # "a" => [1.0, nil, Float::NAN],
1805
+ # "b" => [4.0, Float::NAN, 6]
1806
+ # }
1807
+ # )
1808
+ # df.fill_nan("zero")
1809
+ # # =>
1810
+ # # shape: (3, 2)
1811
+ # # ┌──────┬──────┐
1812
+ # # │ a ┆ b │
1813
+ # # │ --- ┆ --- │
1814
+ # # │ str ┆ str │
1815
+ # # ╞══════╪══════╡
1816
+ # # │ 1.0 ┆ 4.0 │
1817
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1818
+ # # │ null ┆ zero │
1819
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1820
+ # # │ zero ┆ 6.0 │
1821
+ # # └──────┴──────┘
934
1822
  def fill_nan(fill_value)
935
1823
  fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
936
1824
  wrap_expr(_rbexpr.fill_nan(fill_value._rbexpr))
937
1825
  end
938
1826
 
1827
+ # Fill missing values with the latest seen values.
1828
+ #
1829
+ # @param limit [Integer]
1830
+ # The number of consecutive null values to forward fill.
1831
+ #
1832
+ # @return [Expr]
1833
+ #
1834
+ # @example
1835
+ # df = Polars::DataFrame.new(
1836
+ # {
1837
+ # "a" => [1, 2, nil],
1838
+ # "b" => [4, nil, 6]
1839
+ # }
1840
+ # )
1841
+ # df.select(Polars.all.forward_fill)
1842
+ # # =>
1843
+ # # shape: (3, 2)
1844
+ # # ┌─────┬─────┐
1845
+ # # │ a ┆ b │
1846
+ # # │ --- ┆ --- │
1847
+ # # │ i64 ┆ i64 │
1848
+ # # ╞═════╪═════╡
1849
+ # # │ 1 ┆ 4 │
1850
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1851
+ # # │ 2 ┆ 4 │
1852
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1853
+ # # │ 2 ┆ 6 │
1854
+ # # └─────┴─────┘
939
1855
  def forward_fill(limit: nil)
940
1856
  wrap_expr(_rbexpr.forward_fill(limit))
941
1857
  end
942
1858
 
1859
+ # Fill missing values with the next to be seen values.
1860
+ #
1861
+ # @param limit [Integer]
1862
+ # The number of consecutive null values to backward fill.
1863
+ #
1864
+ # @return [Expr]
1865
+ #
1866
+ # @example
1867
+ # df = Polars::DataFrame.new(
1868
+ # {
1869
+ # "a" => [1, 2, nil],
1870
+ # "b" => [4, nil, 6]
1871
+ # }
1872
+ # )
1873
+ # df.select(Polars.all.backward_fill)
1874
+ # # =>
1875
+ # # shape: (3, 2)
1876
+ # # ┌──────┬─────┐
1877
+ # # │ a ┆ b │
1878
+ # # │ --- ┆ --- │
1879
+ # # │ i64 ┆ i64 │
1880
+ # # ╞══════╪═════╡
1881
+ # # │ 1 ┆ 4 │
1882
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
1883
+ # # │ 2 ┆ 6 │
1884
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
1885
+ # # │ null ┆ 6 │
1886
+ # # └──────┴─────┘
943
1887
  def backward_fill(limit: nil)
944
1888
  wrap_expr(_rbexpr.backward_fill(limit))
945
1889
  end
946
1890
 
1891
+ # Reverse the selection.
1892
+ #
1893
+ # @return [Expr]
947
1894
  def reverse
948
1895
  wrap_expr(_rbexpr.reverse)
949
1896
  end
950
1897
 
1898
+ # Get standard deviation.
1899
+ #
1900
+ # @param ddof [Integer]
1901
+ # Degrees of freedom.
1902
+ #
1903
+ # @return [Expr]
1904
+ #
1905
+ # @example
1906
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
1907
+ # df.select(Polars.col("a").std)
1908
+ # # =>
1909
+ # # shape: (1, 1)
1910
+ # # ┌─────┐
1911
+ # # │ a │
1912
+ # # │ --- │
1913
+ # # │ f64 │
1914
+ # # ╞═════╡
1915
+ # # │ 1.0 │
1916
+ # # └─────┘
951
1917
  def std(ddof: 1)
952
1918
  wrap_expr(_rbexpr.std(ddof))
953
1919
  end
954
1920
 
1921
+ # Get variance.
1922
+ #
1923
+ # @param ddof [Integer]
1924
+ # Degrees of freedom.
1925
+ #
1926
+ # @return [Expr]
1927
+ #
1928
+ # @example
1929
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
1930
+ # df.select(Polars.col("a").var)
1931
+ # # =>
1932
+ # # shape: (1, 1)
1933
+ # # ┌─────┐
1934
+ # # │ a │
1935
+ # # │ --- │
1936
+ # # │ f64 │
1937
+ # # ╞═════╡
1938
+ # # │ 1.0 │
1939
+ # # └─────┘
955
1940
  def var(ddof: 1)
956
1941
  wrap_expr(_rbexpr.var(ddof))
957
1942
  end
958
1943
 
1944
+ # Get maximum value.
1945
+ #
1946
+ # @return [Expr]
1947
+ #
1948
+ # @example
1949
+ # df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
1950
+ # df.select(Polars.col("a").max)
1951
+ # # =>
1952
+ # # shape: (1, 1)
1953
+ # # ┌─────┐
1954
+ # # │ a │
1955
+ # # │ --- │
1956
+ # # │ f64 │
1957
+ # # ╞═════╡
1958
+ # # │ 1.0 │
1959
+ # # └─────┘
959
1960
  def max
960
1961
  wrap_expr(_rbexpr.max)
961
1962
  end
962
1963
 
1964
+ # Get minimum value.
1965
+ #
1966
+ # @return [Expr]
1967
+ #
1968
+ # @example
1969
+ # df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
1970
+ # df.select(Polars.col("a").min)
1971
+ # # =>
1972
+ # # shape: (1, 1)
1973
+ # # ┌──────┐
1974
+ # # │ a │
1975
+ # # │ --- │
1976
+ # # │ f64 │
1977
+ # # ╞══════╡
1978
+ # # │ -1.0 │
1979
+ # # └──────┘
963
1980
  def min
964
1981
  wrap_expr(_rbexpr.min)
965
1982
  end
966
1983
 
1984
+ # Get maximum value, but propagate/poison encountered NaN values.
1985
+ #
1986
+ # @return [Expr]
1987
+ #
1988
+ # @example
1989
+ # df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
1990
+ # df.select(Polars.col("a").nan_max)
1991
+ # # =>
1992
+ # # shape: (1, 1)
1993
+ # # ┌─────┐
1994
+ # # │ a │
1995
+ # # │ --- │
1996
+ # # │ f64 │
1997
+ # # ╞═════╡
1998
+ # # │ NaN │
1999
+ # # └─────┘
967
2000
  def nan_max
968
2001
  wrap_expr(_rbexpr.nan_max)
969
2002
  end
970
2003
 
971
- def nan_min
972
- wrap_expr(_rbexpr.nan_min)
973
- end
974
-
975
- def sum
976
- wrap_expr(_rbexpr.sum)
977
- end
978
-
979
- def mean
980
- wrap_expr(_rbexpr.mean)
981
- end
982
-
2004
+ # Get minimum value, but propagate/poison encountered NaN values.
2005
+ #
2006
+ # @return [Expr]
2007
+ #
2008
+ # @example
2009
+ # df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
2010
+ # df.select(Polars.col("a").nan_min)
2011
+ # # =>
2012
+ # # shape: (1, 1)
2013
+ # # ┌─────┐
2014
+ # # │ a │
2015
+ # # │ --- │
2016
+ # # │ f64 │
2017
+ # # ╞═════╡
2018
+ # # │ NaN │
2019
+ # # └─────┘
2020
+ def nan_min
2021
+ wrap_expr(_rbexpr.nan_min)
2022
+ end
2023
+
2024
+ # Get sum value.
2025
+ #
2026
+ # @return [Expr]
2027
+ #
2028
+ # @note
2029
+ # Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
2030
+ # `:i64` before summing to prevent overflow issues.
2031
+ #
2032
+ # @example
2033
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
2034
+ # df.select(Polars.col("a").sum)
2035
+ # # =>
2036
+ # # shape: (1, 1)
2037
+ # # ┌─────┐
2038
+ # # │ a │
2039
+ # # │ --- │
2040
+ # # │ i64 │
2041
+ # # ╞═════╡
2042
+ # # │ 0 │
2043
+ # # └─────┘
2044
+ def sum
2045
+ wrap_expr(_rbexpr.sum)
2046
+ end
2047
+
2048
+ # Get mean value.
2049
+ #
2050
+ # @return [Expr]
2051
+ #
2052
+ # @example
2053
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
2054
+ # df.select(Polars.col("a").mean)
2055
+ # # =>
2056
+ # # shape: (1, 1)
2057
+ # # ┌─────┐
2058
+ # # │ a │
2059
+ # # │ --- │
2060
+ # # │ f64 │
2061
+ # # ╞═════╡
2062
+ # # │ 0.0 │
2063
+ # # └─────┘
2064
+ def mean
2065
+ wrap_expr(_rbexpr.mean)
2066
+ end
2067
+
2068
+ # Get median value using linear interpolation.
2069
+ #
2070
+ # @return [Expr]
2071
+ #
2072
+ # @example
2073
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
2074
+ # df.select(Polars.col("a").median)
2075
+ # # =>
2076
+ # # shape: (1, 1)
2077
+ # # ┌─────┐
2078
+ # # │ a │
2079
+ # # │ --- │
2080
+ # # │ f64 │
2081
+ # # ╞═════╡
2082
+ # # │ 0.0 │
2083
+ # # └─────┘
983
2084
  def median
984
2085
  wrap_expr(_rbexpr.median)
985
2086
  end
986
2087
 
2088
+ # Compute the product of an expression.
2089
+ #
2090
+ # @return [Expr]
2091
+ #
2092
+ # @example
2093
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
2094
+ # df.select(Polars.col("a").product)
2095
+ # # =>
2096
+ # # shape: (1, 1)
2097
+ # # ┌─────┐
2098
+ # # │ a │
2099
+ # # │ --- │
2100
+ # # │ i64 │
2101
+ # # ╞═════╡
2102
+ # # │ 6 │
2103
+ # # └─────┘
987
2104
  def product
988
2105
  wrap_expr(_rbexpr.product)
989
2106
  end
990
2107
 
2108
+ # Count unique values.
2109
+ #
2110
+ # @return [Expr]
2111
+ #
2112
+ # @example
2113
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2114
+ # df.select(Polars.col("a").n_unique)
2115
+ # # =>
2116
+ # # shape: (1, 1)
2117
+ # # ┌─────┐
2118
+ # # │ a │
2119
+ # # │ --- │
2120
+ # # │ u32 │
2121
+ # # ╞═════╡
2122
+ # # │ 2 │
2123
+ # # └─────┘
991
2124
  def n_unique
992
2125
  wrap_expr(_rbexpr.n_unique)
993
2126
  end
994
2127
 
2128
+ # Count null values.
2129
+ #
2130
+ # @return [Expr]
2131
+ #
2132
+ # @example
2133
+ # df = Polars::DataFrame.new(
2134
+ # {
2135
+ # "a" => [nil, 1, nil],
2136
+ # "b" => [1, 2, 3]
2137
+ # }
2138
+ # )
2139
+ # df.select(Polars.all.null_count)
2140
+ # # =>
2141
+ # # shape: (1, 2)
2142
+ # # ┌─────┬─────┐
2143
+ # # │ a ┆ b │
2144
+ # # │ --- ┆ --- │
2145
+ # # │ u32 ┆ u32 │
2146
+ # # ╞═════╪═════╡
2147
+ # # │ 2 ┆ 0 │
2148
+ # # └─────┴─────┘
995
2149
  def null_count
996
2150
  wrap_expr(_rbexpr.null_count)
997
2151
  end
998
2152
 
2153
+ # Get index of first unique value.
2154
+ #
2155
+ # @return [Expr]
2156
+ #
2157
+ # @example
2158
+ # df = Polars::DataFrame.new(
2159
+ # {
2160
+ # "a" => [8, 9, 10],
2161
+ # "b" => [nil, 4, 4]
2162
+ # }
2163
+ # )
2164
+ # df.select(Polars.col("a").arg_unique)
2165
+ # # =>
2166
+ # # shape: (3, 1)
2167
+ # # ┌─────┐
2168
+ # # │ a │
2169
+ # # │ --- │
2170
+ # # │ u32 │
2171
+ # # ╞═════╡
2172
+ # # │ 0 │
2173
+ # # ├╌╌╌╌╌┤
2174
+ # # │ 1 │
2175
+ # # ├╌╌╌╌╌┤
2176
+ # # │ 2 │
2177
+ # # └─────┘
2178
+ #
2179
+ # @example
2180
+ # df.select(Polars.col("b").arg_unique)
2181
+ # # =>
2182
+ # # shape: (2, 1)
2183
+ # # ┌─────┐
2184
+ # # │ b │
2185
+ # # │ --- │
2186
+ # # │ u32 │
2187
+ # # ╞═════╡
2188
+ # # │ 0 │
2189
+ # # ├╌╌╌╌╌┤
2190
+ # # │ 1 │
2191
+ # # └─────┘
999
2192
  def arg_unique
1000
2193
  wrap_expr(_rbexpr.arg_unique)
1001
2194
  end
1002
2195
 
2196
+ # Get unique values of this expression.
2197
+ #
2198
+ # @param maintain_order [Boolean]
2199
+ # Maintain order of data. This requires more work.
2200
+ #
2201
+ # @return [Expr]
2202
+ #
2203
+ # @example
2204
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2205
+ # df.select(Polars.col("a").unique(maintain_order: true))
2206
+ # # =>
2207
+ # # shape: (2, 1)
2208
+ # # ┌─────┐
2209
+ # # │ a │
2210
+ # # │ --- │
2211
+ # # │ i64 │
2212
+ # # ╞═════╡
2213
+ # # │ 1 │
2214
+ # # ├╌╌╌╌╌┤
2215
+ # # │ 2 │
2216
+ # # └─────┘
1003
2217
  def unique(maintain_order: false)
1004
2218
  if maintain_order
1005
2219
  wrap_expr(_rbexpr.unique_stable)
@@ -1008,243 +2222,2508 @@ module Polars
1008
2222
  end
1009
2223
  end
1010
2224
 
2225
+ # Get the first value.
2226
+ #
2227
+ # @return [Expr]
2228
+ #
2229
+ # @example
2230
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2231
+ # df.select(Polars.col("a").first)
2232
+ # # =>
2233
+ # # shape: (1, 1)
2234
+ # # ┌─────┐
2235
+ # # │ a │
2236
+ # # │ --- │
2237
+ # # │ i64 │
2238
+ # # ╞═════╡
2239
+ # # │ 1 │
2240
+ # # └─────┘
1011
2241
  def first
1012
2242
  wrap_expr(_rbexpr.first)
1013
2243
  end
1014
2244
 
2245
+ # Get the last value.
2246
+ #
2247
+ # @return [Expr]
2248
+ #
2249
+ # @example
2250
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2251
+ # df.select(Polars.col("a").last)
2252
+ # # =>
2253
+ # # shape: (1, 1)
2254
+ # # ┌─────┐
2255
+ # # │ a │
2256
+ # # │ --- │
2257
+ # # │ i64 │
2258
+ # # ╞═════╡
2259
+ # # │ 2 │
2260
+ # # └─────┘
1015
2261
  def last
1016
2262
  wrap_expr(_rbexpr.last)
1017
2263
  end
1018
2264
 
2265
+ # Apply window function over a subgroup.
2266
+ #
2267
+ # This is similar to a groupby + aggregation + self join.
2268
+ # Or similar to [window functions in Postgres](https://www.postgresql.org/docs/current/tutorial-window.html).
2269
+ #
2270
+ # @param expr [Object]
2271
+ # Column(s) to group by.
2272
+ #
2273
+ # @return [Expr]
2274
+ #
2275
+ # @example
2276
+ # df = Polars::DataFrame.new(
2277
+ # {
2278
+ # "groups" => ["g1", "g1", "g2"],
2279
+ # "values" => [1, 2, 3]
2280
+ # }
2281
+ # )
2282
+ # df.with_column(
2283
+ # Polars.col("values").max.over("groups").alias("max_by_group")
2284
+ # )
2285
+ # # =>
2286
+ # # shape: (3, 3)
2287
+ # # ┌────────┬────────┬──────────────┐
2288
+ # # │ groups ┆ values ┆ max_by_group │
2289
+ # # │ --- ┆ --- ┆ --- │
2290
+ # # │ str ┆ i64 ┆ i64 │
2291
+ # # ╞════════╪════════╪══════════════╡
2292
+ # # │ g1 ┆ 1 ┆ 2 │
2293
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
2294
+ # # │ g1 ┆ 2 ┆ 2 │
2295
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
2296
+ # # │ g2 ┆ 3 ┆ 3 │
2297
+ # # └────────┴────────┴──────────────┘
2298
+ #
2299
+ # @example
2300
+ # df = Polars::DataFrame.new(
2301
+ # {
2302
+ # "groups" => [1, 1, 2, 2, 1, 2, 3, 3, 1],
2303
+ # "values" => [1, 2, 3, 4, 5, 6, 7, 8, 8]
2304
+ # }
2305
+ # )
2306
+ # df.lazy
2307
+ # .select([Polars.col("groups").sum.over("groups")])
2308
+ # .collect
2309
+ # # =>
2310
+ # # shape: (9, 1)
2311
+ # # ┌────────┐
2312
+ # # │ groups │
2313
+ # # │ --- │
2314
+ # # │ i64 │
2315
+ # # ╞════════╡
2316
+ # # │ 4 │
2317
+ # # ├╌╌╌╌╌╌╌╌┤
2318
+ # # │ 4 │
2319
+ # # ├╌╌╌╌╌╌╌╌┤
2320
+ # # │ 6 │
2321
+ # # ├╌╌╌╌╌╌╌╌┤
2322
+ # # │ 6 │
2323
+ # # ├╌╌╌╌╌╌╌╌┤
2324
+ # # │ ... │
2325
+ # # ├╌╌╌╌╌╌╌╌┤
2326
+ # # │ 6 │
2327
+ # # ├╌╌╌╌╌╌╌╌┤
2328
+ # # │ 6 │
2329
+ # # ├╌╌╌╌╌╌╌╌┤
2330
+ # # │ 6 │
2331
+ # # ├╌╌╌╌╌╌╌╌┤
2332
+ # # │ 4 │
2333
+ # # └────────┘
1019
2334
  def over(expr)
1020
2335
  rbexprs = Utils.selection_to_rbexpr_list(expr)
1021
2336
  wrap_expr(_rbexpr.over(rbexprs))
1022
2337
  end
1023
2338
 
2339
+ # Get mask of unique values.
2340
+ #
2341
+ # @return [Expr]
2342
+ #
2343
+ # @example
2344
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2345
+ # df.select(Polars.col("a").is_unique)
2346
+ # # =>
2347
+ # # shape: (3, 1)
2348
+ # # ┌───────┐
2349
+ # # │ a │
2350
+ # # │ --- │
2351
+ # # │ bool │
2352
+ # # ╞═══════╡
2353
+ # # │ false │
2354
+ # # ├╌╌╌╌╌╌╌┤
2355
+ # # │ false │
2356
+ # # ├╌╌╌╌╌╌╌┤
2357
+ # # │ true │
2358
+ # # └───────┘
1024
2359
  def is_unique
1025
2360
  wrap_expr(_rbexpr.is_unique)
1026
2361
  end
1027
2362
 
2363
+ # Get a mask of the first unique value.
2364
+ #
2365
+ # @return [Expr]
2366
+ #
2367
+ # @example
2368
+ # df = Polars::DataFrame.new(
2369
+ # {
2370
+ # "num" => [1, 2, 3, 1, 5]
2371
+ # }
2372
+ # )
2373
+ # df.with_column(Polars.col("num").is_first.alias("is_first"))
2374
+ # # =>
2375
+ # # shape: (5, 2)
2376
+ # # ┌─────┬──────────┐
2377
+ # # │ num ┆ is_first │
2378
+ # # │ --- ┆ --- │
2379
+ # # │ i64 ┆ bool │
2380
+ # # ╞═════╪══════════╡
2381
+ # # │ 1 ┆ true │
2382
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2383
+ # # │ 2 ┆ true │
2384
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2385
+ # # │ 3 ┆ true │
2386
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2387
+ # # │ 1 ┆ false │
2388
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2389
+ # # │ 5 ┆ true │
2390
+ # # └─────┴──────────┘
1028
2391
  def is_first
1029
2392
  wrap_expr(_rbexpr.is_first)
1030
2393
  end
1031
2394
 
2395
+ # Get mask of duplicated values.
2396
+ #
2397
+ # @return [Expr]
2398
+ #
2399
+ # @example
2400
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2401
+ # df.select(Polars.col("a").is_duplicated)
2402
+ # # =>
2403
+ # # shape: (3, 1)
2404
+ # # ┌───────┐
2405
+ # # │ a │
2406
+ # # │ --- │
2407
+ # # │ bool │
2408
+ # # ╞═══════╡
2409
+ # # │ true │
2410
+ # # ├╌╌╌╌╌╌╌┤
2411
+ # # │ true │
2412
+ # # ├╌╌╌╌╌╌╌┤
2413
+ # # │ false │
2414
+ # # └───────┘
1032
2415
  def is_duplicated
1033
2416
  wrap_expr(_rbexpr.is_duplicated)
1034
2417
  end
1035
2418
 
1036
- def quantile(quantile, interpolation: "nearest")
1037
- wrap_expr(_rbexpr.quantile(quantile, interpolation))
1038
- end
1039
-
1040
- def filter(predicate)
1041
- wrap_expr(_rbexpr.filter(predicate._rbexpr))
1042
- end
1043
-
1044
- def where(predicate)
1045
- filter(predicate)
1046
- end
1047
-
1048
- # def map
1049
- # end
1050
-
1051
- # def apply
1052
- # end
1053
-
2419
+ # Get quantile value.
1054
2420
  #
1055
- def flatten
1056
- wrap_expr(_rbexpr.explode)
1057
- end
1058
-
1059
- def explode
1060
- wrap_expr(_rbexpr.explode)
2421
+ # @param quantile [Float]
2422
+ # Quantile between 0.0 and 1.0.
2423
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
2424
+ # Interpolation method.
2425
+ #
2426
+ # @return [Expr]
2427
+ #
2428
+ # @example
2429
+ # df = Polars::DataFrame.new({"a" => [0, 1, 2, 3, 4, 5]})
2430
+ # df.select(Polars.col("a").quantile(0.3))
2431
+ # # =>
2432
+ # # shape: (1, 1)
2433
+ # # ┌─────┐
2434
+ # # │ a │
2435
+ # # │ --- │
2436
+ # # │ f64 │
2437
+ # # ╞═════╡
2438
+ # # │ 1.0 │
2439
+ # # └─────┘
2440
+ #
2441
+ # @example
2442
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "higher"))
2443
+ # # =>
2444
+ # # shape: (1, 1)
2445
+ # # ┌─────┐
2446
+ # # │ a │
2447
+ # # │ --- │
2448
+ # # │ f64 │
2449
+ # # ╞═════╡
2450
+ # # │ 2.0 │
2451
+ # # └─────┘
2452
+ #
2453
+ # @example
2454
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "lower"))
2455
+ # # =>
2456
+ # # shape: (1, 1)
2457
+ # # ┌─────┐
2458
+ # # │ a │
2459
+ # # │ --- │
2460
+ # # │ f64 │
2461
+ # # ╞═════╡
2462
+ # # │ 1.0 │
2463
+ # # └─────┘
2464
+ #
2465
+ # @example
2466
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "midpoint"))
2467
+ # # =>
2468
+ # # shape: (1, 1)
2469
+ # # ┌─────┐
2470
+ # # │ a │
2471
+ # # │ --- │
2472
+ # # │ f64 │
2473
+ # # ╞═════╡
2474
+ # # │ 1.5 │
2475
+ # # └─────┘
2476
+ #
2477
+ # @example
2478
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "linear"))
2479
+ # # =>
2480
+ # # shape: (1, 1)
2481
+ # # ┌─────┐
2482
+ # # │ a │
2483
+ # # │ --- │
2484
+ # # │ f64 │
2485
+ # # ╞═════╡
2486
+ # # │ 1.5 │
2487
+ # # └─────┘
2488
+ def quantile(quantile, interpolation: "nearest")
2489
+ wrap_expr(_rbexpr.quantile(quantile, interpolation))
2490
+ end
2491
+
2492
+ # Filter a single column.
2493
+ #
2494
+ # Mostly useful in an aggregation context. If you want to filter on a DataFrame
2495
+ # level, use `LazyFrame#filter`.
2496
+ #
2497
+ # @param predicate [Expr]
2498
+ # Boolean expression.
2499
+ #
2500
+ # @return [Expr]
2501
+ #
2502
+ # @example
2503
+ # df = Polars::DataFrame.new(
2504
+ # {
2505
+ # "group_col" => ["g1", "g1", "g2"],
2506
+ # "b" => [1, 2, 3]
2507
+ # }
2508
+ # )
2509
+ # (
2510
+ # df.groupby("group_col").agg(
2511
+ # [
2512
+ # Polars.col("b").filter(Polars.col("b") < 2).sum.alias("lt"),
2513
+ # Polars.col("b").filter(Polars.col("b") >= 2).sum.alias("gte")
2514
+ # ]
2515
+ # )
2516
+ # ).sort("group_col")
2517
+ # # =>
2518
+ # # shape: (2, 3)
2519
+ # # ┌───────────┬──────┬─────┐
2520
+ # # │ group_col ┆ lt ┆ gte │
2521
+ # # │ --- ┆ --- ┆ --- │
2522
+ # # │ str ┆ i64 ┆ i64 │
2523
+ # # ╞═══════════╪══════╪═════╡
2524
+ # # │ g1 ┆ 1 ┆ 2 │
2525
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
2526
+ # # │ g2 ┆ null ┆ 3 │
2527
+ # # └───────────┴──────┴─────┘
2528
+ def filter(predicate)
2529
+ wrap_expr(_rbexpr.filter(predicate._rbexpr))
2530
+ end
2531
+
2532
+ # Filter a single column.
2533
+ #
2534
+ # Alias for {#filter}.
2535
+ #
2536
+ # @param predicate [Expr]
2537
+ # Boolean expression.
2538
+ #
2539
+ # @return [Expr]
2540
+ #
2541
+ # @example
2542
+ # df = Polars::DataFrame.new(
2543
+ # {
2544
+ # "group_col" => ["g1", "g1", "g2"],
2545
+ # "b" => [1, 2, 3]
2546
+ # }
2547
+ # )
2548
+ # (
2549
+ # df.groupby("group_col").agg(
2550
+ # [
2551
+ # Polars.col("b").where(Polars.col("b") < 2).sum.alias("lt"),
2552
+ # Polars.col("b").where(Polars.col("b") >= 2).sum.alias("gte")
2553
+ # ]
2554
+ # )
2555
+ # ).sort("group_col")
2556
+ # # =>
2557
+ # # shape: (2, 3)
2558
+ # # ┌───────────┬──────┬─────┐
2559
+ # # │ group_col ┆ lt ┆ gte │
2560
+ # # │ --- ┆ --- ┆ --- │
2561
+ # # │ str ┆ i64 ┆ i64 │
2562
+ # # ╞═══════════╪══════╪═════╡
2563
+ # # │ g1 ┆ 1 ┆ 2 │
2564
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
2565
+ # # │ g2 ┆ null ┆ 3 │
2566
+ # # └───────────┴──────┴─────┘
2567
+ def where(predicate)
2568
+ filter(predicate)
2569
+ end
2570
+
2571
+ # Apply a custom Ruby function to a Series or sequence of Series.
2572
+ #
2573
+ # The output of this custom function must be a Series.
2574
+ # If you want to apply a custom function elementwise over single values, see
2575
+ # {#apply}. A use case for `map` is when you want to transform an
2576
+ # expression with a third-party library.
2577
+ #
2578
+ # Read more in [the book](https://pola-rs.github.io/polars-book/user-guide/dsl/custom_functions.html).
2579
+ #
2580
+ # @param return_dtype [Symbol]
2581
+ # Dtype of the output Series.
2582
+ # @param agg_list [Boolean]
2583
+ # Aggregate list.
2584
+ #
2585
+ # @return [Expr]
2586
+ #
2587
+ # @example
2588
+ # df = Polars::DataFrame.new(
2589
+ # {
2590
+ # "sine" => [0.0, 1.0, 0.0, -1.0],
2591
+ # "cosine" => [1.0, 0.0, -1.0, 0.0]
2592
+ # }
2593
+ # )
2594
+ # df.select(Polars.all.map { |x| x.to_numpy.argmax })
2595
+ # # =>
2596
+ # # shape: (1, 2)
2597
+ # # ┌──────┬────────┐
2598
+ # # │ sine ┆ cosine │
2599
+ # # │ --- ┆ --- │
2600
+ # # │ i64 ┆ i64 │
2601
+ # # ╞══════╪════════╡
2602
+ # # │ 1 ┆ 0 │
2603
+ # # └──────┴────────┘
2604
+ # def map(return_dtype: nil, agg_list: false, &f)
2605
+ # if !return_dtype.nil?
2606
+ # return_dtype = Utils.rb_type_to_dtype(return_dtype)
2607
+ # end
2608
+ # wrap_expr(_rbexpr.map(f, return_dtype, agg_list))
2609
+ # end
2610
+
2611
+ # Apply a custom/user-defined function (UDF) in a GroupBy or Projection context.
2612
+ #
2613
+ # Depending on the context it has the following behavior:
2614
+ #
2615
+ # * Selection
2616
+ # Expects `f` to be of type Callable[[Any], Any].
2617
+ # Applies a Ruby function over each individual value in the column.
2618
+ # * GroupBy
2619
+ # Expects `f` to be of type Callable[[Series], Series].
2620
+ # Applies a Ruby function over each group.
2621
+ #
2622
+ # Implementing logic using a Ruby function is almost always _significantly_
2623
+ # slower and more memory intensive than implementing the same logic using
2624
+ # the native expression API because:
2625
+ #
2626
+ # - The native expression engine runs in Rust; UDFs run in Ruby.
2627
+ # - Use of Ruby UDFs forces the DataFrame to be materialized in memory.
2628
+ # - Polars-native expressions can be parallelised (UDFs cannot).
2629
+ # - Polars-native expressions can be logically optimised (UDFs cannot).
2630
+ #
2631
+ # Wherever possible you should strongly prefer the native expression API
2632
+ # to achieve the best performance.
2633
+ #
2634
+ # @param return_dtype [Symbol]
2635
+ # Dtype of the output Series.
2636
+ # If not set, polars will assume that
2637
+ # the dtype remains unchanged.
2638
+ #
2639
+ # @return [Expr]
2640
+ #
2641
+ # @example
2642
+ # df = Polars::DataFrame.new(
2643
+ # {
2644
+ # "a" => [1, 2, 3, 1],
2645
+ # "b" => ["a", "b", "c", "c"]
2646
+ # }
2647
+ # )
2648
+ #
2649
+ # @example In a selection context, the function is applied by row.
2650
+ # df.with_column(
2651
+ # Polars.col("a").apply { |x| x * 2 }.alias("a_times_2")
2652
+ # )
2653
+ # # =>
2654
+ # # shape: (4, 3)
2655
+ # # ┌─────┬─────┬───────────┐
2656
+ # # │ a ┆ b ┆ a_times_2 │
2657
+ # # │ --- ┆ --- ┆ --- │
2658
+ # # │ i64 ┆ str ┆ i64 │
2659
+ # # ╞═════╪═════╪═══════════╡
2660
+ # # │ 1 ┆ a ┆ 2 │
2661
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
2662
+ # # │ 2 ┆ b ┆ 4 │
2663
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
2664
+ # # │ 3 ┆ c ┆ 6 │
2665
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
2666
+ # # │ 1 ┆ c ┆ 2 │
2667
+ # # └─────┴─────┴───────────┘
2668
+ #
2669
+ # @example In a GroupBy context the function is applied by group:
2670
+ # df.lazy
2671
+ # .groupby("b", maintain_order: true)
2672
+ # .agg(
2673
+ # [
2674
+ # Polars.col("a").apply { |x| x.sum }
2675
+ # ]
2676
+ # )
2677
+ # .collect
2678
+ # # =>
2679
+ # # shape: (3, 2)
2680
+ # # ┌─────┬─────┐
2681
+ # # │ b ┆ a │
2682
+ # # │ --- ┆ --- │
2683
+ # # │ str ┆ i64 │
2684
+ # # ╞═════╪═════╡
2685
+ # # │ a ┆ 1 │
2686
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2687
+ # # │ b ┆ 2 │
2688
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2689
+ # # │ c ┆ 4 │
2690
+ # # └─────┴─────┘
2691
+ # def apply(return_dtype: nil, &f)
2692
+ # wrap_f = lambda do |x|
2693
+ # x.apply(return_dtype: return_dtype, &f)
2694
+ # end
2695
+ # map(agg_list: true, return_dtype: return_dtype, &wrap_f)
2696
+ # end
2697
+
2698
+ # Explode a list or utf8 Series. This means that every item is expanded to a new
2699
+ # row.
2700
+ #
2701
+ # Alias for {#explode}.
2702
+ #
2703
+ # @return [Expr]
2704
+ #
2705
+ # @example
2706
+ # df = Polars::DataFrame.new({"foo" => ["hello", "world"]})
2707
+ # df.select(Polars.col("foo").flatten)
2708
+ # # =>
2709
+ # # shape: (10, 1)
2710
+ # # ┌─────┐
2711
+ # # │ foo │
2712
+ # # │ --- │
2713
+ # # │ str │
2714
+ # # ╞═════╡
2715
+ # # │ h │
2716
+ # # ├╌╌╌╌╌┤
2717
+ # # │ e │
2718
+ # # ├╌╌╌╌╌┤
2719
+ # # │ l │
2720
+ # # ├╌╌╌╌╌┤
2721
+ # # │ l │
2722
+ # # ├╌╌╌╌╌┤
2723
+ # # │ ... │
2724
+ # # ├╌╌╌╌╌┤
2725
+ # # │ o │
2726
+ # # ├╌╌╌╌╌┤
2727
+ # # │ r │
2728
+ # # ├╌╌╌╌╌┤
2729
+ # # │ l │
2730
+ # # ├╌╌╌╌╌┤
2731
+ # # │ d │
2732
+ # # └─────┘
2733
+ def flatten
2734
+ wrap_expr(_rbexpr.explode)
2735
+ end
2736
+
2737
+ # Explode a list or utf8 Series.
2738
+ #
2739
+ # This means that every item is expanded to a new row.
2740
+ #
2741
+ # @return [Expr]
2742
+ #
2743
+ # @example
2744
+ # df = Polars::DataFrame.new({"b" => [[1, 2, 3], [4, 5, 6]]})
2745
+ # df.select(Polars.col("b").explode)
2746
+ # # =>
2747
+ # # shape: (6, 1)
2748
+ # # ┌─────┐
2749
+ # # │ b │
2750
+ # # │ --- │
2751
+ # # │ i64 │
2752
+ # # ╞═════╡
2753
+ # # │ 1 │
2754
+ # # ├╌╌╌╌╌┤
2755
+ # # │ 2 │
2756
+ # # ├╌╌╌╌╌┤
2757
+ # # │ 3 │
2758
+ # # ├╌╌╌╌╌┤
2759
+ # # │ 4 │
2760
+ # # ├╌╌╌╌╌┤
2761
+ # # │ 5 │
2762
+ # # ├╌╌╌╌╌┤
2763
+ # # │ 6 │
2764
+ # # └─────┘
2765
+ def explode
2766
+ wrap_expr(_rbexpr.explode)
1061
2767
  end
1062
2768
 
2769
+ # Take every nth value in the Series and return as a new Series.
2770
+ #
2771
+ # @return [Expr]
2772
+ #
2773
+ # @example
2774
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
2775
+ # df.select(Polars.col("foo").take_every(3))
2776
+ # # =>
2777
+ # # shape: (3, 1)
2778
+ # # ┌─────┐
2779
+ # # │ foo │
2780
+ # # │ --- │
2781
+ # # │ i64 │
2782
+ # # ╞═════╡
2783
+ # # │ 1 │
2784
+ # # ├╌╌╌╌╌┤
2785
+ # # │ 4 │
2786
+ # # ├╌╌╌╌╌┤
2787
+ # # │ 7 │
2788
+ # # └─────┘
1063
2789
  def take_every(n)
1064
2790
  wrap_expr(_rbexpr.take_every(n))
1065
2791
  end
1066
2792
 
2793
+ # Get the first `n` rows.
2794
+ #
2795
+ # @param n [Integer]
2796
+ # Number of rows to return.
2797
+ #
2798
+ # @return [Expr]
2799
+ #
2800
+ # @example
2801
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
2802
+ # df.head(3)
2803
+ # # =>
2804
+ # # shape: (3, 1)
2805
+ # # ┌─────┐
2806
+ # # │ foo │
2807
+ # # │ --- │
2808
+ # # │ i64 │
2809
+ # # ╞═════╡
2810
+ # # │ 1 │
2811
+ # # ├╌╌╌╌╌┤
2812
+ # # │ 2 │
2813
+ # # ├╌╌╌╌╌┤
2814
+ # # │ 3 │
2815
+ # # └─────┘
1067
2816
  def head(n = 10)
1068
2817
  wrap_expr(_rbexpr.head(n))
1069
2818
  end
1070
2819
 
2820
+ # Get the last `n` rows.
2821
+ #
2822
+ # @param n [Integer]
2823
+ # Number of rows to return.
2824
+ #
2825
+ # @return [Expr]
2826
+ #
2827
+ # @example
2828
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
2829
+ # df.tail(3)
2830
+ # # =>
2831
+ # # shape: (3, 1)
2832
+ # # ┌─────┐
2833
+ # # │ foo │
2834
+ # # │ --- │
2835
+ # # │ i64 │
2836
+ # # ╞═════╡
2837
+ # # │ 5 │
2838
+ # # ├╌╌╌╌╌┤
2839
+ # # │ 6 │
2840
+ # # ├╌╌╌╌╌┤
2841
+ # # │ 7 │
2842
+ # # └─────┘
1071
2843
  def tail(n = 10)
1072
2844
  wrap_expr(_rbexpr.tail(n))
1073
2845
  end
1074
2846
 
2847
+ # Get the first `n` rows.
2848
+ #
2849
+ # Alias for {#head}.
2850
+ #
2851
+ # @param n [Integer]
2852
+ # Number of rows to return.
2853
+ #
2854
+ # @return [Expr]
1075
2855
  def limit(n = 10)
1076
2856
  head(n)
1077
2857
  end
1078
2858
 
2859
+ # Raise expression to the power of exponent.
2860
+ #
2861
+ # @return [Expr]
2862
+ #
2863
+ # @example
2864
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
2865
+ # df.select(Polars.col("foo").pow(3))
2866
+ # # =>
2867
+ # # shape: (4, 1)
2868
+ # # ┌──────┐
2869
+ # # │ foo │
2870
+ # # │ --- │
2871
+ # # │ f64 │
2872
+ # # ╞══════╡
2873
+ # # │ 1.0 │
2874
+ # # ├╌╌╌╌╌╌┤
2875
+ # # │ 8.0 │
2876
+ # # ├╌╌╌╌╌╌┤
2877
+ # # │ 27.0 │
2878
+ # # ├╌╌╌╌╌╌┤
2879
+ # # │ 64.0 │
2880
+ # # └──────┘
1079
2881
  def pow(exponent)
1080
2882
  exponent = Utils.expr_to_lit_or_expr(exponent)
1081
2883
  wrap_expr(_rbexpr.pow(exponent._rbexpr))
1082
2884
  end
1083
2885
 
1084
- # def is_in
1085
- # end
1086
-
2886
+ # Check if elements of this expression are present in the other Series.
1087
2887
  #
1088
- def repeat_by(by)
1089
- by = Utils.expr_to_lit_or_expr(by, false)
1090
- wrap_expr(_rbexpr.repeat_by(by._rbexpr))
1091
- end
1092
-
1093
- # def is_between
1094
- # end
1095
-
1096
- # def _hash
1097
- # end
1098
-
2888
+ # @param other [Object]
2889
+ # Series or sequence of primitive type.
1099
2890
  #
1100
- def reinterpret(signed: false)
1101
- wrap_expr(_rbexpr.reinterpret(signed))
1102
- end
1103
-
1104
- # def _inspect
1105
- # end
1106
-
2891
+ # @return [Expr]
1107
2892
  #
1108
- def interpolate
1109
- wrap_expr(_rbexpr.interpolate)
1110
- end
1111
-
1112
- # def rolling_min
1113
- # end
1114
-
1115
- # def rolling_max
1116
- # end
2893
+ # @example
2894
+ # df = Polars::DataFrame.new(
2895
+ # {"sets" => [[1, 2, 3], [1, 2], [9, 10]], "optional_members" => [1, 2, 3]}
2896
+ # )
2897
+ # df.select([Polars.col("optional_members").is_in("sets").alias("contains")])
2898
+ # # =>
2899
+ # # shape: (3, 1)
2900
+ # # ┌──────────┐
2901
+ # # │ contains │
2902
+ # # │ --- │
2903
+ # # │ bool │
2904
+ # # ╞══════════╡
2905
+ # # │ true │
2906
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
2907
+ # # │ true │
2908
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
2909
+ # # │ false │
2910
+ # # └──────────┘
2911
+ def is_in(other)
2912
+ if other.is_a?(Array)
2913
+ if other.length == 0
2914
+ other = Polars.lit(nil)
2915
+ else
2916
+ other = Polars.lit(Series.new(other))
2917
+ end
2918
+ else
2919
+ other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
2920
+ end
2921
+ wrap_expr(_rbexpr.is_in(other._rbexpr))
2922
+ end
1117
2923
 
1118
- # def rolling_mean
1119
- # end
2924
+ # Repeat the elements in this Series as specified in the given expression.
2925
+ #
2926
+ # The repeated elements are expanded into a `List`.
2927
+ #
2928
+ # @param by [Object]
2929
+ # Numeric column that determines how often the values will be repeated.
2930
+ # The column will be coerced to UInt32. Give this dtype to make the coercion a
2931
+ # no-op.
2932
+ #
2933
+ # @return [Expr]
2934
+ #
2935
+ # @example
2936
+ # df = Polars::DataFrame.new(
2937
+ # {
2938
+ # "a" => ["x", "y", "z"],
2939
+ # "n" => [1, 2, 3]
2940
+ # }
2941
+ # )
2942
+ # df.select(Polars.col("a").repeat_by("n"))
2943
+ # # =>
2944
+ # # shape: (3, 1)
2945
+ # # ┌─────────────────┐
2946
+ # # │ a │
2947
+ # # │ --- │
2948
+ # # │ list[str] │
2949
+ # # ╞═════════════════╡
2950
+ # # │ ["x"] │
2951
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
2952
+ # # │ ["y", "y"] │
2953
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
2954
+ # # │ ["z", "z", "z"] │
2955
+ # # └─────────────────┘
2956
+ def repeat_by(by)
2957
+ by = Utils.expr_to_lit_or_expr(by, str_to_lit: false)
2958
+ wrap_expr(_rbexpr.repeat_by(by._rbexpr))
2959
+ end
1120
2960
 
1121
- # def rolling_sum
1122
- # end
2961
+ # Check if this expression is between start and end.
2962
+ #
2963
+ # @param start [Object]
2964
+ # Lower bound as primitive type or datetime.
2965
+ # @param _end [Object]
2966
+ # Upper bound as primitive type or datetime.
2967
+ # @param include_bounds [Boolean]
2968
+ # False: Exclude both start and end (default).
2969
+ # True: Include both start and end.
2970
+ # (False, False): Exclude start and exclude end.
2971
+ # (True, True): Include start and include end.
2972
+ # (False, True): Exclude start and include end.
2973
+ # (True, False): Include start and exclude end.
2974
+ #
2975
+ # @return [Expr]
2976
+ #
2977
+ # @example
2978
+ # df = Polars::DataFrame.new({"num" => [1, 2, 3, 4, 5]})
2979
+ # df.with_column(Polars.col("num").is_between(2, 4))
2980
+ # # =>
2981
+ # # shape: (5, 2)
2982
+ # # ┌─────┬────────────┐
2983
+ # # │ num ┆ is_between │
2984
+ # # │ --- ┆ --- │
2985
+ # # │ i64 ┆ bool │
2986
+ # # ╞═════╪════════════╡
2987
+ # # │ 1 ┆ false │
2988
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2989
+ # # │ 2 ┆ false │
2990
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2991
+ # # │ 3 ┆ true │
2992
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2993
+ # # │ 4 ┆ false │
2994
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2995
+ # # │ 5 ┆ false │
2996
+ # # └─────┴────────────┘
2997
+ def is_between(start, _end, include_bounds: false)
2998
+ if include_bounds == false || include_bounds == [false, false]
2999
+ ((self > start) & (self < _end)).alias("is_between")
3000
+ elsif include_bounds == true || include_bounds == [true, true]
3001
+ ((self >= start) & (self <= _end)).alias("is_between")
3002
+ elsif include_bounds == [false, true]
3003
+ ((self > start) & (self <= _end)).alias("is_between")
3004
+ elsif include_bounds == [true, false]
3005
+ ((self >= start) & (self < _end)).alias("is_between")
3006
+ else
3007
+ raise ArgumentError, "include_bounds should be a bool or [bool, bool]."
3008
+ end
3009
+ end
1123
3010
 
1124
- # def rolling_std
1125
- # end
3011
+ # Hash the elements in the selection.
3012
+ #
3013
+ # The hash value is of type `:u64`.
3014
+ #
3015
+ # @param seed [Integer]
3016
+ # Random seed parameter. Defaults to 0.
3017
+ # @param seed_1 [Integer]
3018
+ # Random seed parameter. Defaults to `seed` if not set.
3019
+ # @param seed_2 [Integer]
3020
+ # Random seed parameter. Defaults to `seed` if not set.
3021
+ # @param seed_3 [Integer]
3022
+ # Random seed parameter. Defaults to `seed` if not set.
3023
+ #
3024
+ # @return [Expr]
3025
+ #
3026
+ # @example
3027
+ # df = Polars::DataFrame.new(
3028
+ # {
3029
+ # "a" => [1, 2, nil],
3030
+ # "b" => ["x", nil, "z"]
3031
+ # }
3032
+ # )
3033
+ # df.with_column(Polars.all._hash(10, 20, 30, 40))
3034
+ # # =>
3035
+ # # shape: (3, 2)
3036
+ # # ┌──────────────────────┬──────────────────────┐
3037
+ # # │ a ┆ b │
3038
+ # # │ --- ┆ --- │
3039
+ # # │ u64 ┆ u64 │
3040
+ # # ╞══════════════════════╪══════════════════════╡
3041
+ # # │ 4629889412789719550 ┆ 6959506404929392568 │
3042
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
3043
+ # # │ 16386608652769605760 ┆ 11638928888656214026 │
3044
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
3045
+ # # │ 11638928888656214026 ┆ 11040941213715918520 │
3046
+ # # └──────────────────────┴──────────────────────┘
3047
+ def _hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
3048
+ k0 = seed
3049
+ k1 = seed_1.nil? ? seed : seed_1
3050
+ k2 = seed_2.nil? ? seed : seed_2
3051
+ k3 = seed_3.nil? ? seed : seed_3
3052
+ wrap_expr(_rbexpr._hash(k0, k1, k2, k3))
3053
+ end
3054
+
3055
+ # Reinterpret the underlying bits as a signed/unsigned integer.
3056
+ #
3057
+ # This operation is only allowed for 64bit integers. For lower bits integers,
3058
+ # you can safely use that cast operation.
3059
+ #
3060
+ # @param signed [Boolean]
3061
+ # If true, reinterpret as `:i64`. Otherwise, reinterpret as `:u64`.
3062
+ #
3063
+ # @return [Expr]
3064
+ #
3065
+ # @example
3066
+ # s = Polars::Series.new("a", [1, 1, 2], dtype: :u64)
3067
+ # df = Polars::DataFrame.new([s])
3068
+ # df.select(
3069
+ # [
3070
+ # Polars.col("a").reinterpret(signed: true).alias("reinterpreted"),
3071
+ # Polars.col("a").alias("original")
3072
+ # ]
3073
+ # )
3074
+ # # =>
3075
+ # # shape: (3, 2)
3076
+ # # ┌───────────────┬──────────┐
3077
+ # # │ reinterpreted ┆ original │
3078
+ # # │ --- ┆ --- │
3079
+ # # │ i64 ┆ u64 │
3080
+ # # ╞═══════════════╪══════════╡
3081
+ # # │ 1 ┆ 1 │
3082
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
3083
+ # # │ 1 ┆ 1 │
3084
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
3085
+ # # │ 2 ┆ 2 │
3086
+ # # └───────────────┴──────────┘
3087
+ def reinterpret(signed: false)
3088
+ wrap_expr(_rbexpr.reinterpret(signed))
3089
+ end
1126
3090
 
1127
- # def rolling_var
1128
- # end
3091
+ # Print the value that this expression evaluates to and pass on the value.
3092
+ #
3093
+ # @return [Expr]
3094
+ #
3095
+ # @example
3096
+ # df = Polars::DataFrame.new({"foo" => [1, 1, 2]})
3097
+ # df.select(Polars.col("foo").cumsum._inspect("value is: %s").alias("bar"))
3098
+ # # =>
3099
+ # # value is: shape: (3,)
3100
+ # # Series: 'foo' [i64]
3101
+ # # [
3102
+ # # 1
3103
+ # # 2
3104
+ # # 4
3105
+ # # ]
3106
+ # # shape: (3, 1)
3107
+ # # ┌─────┐
3108
+ # # │ bar │
3109
+ # # │ --- │
3110
+ # # │ i64 │
3111
+ # # ╞═════╡
3112
+ # # │ 1 │
3113
+ # # ├╌╌╌╌╌┤
3114
+ # # │ 2 │
3115
+ # # ├╌╌╌╌╌┤
3116
+ # # │ 4 │
3117
+ # # └─────┘
3118
+ # def _inspect(fmt = "%s")
3119
+ # inspect = lambda do |s|
3120
+ # puts(fmt % [s])
3121
+ # s
3122
+ # end
1129
3123
 
1130
- # def rolling_median
3124
+ # map(return_dtype: nil, agg_list: true, &inspect)
1131
3125
  # end
1132
3126
 
1133
- # def rolling_quantile
1134
- # end
3127
+ # Fill nulls with linear interpolation over missing values.
3128
+ #
3129
+ # Can also be used to regrid data to a new grid - see examples below.
3130
+ #
3131
+ # @return [Expr]
3132
+ #
3133
+ # @example Fill nulls with linear interpolation
3134
+ # df = Polars::DataFrame.new(
3135
+ # {
3136
+ # "a" => [1, nil, 3],
3137
+ # "b" => [1.0, Float::NAN, 3.0]
3138
+ # }
3139
+ # )
3140
+ # df.select(Polars.all.interpolate)
3141
+ # # =>
3142
+ # # shape: (3, 2)
3143
+ # # ┌─────┬─────┐
3144
+ # # │ a ┆ b │
3145
+ # # │ --- ┆ --- │
3146
+ # # │ i64 ┆ f64 │
3147
+ # # ╞═════╪═════╡
3148
+ # # │ 1 ┆ 1.0 │
3149
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
3150
+ # # │ 2 ┆ NaN │
3151
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
3152
+ # # │ 3 ┆ 3.0 │
3153
+ # # └─────┴─────┘
3154
+ def interpolate
3155
+ wrap_expr(_rbexpr.interpolate)
3156
+ end
1135
3157
 
1136
- # def rolling_apply
3158
+ # Apply a rolling min (moving min) over the values in this array.
3159
+ #
3160
+ # A window of length `window_size` will traverse the array. The values that fill
3161
+ # this window will (optionally) be multiplied with the weights given by the
3162
+ # `weight` vector. The resulting values will be aggregated to their sum.
3163
+ #
3164
+ # @param window_size [Integer]
3165
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3166
+ # size indicated by a timedelta or the following string language:
3167
+ #
3168
+ # - 1ns (1 nanosecond)
3169
+ # - 1us (1 microsecond)
3170
+ # - 1ms (1 millisecond)
3171
+ # - 1s (1 second)
3172
+ # - 1m (1 minute)
3173
+ # - 1h (1 hour)
3174
+ # - 1d (1 day)
3175
+ # - 1w (1 week)
3176
+ # - 1mo (1 calendar month)
3177
+ # - 1y (1 calendar year)
3178
+ # - 1i (1 index count)
3179
+ #
3180
+ # If a timedelta or the dynamic string language is used, the `by`
3181
+ # and `closed` arguments must also be set.
3182
+ # @param weights [Array]
3183
+ # An optional slice with the same length as the window that will be multiplied
3184
+ # elementwise with the values in the window.
3185
+ # @param min_periods [Integer]
3186
+ # The number of values in the window that should be non-null before computing
3187
+ # a result. If None, it will be set equal to window size.
3188
+ # @param center [Boolean]
3189
+ # Set the labels at the center of the window
3190
+ # @param by [String]
3191
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3192
+ # set the column that will be used to determine the windows. This column must
3193
+ # be of dtype `{Date, Datetime}`
3194
+ # @param closed ["left", "right", "both", "none"]
3195
+ # Define whether the temporal window interval is closed or not.
3196
+ #
3197
+ # @note
3198
+ # This functionality is experimental and may change without it being considered a
3199
+ # breaking change.
3200
+ #
3201
+ # @note
3202
+ # If you want to compute multiple aggregation statistics over the same dynamic
3203
+ # window, consider using `groupby_rolling` this method can cache the window size
3204
+ # computation.
3205
+ #
3206
+ # @return [Expr]
3207
+ #
3208
+ # @example
3209
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
3210
+ # df.select(
3211
+ # [
3212
+ # Polars.col("A").rolling_min(2)
3213
+ # ]
3214
+ # )
3215
+ # # =>
3216
+ # # shape: (6, 1)
3217
+ # # ┌──────┐
3218
+ # # │ A │
3219
+ # # │ --- │
3220
+ # # │ f64 │
3221
+ # # ╞══════╡
3222
+ # # │ null │
3223
+ # # ├╌╌╌╌╌╌┤
3224
+ # # │ 1.0 │
3225
+ # # ├╌╌╌╌╌╌┤
3226
+ # # │ 2.0 │
3227
+ # # ├╌╌╌╌╌╌┤
3228
+ # # │ 3.0 │
3229
+ # # ├╌╌╌╌╌╌┤
3230
+ # # │ 4.0 │
3231
+ # # ├╌╌╌╌╌╌┤
3232
+ # # │ 5.0 │
3233
+ # # └──────┘
3234
+ def rolling_min(
3235
+ window_size,
3236
+ weights: nil,
3237
+ min_periods: nil,
3238
+ center: false,
3239
+ by: nil,
3240
+ closed: "left"
3241
+ )
3242
+ window_size, min_periods = _prepare_rolling_window_args(
3243
+ window_size, min_periods
3244
+ )
3245
+ wrap_expr(
3246
+ _rbexpr.rolling_min(
3247
+ window_size, weights, min_periods, center, by, closed
3248
+ )
3249
+ )
3250
+ end
3251
+
3252
+ # Apply a rolling max (moving max) over the values in this array.
3253
+ #
3254
+ # A window of length `window_size` will traverse the array. The values that fill
3255
+ # this window will (optionally) be multiplied with the weights given by the
3256
+ # `weight` vector. The resulting values will be aggregated to their sum.
3257
+ #
3258
+ # @param window_size [Integer]
3259
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3260
+ # size indicated by a timedelta or the following string language:
3261
+ #
3262
+ # - 1ns (1 nanosecond)
3263
+ # - 1us (1 microsecond)
3264
+ # - 1ms (1 millisecond)
3265
+ # - 1s (1 second)
3266
+ # - 1m (1 minute)
3267
+ # - 1h (1 hour)
3268
+ # - 1d (1 day)
3269
+ # - 1w (1 week)
3270
+ # - 1mo (1 calendar month)
3271
+ # - 1y (1 calendar year)
3272
+ # - 1i (1 index count)
3273
+ #
3274
+ # If a timedelta or the dynamic string language is used, the `by`
3275
+ # and `closed` arguments must also be set.
3276
+ # @param weights [Array]
3277
+ # An optional slice with the same length as the window that will be multiplied
3278
+ # elementwise with the values in the window.
3279
+ # @param min_periods [Integer]
3280
+ # The number of values in the window that should be non-null before computing
3281
+ # a result. If None, it will be set equal to window size.
3282
+ # @param center [Boolean]
3283
+ # Set the labels at the center of the window
3284
+ # @param by [String]
3285
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3286
+ # set the column that will be used to determine the windows. This column must
3287
+ # be of dtype `{Date, Datetime}`
3288
+ # @param closed ["left", "right", "both", "none"]
3289
+ # Define whether the temporal window interval is closed or not.
3290
+ #
3291
+ # @note
3292
+ # This functionality is experimental and may change without it being considered a
3293
+ # breaking change.
3294
+ #
3295
+ # @note
3296
+ # If you want to compute multiple aggregation statistics over the same dynamic
3297
+ # window, consider using `groupby_rolling` this method can cache the window size
3298
+ # computation.
3299
+ #
3300
+ # @return [Expr]
3301
+ #
3302
+ # @example
3303
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
3304
+ # df.select(
3305
+ # [
3306
+ # Polars.col("A").rolling_max(2)
3307
+ # ]
3308
+ # )
3309
+ # # =>
3310
+ # # shape: (6, 1)
3311
+ # # ┌──────┐
3312
+ # # │ A │
3313
+ # # │ --- │
3314
+ # # │ f64 │
3315
+ # # ╞══════╡
3316
+ # # │ null │
3317
+ # # ├╌╌╌╌╌╌┤
3318
+ # # │ 2.0 │
3319
+ # # ├╌╌╌╌╌╌┤
3320
+ # # │ 3.0 │
3321
+ # # ├╌╌╌╌╌╌┤
3322
+ # # │ 4.0 │
3323
+ # # ├╌╌╌╌╌╌┤
3324
+ # # │ 5.0 │
3325
+ # # ├╌╌╌╌╌╌┤
3326
+ # # │ 6.0 │
3327
+ # # └──────┘
3328
+ def rolling_max(
3329
+ window_size,
3330
+ weights: nil,
3331
+ min_periods: nil,
3332
+ center: false,
3333
+ by: nil,
3334
+ closed: "left"
3335
+ )
3336
+ window_size, min_periods = _prepare_rolling_window_args(
3337
+ window_size, min_periods
3338
+ )
3339
+ wrap_expr(
3340
+ _rbexpr.rolling_max(
3341
+ window_size, weights, min_periods, center, by, closed
3342
+ )
3343
+ )
3344
+ end
3345
+
3346
+ # Apply a rolling mean (moving mean) over the values in this array.
3347
+ #
3348
+ # A window of length `window_size` will traverse the array. The values that fill
3349
+ # this window will (optionally) be multiplied with the weights given by the
3350
+ # `weight` vector. The resulting values will be aggregated to their sum.
3351
+ #
3352
+ # @param window_size [Integer]
3353
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3354
+ # size indicated by a timedelta or the following string language:
3355
+ #
3356
+ # - 1ns (1 nanosecond)
3357
+ # - 1us (1 microsecond)
3358
+ # - 1ms (1 millisecond)
3359
+ # - 1s (1 second)
3360
+ # - 1m (1 minute)
3361
+ # - 1h (1 hour)
3362
+ # - 1d (1 day)
3363
+ # - 1w (1 week)
3364
+ # - 1mo (1 calendar month)
3365
+ # - 1y (1 calendar year)
3366
+ # - 1i (1 index count)
3367
+ #
3368
+ # If a timedelta or the dynamic string language is used, the `by`
3369
+ # and `closed` arguments must also be set.
3370
+ # @param weights [Array]
3371
+ # An optional slice with the same length as the window that will be multiplied
3372
+ # elementwise with the values in the window.
3373
+ # @param min_periods [Integer]
3374
+ # The number of values in the window that should be non-null before computing
3375
+ # a result. If None, it will be set equal to window size.
3376
+ # @param center [Boolean]
3377
+ # Set the labels at the center of the window
3378
+ # @param by [String]
3379
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3380
+ # set the column that will be used to determine the windows. This column must
3381
+ # be of dtype `{Date, Datetime}`
3382
+ # @param closed ["left", "right", "both", "none"]
3383
+ # Define whether the temporal window interval is closed or not.
3384
+ #
3385
+ # @note
3386
+ # This functionality is experimental and may change without it being considered a
3387
+ # breaking change.
3388
+ #
3389
+ # @note
3390
+ # If you want to compute multiple aggregation statistics over the same dynamic
3391
+ # window, consider using `groupby_rolling` this method can cache the window size
3392
+ # computation.
3393
+ #
3394
+ # @return [Expr]
3395
+ #
3396
+ # @example
3397
+ # df = Polars::DataFrame.new({"A" => [1.0, 8.0, 6.0, 2.0, 16.0, 10.0]})
3398
+ # df.select(
3399
+ # [
3400
+ # Polars.col("A").rolling_mean(2)
3401
+ # ]
3402
+ # )
3403
+ # # =>
3404
+ # # shape: (6, 1)
3405
+ # # ┌──────┐
3406
+ # # │ A │
3407
+ # # │ --- │
3408
+ # # │ f64 │
3409
+ # # ╞══════╡
3410
+ # # │ null │
3411
+ # # ├╌╌╌╌╌╌┤
3412
+ # # │ 4.5 │
3413
+ # # ├╌╌╌╌╌╌┤
3414
+ # # │ 7.0 │
3415
+ # # ├╌╌╌╌╌╌┤
3416
+ # # │ 4.0 │
3417
+ # # ├╌╌╌╌╌╌┤
3418
+ # # │ 9.0 │
3419
+ # # ├╌╌╌╌╌╌┤
3420
+ # # │ 13.0 │
3421
+ # # └──────┘
3422
+ def rolling_mean(
3423
+ window_size,
3424
+ weights: nil,
3425
+ min_periods: nil,
3426
+ center: false,
3427
+ by: nil,
3428
+ closed: "left"
3429
+ )
3430
+ window_size, min_periods = _prepare_rolling_window_args(
3431
+ window_size, min_periods
3432
+ )
3433
+ wrap_expr(
3434
+ _rbexpr.rolling_mean(
3435
+ window_size, weights, min_periods, center, by, closed
3436
+ )
3437
+ )
3438
+ end
3439
+
3440
+ # Apply a rolling sum (moving sum) over the values in this array.
3441
+ #
3442
+ # A window of length `window_size` will traverse the array. The values that fill
3443
+ # this window will (optionally) be multiplied with the weights given by the
3444
+ # `weight` vector. The resulting values will be aggregated to their sum.
3445
+ #
3446
+ # @param window_size [Integer]
3447
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3448
+ # size indicated by a timedelta or the following string language:
3449
+ #
3450
+ # - 1ns (1 nanosecond)
3451
+ # - 1us (1 microsecond)
3452
+ # - 1ms (1 millisecond)
3453
+ # - 1s (1 second)
3454
+ # - 1m (1 minute)
3455
+ # - 1h (1 hour)
3456
+ # - 1d (1 day)
3457
+ # - 1w (1 week)
3458
+ # - 1mo (1 calendar month)
3459
+ # - 1y (1 calendar year)
3460
+ # - 1i (1 index count)
3461
+ #
3462
+ # If a timedelta or the dynamic string language is used, the `by`
3463
+ # and `closed` arguments must also be set.
3464
+ # @param weights [Array]
3465
+ # An optional slice with the same length as the window that will be multiplied
3466
+ # elementwise with the values in the window.
3467
+ # @param min_periods [Integer]
3468
+ # The number of values in the window that should be non-null before computing
3469
+ # a result. If None, it will be set equal to window size.
3470
+ # @param center [Boolean]
3471
+ # Set the labels at the center of the window
3472
+ # @param by [String]
3473
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3474
+ # set the column that will be used to determine the windows. This column must
3475
+ # be of dtype `{Date, Datetime}`
3476
+ # @param closed ["left", "right", "both", "none"]
3477
+ # Define whether the temporal window interval is closed or not.
3478
+ #
3479
+ # @note
3480
+ # This functionality is experimental and may change without it being considered a
3481
+ # breaking change.
3482
+ #
3483
+ # @note
3484
+ # If you want to compute multiple aggregation statistics over the same dynamic
3485
+ # window, consider using `groupby_rolling` this method can cache the window size
3486
+ # computation.
3487
+ #
3488
+ # @return [Expr]
3489
+ #
3490
+ # @example
3491
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
3492
+ # df.select(
3493
+ # [
3494
+ # Polars.col("A").rolling_sum(2)
3495
+ # ]
3496
+ # )
3497
+ # # =>
3498
+ # # shape: (6, 1)
3499
+ # # ┌──────┐
3500
+ # # │ A │
3501
+ # # │ --- │
3502
+ # # │ f64 │
3503
+ # # ╞══════╡
3504
+ # # │ null │
3505
+ # # ├╌╌╌╌╌╌┤
3506
+ # # │ 3.0 │
3507
+ # # ├╌╌╌╌╌╌┤
3508
+ # # │ 5.0 │
3509
+ # # ├╌╌╌╌╌╌┤
3510
+ # # │ 7.0 │
3511
+ # # ├╌╌╌╌╌╌┤
3512
+ # # │ 9.0 │
3513
+ # # ├╌╌╌╌╌╌┤
3514
+ # # │ 11.0 │
3515
+ # # └──────┘
3516
+ def rolling_sum(
3517
+ window_size,
3518
+ weights: nil,
3519
+ min_periods: nil,
3520
+ center: false,
3521
+ by: nil,
3522
+ closed: "left"
3523
+ )
3524
+ window_size, min_periods = _prepare_rolling_window_args(
3525
+ window_size, min_periods
3526
+ )
3527
+ wrap_expr(
3528
+ _rbexpr.rolling_sum(
3529
+ window_size, weights, min_periods, center, by, closed
3530
+ )
3531
+ )
3532
+ end
3533
+
3534
+ # Compute a rolling standard deviation.
3535
+ #
3536
+ # A window of length `window_size` will traverse the array. The values that fill
3537
+ # this window will (optionally) be multiplied with the weights given by the
3538
+ # `weight` vector. The resulting values will be aggregated to their sum.
3539
+ #
3540
+ # @param window_size [Integer]
3541
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3542
+ # size indicated by a timedelta or the following string language:
3543
+ #
3544
+ # - 1ns (1 nanosecond)
3545
+ # - 1us (1 microsecond)
3546
+ # - 1ms (1 millisecond)
3547
+ # - 1s (1 second)
3548
+ # - 1m (1 minute)
3549
+ # - 1h (1 hour)
3550
+ # - 1d (1 day)
3551
+ # - 1w (1 week)
3552
+ # - 1mo (1 calendar month)
3553
+ # - 1y (1 calendar year)
3554
+ # - 1i (1 index count)
3555
+ #
3556
+ # If a timedelta or the dynamic string language is used, the `by`
3557
+ # and `closed` arguments must also be set.
3558
+ # @param weights [Array]
3559
+ # An optional slice with the same length as the window that will be multiplied
3560
+ # elementwise with the values in the window.
3561
+ # @param min_periods [Integer]
3562
+ # The number of values in the window that should be non-null before computing
3563
+ # a result. If None, it will be set equal to window size.
3564
+ # @param center [Boolean]
3565
+ # Set the labels at the center of the window
3566
+ # @param by [String]
3567
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3568
+ # set the column that will be used to determine the windows. This column must
3569
+ # be of dtype `{Date, Datetime}`
3570
+ # @param closed ["left", "right", "both", "none"]
3571
+ # Define whether the temporal window interval is closed or not.
3572
+ #
3573
+ # @note
3574
+ # This functionality is experimental and may change without it being considered a
3575
+ # breaking change.
3576
+ #
3577
+ # @note
3578
+ # If you want to compute multiple aggregation statistics over the same dynamic
3579
+ # window, consider using `groupby_rolling` this method can cache the window size
3580
+ # computation.
3581
+ #
3582
+ # @return [Expr]
3583
+ #
3584
+ # @example
3585
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
3586
+ # df.select(
3587
+ # [
3588
+ # Polars.col("A").rolling_std(3)
3589
+ # ]
3590
+ # )
3591
+ # # =>
3592
+ # # shape: (6, 1)
3593
+ # # ┌──────────┐
3594
+ # # │ A │
3595
+ # # │ --- │
3596
+ # # │ f64 │
3597
+ # # ╞══════════╡
3598
+ # # │ null │
3599
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3600
+ # # │ null │
3601
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3602
+ # # │ 1.0 │
3603
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3604
+ # # │ 1.0 │
3605
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3606
+ # # │ 1.527525 │
3607
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3608
+ # # │ 2.0 │
3609
+ # # └──────────┘
3610
+ def rolling_std(
3611
+ window_size,
3612
+ weights: nil,
3613
+ min_periods: nil,
3614
+ center: false,
3615
+ by: nil,
3616
+ closed: "left"
3617
+ )
3618
+ window_size, min_periods = _prepare_rolling_window_args(
3619
+ window_size, min_periods
3620
+ )
3621
+ wrap_expr(
3622
+ _rbexpr.rolling_std(
3623
+ window_size, weights, min_periods, center, by, closed
3624
+ )
3625
+ )
3626
+ end
3627
+
3628
+ # Compute a rolling variance.
3629
+ #
3630
+ # A window of length `window_size` will traverse the array. The values that fill
3631
+ # this window will (optionally) be multiplied with the weights given by the
3632
+ # `weight` vector. The resulting values will be aggregated to their sum.
3633
+ #
3634
+ # @param window_size [Integer]
3635
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3636
+ # size indicated by a timedelta or the following string language:
3637
+ #
3638
+ # - 1ns (1 nanosecond)
3639
+ # - 1us (1 microsecond)
3640
+ # - 1ms (1 millisecond)
3641
+ # - 1s (1 second)
3642
+ # - 1m (1 minute)
3643
+ # - 1h (1 hour)
3644
+ # - 1d (1 day)
3645
+ # - 1w (1 week)
3646
+ # - 1mo (1 calendar month)
3647
+ # - 1y (1 calendar year)
3648
+ # - 1i (1 index count)
3649
+ #
3650
+ # If a timedelta or the dynamic string language is used, the `by`
3651
+ # and `closed` arguments must also be set.
3652
+ # @param weights [Array]
3653
+ # An optional slice with the same length as the window that will be multiplied
3654
+ # elementwise with the values in the window.
3655
+ # @param min_periods [Integer]
3656
+ # The number of values in the window that should be non-null before computing
3657
+ # a result. If None, it will be set equal to window size.
3658
+ # @param center [Boolean]
3659
+ # Set the labels at the center of the window
3660
+ # @param by [String]
3661
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3662
+ # set the column that will be used to determine the windows. This column must
3663
+ # be of dtype `{Date, Datetime}`
3664
+ # @param closed ["left", "right", "both", "none"]
3665
+ # Define whether the temporal window interval is closed or not.
3666
+ #
3667
+ # @note
3668
+ # This functionality is experimental and may change without it being considered a
3669
+ # breaking change.
3670
+ #
3671
+ # @note
3672
+ # If you want to compute multiple aggregation statistics over the same dynamic
3673
+ # window, consider using `groupby_rolling` this method can cache the window size
3674
+ # computation.
3675
+ #
3676
+ # @return [Expr]
3677
+ #
3678
+ # @example
3679
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
3680
+ # df.select(
3681
+ # [
3682
+ # Polars.col("A").rolling_var(3)
3683
+ # ]
3684
+ # )
3685
+ # # =>
3686
+ # # shape: (6, 1)
3687
+ # # ┌──────────┐
3688
+ # # │ A │
3689
+ # # │ --- │
3690
+ # # │ f64 │
3691
+ # # ╞══════════╡
3692
+ # # │ null │
3693
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3694
+ # # │ null │
3695
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3696
+ # # │ 1.0 │
3697
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3698
+ # # │ 1.0 │
3699
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3700
+ # # │ 2.333333 │
3701
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3702
+ # # │ 4.0 │
3703
+ # # └──────────┘
3704
+ def rolling_var(
3705
+ window_size,
3706
+ weights: nil,
3707
+ min_periods: nil,
3708
+ center: false,
3709
+ by: nil,
3710
+ closed: "left"
3711
+ )
3712
+ window_size, min_periods = _prepare_rolling_window_args(
3713
+ window_size, min_periods
3714
+ )
3715
+ wrap_expr(
3716
+ _rbexpr.rolling_var(
3717
+ window_size, weights, min_periods, center, by, closed
3718
+ )
3719
+ )
3720
+ end
3721
+
3722
+ # Compute a rolling median.
3723
+ #
3724
+ # @param window_size [Integer]
3725
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3726
+ # size indicated by a timedelta or the following string language:
3727
+ #
3728
+ # - 1ns (1 nanosecond)
3729
+ # - 1us (1 microsecond)
3730
+ # - 1ms (1 millisecond)
3731
+ # - 1s (1 second)
3732
+ # - 1m (1 minute)
3733
+ # - 1h (1 hour)
3734
+ # - 1d (1 day)
3735
+ # - 1w (1 week)
3736
+ # - 1mo (1 calendar month)
3737
+ # - 1y (1 calendar year)
3738
+ # - 1i (1 index count)
3739
+ #
3740
+ # If a timedelta or the dynamic string language is used, the `by`
3741
+ # and `closed` arguments must also be set.
3742
+ # @param weights [Array]
3743
+ # An optional slice with the same length as the window that will be multiplied
3744
+ # elementwise with the values in the window.
3745
+ # @param min_periods [Integer]
3746
+ # The number of values in the window that should be non-null before computing
3747
+ # a result. If None, it will be set equal to window size.
3748
+ # @param center [Boolean]
3749
+ # Set the labels at the center of the window
3750
+ # @param by [String]
3751
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3752
+ # set the column that will be used to determine the windows. This column must
3753
+ # be of dtype `{Date, Datetime}`
3754
+ # @param closed ["left", "right", "both", "none"]
3755
+ # Define whether the temporal window interval is closed or not.
3756
+ #
3757
+ # @note
3758
+ # This functionality is experimental and may change without it being considered a
3759
+ # breaking change.
3760
+ #
3761
+ # @note
3762
+ # If you want to compute multiple aggregation statistics over the same dynamic
3763
+ # window, consider using `groupby_rolling` this method can cache the window size
3764
+ # computation.
3765
+ #
3766
+ # @return [Expr]
3767
+ #
3768
+ # @example
3769
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
3770
+ # df.select(
3771
+ # [
3772
+ # Polars.col("A").rolling_median(3)
3773
+ # ]
3774
+ # )
3775
+ # # =>
3776
+ # # shape: (6, 1)
3777
+ # # ┌──────┐
3778
+ # # │ A │
3779
+ # # │ --- │
3780
+ # # │ f64 │
3781
+ # # ╞══════╡
3782
+ # # │ null │
3783
+ # # ├╌╌╌╌╌╌┤
3784
+ # # │ null │
3785
+ # # ├╌╌╌╌╌╌┤
3786
+ # # │ 2.0 │
3787
+ # # ├╌╌╌╌╌╌┤
3788
+ # # │ 3.0 │
3789
+ # # ├╌╌╌╌╌╌┤
3790
+ # # │ 4.0 │
3791
+ # # ├╌╌╌╌╌╌┤
3792
+ # # │ 6.0 │
3793
+ # # └──────┘
3794
+ def rolling_median(
3795
+ window_size,
3796
+ weights: nil,
3797
+ min_periods: nil,
3798
+ center: false,
3799
+ by: nil,
3800
+ closed: "left"
3801
+ )
3802
+ window_size, min_periods = _prepare_rolling_window_args(
3803
+ window_size, min_periods
3804
+ )
3805
+ wrap_expr(
3806
+ _rbexpr.rolling_median(
3807
+ window_size, weights, min_periods, center, by, closed
3808
+ )
3809
+ )
3810
+ end
3811
+
3812
+ # Compute a rolling quantile.
3813
+ #
3814
+ # @param quantile [Float]
3815
+ # Quantile between 0.0 and 1.0.
3816
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
3817
+ # Interpolation method.
3818
+ # @param window_size [Integer]
3819
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3820
+ # size indicated by a timedelta or the following string language:
3821
+ #
3822
+ # - 1ns (1 nanosecond)
3823
+ # - 1us (1 microsecond)
3824
+ # - 1ms (1 millisecond)
3825
+ # - 1s (1 second)
3826
+ # - 1m (1 minute)
3827
+ # - 1h (1 hour)
3828
+ # - 1d (1 day)
3829
+ # - 1w (1 week)
3830
+ # - 1mo (1 calendar month)
3831
+ # - 1y (1 calendar year)
3832
+ # - 1i (1 index count)
3833
+ #
3834
+ # If a timedelta or the dynamic string language is used, the `by`
3835
+ # and `closed` arguments must also be set.
3836
+ # @param weights [Array]
3837
+ # An optional slice with the same length as the window that will be multiplied
3838
+ # elementwise with the values in the window.
3839
+ # @param min_periods [Integer]
3840
+ # The number of values in the window that should be non-null before computing
3841
+ # a result. If None, it will be set equal to window size.
3842
+ # @param center [Boolean]
3843
+ # Set the labels at the center of the window
3844
+ # @param by [String]
3845
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3846
+ # set the column that will be used to determine the windows. This column must
3847
+ # be of dtype `{Date, Datetime}`
3848
+ # @param closed ["left", "right", "both", "none"]
3849
+ # Define whether the temporal window interval is closed or not.
3850
+ #
3851
+ # @note
3852
+ # This functionality is experimental and may change without it being considered a
3853
+ # breaking change.
3854
+ #
3855
+ # @note
3856
+ # If you want to compute multiple aggregation statistics over the same dynamic
3857
+ # window, consider using `groupby_rolling` this method can cache the window size
3858
+ # computation.
3859
+ #
3860
+ # @return [Expr]
3861
+ #
3862
+ # @example
3863
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
3864
+ # df.select(
3865
+ # [
3866
+ # Polars.col("A").rolling_quantile(0.33, window_size: 3)
3867
+ # ]
3868
+ # )
3869
+ # # =>
3870
+ # # shape: (6, 1)
3871
+ # # ┌──────┐
3872
+ # # │ A │
3873
+ # # │ --- │
3874
+ # # │ f64 │
3875
+ # # ╞══════╡
3876
+ # # │ null │
3877
+ # # ├╌╌╌╌╌╌┤
3878
+ # # │ null │
3879
+ # # ├╌╌╌╌╌╌┤
3880
+ # # │ 1.0 │
3881
+ # # ├╌╌╌╌╌╌┤
3882
+ # # │ 2.0 │
3883
+ # # ├╌╌╌╌╌╌┤
3884
+ # # │ 3.0 │
3885
+ # # ├╌╌╌╌╌╌┤
3886
+ # # │ 4.0 │
3887
+ # # └──────┘
3888
+ def rolling_quantile(
3889
+ quantile,
3890
+ interpolation: "nearest",
3891
+ window_size: 2,
3892
+ weights: nil,
3893
+ min_periods: nil,
3894
+ center: false,
3895
+ by: nil,
3896
+ closed: "left"
3897
+ )
3898
+ window_size, min_periods = _prepare_rolling_window_args(
3899
+ window_size, min_periods
3900
+ )
3901
+ wrap_expr(
3902
+ _rbexpr.rolling_quantile(
3903
+ quantile, interpolation, window_size, weights, min_periods, center, by, closed
3904
+ )
3905
+ )
3906
+ end
3907
+
3908
+ # Apply a custom rolling window function.
3909
+ #
3910
+ # Prefer the specific rolling window functions over this one, as they are faster.
3911
+ #
3912
+ # Prefer:
3913
+ # * rolling_min
3914
+ # * rolling_max
3915
+ # * rolling_mean
3916
+ # * rolling_sum
3917
+ #
3918
+ # @param window_size [Integer]
3919
+ # The length of the window.
3920
+ # @param weights [Object]
3921
+ # An optional slice with the same length as the window that will be multiplied
3922
+ # elementwise with the values in the window.
3923
+ # @param min_periods [Integer]
3924
+ # The number of values in the window that should be non-null before computing
3925
+ # a result. If nil, it will be set equal to window size.
3926
+ # @param center [Boolean]
3927
+ # Set the labels at the center of the window
3928
+ #
3929
+ # @return [Expr]
3930
+ #
3931
+ # @example
3932
+ # df = Polars::DataFrame.new(
3933
+ # {
3934
+ # "A" => [1.0, 2.0, 9.0, 2.0, 13.0]
3935
+ # }
3936
+ # )
3937
+ # df.select(
3938
+ # [
3939
+ # Polars.col("A").rolling_apply(window_size: 3) { |s| s.std }
3940
+ # ]
3941
+ # )
3942
+ # # =>
3943
+ # # shape: (5, 1)
3944
+ # # ┌──────────┐
3945
+ # # │ A │
3946
+ # # │ --- │
3947
+ # # │ f64 │
3948
+ # # ╞══════════╡
3949
+ # # │ null │
3950
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3951
+ # # │ null │
3952
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3953
+ # # │ 4.358899 │
3954
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3955
+ # # │ 4.041452 │
3956
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3957
+ # # │ 5.567764 │
3958
+ # # └──────────┘
3959
+ # def rolling_apply(
3960
+ # window_size:,
3961
+ # weights: nil,
3962
+ # min_periods: nil,
3963
+ # center: false,
3964
+ # &function
3965
+ # )
3966
+ # if min_periods.nil?
3967
+ # min_periods = window_size
3968
+ # end
3969
+ # wrap_expr(
3970
+ # _rbexpr.rolling_apply(
3971
+ # function, window_size, weights, min_periods, center
3972
+ # )
3973
+ # )
1137
3974
  # end
1138
3975
 
3976
+ # Compute a rolling skew.
3977
+ #
3978
+ # @param window_size [Integer]
3979
+ # Integer size of the rolling window.
3980
+ # @param bias [Boolean]
3981
+ # If false, the calculations are corrected for statistical bias.
1139
3982
  #
3983
+ # @return [Expr]
1140
3984
  def rolling_skew(window_size, bias: true)
1141
3985
  wrap_expr(_rbexpr.rolling_skew(window_size, bias))
1142
3986
  end
1143
3987
 
3988
+ # Compute absolute values.
3989
+ #
3990
+ # @return [Expr]
3991
+ #
3992
+ # @example
3993
+ # df = Polars::DataFrame.new(
3994
+ # {
3995
+ # "A" => [-1.0, 0.0, 1.0, 2.0]
3996
+ # }
3997
+ # )
3998
+ # df.select(Polars.col("A").abs)
3999
+ # # =>
4000
+ # # shape: (4, 1)
4001
+ # # ┌─────┐
4002
+ # # │ A │
4003
+ # # │ --- │
4004
+ # # │ f64 │
4005
+ # # ╞═════╡
4006
+ # # │ 1.0 │
4007
+ # # ├╌╌╌╌╌┤
4008
+ # # │ 0.0 │
4009
+ # # ├╌╌╌╌╌┤
4010
+ # # │ 1.0 │
4011
+ # # ├╌╌╌╌╌┤
4012
+ # # │ 2.0 │
4013
+ # # └─────┘
1144
4014
  def abs
1145
4015
  wrap_expr(_rbexpr.abs)
1146
4016
  end
1147
4017
 
4018
+ # Get the index values that would sort this column.
4019
+ #
4020
+ # Alias for {#arg_sort}.
4021
+ #
4022
+ # @param reverse [Boolean]
4023
+ # Sort in reverse (descending) order.
4024
+ # @param nulls_last [Boolean]
4025
+ # Place null values last instead of first.
4026
+ #
4027
+ # @return [expr]
4028
+ #
4029
+ # @example
4030
+ # df = Polars::DataFrame.new(
4031
+ # {
4032
+ # "a" => [20, 10, 30]
4033
+ # }
4034
+ # )
4035
+ # df.select(Polars.col("a").argsort)
4036
+ # # =>
4037
+ # # shape: (3, 1)
4038
+ # # ┌─────┐
4039
+ # # │ a │
4040
+ # # │ --- │
4041
+ # # │ u32 │
4042
+ # # ╞═════╡
4043
+ # # │ 1 │
4044
+ # # ├╌╌╌╌╌┤
4045
+ # # │ 0 │
4046
+ # # ├╌╌╌╌╌┤
4047
+ # # │ 2 │
4048
+ # # └─────┘
1148
4049
  def argsort(reverse: false, nulls_last: false)
1149
4050
  arg_sort(reverse: reverse, nulls_last: nulls_last)
1150
4051
  end
1151
4052
 
4053
+ # Assign ranks to data, dealing with ties appropriately.
4054
+ #
4055
+ # @param method ["average", "min", "max", "dense", "ordinal", "random"]
4056
+ # The method used to assign ranks to tied elements.
4057
+ # The following methods are available:
4058
+ #
4059
+ # - 'average' : The average of the ranks that would have been assigned to
4060
+ # all the tied values is assigned to each value.
4061
+ # - 'min' : The minimum of the ranks that would have been assigned to all
4062
+ # the tied values is assigned to each value. (This is also referred to
4063
+ # as "competition" ranking.)
4064
+ # - 'max' : The maximum of the ranks that would have been assigned to all
4065
+ # the tied values is assigned to each value.
4066
+ # - 'dense' : Like 'min', but the rank of the next highest element is
4067
+ # assigned the rank immediately after those assigned to the tied
4068
+ # elements.
4069
+ # - 'ordinal' : All values are given a distinct rank, corresponding to
4070
+ # the order that the values occur in the Series.
4071
+ # - 'random' : Like 'ordinal', but the rank for ties is not dependent
4072
+ # on the order that the values occur in the Series.
4073
+ # @param reverse [Boolean]
4074
+ # Reverse the operation.
4075
+ #
4076
+ # @return [Expr]
4077
+ #
4078
+ # @example The 'average' method:
4079
+ # df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
4080
+ # df.select(Polars.col("a").rank)
4081
+ # # =>
4082
+ # # shape: (5, 1)
4083
+ # # ┌─────┐
4084
+ # # │ a │
4085
+ # # │ --- │
4086
+ # # │ f32 │
4087
+ # # ╞═════╡
4088
+ # # │ 3.0 │
4089
+ # # ├╌╌╌╌╌┤
4090
+ # # │ 4.5 │
4091
+ # # ├╌╌╌╌╌┤
4092
+ # # │ 1.5 │
4093
+ # # ├╌╌╌╌╌┤
4094
+ # # │ 1.5 │
4095
+ # # ├╌╌╌╌╌┤
4096
+ # # │ 4.5 │
4097
+ # # └─────┘
4098
+ #
4099
+ # @example The 'ordinal' method:
4100
+ # df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
4101
+ # df.select(Polars.col("a").rank(method: "ordinal"))
4102
+ # # =>
4103
+ # # shape: (5, 1)
4104
+ # # ┌─────┐
4105
+ # # │ a │
4106
+ # # │ --- │
4107
+ # # │ u32 │
4108
+ # # ╞═════╡
4109
+ # # │ 3 │
4110
+ # # ├╌╌╌╌╌┤
4111
+ # # │ 4 │
4112
+ # # ├╌╌╌╌╌┤
4113
+ # # │ 1 │
4114
+ # # ├╌╌╌╌╌┤
4115
+ # # │ 2 │
4116
+ # # ├╌╌╌╌╌┤
4117
+ # # │ 5 │
4118
+ # # └─────┘
1152
4119
  def rank(method: "average", reverse: false)
1153
4120
  wrap_expr(_rbexpr.rank(method, reverse))
1154
4121
  end
1155
4122
 
4123
+ # Calculate the n-th discrete difference.
4124
+ #
4125
+ # @param n [Integer]
4126
+ # Number of slots to shift.
4127
+ # @param null_behavior ["ignore", "drop"]
4128
+ # How to handle null values.
4129
+ #
4130
+ # @return [Expr]
4131
+ #
4132
+ # @example
4133
+ # df = Polars::DataFrame.new(
4134
+ # {
4135
+ # "a" => [20, 10, 30]
4136
+ # }
4137
+ # )
4138
+ # df.select(Polars.col("a").diff)
4139
+ # # =>
4140
+ # # shape: (3, 1)
4141
+ # # ┌──────┐
4142
+ # # │ a │
4143
+ # # │ --- │
4144
+ # # │ i64 │
4145
+ # # ╞══════╡
4146
+ # # │ null │
4147
+ # # ├╌╌╌╌╌╌┤
4148
+ # # │ -10 │
4149
+ # # ├╌╌╌╌╌╌┤
4150
+ # # │ 20 │
4151
+ # # └──────┘
1156
4152
  def diff(n: 1, null_behavior: "ignore")
1157
4153
  wrap_expr(_rbexpr.diff(n, null_behavior))
1158
4154
  end
1159
4155
 
4156
+ # Computes percentage change between values.
4157
+ #
4158
+ # Percentage change (as fraction) between current element and most-recent
4159
+ # non-null element at least `n` period(s) before the current element.
4160
+ #
4161
+ # Computes the change from the previous row by default.
4162
+ #
4163
+ # @param n [Integer]
4164
+ # Periods to shift for forming percent change.
4165
+ #
4166
+ # @return [Expr]
4167
+ #
4168
+ # @example
4169
+ # df = Polars::DataFrame.new(
4170
+ # {
4171
+ # "a" => [10, 11, 12, nil, 12]
4172
+ # }
4173
+ # )
4174
+ # df.with_column(Polars.col("a").pct_change.alias("pct_change"))
4175
+ # # =>
4176
+ # # shape: (5, 2)
4177
+ # # ┌──────┬────────────┐
4178
+ # # │ a ┆ pct_change │
4179
+ # # │ --- ┆ --- │
4180
+ # # │ i64 ┆ f64 │
4181
+ # # ╞══════╪════════════╡
4182
+ # # │ 10 ┆ null │
4183
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
4184
+ # # │ 11 ┆ 0.1 │
4185
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
4186
+ # # │ 12 ┆ 0.090909 │
4187
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
4188
+ # # │ null ┆ 0.0 │
4189
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
4190
+ # # │ 12 ┆ 0.0 │
4191
+ # # └──────┴────────────┘
1160
4192
  def pct_change(n: 1)
1161
4193
  wrap_expr(_rbexpr.pct_change(n))
1162
4194
  end
1163
4195
 
4196
+ # Compute the sample skewness of a data set.
4197
+ #
4198
+ # For normally distributed data, the skewness should be about zero. For
4199
+ # unimodal continuous distributions, a skewness value greater than zero means
4200
+ # that there is more weight in the right tail of the distribution. The
4201
+ # function `skewtest` can be used to determine if the skewness value
4202
+ # is close enough to zero, statistically speaking.
4203
+ #
4204
+ # @param bias [Boolean]
4205
+ # If false, the calculations are corrected for statistical bias.
4206
+ #
4207
+ # @return [Expr]
4208
+ #
4209
+ # @example
4210
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
4211
+ # df.select(Polars.col("a").skew)
4212
+ # # =>
4213
+ # # shape: (1, 1)
4214
+ # # ┌──────────┐
4215
+ # # │ a │
4216
+ # # │ --- │
4217
+ # # │ f64 │
4218
+ # # ╞══════════╡
4219
+ # # │ 0.343622 │
4220
+ # # └──────────┘
1164
4221
  def skew(bias: true)
1165
4222
  wrap_expr(_rbexpr.skew(bias))
1166
4223
  end
1167
4224
 
4225
+ # Compute the kurtosis (Fisher or Pearson) of a dataset.
4226
+ #
4227
+ # Kurtosis is the fourth central moment divided by the square of the
4228
+ # variance. If Fisher's definition is used, then 3.0 is subtracted from
4229
+ # the result to give 0.0 for a normal distribution.
4230
+ # If bias is False then the kurtosis is calculated using k statistics to
4231
+ # eliminate bias coming from biased moment estimators
4232
+ #
4233
+ # @param fisher [Boolean]
4234
+ # If true, Fisher's definition is used (normal ==> 0.0). If false,
4235
+ # Pearson's definition is used (normal ==> 3.0).
4236
+ # @param bias [Boolean]
4237
+ # If false, the calculations are corrected for statistical bias.
4238
+ #
4239
+ # @return [Expr]
4240
+ #
4241
+ # @example
4242
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
4243
+ # df.select(Polars.col("a").kurtosis)
4244
+ # # =>
4245
+ # # shape: (1, 1)
4246
+ # # ┌───────────┐
4247
+ # # │ a │
4248
+ # # │ --- │
4249
+ # # │ f64 │
4250
+ # # ╞═══════════╡
4251
+ # # │ -1.153061 │
4252
+ # # └───────────┘
1168
4253
  def kurtosis(fisher: true, bias: true)
1169
4254
  wrap_expr(_rbexpr.kurtosis(fisher, bias))
1170
4255
  end
1171
4256
 
4257
+ # Clip (limit) the values in an array to a `min` and `max` boundary.
4258
+ #
4259
+ # Only works for numerical types.
4260
+ #
4261
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4262
+ # expression. See `when` for more information.
4263
+ #
4264
+ # @param min_val [Numeric]
4265
+ # Minimum value.
4266
+ # @param max_val [Numeric]
4267
+ # Maximum value.
4268
+ #
4269
+ # @return [Expr]
4270
+ #
4271
+ # @example
4272
+ # df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
4273
+ # df.with_column(Polars.col("foo").clip(1, 10).alias("foo_clipped"))
4274
+ # # =>
4275
+ # # shape: (4, 2)
4276
+ # # ┌──────┬─────────────┐
4277
+ # # │ foo ┆ foo_clipped │
4278
+ # # │ --- ┆ --- │
4279
+ # # │ i64 ┆ i64 │
4280
+ # # ╞══════╪═════════════╡
4281
+ # # │ -50 ┆ 1 │
4282
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4283
+ # # │ 5 ┆ 5 │
4284
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4285
+ # # │ null ┆ null │
4286
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4287
+ # # │ 50 ┆ 10 │
4288
+ # # └──────┴─────────────┘
1172
4289
  def clip(min_val, max_val)
1173
4290
  wrap_expr(_rbexpr.clip(min_val, max_val))
1174
4291
  end
1175
4292
 
4293
+ # Clip (limit) the values in an array to a `min` boundary.
4294
+ #
4295
+ # Only works for numerical types.
4296
+ #
4297
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4298
+ # expression. See `when` for more information.
4299
+ #
4300
+ # @param min_val [Numeric]
4301
+ # Minimum value.
4302
+ #
4303
+ # @return [Expr]
4304
+ #
4305
+ # @example
4306
+ # df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
4307
+ # df.with_column(Polars.col("foo").clip_min(0).alias("foo_clipped"))
4308
+ # # =>
4309
+ # # shape: (4, 2)
4310
+ # # ┌──────┬─────────────┐
4311
+ # # │ foo ┆ foo_clipped │
4312
+ # # │ --- ┆ --- │
4313
+ # # │ i64 ┆ i64 │
4314
+ # # ╞══════╪═════════════╡
4315
+ # # │ -50 ┆ 0 │
4316
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4317
+ # # │ 5 ┆ 5 │
4318
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4319
+ # # │ null ┆ null │
4320
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4321
+ # # │ 50 ┆ 50 │
4322
+ # # └──────┴─────────────┘
1176
4323
  def clip_min(min_val)
1177
4324
  wrap_expr(_rbexpr.clip_min(min_val))
1178
4325
  end
1179
4326
 
4327
+ # Clip (limit) the values in an array to a `max` boundary.
4328
+ #
4329
+ # Only works for numerical types.
4330
+ #
4331
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4332
+ # expression. See `when` for more information.
4333
+ #
4334
+ # @param max_val [Numeric]
4335
+ # Maximum value.
4336
+ #
4337
+ # @return [Expr]
4338
+ #
4339
+ # @example
4340
+ # df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
4341
+ # df.with_column(Polars.col("foo").clip_max(0).alias("foo_clipped"))
4342
+ # # =>
4343
+ # # shape: (4, 2)
4344
+ # # ┌──────┬─────────────┐
4345
+ # # │ foo ┆ foo_clipped │
4346
+ # # │ --- ┆ --- │
4347
+ # # │ i64 ┆ i64 │
4348
+ # # ╞══════╪═════════════╡
4349
+ # # │ -50 ┆ -50 │
4350
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4351
+ # # │ 5 ┆ 0 │
4352
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4353
+ # # │ null ┆ null │
4354
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4355
+ # # │ 50 ┆ 0 │
4356
+ # # └──────┴─────────────┘
1180
4357
  def clip_max(max_val)
1181
4358
  wrap_expr(_rbexpr.clip_max(max_val))
1182
4359
  end
1183
4360
 
4361
+ # Calculate the lower bound.
4362
+ #
4363
+ # Returns a unit Series with the lowest value possible for the dtype of this
4364
+ # expression.
4365
+ #
4366
+ # @return [Expr]
4367
+ #
4368
+ # @example
4369
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
4370
+ # df.select(Polars.col("a").lower_bound)
4371
+ # # =>
4372
+ # # shape: (1, 1)
4373
+ # # ┌──────────────────────┐
4374
+ # # │ a │
4375
+ # # │ --- │
4376
+ # # │ i64 │
4377
+ # # ╞══════════════════════╡
4378
+ # # │ -9223372036854775808 │
4379
+ # # └──────────────────────┘
1184
4380
  def lower_bound
1185
4381
  wrap_expr(_rbexpr.lower_bound)
1186
4382
  end
1187
4383
 
4384
+ # Calculate the upper bound.
4385
+ #
4386
+ # Returns a unit Series with the highest value possible for the dtype of this
4387
+ # expression.
4388
+ #
4389
+ # @return [Expr]
4390
+ #
4391
+ # @example
4392
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
4393
+ # df.select(Polars.col("a").upper_bound)
4394
+ # # =>
4395
+ # # shape: (1, 1)
4396
+ # # ┌─────────────────────┐
4397
+ # # │ a │
4398
+ # # │ --- │
4399
+ # # │ i64 │
4400
+ # # ╞═════════════════════╡
4401
+ # # │ 9223372036854775807 │
4402
+ # # └─────────────────────┘
1188
4403
  def upper_bound
1189
4404
  wrap_expr(_rbexpr.upper_bound)
1190
4405
  end
1191
4406
 
4407
+ # Compute the element-wise indication of the sign.
4408
+ #
4409
+ # @return [Expr]
4410
+ #
4411
+ # @example
4412
+ # df = Polars::DataFrame.new({"a" => [-9.0, -0.0, 0.0, 4.0, nil]})
4413
+ # df.select(Polars.col("a").sign)
4414
+ # # =>
4415
+ # # shape: (5, 1)
4416
+ # # ┌──────┐
4417
+ # # │ a │
4418
+ # # │ --- │
4419
+ # # │ i64 │
4420
+ # # ╞══════╡
4421
+ # # │ -1 │
4422
+ # # ├╌╌╌╌╌╌┤
4423
+ # # │ 0 │
4424
+ # # ├╌╌╌╌╌╌┤
4425
+ # # │ 0 │
4426
+ # # ├╌╌╌╌╌╌┤
4427
+ # # │ 1 │
4428
+ # # ├╌╌╌╌╌╌┤
4429
+ # # │ null │
4430
+ # # └──────┘
1192
4431
  def sign
1193
4432
  wrap_expr(_rbexpr.sign)
1194
4433
  end
1195
4434
 
4435
+ # Compute the element-wise value for the sine.
4436
+ #
4437
+ # @return [Expr]
4438
+ #
4439
+ # @example
4440
+ # df = Polars::DataFrame.new({"a" => [0.0]})
4441
+ # df.select(Polars.col("a").sin)
4442
+ # # =>
4443
+ # # shape: (1, 1)
4444
+ # # ┌─────┐
4445
+ # # │ a │
4446
+ # # │ --- │
4447
+ # # │ f64 │
4448
+ # # ╞═════╡
4449
+ # # │ 0.0 │
4450
+ # # └─────┘
1196
4451
  def sin
1197
4452
  wrap_expr(_rbexpr.sin)
1198
4453
  end
1199
4454
 
4455
+ # Compute the element-wise value for the cosine.
4456
+ #
4457
+ # @return [Expr]
4458
+ #
4459
+ # @example
4460
+ # df = Polars::DataFrame.new({"a" => [0.0]})
4461
+ # df.select(Polars.col("a").cos)
4462
+ # # =>
4463
+ # # shape: (1, 1)
4464
+ # # ┌─────┐
4465
+ # # │ a │
4466
+ # # │ --- │
4467
+ # # │ f64 │
4468
+ # # ╞═════╡
4469
+ # # │ 1.0 │
4470
+ # # └─────┘
1200
4471
  def cos
1201
4472
  wrap_expr(_rbexpr.cos)
1202
4473
  end
1203
4474
 
4475
+ # Compute the element-wise value for the tangent.
4476
+ #
4477
+ # @return [Expr]
4478
+ #
4479
+ # @example
4480
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4481
+ # df.select(Polars.col("a").tan)
4482
+ # # =>
4483
+ # # shape: (1, 1)
4484
+ # # ┌──────────┐
4485
+ # # │ a │
4486
+ # # │ --- │
4487
+ # # │ f64 │
4488
+ # # ╞══════════╡
4489
+ # # │ 1.557408 │
4490
+ # # └──────────┘
1204
4491
  def tan
1205
4492
  wrap_expr(_rbexpr.tan)
1206
4493
  end
1207
4494
 
4495
+ # Compute the element-wise value for the inverse sine.
4496
+ #
4497
+ # @return [Expr]
4498
+ #
4499
+ # @example
4500
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4501
+ # df.select(Polars.col("a").arcsin)
4502
+ # # =>
4503
+ # # shape: (1, 1)
4504
+ # # ┌──────────┐
4505
+ # # │ a │
4506
+ # # │ --- │
4507
+ # # │ f64 │
4508
+ # # ╞══════════╡
4509
+ # # │ 1.570796 │
4510
+ # # └──────────┘
1208
4511
  def arcsin
1209
4512
  wrap_expr(_rbexpr.arcsin)
1210
4513
  end
1211
4514
 
4515
+ # Compute the element-wise value for the inverse cosine.
4516
+ #
4517
+ # @return [Expr]
4518
+ #
4519
+ # @example
4520
+ # df = Polars::DataFrame.new({"a" => [0.0]})
4521
+ # df.select(Polars.col("a").arccos)
4522
+ # # =>
4523
+ # # shape: (1, 1)
4524
+ # # ┌──────────┐
4525
+ # # │ a │
4526
+ # # │ --- │
4527
+ # # │ f64 │
4528
+ # # ╞══════════╡
4529
+ # # │ 1.570796 │
4530
+ # # └──────────┘
1212
4531
  def arccos
1213
4532
  wrap_expr(_rbexpr.arccos)
1214
4533
  end
1215
4534
 
4535
+ # Compute the element-wise value for the inverse tangent.
4536
+ #
4537
+ # @return [Expr]
4538
+ #
4539
+ # @example
4540
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4541
+ # df.select(Polars.col("a").arctan)
4542
+ # # =>
4543
+ # # shape: (1, 1)
4544
+ # # ┌──────────┐
4545
+ # # │ a │
4546
+ # # │ --- │
4547
+ # # │ f64 │
4548
+ # # ╞══════════╡
4549
+ # # │ 0.785398 │
4550
+ # # └──────────┘
1216
4551
  def arctan
1217
4552
  wrap_expr(_rbexpr.arctan)
1218
4553
  end
1219
4554
 
4555
+ # Compute the element-wise value for the hyperbolic sine.
4556
+ #
4557
+ # @return [Expr]
4558
+ #
4559
+ # @example
4560
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4561
+ # df.select(Polars.col("a").sinh)
4562
+ # # =>
4563
+ # # shape: (1, 1)
4564
+ # # ┌──────────┐
4565
+ # # │ a │
4566
+ # # │ --- │
4567
+ # # │ f64 │
4568
+ # # ╞══════════╡
4569
+ # # │ 1.175201 │
4570
+ # # └──────────┘
1220
4571
  def sinh
1221
4572
  wrap_expr(_rbexpr.sinh)
1222
4573
  end
1223
4574
 
4575
+ # Compute the element-wise value for the hyperbolic cosine.
4576
+ #
4577
+ # @return [Expr]
4578
+ #
4579
+ # @example
4580
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4581
+ # df.select(Polars.col("a").cosh)
4582
+ # # =>
4583
+ # # shape: (1, 1)
4584
+ # # ┌──────────┐
4585
+ # # │ a │
4586
+ # # │ --- │
4587
+ # # │ f64 │
4588
+ # # ╞══════════╡
4589
+ # # │ 1.543081 │
4590
+ # # └──────────┘
1224
4591
  def cosh
1225
4592
  wrap_expr(_rbexpr.cosh)
1226
4593
  end
1227
4594
 
4595
+ # Compute the element-wise value for the hyperbolic tangent.
4596
+ #
4597
+ # @return [Expr]
4598
+ #
4599
+ # @example
4600
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4601
+ # df.select(Polars.col("a").tanh)
4602
+ # # =>
4603
+ # # shape: (1, 1)
4604
+ # # ┌──────────┐
4605
+ # # │ a │
4606
+ # # │ --- │
4607
+ # # │ f64 │
4608
+ # # ╞══════════╡
4609
+ # # │ 0.761594 │
4610
+ # # └──────────┘
1228
4611
  def tanh
1229
4612
  wrap_expr(_rbexpr.tanh)
1230
4613
  end
1231
4614
 
4615
+ # Compute the element-wise value for the inverse hyperbolic sine.
4616
+ #
4617
+ # @return [Expr]
4618
+ #
4619
+ # @example
4620
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4621
+ # df.select(Polars.col("a").arcsinh)
4622
+ # # =>
4623
+ # # shape: (1, 1)
4624
+ # # ┌──────────┐
4625
+ # # │ a │
4626
+ # # │ --- │
4627
+ # # │ f64 │
4628
+ # # ╞══════════╡
4629
+ # # │ 0.881374 │
4630
+ # # └──────────┘
1232
4631
  def arcsinh
1233
4632
  wrap_expr(_rbexpr.arcsinh)
1234
4633
  end
1235
4634
 
4635
+ # Compute the element-wise value for the inverse hyperbolic cosine.
4636
+ #
4637
+ # @return [Expr]
4638
+ #
4639
+ # @example
4640
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4641
+ # df.select(Polars.col("a").arccosh)
4642
+ # # =>
4643
+ # # shape: (1, 1)
4644
+ # # ┌─────┐
4645
+ # # │ a │
4646
+ # # │ --- │
4647
+ # # │ f64 │
4648
+ # # ╞═════╡
4649
+ # # │ 0.0 │
4650
+ # # └─────┘
1236
4651
  def arccosh
1237
4652
  wrap_expr(_rbexpr.arccosh)
1238
4653
  end
1239
4654
 
4655
+ # Compute the element-wise value for the inverse hyperbolic tangent.
4656
+ #
4657
+ # @return [Expr]
4658
+ #
4659
+ # @example
4660
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4661
+ # df.select(Polars.col("a").arctanh)
4662
+ # # =>
4663
+ # # shape: (1, 1)
4664
+ # # ┌─────┐
4665
+ # # │ a │
4666
+ # # │ --- │
4667
+ # # │ f64 │
4668
+ # # ╞═════╡
4669
+ # # │ inf │
4670
+ # # └─────┘
1240
4671
  def arctanh
1241
4672
  wrap_expr(_rbexpr.arctanh)
1242
4673
  end
1243
4674
 
4675
+ # Reshape this Expr to a flat Series or a Series of Lists.
4676
+ #
4677
+ # @param dims [Array]
4678
+ # Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that
4679
+ # dimension is inferred.
4680
+ #
4681
+ # @return [Expr]
4682
+ #
4683
+ # @example
4684
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
4685
+ # df.select(Polars.col("foo").reshape([3, 3]))
4686
+ # # =>
4687
+ # # shape: (3, 1)
4688
+ # # ┌───────────┐
4689
+ # # │ foo │
4690
+ # # │ --- │
4691
+ # # │ list[i64] │
4692
+ # # ╞═══════════╡
4693
+ # # │ [1, 2, 3] │
4694
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
4695
+ # # │ [4, 5, 6] │
4696
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
4697
+ # # │ [7, 8, 9] │
4698
+ # # └───────────┘
1244
4699
  def reshape(dims)
1245
4700
  wrap_expr(_rbexpr.reshape(dims))
1246
4701
  end
1247
4702
 
4703
+ # Shuffle the contents of this expr.
4704
+ #
4705
+ # @param seed [Integer]
4706
+ # Seed for the random number generator. If set to None (default), a random
4707
+ # seed is generated using the `random` module.
4708
+ #
4709
+ # @return [Expr]
4710
+ #
4711
+ # @example
4712
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4713
+ # df.select(Polars.col("a").shuffle(seed: 1))
4714
+ # # =>
4715
+ # # shape: (3, 1)
4716
+ # # ┌─────┐
4717
+ # # │ a │
4718
+ # # │ --- │
4719
+ # # │ i64 │
4720
+ # # ╞═════╡
4721
+ # # │ 2 │
4722
+ # # ├╌╌╌╌╌┤
4723
+ # # │ 1 │
4724
+ # # ├╌╌╌╌╌┤
4725
+ # # │ 3 │
4726
+ # # └─────┘
1248
4727
  def shuffle(seed: nil)
1249
4728
  if seed.nil?
1250
4729
  seed = rand(10000)
@@ -1252,74 +4731,514 @@ module Polars
1252
4731
  wrap_expr(_rbexpr.shuffle(seed))
1253
4732
  end
1254
4733
 
1255
- # def sample
1256
- # end
1257
-
1258
- # def ewm_mean
1259
- # end
4734
+ # Sample from this expression.
4735
+ #
4736
+ # @param frac [Float]
4737
+ # Fraction of items to return. Cannot be used with `n`.
4738
+ # @param with_replacement [Boolean]
4739
+ # Allow values to be sampled more than once.
4740
+ # @param shuffle [Boolean]
4741
+ # Shuffle the order of sampled data points.
4742
+ # @param seed [Integer]
4743
+ # Seed for the random number generator. If set to None (default), a random
4744
+ # seed is used.
4745
+ # @param n [Integer]
4746
+ # Number of items to return. Cannot be used with `frac`.
4747
+ #
4748
+ # @return [Expr]
4749
+ #
4750
+ # @example
4751
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4752
+ # df.select(Polars.col("a").sample(frac: 1.0, with_replacement: true, seed: 1))
4753
+ # # =>
4754
+ # # shape: (3, 1)
4755
+ # # ┌─────┐
4756
+ # # │ a │
4757
+ # # │ --- │
4758
+ # # │ i64 │
4759
+ # # ╞═════╡
4760
+ # # │ 3 │
4761
+ # # ├╌╌╌╌╌┤
4762
+ # # │ 1 │
4763
+ # # ├╌╌╌╌╌┤
4764
+ # # │ 1 │
4765
+ # # └─────┘
4766
+ def sample(
4767
+ frac: nil,
4768
+ with_replacement: true,
4769
+ shuffle: false,
4770
+ seed: nil,
4771
+ n: nil
4772
+ )
4773
+ if !n.nil? && !frac.nil?
4774
+ raise ArgumentError, "cannot specify both `n` and `frac`"
4775
+ end
1260
4776
 
1261
- # def ewm_std
1262
- # end
4777
+ if !n.nil? && frac.nil?
4778
+ return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
4779
+ end
1263
4780
 
1264
- # def ewm_var
1265
- # end
4781
+ if frac.nil?
4782
+ frac = 1.0
4783
+ end
4784
+ wrap_expr(
4785
+ _rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
4786
+ )
4787
+ end
1266
4788
 
4789
+ # Exponentially-weighted moving average.
4790
+ #
4791
+ # @return [Expr]
4792
+ #
4793
+ # @example
4794
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4795
+ # df.select(Polars.col("a").ewm_mean(com: 1))
4796
+ # # =>
4797
+ # # shape: (3, 1)
4798
+ # # ┌──────────┐
4799
+ # # │ a │
4800
+ # # │ --- │
4801
+ # # │ f64 │
4802
+ # # ╞══════════╡
4803
+ # # │ 1.0 │
4804
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4805
+ # # │ 1.666667 │
4806
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4807
+ # # │ 2.428571 │
4808
+ # # └──────────┘
4809
+ def ewm_mean(
4810
+ com: nil,
4811
+ span: nil,
4812
+ half_life: nil,
4813
+ alpha: nil,
4814
+ adjust: true,
4815
+ min_periods: 1
4816
+ )
4817
+ alpha = _prepare_alpha(com, span, half_life, alpha)
4818
+ wrap_expr(_rbexpr.ewm_mean(alpha, adjust, min_periods))
4819
+ end
4820
+
4821
+ # Exponentially-weighted moving standard deviation.
4822
+ #
4823
+ # @return [Expr]
4824
+ #
4825
+ # @example
4826
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4827
+ # df.select(Polars.col("a").ewm_std(com: 1))
4828
+ # # =>
4829
+ # # shape: (3, 1)
4830
+ # # ┌──────────┐
4831
+ # # │ a │
4832
+ # # │ --- │
4833
+ # # │ f64 │
4834
+ # # ╞══════════╡
4835
+ # # │ 0.0 │
4836
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4837
+ # # │ 0.707107 │
4838
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4839
+ # # │ 0.963624 │
4840
+ # # └──────────┘
4841
+ def ewm_std(
4842
+ com: nil,
4843
+ span: nil,
4844
+ half_life: nil,
4845
+ alpha: nil,
4846
+ adjust: true,
4847
+ bias: false,
4848
+ min_periods: 1
4849
+ )
4850
+ alpha = _prepare_alpha(com, span, half_life, alpha)
4851
+ wrap_expr(_rbexpr.ewm_std(alpha, adjust, bias, min_periods))
4852
+ end
4853
+
4854
+ # Exponentially-weighted moving variance.
4855
+ #
4856
+ # @return [Expr]
4857
+ #
4858
+ # @example
4859
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4860
+ # df.select(Polars.col("a").ewm_var(com: 1))
4861
+ # # =>
4862
+ # # shape: (3, 1)
4863
+ # # ┌──────────┐
4864
+ # # │ a │
4865
+ # # │ --- │
4866
+ # # │ f64 │
4867
+ # # ╞══════════╡
4868
+ # # │ 0.0 │
4869
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4870
+ # # │ 0.5 │
4871
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4872
+ # # │ 0.928571 │
4873
+ # # └──────────┘
4874
+ def ewm_var(
4875
+ com: nil,
4876
+ span: nil,
4877
+ half_life: nil,
4878
+ alpha: nil,
4879
+ adjust: true,
4880
+ bias: false,
4881
+ min_periods: 1
4882
+ )
4883
+ alpha = _prepare_alpha(com, span, half_life, alpha)
4884
+ wrap_expr(_rbexpr.ewm_var(alpha, adjust, bias, min_periods))
4885
+ end
4886
+
4887
+ # Extend the Series with given number of values.
4888
+ #
4889
+ # @param value [Object]
4890
+ # The value to extend the Series with. This value may be nil to fill with
4891
+ # nulls.
4892
+ # @param n [Integer]
4893
+ # The number of values to extend.
4894
+ #
4895
+ # @return [Expr]
1267
4896
  #
4897
+ # @example
4898
+ # df = Polars::DataFrame.new({"values" => [1, 2, 3]})
4899
+ # df.select(Polars.col("values").extend_constant(99, 2))
4900
+ # # =>
4901
+ # # shape: (5, 1)
4902
+ # # ┌────────┐
4903
+ # # │ values │
4904
+ # # │ --- │
4905
+ # # │ i64 │
4906
+ # # ╞════════╡
4907
+ # # │ 1 │
4908
+ # # ├╌╌╌╌╌╌╌╌┤
4909
+ # # │ 2 │
4910
+ # # ├╌╌╌╌╌╌╌╌┤
4911
+ # # │ 3 │
4912
+ # # ├╌╌╌╌╌╌╌╌┤
4913
+ # # │ 99 │
4914
+ # # ├╌╌╌╌╌╌╌╌┤
4915
+ # # │ 99 │
4916
+ # # └────────┘
1268
4917
  def extend_constant(value, n)
1269
4918
  wrap_expr(_rbexpr.extend_constant(value, n))
1270
4919
  end
1271
4920
 
4921
+ # Count all unique values and create a struct mapping value to count.
4922
+ #
4923
+ # @param multithreaded [Boolean]
4924
+ # Better to turn this off in the aggregation context, as it can lead to
4925
+ # contention.
4926
+ # @param sort [Boolean]
4927
+ # Ensure the output is sorted from most values to least.
4928
+ #
4929
+ # @return [Expr]
4930
+ #
4931
+ # @example
4932
+ # df = Polars::DataFrame.new(
4933
+ # {
4934
+ # "id" => ["a", "b", "b", "c", "c", "c"]
4935
+ # }
4936
+ # )
4937
+ # df.select(
4938
+ # [
4939
+ # Polars.col("id").value_counts(sort: true),
4940
+ # ]
4941
+ # )
4942
+ # # =>
4943
+ # # shape: (3, 1)
4944
+ # # ┌───────────┐
4945
+ # # │ id │
4946
+ # # │ --- │
4947
+ # # │ struct[2] │
4948
+ # # ╞═══════════╡
4949
+ # # │ {"c",3} │
4950
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
4951
+ # # │ {"b",2} │
4952
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
4953
+ # # │ {"a",1} │
4954
+ # # └───────────┘
1272
4955
  def value_counts(multithreaded: false, sort: false)
1273
4956
  wrap_expr(_rbexpr.value_counts(multithreaded, sort))
1274
4957
  end
1275
4958
 
4959
+ # Return a count of the unique values in the order of appearance.
4960
+ #
4961
+ # This method differs from `value_counts` in that it does not return the
4962
+ # values, only the counts and might be faster
4963
+ #
4964
+ # @return [Expr]
4965
+ #
4966
+ # @example
4967
+ # df = Polars::DataFrame.new(
4968
+ # {
4969
+ # "id" => ["a", "b", "b", "c", "c", "c"]
4970
+ # }
4971
+ # )
4972
+ # df.select(
4973
+ # [
4974
+ # Polars.col("id").unique_counts
4975
+ # ]
4976
+ # )
4977
+ # # =>
4978
+ # # shape: (3, 1)
4979
+ # # ┌─────┐
4980
+ # # │ id │
4981
+ # # │ --- │
4982
+ # # │ u32 │
4983
+ # # ╞═════╡
4984
+ # # │ 1 │
4985
+ # # ├╌╌╌╌╌┤
4986
+ # # │ 2 │
4987
+ # # ├╌╌╌╌╌┤
4988
+ # # │ 3 │
4989
+ # # └─────┘
1276
4990
  def unique_counts
1277
4991
  wrap_expr(_rbexpr.unique_counts)
1278
4992
  end
1279
4993
 
4994
+ # Compute the logarithm to a given base.
4995
+ #
4996
+ # @param base [Float]
4997
+ # Given base, defaults to `e`.
4998
+ #
4999
+ # @return [Expr]
5000
+ #
5001
+ # @example
5002
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
5003
+ # df.select(Polars.col("a").log(2))
5004
+ # # =>
5005
+ # # shape: (3, 1)
5006
+ # # ┌──────────┐
5007
+ # # │ a │
5008
+ # # │ --- │
5009
+ # # │ f64 │
5010
+ # # ╞══════════╡
5011
+ # # │ 0.0 │
5012
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
5013
+ # # │ 1.0 │
5014
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
5015
+ # # │ 1.584963 │
5016
+ # # └──────────┘
1280
5017
  def log(base = Math::E)
1281
5018
  wrap_expr(_rbexpr.log(base))
1282
5019
  end
1283
5020
 
1284
- def entropy(base: 2, normalize: false)
5021
+ # Computes the entropy.
5022
+ #
5023
+ # Uses the formula `-sum(pk * log(pk)` where `pk` are discrete probabilities.
5024
+ #
5025
+ # @param base [Float]
5026
+ # Given base, defaults to `e`.
5027
+ # @param normalize [Boolean]
5028
+ # Normalize pk if it doesn't sum to 1.
5029
+ #
5030
+ # @return [Expr]
5031
+ #
5032
+ # @example
5033
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
5034
+ # df.select(Polars.col("a").entropy(base: 2))
5035
+ # # =>
5036
+ # # shape: (1, 1)
5037
+ # # ┌──────────┐
5038
+ # # │ a │
5039
+ # # │ --- │
5040
+ # # │ f64 │
5041
+ # # ╞══════════╡
5042
+ # # │ 1.459148 │
5043
+ # # └──────────┘
5044
+ #
5045
+ # @example
5046
+ # df.select(Polars.col("a").entropy(base: 2, normalize: false))
5047
+ # # =>
5048
+ # # shape: (1, 1)
5049
+ # # ┌───────────┐
5050
+ # # │ a │
5051
+ # # │ --- │
5052
+ # # │ f64 │
5053
+ # # ╞═══════════╡
5054
+ # # │ -6.754888 │
5055
+ # # └───────────┘
5056
+ def entropy(base: 2, normalize: true)
1285
5057
  wrap_expr(_rbexpr.entropy(base, normalize))
1286
5058
  end
1287
5059
 
1288
- # def cumulative_eval
1289
- # end
1290
-
1291
- # def set_sorted
5060
+ # Run an expression over a sliding window that increases `1` slot every iteration.
5061
+ #
5062
+ # @param expr [Expr]
5063
+ # Expression to evaluate
5064
+ # @param min_periods [Integer]
5065
+ # Number of valid values there should be in the window before the expression
5066
+ # is evaluated. valid values = `length - null_count`
5067
+ # @param parallel [Boolean]
5068
+ # Run in parallel. Don't do this in a groupby or another operation that
5069
+ # already has much parallelization.
5070
+ #
5071
+ # @return [Expr]
5072
+ #
5073
+ # @note
5074
+ # This functionality is experimental and may change without it being considered a
5075
+ # breaking change.
5076
+ #
5077
+ # @note
5078
+ # This can be really slow as it can have `O(n^2)` complexity. Don't use this
5079
+ # for operations that visit all elements.
5080
+ #
5081
+ # @example
5082
+ # df = Polars::DataFrame.new({"values" => [1, 2, 3, 4, 5]})
5083
+ # df.select(
5084
+ # [
5085
+ # Polars.col("values").cumulative_eval(
5086
+ # Polars.element.first - Polars.element.last ** 2
5087
+ # )
5088
+ # ]
5089
+ # )
5090
+ # # =>
5091
+ # # shape: (5, 1)
5092
+ # # ┌────────┐
5093
+ # # │ values │
5094
+ # # │ --- │
5095
+ # # │ f64 │
5096
+ # # ╞════════╡
5097
+ # # │ 0.0 │
5098
+ # # ├╌╌╌╌╌╌╌╌┤
5099
+ # # │ -3.0 │
5100
+ # # ├╌╌╌╌╌╌╌╌┤
5101
+ # # │ -8.0 │
5102
+ # # ├╌╌╌╌╌╌╌╌┤
5103
+ # # │ -15.0 │
5104
+ # # ├╌╌╌╌╌╌╌╌┤
5105
+ # # │ -24.0 │
5106
+ # # └────────┘
5107
+ def cumulative_eval(expr, min_periods: 1, parallel: false)
5108
+ wrap_expr(
5109
+ _rbexpr.cumulative_eval(expr._rbexpr, min_periods, parallel)
5110
+ )
5111
+ end
5112
+
5113
+ # Flags the expression as 'sorted'.
5114
+ #
5115
+ # Enables downstream code to user fast paths for sorted arrays.
5116
+ #
5117
+ # @param reverse [Boolean]
5118
+ # If the `Series` order is reversed, e.g. descending.
5119
+ #
5120
+ # @return [Expr]
5121
+ #
5122
+ # @note
5123
+ # This can lead to incorrect results if this `Series` is not sorted!!
5124
+ # Use with care!
5125
+ #
5126
+ # @example
5127
+ # df = Polars::DataFrame.new({"values" => [1, 2, 3]})
5128
+ # df.select(Polars.col("values").set_sorted.max)
5129
+ # # =>
5130
+ # # shape: (1, 1)
5131
+ # # ┌────────┐
5132
+ # # │ values │
5133
+ # # │ --- │
5134
+ # # │ i64 │
5135
+ # # ╞════════╡
5136
+ # # │ 3 │
5137
+ # # └────────┘
5138
+ # def set_sorted(reverse: false)
5139
+ # map { |s| s.set_sorted(reverse) }
1292
5140
  # end
1293
5141
 
5142
+ # Aggregate to list.
5143
+ #
5144
+ # @return [Expr]
1294
5145
  #
5146
+ # @example
5147
+ # df = Polars::DataFrame.new(
5148
+ # {
5149
+ # "a" => [1, 2, 3],
5150
+ # "b" => [4, 5, 6]
5151
+ # }
5152
+ # )
5153
+ # df.select(Polars.all.list)
5154
+ # # =>
5155
+ # # shape: (1, 2)
5156
+ # # ┌───────────┬───────────┐
5157
+ # # │ a ┆ b │
5158
+ # # │ --- ┆ --- │
5159
+ # # │ list[i64] ┆ list[i64] │
5160
+ # # ╞═══════════╪═══════════╡
5161
+ # # │ [1, 2, 3] ┆ [4, 5, 6] │
5162
+ # # └───────────┴───────────┘
1295
5163
  def list
1296
5164
  wrap_expr(_rbexpr.list)
1297
5165
  end
1298
5166
 
5167
+ # Shrink numeric columns to the minimal required datatype.
5168
+ #
5169
+ # Shrink to the dtype needed to fit the extrema of this `Series`.
5170
+ # This can be used to reduce memory pressure.
5171
+ #
5172
+ # @return [Expr]
5173
+ #
5174
+ # @example
5175
+ # Polars::DataFrame.new(
5176
+ # {
5177
+ # "a" => [1, 2, 3],
5178
+ # "b" => [1, 2, 2 << 32],
5179
+ # "c" => [-1, 2, 1 << 30],
5180
+ # "d" => [-112, 2, 112],
5181
+ # "e" => [-112, 2, 129],
5182
+ # "f" => ["a", "b", "c"],
5183
+ # "g" => [0.1, 1.32, 0.12],
5184
+ # "h" => [true, nil, false]
5185
+ # }
5186
+ # ).select(Polars.all.shrink_dtype)
5187
+ # # =>
5188
+ # # shape: (3, 8)
5189
+ # # ┌─────┬────────────┬────────────┬──────┬──────┬─────┬──────┬───────┐
5190
+ # # │ a ┆ b ┆ c ┆ d ┆ e ┆ f ┆ g ┆ h │
5191
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
5192
+ # # │ i8 ┆ i64 ┆ i32 ┆ i8 ┆ i16 ┆ str ┆ f32 ┆ bool │
5193
+ # # ╞═════╪════════════╪════════════╪══════╪══════╪═════╪══════╪═══════╡
5194
+ # # │ 1 ┆ 1 ┆ -1 ┆ -112 ┆ -112 ┆ a ┆ 0.1 ┆ true │
5195
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
5196
+ # # │ 2 ┆ 2 ┆ 2 ┆ 2 ┆ 2 ┆ b ┆ 1.32 ┆ null │
5197
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
5198
+ # # │ 3 ┆ 8589934592 ┆ 1073741824 ┆ 112 ┆ 129 ┆ c ┆ 0.12 ┆ false │
5199
+ # # └─────┴────────────┴────────────┴──────┴──────┴─────┴──────┴───────┘
1299
5200
  def shrink_dtype
1300
5201
  wrap_expr(_rbexpr.shrink_dtype)
1301
5202
  end
1302
5203
 
5204
+ # Create an object namespace of all list related methods.
5205
+ #
5206
+ # @return [ListExpr]
1303
5207
  def arr
1304
5208
  ListExpr.new(self)
1305
5209
  end
1306
5210
 
5211
+ # Create an object namespace of all categorical related methods.
5212
+ #
5213
+ # @return [CatExpr]
1307
5214
  def cat
1308
5215
  CatExpr.new(self)
1309
5216
  end
1310
5217
 
5218
+ # Create an object namespace of all datetime related methods.
5219
+ #
5220
+ # @return [DateTimeExpr]
1311
5221
  def dt
1312
5222
  DateTimeExpr.new(self)
1313
5223
  end
1314
5224
 
5225
+ # Create an object namespace of all meta related expression methods.
5226
+ #
5227
+ # @return [MetaExpr]
1315
5228
  def meta
1316
5229
  MetaExpr.new(self)
1317
5230
  end
1318
5231
 
5232
+ # Create an object namespace of all string related methods.
5233
+ #
5234
+ # @return [StringExpr]
1319
5235
  def str
1320
5236
  StringExpr.new(self)
1321
5237
  end
1322
5238
 
5239
+ # Create an object namespace of all struct related methods.
5240
+ #
5241
+ # @return [StructExpr]
1323
5242
  def struct
1324
5243
  StructExpr.new(self)
1325
5244
  end
@@ -1337,5 +5256,51 @@ module Polars
1337
5256
  def _to_expr(other)
1338
5257
  other.is_a?(Expr) ? other : Utils.lit(other)
1339
5258
  end
5259
+
5260
+ def _prepare_alpha(com, span, half_life, alpha)
5261
+ if [com, span, half_life, alpha].count { |v| !v.nil? } > 1
5262
+ raise ArgumentError, "Parameters 'com', 'span', 'half_life', and 'alpha' are mutually exclusive"
5263
+ end
5264
+
5265
+ if !com.nil?
5266
+ if com < 0.0
5267
+ raise ArgumentError, "Require 'com' >= 0 (found #{com})"
5268
+ end
5269
+ alpha = 1.0 / (1.0 + com)
5270
+
5271
+ elsif !span.nil?
5272
+ if span < 1.0
5273
+ raise ArgumentError, "Require 'span' >= 1 (found #{span})"
5274
+ end
5275
+ alpha = 2.0 / (span + 1.0)
5276
+
5277
+ elsif !half_life.nil?
5278
+ if half_life <= 0.0
5279
+ raise ArgumentError, "Require 'half_life' > 0 (found #{half_life})"
5280
+ end
5281
+ alpha = 1.0 - Math.exp(-Math.log(2.0) / half_life)
5282
+
5283
+ elsif alpha.nil?
5284
+ raise ArgumentError, "One of 'com', 'span', 'half_life', or 'alpha' must be set"
5285
+
5286
+ elsif alpha <= 0 || alpha > 1
5287
+ raise ArgumentError, "Require 0 < 'alpha' <= 1 (found #{alpha})"
5288
+ end
5289
+
5290
+ alpha
5291
+ end
5292
+
5293
+ def _prepare_rolling_window_args(window_size, min_periods)
5294
+ if window_size.is_a?(Integer)
5295
+ if min_periods.nil?
5296
+ min_periods = window_size
5297
+ end
5298
+ window_size = "#{window_size}i"
5299
+ end
5300
+ if min_periods.nil?
5301
+ min_periods = 1
5302
+ end
5303
+ [window_size, min_periods]
5304
+ end
1340
5305
  end
1341
5306
  end