polars-df 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/Cargo.lock +142 -11
  4. data/Cargo.toml +5 -0
  5. data/ext/polars/Cargo.toml +17 -1
  6. data/ext/polars/src/apply/dataframe.rs +292 -0
  7. data/ext/polars/src/apply/mod.rs +254 -0
  8. data/ext/polars/src/apply/series.rs +1173 -0
  9. data/ext/polars/src/conversion.rs +180 -5
  10. data/ext/polars/src/dataframe.rs +146 -1
  11. data/ext/polars/src/error.rs +12 -0
  12. data/ext/polars/src/lazy/apply.rs +34 -2
  13. data/ext/polars/src/lazy/dataframe.rs +74 -3
  14. data/ext/polars/src/lazy/dsl.rs +136 -0
  15. data/ext/polars/src/lib.rs +199 -1
  16. data/ext/polars/src/list_construction.rs +100 -0
  17. data/ext/polars/src/series.rs +331 -0
  18. data/ext/polars/src/utils.rs +25 -0
  19. data/lib/polars/cat_name_space.rb +54 -0
  20. data/lib/polars/convert.rb +100 -0
  21. data/lib/polars/data_frame.rb +1558 -60
  22. data/lib/polars/date_time_expr.rb +2 -2
  23. data/lib/polars/date_time_name_space.rb +1484 -0
  24. data/lib/polars/dynamic_group_by.rb +49 -0
  25. data/lib/polars/expr.rb +4072 -107
  26. data/lib/polars/expr_dispatch.rb +8 -0
  27. data/lib/polars/functions.rb +192 -3
  28. data/lib/polars/group_by.rb +44 -3
  29. data/lib/polars/io.rb +20 -4
  30. data/lib/polars/lazy_frame.rb +800 -26
  31. data/lib/polars/lazy_functions.rb +687 -43
  32. data/lib/polars/lazy_group_by.rb +1 -0
  33. data/lib/polars/list_expr.rb +502 -5
  34. data/lib/polars/list_name_space.rb +346 -0
  35. data/lib/polars/rolling_group_by.rb +35 -0
  36. data/lib/polars/series.rb +934 -62
  37. data/lib/polars/string_expr.rb +189 -13
  38. data/lib/polars/string_name_space.rb +690 -0
  39. data/lib/polars/struct_name_space.rb +64 -0
  40. data/lib/polars/utils.rb +44 -0
  41. data/lib/polars/version.rb +1 -1
  42. data/lib/polars.rb +14 -1
  43. metadata +15 -3
data/lib/polars/expr.rb CHANGED
@@ -138,8 +138,45 @@ module Polars
138
138
  Utils.lit(0) - self
139
139
  end
140
140
 
141
- # def to_physical
142
- # end
141
+ # Cast to physical representation of the logical dtype.
142
+ #
143
+ # - `:date` -> `:i32`
144
+ # - `:datetime` -> `:i64`
145
+ # - `:time` -> `:i64`
146
+ # - `:duration` -> `:i64`
147
+ # - `:cat` -> `:u32`
148
+ # - Other data types will be left unchanged.
149
+ #
150
+ # @return [Expr]
151
+ #
152
+ # @example
153
+ # Polars::DataFrame.new({"vals" => ["a", "x", nil, "a"]}).with_columns(
154
+ # [
155
+ # Polars.col("vals").cast(:cat),
156
+ # Polars.col("vals")
157
+ # .cast(:cat)
158
+ # .to_physical
159
+ # .alias("vals_physical")
160
+ # ]
161
+ # )
162
+ # # =>
163
+ # # shape: (4, 2)
164
+ # # ┌──────┬───────────────┐
165
+ # # │ vals ┆ vals_physical │
166
+ # # │ --- ┆ --- │
167
+ # # │ cat ┆ u32 │
168
+ # # ╞══════╪═══════════════╡
169
+ # # │ a ┆ 0 │
170
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
171
+ # # │ x ┆ 1 │
172
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
173
+ # # │ null ┆ null │
174
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
175
+ # # │ a ┆ 0 │
176
+ # # └──────┴───────────────┘
177
+ def to_physical
178
+ wrap_expr(_rbexpr.to_physical)
179
+ end
143
180
 
144
181
  # Check if any boolean value in a Boolean column is `true`.
145
182
  #
@@ -258,13 +295,82 @@ module Polars
258
295
  wrap_expr(_rbexpr.exp)
259
296
  end
260
297
 
298
+ # Rename the output of an expression.
299
+ #
300
+ # @param name [String]
301
+ # New name.
302
+ #
303
+ # @return [Expr]
304
+ #
305
+ # @example
306
+ # df = Polars::DataFrame.new(
307
+ # {
308
+ # "a" => [1, 2, 3],
309
+ # "b" => ["a", "b", nil]
310
+ # }
311
+ # )
312
+ # df.select(
313
+ # [
314
+ # Polars.col("a").alias("bar"),
315
+ # Polars.col("b").alias("foo")
316
+ # ]
317
+ # )
318
+ # # =>
319
+ # # shape: (3, 2)
320
+ # # ┌─────┬──────┐
321
+ # # │ bar ┆ foo │
322
+ # # │ --- ┆ --- │
323
+ # # │ i64 ┆ str │
324
+ # # ╞═════╪══════╡
325
+ # # │ 1 ┆ a │
326
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
327
+ # # │ 2 ┆ b │
328
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
329
+ # # │ 3 ┆ null │
330
+ # # └─────┴──────┘
261
331
  def alias(name)
262
332
  wrap_expr(_rbexpr._alias(name))
263
333
  end
264
334
 
265
335
  # TODO support symbols for exclude
266
336
 
337
+ # Exclude certain columns from a wildcard/regex selection.
338
+ #
339
+ # You may also use regexes in the exclude list. They must start with `^` and end
340
+ # with `$`.
341
+ #
342
+ # @param columns [Object]
343
+ # Column(s) to exclude from selection.
344
+ # This can be:
267
345
  #
346
+ # - a column name, or multiple column names
347
+ # - a regular expression starting with `^` and ending with `$`
348
+ # - a dtype or multiple dtypes
349
+ #
350
+ # @return [Expr]
351
+ #
352
+ # @example
353
+ # df = Polars::DataFrame.new(
354
+ # {
355
+ # "aa" => [1, 2, 3],
356
+ # "ba" => ["a", "b", nil],
357
+ # "cc" => [nil, 2.5, 1.5]
358
+ # }
359
+ # )
360
+ # df.select(Polars.all.exclude("ba"))
361
+ # # =>
362
+ # # shape: (3, 2)
363
+ # # ┌─────┬──────┐
364
+ # # │ aa ┆ cc │
365
+ # # │ --- ┆ --- │
366
+ # # │ i64 ┆ f64 │
367
+ # # ╞═════╪══════╡
368
+ # # │ 1 ┆ null │
369
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
370
+ # # │ 2 ┆ 2.5 │
371
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
372
+ # # │ 3 ┆ 1.5 │
373
+ # # └─────┴──────┘
268
374
  def exclude(columns)
269
375
  if columns.is_a?(String)
270
376
  columns = [columns]
@@ -285,20 +391,75 @@ module Polars
285
391
  end
286
392
  end
287
393
 
394
+ # Keep the original root name of the expression.
395
+ #
396
+ # @return [Expr]
397
+ #
398
+ # @example
399
+ # df = Polars::DataFrame.new(
400
+ # {
401
+ # "a" => [1, 2],
402
+ # "b" => [3, 4]
403
+ # }
404
+ # )
405
+ # df.with_columns([(Polars.col("a") * 9).alias("c").keep_name])
406
+ # # =>
407
+ # # shape: (2, 2)
408
+ # # ┌─────┬─────┐
409
+ # # │ a ┆ b │
410
+ # # │ --- ┆ --- │
411
+ # # │ i64 ┆ i64 │
412
+ # # ╞═════╪═════╡
413
+ # # │ 9 ┆ 3 │
414
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
415
+ # # │ 18 ┆ 4 │
416
+ # # └─────┴─────┘
288
417
  def keep_name
289
418
  wrap_expr(_rbexpr.keep_name)
290
419
  end
291
420
 
421
+ # Add a prefix to the root column name of the expression.
422
+ #
423
+ # @return [Expr]
292
424
  def prefix(prefix)
293
425
  wrap_expr(_rbexpr.prefix(prefix))
294
426
  end
295
427
 
428
+ # Add a suffix to the root column name of the expression.
429
+ #
430
+ # @return [Expr]
296
431
  def suffix(suffix)
297
432
  wrap_expr(_rbexpr.suffix(suffix))
298
433
  end
299
434
 
300
- # def map_alias
301
- # end
435
+ # Rename the output of an expression by mapping a function over the root name.
436
+ #
437
+ # @return [Expr]
438
+ #
439
+ # @example
440
+ # df = Polars::DataFrame.new(
441
+ # {
442
+ # "A" => [1, 2],
443
+ # "B" => [3, 4]
444
+ # }
445
+ # )
446
+ # df.select(
447
+ # Polars.all.reverse.map_alias { |colName| colName + "_reverse" }
448
+ # )
449
+ # # =>
450
+ # # shape: (2, 2)
451
+ # # ┌───────────┬───────────┐
452
+ # # │ A_reverse ┆ B_reverse │
453
+ # # │ --- ┆ --- │
454
+ # # │ i64 ┆ i64 │
455
+ # # ╞═══════════╪═══════════╡
456
+ # # │ 2 ┆ 4 │
457
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
458
+ # # │ 1 ┆ 3 │
459
+ # # └───────────┴───────────┘
460
+ def map_alias(&f)
461
+ Utils.wrap_expr(_rbexpr.map_alias(f))
462
+ end
302
463
 
303
464
  # Negate a boolean expression.
304
465
  #
@@ -464,14 +625,112 @@ module Polars
464
625
  wrap_expr(_rbexpr.is_infinite)
465
626
  end
466
627
 
628
+ # Returns a boolean Series indicating which values are NaN.
629
+ #
630
+ # @note
631
+ # Floating point `NaN` (Not A Number) should not be confused
632
+ # with missing data represented as `nil`.
633
+ #
634
+ # @return [Expr]
635
+ #
636
+ # @example
637
+ # df = Polars::DataFrame.new(
638
+ # {
639
+ # "a" => [1, 2, nil, 1, 5],
640
+ # "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
641
+ # }
642
+ # )
643
+ # df.with_column(Polars.col(Polars::Float64).is_nan.suffix("_isnan"))
644
+ # # =>
645
+ # # shape: (5, 3)
646
+ # # ┌──────┬─────┬─────────┐
647
+ # # │ a ┆ b ┆ b_isnan │
648
+ # # │ --- ┆ --- ┆ --- │
649
+ # # │ i64 ┆ f64 ┆ bool │
650
+ # # ╞══════╪═════╪═════════╡
651
+ # # │ 1 ┆ 1.0 ┆ false │
652
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
653
+ # # │ 2 ┆ 2.0 ┆ false │
654
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
655
+ # # │ null ┆ NaN ┆ true │
656
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
657
+ # # │ 1 ┆ 1.0 ┆ false │
658
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
659
+ # # │ 5 ┆ 5.0 ┆ false │
660
+ # # └──────┴─────┴─────────┘
467
661
  def is_nan
468
662
  wrap_expr(_rbexpr.is_nan)
469
663
  end
470
664
 
665
+ # Returns a boolean Series indicating which values are not NaN.
666
+ #
667
+ # @note
668
+ # Floating point `NaN` (Not A Number) should not be confused
669
+ # with missing data represented as `nil`.
670
+ #
671
+ # @return [Expr]
672
+ #
673
+ # @example
674
+ # df = Polars::DataFrame.new(
675
+ # {
676
+ # "a" => [1, 2, nil, 1, 5],
677
+ # "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
678
+ # }
679
+ # )
680
+ # df.with_column(Polars.col(Polars::Float64).is_not_nan.suffix("_is_not_nan"))
681
+ # # =>
682
+ # # shape: (5, 3)
683
+ # # ┌──────┬─────┬──────────────┐
684
+ # # │ a ┆ b ┆ b_is_not_nan │
685
+ # # │ --- ┆ --- ┆ --- │
686
+ # # │ i64 ┆ f64 ┆ bool │
687
+ # # ╞══════╪═════╪══════════════╡
688
+ # # │ 1 ┆ 1.0 ┆ true │
689
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
690
+ # # │ 2 ┆ 2.0 ┆ true │
691
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
692
+ # # │ null ┆ NaN ┆ false │
693
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
694
+ # # │ 1 ┆ 1.0 ┆ true │
695
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
696
+ # # │ 5 ┆ 5.0 ┆ true │
697
+ # # └──────┴─────┴──────────────┘
471
698
  def is_not_nan
472
699
  wrap_expr(_rbexpr.is_not_nan)
473
700
  end
474
701
 
702
+ # Get the group indexes of the group by operation.
703
+ #
704
+ # Should be used in aggregation context only.
705
+ #
706
+ # @return [Expr]
707
+ #
708
+ # @example
709
+ # df = Polars::DataFrame.new(
710
+ # {
711
+ # "group" => [
712
+ # "one",
713
+ # "one",
714
+ # "one",
715
+ # "two",
716
+ # "two",
717
+ # "two"
718
+ # ],
719
+ # "value" => [94, 95, 96, 97, 97, 99]
720
+ # }
721
+ # )
722
+ # df.groupby("group", maintain_order: true).agg(Polars.col("value").agg_groups)
723
+ # # =>
724
+ # # shape: (2, 2)
725
+ # # ┌───────┬───────────┐
726
+ # # │ group ┆ value │
727
+ # # │ --- ┆ --- │
728
+ # # │ str ┆ list[u32] │
729
+ # # ╞═══════╪═══════════╡
730
+ # # │ one ┆ [0, 1, 2] │
731
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
732
+ # # │ two ┆ [3, 4, 5] │
733
+ # # └───────┴───────────┘
475
734
  def agg_groups
476
735
  wrap_expr(_rbexpr.agg_groups)
477
736
  end
@@ -557,6 +816,36 @@ module Polars
557
816
  wrap_expr(_rbexpr.slice(offset._rbexpr, length._rbexpr))
558
817
  end
559
818
 
819
+ # Append expressions.
820
+ #
821
+ # This is done by adding the chunks of `other` to this `Series`.
822
+ #
823
+ # @param other [Expr]
824
+ # Expression to append.
825
+ # @param upcast [Boolean]
826
+ # Cast both `Series` to the same supertype.
827
+ #
828
+ # @return [Expr]
829
+ #
830
+ # @example
831
+ # df = Polars::DataFrame.new(
832
+ # {
833
+ # "a" => [8, 9, 10],
834
+ # "b" => [nil, 4, 4]
835
+ # }
836
+ # )
837
+ # df.select(Polars.all.head(1).append(Polars.all.tail(1)))
838
+ # # =>
839
+ # # shape: (2, 2)
840
+ # # ┌─────┬──────┐
841
+ # # │ a ┆ b │
842
+ # # │ --- ┆ --- │
843
+ # # │ i64 ┆ i64 │
844
+ # # ╞═════╪══════╡
845
+ # # │ 8 ┆ null │
846
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
847
+ # # │ 10 ┆ 4 │
848
+ # # └─────┴──────┘
560
849
  def append(other, upcast: true)
561
850
  other = Utils.expr_to_lit_or_expr(other)
562
851
  wrap_expr(_rbexpr.append(other._rbexpr, upcast))
@@ -567,7 +856,7 @@ module Polars
567
856
  # @return [Expr]
568
857
  #
569
858
  # @example Create a Series with 3 nulls, append column a then rechunk
570
- # df = Polars::DataFrame.new({"a": [1, 1, 2]})
859
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
571
860
  # df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
572
861
  # # =>
573
862
  # # shape: (6, 1)
@@ -650,22 +939,182 @@ module Polars
650
939
  wrap_expr(_rbexpr.drop_nans)
651
940
  end
652
941
 
942
+ # Get an array with the cumulative sum computed at every element.
943
+ #
944
+ # @param reverse [Boolean]
945
+ # Reverse the operation.
946
+ #
947
+ # @return [Expr]
948
+ #
949
+ # @note
950
+ # Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
951
+ # `:i64` before summing to prevent overflow issues.
952
+ #
953
+ # @example
954
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
955
+ # df.select(
956
+ # [
957
+ # Polars.col("a").cumsum,
958
+ # Polars.col("a").cumsum(reverse: true).alias("a_reverse")
959
+ # ]
960
+ # )
961
+ # # =>
962
+ # # shape: (4, 2)
963
+ # # ┌─────┬───────────┐
964
+ # # │ a ┆ a_reverse │
965
+ # # │ --- ┆ --- │
966
+ # # │ i64 ┆ i64 │
967
+ # # ╞═════╪═══════════╡
968
+ # # │ 1 ┆ 10 │
969
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
970
+ # # │ 3 ┆ 9 │
971
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
972
+ # # │ 6 ┆ 7 │
973
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
974
+ # # │ 10 ┆ 4 │
975
+ # # └─────┴───────────┘
653
976
  def cumsum(reverse: false)
654
977
  wrap_expr(_rbexpr.cumsum(reverse))
655
978
  end
656
979
 
980
+ # Get an array with the cumulative product computed at every element.
981
+ #
982
+ # @param reverse [Boolean]
983
+ # Reverse the operation.
984
+ #
985
+ # @return [Expr]
986
+ #
987
+ # @note
988
+ # Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
989
+ # `:i64` before summing to prevent overflow issues.
990
+ #
991
+ # @example
992
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
993
+ # df.select(
994
+ # [
995
+ # Polars.col("a").cumprod,
996
+ # Polars.col("a").cumprod(reverse: true).alias("a_reverse")
997
+ # ]
998
+ # )
999
+ # # =>
1000
+ # # shape: (4, 2)
1001
+ # # ┌─────┬───────────┐
1002
+ # # │ a ┆ a_reverse │
1003
+ # # │ --- ┆ --- │
1004
+ # # │ i64 ┆ i64 │
1005
+ # # ╞═════╪═══════════╡
1006
+ # # │ 1 ┆ 24 │
1007
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1008
+ # # │ 2 ┆ 24 │
1009
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1010
+ # # │ 6 ┆ 12 │
1011
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1012
+ # # │ 24 ┆ 4 │
1013
+ # # └─────┴───────────┘
657
1014
  def cumprod(reverse: false)
658
1015
  wrap_expr(_rbexpr.cumprod(reverse))
659
1016
  end
660
1017
 
1018
+ # Get an array with the cumulative min computed at every element.
1019
+ #
1020
+ # @param reverse [Boolean]
1021
+ # Reverse the operation.
1022
+ #
1023
+ # @return [Expr]
1024
+ #
1025
+ # @example
1026
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
1027
+ # df.select(
1028
+ # [
1029
+ # Polars.col("a").cummin,
1030
+ # Polars.col("a").cummin(reverse: true).alias("a_reverse")
1031
+ # ]
1032
+ # )
1033
+ # # =>
1034
+ # # shape: (4, 2)
1035
+ # # ┌─────┬───────────┐
1036
+ # # │ a ┆ a_reverse │
1037
+ # # │ --- ┆ --- │
1038
+ # # │ i64 ┆ i64 │
1039
+ # # ╞═════╪═══════════╡
1040
+ # # │ 1 ┆ 1 │
1041
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1042
+ # # │ 1 ┆ 2 │
1043
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1044
+ # # │ 1 ┆ 3 │
1045
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1046
+ # # │ 1 ┆ 4 │
1047
+ # # └─────┴───────────┘
661
1048
  def cummin(reverse: false)
662
1049
  wrap_expr(_rbexpr.cummin(reverse))
663
1050
  end
664
1051
 
1052
+ # Get an array with the cumulative max computed at every element.
1053
+ #
1054
+ # @param reverse [Boolean]
1055
+ # Reverse the operation.
1056
+ #
1057
+ # @return [Expr]
1058
+ #
1059
+ # @example
1060
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
1061
+ # df.select(
1062
+ # [
1063
+ # Polars.col("a").cummax,
1064
+ # Polars.col("a").cummax(reverse: true).alias("a_reverse")
1065
+ # ]
1066
+ # )
1067
+ # # =>
1068
+ # # shape: (4, 2)
1069
+ # # ┌─────┬───────────┐
1070
+ # # │ a ┆ a_reverse │
1071
+ # # │ --- ┆ --- │
1072
+ # # │ i64 ┆ i64 │
1073
+ # # ╞═════╪═══════════╡
1074
+ # # │ 1 ┆ 4 │
1075
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1076
+ # # │ 2 ┆ 4 │
1077
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1078
+ # # │ 3 ┆ 4 │
1079
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1080
+ # # │ 4 ┆ 4 │
1081
+ # # └─────┴───────────┘
665
1082
  def cummax(reverse: false)
666
1083
  wrap_expr(_rbexpr.cummax(reverse))
667
1084
  end
668
1085
 
1086
+ # Get an array with the cumulative count computed at every element.
1087
+ #
1088
+ # Counting from 0 to len
1089
+ #
1090
+ # @param reverse [Boolean]
1091
+ # Reverse the operation.
1092
+ #
1093
+ # @return [Expr]
1094
+ #
1095
+ # @example
1096
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
1097
+ # df.select(
1098
+ # [
1099
+ # Polars.col("a").cumcount,
1100
+ # Polars.col("a").cumcount(reverse: true).alias("a_reverse")
1101
+ # ]
1102
+ # )
1103
+ # # =>
1104
+ # # shape: (4, 2)
1105
+ # # ┌─────┬───────────┐
1106
+ # # │ a ┆ a_reverse │
1107
+ # # │ --- ┆ --- │
1108
+ # # │ u32 ┆ u32 │
1109
+ # # ╞═════╪═══════════╡
1110
+ # # │ 0 ┆ 3 │
1111
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1112
+ # # │ 1 ┆ 2 │
1113
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1114
+ # # │ 2 ┆ 1 │
1115
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1116
+ # # │ 3 ┆ 0 │
1117
+ # # └─────┴───────────┘
669
1118
  def cumcount(reverse: false)
670
1119
  wrap_expr(_rbexpr.cumcount(reverse))
671
1120
  end
@@ -755,6 +1204,30 @@ module Polars
755
1204
  wrap_expr(_rbexpr.round(decimals))
756
1205
  end
757
1206
 
1207
+ # Compute the dot/inner product between two Expressions.
1208
+ #
1209
+ # @param other [Expr]
1210
+ # Expression to compute dot product with.
1211
+ #
1212
+ # @return [Expr]
1213
+ #
1214
+ # @example
1215
+ # df = Polars::DataFrame.new(
1216
+ # {
1217
+ # "a" => [1, 3, 5],
1218
+ # "b" => [2, 4, 6]
1219
+ # }
1220
+ # )
1221
+ # df.select(Polars.col("a").dot(Polars.col("b")))
1222
+ # # =>
1223
+ # # shape: (1, 1)
1224
+ # # ┌─────┐
1225
+ # # │ a │
1226
+ # # │ --- │
1227
+ # # │ i64 │
1228
+ # # ╞═════╡
1229
+ # # │ 44 │
1230
+ # # └─────┘
758
1231
  def dot(other)
759
1232
  other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
760
1233
  wrap_expr(_rbexpr.dot(other._rbexpr))
@@ -789,24 +1262,183 @@ module Polars
789
1262
  wrap_expr(_rbexpr.mode)
790
1263
  end
791
1264
 
1265
+ # Cast between data types.
1266
+ #
1267
+ # @param dtype [Symbol]
1268
+ # DataType to cast to.
1269
+ # @param strict [Boolean]
1270
+ # Throw an error if a cast could not be done.
1271
+ # For instance, due to an overflow.
1272
+ #
1273
+ # @return [Expr]
1274
+ #
1275
+ # @example
1276
+ # df = Polars::DataFrame.new(
1277
+ # {
1278
+ # "a" => [1, 2, 3],
1279
+ # "b" => ["4", "5", "6"]
1280
+ # }
1281
+ # )
1282
+ # df.with_columns(
1283
+ # [
1284
+ # Polars.col("a").cast(:f64),
1285
+ # Polars.col("b").cast(:i32)
1286
+ # ]
1287
+ # )
1288
+ # # =>
1289
+ # # shape: (3, 2)
1290
+ # # ┌─────┬─────┐
1291
+ # # │ a ┆ b │
1292
+ # # │ --- ┆ --- │
1293
+ # # │ f64 ┆ i32 │
1294
+ # # ╞═════╪═════╡
1295
+ # # │ 1.0 ┆ 4 │
1296
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1297
+ # # │ 2.0 ┆ 5 │
1298
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1299
+ # # │ 3.0 ┆ 6 │
1300
+ # # └─────┴─────┘
792
1301
  def cast(dtype, strict: true)
793
1302
  dtype = Utils.rb_type_to_dtype(dtype)
794
1303
  wrap_expr(_rbexpr.cast(dtype, strict))
795
1304
  end
796
1305
 
1306
+ # Sort this column. In projection/ selection context the whole column is sorted.
1307
+ #
1308
+ # If used in a groupby context, the groups are sorted.
1309
+ #
1310
+ # @param reverse [Boolean]
1311
+ # false -> order from small to large.
1312
+ # true -> order from large to small.
1313
+ # @param nulls_last [Boolean]
1314
+ # If true nulls are considered to be larger than any valid value.
1315
+ #
1316
+ # @return [Expr]
1317
+ #
1318
+ # @example
1319
+ # df = Polars::DataFrame.new(
1320
+ # {
1321
+ # "group" => [
1322
+ # "one",
1323
+ # "one",
1324
+ # "one",
1325
+ # "two",
1326
+ # "two",
1327
+ # "two"
1328
+ # ],
1329
+ # "value" => [1, 98, 2, 3, 99, 4]
1330
+ # }
1331
+ # )
1332
+ # df.select(Polars.col("value").sort)
1333
+ # # =>
1334
+ # # shape: (6, 1)
1335
+ # # ┌───────┐
1336
+ # # │ value │
1337
+ # # │ --- │
1338
+ # # │ i64 │
1339
+ # # ╞═══════╡
1340
+ # # │ 1 │
1341
+ # # ├╌╌╌╌╌╌╌┤
1342
+ # # │ 2 │
1343
+ # # ├╌╌╌╌╌╌╌┤
1344
+ # # │ 3 │
1345
+ # # ├╌╌╌╌╌╌╌┤
1346
+ # # │ 4 │
1347
+ # # ├╌╌╌╌╌╌╌┤
1348
+ # # │ 98 │
1349
+ # # ├╌╌╌╌╌╌╌┤
1350
+ # # │ 99 │
1351
+ # # └───────┘
1352
+ #
1353
+ # @example
1354
+ # df.select(Polars.col("value").sort)
1355
+ # # =>
1356
+ # # shape: (6, 1)
1357
+ # # ┌───────┐
1358
+ # # │ value │
1359
+ # # │ --- │
1360
+ # # │ i64 │
1361
+ # # ╞═══════╡
1362
+ # # │ 1 │
1363
+ # # ├╌╌╌╌╌╌╌┤
1364
+ # # │ 2 │
1365
+ # # ├╌╌╌╌╌╌╌┤
1366
+ # # │ 3 │
1367
+ # # ├╌╌╌╌╌╌╌┤
1368
+ # # │ 4 │
1369
+ # # ├╌╌╌╌╌╌╌┤
1370
+ # # │ 98 │
1371
+ # # ├╌╌╌╌╌╌╌┤
1372
+ # # │ 99 │
1373
+ # # └───────┘
1374
+ #
1375
+ # @example
1376
+ # df.groupby("group").agg(Polars.col("value").sort)
1377
+ # # =>
1378
+ # # shape: (2, 2)
1379
+ # # ┌───────┬────────────┐
1380
+ # # │ group ┆ value │
1381
+ # # │ --- ┆ --- │
1382
+ # # │ str ┆ list[i64] │
1383
+ # # ╞═══════╪════════════╡
1384
+ # # │ two ┆ [3, 4, 99] │
1385
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1386
+ # # │ one ┆ [1, 2, 98] │
1387
+ # # └───────┴────────────┘
797
1388
  def sort(reverse: false, nulls_last: false)
798
1389
  wrap_expr(_rbexpr.sort_with(reverse, nulls_last))
799
1390
  end
800
1391
 
1392
+ # Return the `k` largest elements.
1393
+ #
1394
+ # If 'reverse: true` the smallest elements will be given.
1395
+ #
1396
+ # @param k [Integer]
1397
+ # Number of elements to return.
1398
+ # @param reverse [Boolean]
1399
+ # Return the smallest elements.
1400
+ #
1401
+ # @return [Expr]
1402
+ #
1403
+ # @example
1404
+ # df = Polars::DataFrame.new(
1405
+ # {
1406
+ # "value" => [1, 98, 2, 3, 99, 4]
1407
+ # }
1408
+ # )
1409
+ # df.select(
1410
+ # [
1411
+ # Polars.col("value").top_k.alias("top_k"),
1412
+ # Polars.col("value").top_k(reverse: true).alias("bottom_k")
1413
+ # ]
1414
+ # )
1415
+ # # =>
1416
+ # # shape: (5, 2)
1417
+ # # ┌───────┬──────────┐
1418
+ # # │ top_k ┆ bottom_k │
1419
+ # # │ --- ┆ --- │
1420
+ # # │ i64 ┆ i64 │
1421
+ # # ╞═══════╪══════════╡
1422
+ # # │ 99 ┆ 1 │
1423
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
1424
+ # # │ 98 ┆ 2 │
1425
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
1426
+ # # │ 4 ┆ 3 │
1427
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
1428
+ # # │ 3 ┆ 4 │
1429
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
1430
+ # # │ 2 ┆ 98 │
1431
+ # # └───────┴──────────┘
801
1432
  def top_k(k: 5, reverse: false)
802
1433
  wrap_expr(_rbexpr.top_k(k, reverse))
803
1434
  end
804
1435
 
805
- def arg_sort(reverse: false, nulls_last: false)
806
- wrap_expr(_rbexpr.arg_sort(reverse, nulls_last))
807
- end
808
-
809
- # Get the index of the maximal value.
1436
+ # Get the index values that would sort this column.
1437
+ #
1438
+ # @param reverse [Boolean]
1439
+ # Sort in reverse (descending) order.
1440
+ # @param nulls_last [Boolean]
1441
+ # Place null values last instead of first.
810
1442
  #
811
1443
  # @return [Expr]
812
1444
  #
@@ -816,21 +1448,49 @@ module Polars
816
1448
  # "a" => [20, 10, 30]
817
1449
  # }
818
1450
  # )
819
- # df.select(Polars.col("a").arg_max)
1451
+ # df.select(Polars.col("a").arg_sort)
820
1452
  # # =>
821
- # # shape: (1, 1)
1453
+ # # shape: (3, 1)
822
1454
  # # ┌─────┐
823
1455
  # # │ a │
824
1456
  # # │ --- │
825
1457
  # # │ u32 │
826
1458
  # # ╞═════╡
1459
+ # # │ 1 │
1460
+ # # ├╌╌╌╌╌┤
1461
+ # # │ 0 │
1462
+ # # ├╌╌╌╌╌┤
827
1463
  # # │ 2 │
828
1464
  # # └─────┘
829
- def arg_max
830
- wrap_expr(_rbexpr.arg_max)
1465
+ def arg_sort(reverse: false, nulls_last: false)
1466
+ wrap_expr(_rbexpr.arg_sort(reverse, nulls_last))
831
1467
  end
832
1468
 
833
- # Get the index of the minimal value.
1469
+ # Get the index of the maximal value.
1470
+ #
1471
+ # @return [Expr]
1472
+ #
1473
+ # @example
1474
+ # df = Polars::DataFrame.new(
1475
+ # {
1476
+ # "a" => [20, 10, 30]
1477
+ # }
1478
+ # )
1479
+ # df.select(Polars.col("a").arg_max)
1480
+ # # =>
1481
+ # # shape: (1, 1)
1482
+ # # ┌─────┐
1483
+ # # │ a │
1484
+ # # │ --- │
1485
+ # # │ u32 │
1486
+ # # ╞═════╡
1487
+ # # │ 2 │
1488
+ # # └─────┘
1489
+ def arg_max
1490
+ wrap_expr(_rbexpr.arg_max)
1491
+ end
1492
+
1493
+ # Get the index of the minimal value.
834
1494
  #
835
1495
  # @return [Expr]
836
1496
  #
@@ -854,11 +1514,87 @@ module Polars
854
1514
  wrap_expr(_rbexpr.arg_min)
855
1515
  end
856
1516
 
1517
+ # Find indices where elements should be inserted to maintain order.
1518
+ #
1519
+ # @param element [Object]
1520
+ # Expression or scalar value.
1521
+ #
1522
+ # @return [Expr]
1523
+ #
1524
+ # @example
1525
+ # df = Polars::DataFrame.new(
1526
+ # {
1527
+ # "values" => [1, 2, 3, 5]
1528
+ # }
1529
+ # )
1530
+ # df.select(
1531
+ # [
1532
+ # Polars.col("values").search_sorted(0).alias("zero"),
1533
+ # Polars.col("values").search_sorted(3).alias("three"),
1534
+ # Polars.col("values").search_sorted(6).alias("six")
1535
+ # ]
1536
+ # )
1537
+ # # =>
1538
+ # # shape: (1, 3)
1539
+ # # ┌──────┬───────┬─────┐
1540
+ # # │ zero ┆ three ┆ six │
1541
+ # # │ --- ┆ --- ┆ --- │
1542
+ # # │ u32 ┆ u32 ┆ u32 │
1543
+ # # ╞══════╪═══════╪═════╡
1544
+ # # │ 0 ┆ 2 ┆ 4 │
1545
+ # # └──────┴───────┴─────┘
857
1546
  def search_sorted(element)
858
1547
  element = Utils.expr_to_lit_or_expr(element, str_to_lit: false)
859
1548
  wrap_expr(_rbexpr.search_sorted(element._rbexpr))
860
1549
  end
861
1550
 
1551
+ # Sort this column by the ordering of another column, or multiple other columns.
1552
+ #
1553
+ # In projection/ selection context the whole column is sorted.
1554
+ # If used in a groupby context, the groups are sorted.
1555
+ #
1556
+ # @param by [Object]
1557
+ # The column(s) used for sorting.
1558
+ # @param reverse [Boolean]
1559
+ # false -> order from small to large.
1560
+ # true -> order from large to small.
1561
+ #
1562
+ # @return [Expr]
1563
+ #
1564
+ # @example
1565
+ # df = Polars::DataFrame.new(
1566
+ # {
1567
+ # "group" => [
1568
+ # "one",
1569
+ # "one",
1570
+ # "one",
1571
+ # "two",
1572
+ # "two",
1573
+ # "two"
1574
+ # ],
1575
+ # "value" => [1, 98, 2, 3, 99, 4]
1576
+ # }
1577
+ # )
1578
+ # df.select(Polars.col("group").sort_by("value"))
1579
+ # # =>
1580
+ # # shape: (6, 1)
1581
+ # # ┌───────┐
1582
+ # # │ group │
1583
+ # # │ --- │
1584
+ # # │ str │
1585
+ # # ╞═══════╡
1586
+ # # │ one │
1587
+ # # ├╌╌╌╌╌╌╌┤
1588
+ # # │ one │
1589
+ # # ├╌╌╌╌╌╌╌┤
1590
+ # # │ two │
1591
+ # # ├╌╌╌╌╌╌╌┤
1592
+ # # │ two │
1593
+ # # ├╌╌╌╌╌╌╌┤
1594
+ # # │ one │
1595
+ # # ├╌╌╌╌╌╌╌┤
1596
+ # # │ two │
1597
+ # # └───────┘
862
1598
  def sort_by(by, reverse: false)
863
1599
  if !by.is_a?(Array)
864
1600
  by = [by]
@@ -871,6 +1607,39 @@ module Polars
871
1607
  wrap_expr(_rbexpr.sort_by(by, reverse))
872
1608
  end
873
1609
 
1610
+ # Take values by index.
1611
+ #
1612
+ # @param indices [Expr]
1613
+ # An expression that leads to a `:u32` dtyped Series.
1614
+ #
1615
+ # @return [Expr]
1616
+ #
1617
+ # @example
1618
+ # df = Polars::DataFrame.new(
1619
+ # {
1620
+ # "group" => [
1621
+ # "one",
1622
+ # "one",
1623
+ # "one",
1624
+ # "two",
1625
+ # "two",
1626
+ # "two"
1627
+ # ],
1628
+ # "value" => [1, 98, 2, 3, 99, 4]
1629
+ # }
1630
+ # )
1631
+ # df.groupby("group", maintain_order: true).agg(Polars.col("value").take(1))
1632
+ # # =>
1633
+ # # shape: (2, 2)
1634
+ # # ┌───────┬───────┐
1635
+ # # │ group ┆ value │
1636
+ # # │ --- ┆ --- │
1637
+ # # │ str ┆ i64 │
1638
+ # # ╞═══════╪═══════╡
1639
+ # # │ one ┆ 98 │
1640
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1641
+ # # │ two ┆ 99 │
1642
+ # # └───────┴───────┘
874
1643
  def take(indices)
875
1644
  if indices.is_a?(Array)
876
1645
  indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
@@ -909,11 +1678,105 @@ module Polars
909
1678
  wrap_expr(_rbexpr.shift(periods))
910
1679
  end
911
1680
 
1681
+ # Shift the values by a given period and fill the resulting null values.
1682
+ #
1683
+ # @param periods [Integer]
1684
+ # Number of places to shift (may be negative).
1685
+ # @param fill_value [Object]
1686
+ # Fill nil values with the result of this expression.
1687
+ #
1688
+ # @return [Expr]
1689
+ #
1690
+ # @example
1691
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
1692
+ # df.select(Polars.col("foo").shift_and_fill(1, "a"))
1693
+ # # =>
1694
+ # # shape: (4, 1)
1695
+ # # ┌─────┐
1696
+ # # │ foo │
1697
+ # # │ --- │
1698
+ # # │ str │
1699
+ # # ╞═════╡
1700
+ # # │ a │
1701
+ # # ├╌╌╌╌╌┤
1702
+ # # │ 1 │
1703
+ # # ├╌╌╌╌╌┤
1704
+ # # │ 2 │
1705
+ # # ├╌╌╌╌╌┤
1706
+ # # │ 3 │
1707
+ # # └─────┘
912
1708
  def shift_and_fill(periods, fill_value)
913
1709
  fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
914
1710
  wrap_expr(_rbexpr.shift_and_fill(periods, fill_value._rbexpr))
915
1711
  end
916
1712
 
1713
+ # Fill null values using the specified value or strategy.
1714
+ #
1715
+ # To interpolate over null values see interpolate.
1716
+ #
1717
+ # @param value [Object]
1718
+ # Value used to fill null values.
1719
+ # @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
1720
+ # Strategy used to fill null values.
1721
+ # @param limit [Integer]
1722
+ # Number of consecutive null values to fill when using the 'forward' or
1723
+ # 'backward' strategy.
1724
+ #
1725
+ # @return [Expr]
1726
+ #
1727
+ # @example
1728
+ # df = Polars::DataFrame.new(
1729
+ # {
1730
+ # "a" => [1, 2, nil],
1731
+ # "b" => [4, nil, 6]
1732
+ # }
1733
+ # )
1734
+ # df.fill_null(strategy: "zero")
1735
+ # # =>
1736
+ # # shape: (3, 2)
1737
+ # # ┌─────┬─────┐
1738
+ # # │ a ┆ b │
1739
+ # # │ --- ┆ --- │
1740
+ # # │ i64 ┆ i64 │
1741
+ # # ╞═════╪═════╡
1742
+ # # │ 1 ┆ 4 │
1743
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1744
+ # # │ 2 ┆ 0 │
1745
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1746
+ # # │ 0 ┆ 6 │
1747
+ # # └─────┴─────┘
1748
+ #
1749
+ # @example
1750
+ # df.fill_null(99)
1751
+ # # =>
1752
+ # # shape: (3, 2)
1753
+ # # ┌─────┬─────┐
1754
+ # # │ a ┆ b │
1755
+ # # │ --- ┆ --- │
1756
+ # # │ i64 ┆ i64 │
1757
+ # # ╞═════╪═════╡
1758
+ # # │ 1 ┆ 4 │
1759
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1760
+ # # │ 2 ┆ 99 │
1761
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1762
+ # # │ 99 ┆ 6 │
1763
+ # # └─────┴─────┘
1764
+ #
1765
+ # @example
1766
+ # df.fill_null(strategy: "forward")
1767
+ # # =>
1768
+ # # shape: (3, 2)
1769
+ # # ┌─────┬─────┐
1770
+ # # │ a ┆ b │
1771
+ # # │ --- ┆ --- │
1772
+ # # │ i64 ┆ i64 │
1773
+ # # ╞═════╪═════╡
1774
+ # # │ 1 ┆ 4 │
1775
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1776
+ # # │ 2 ┆ 4 │
1777
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1778
+ # # │ 2 ┆ 6 │
1779
+ # # └─────┴─────┘
917
1780
  def fill_null(value = nil, strategy: nil, limit: nil)
918
1781
  if !value.nil? && !strategy.nil?
919
1782
  raise ArgumentError, "cannot specify both 'value' and 'strategy'."
@@ -931,75 +1794,426 @@ module Polars
931
1794
  end
932
1795
  end
933
1796
 
1797
+ # Fill floating point NaN value with a fill value.
1798
+ #
1799
+ # @return [Expr]
1800
+ #
1801
+ # @example
1802
+ # df = Polars::DataFrame.new(
1803
+ # {
1804
+ # "a" => [1.0, nil, Float::NAN],
1805
+ # "b" => [4.0, Float::NAN, 6]
1806
+ # }
1807
+ # )
1808
+ # df.fill_nan("zero")
1809
+ # # =>
1810
+ # # shape: (3, 2)
1811
+ # # ┌──────┬──────┐
1812
+ # # │ a ┆ b │
1813
+ # # │ --- ┆ --- │
1814
+ # # │ str ┆ str │
1815
+ # # ╞══════╪══════╡
1816
+ # # │ 1.0 ┆ 4.0 │
1817
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1818
+ # # │ null ┆ zero │
1819
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1820
+ # # │ zero ┆ 6.0 │
1821
+ # # └──────┴──────┘
934
1822
  def fill_nan(fill_value)
935
1823
  fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
936
1824
  wrap_expr(_rbexpr.fill_nan(fill_value._rbexpr))
937
1825
  end
938
1826
 
1827
+ # Fill missing values with the latest seen values.
1828
+ #
1829
+ # @param limit [Integer]
1830
+ # The number of consecutive null values to forward fill.
1831
+ #
1832
+ # @return [Expr]
1833
+ #
1834
+ # @example
1835
+ # df = Polars::DataFrame.new(
1836
+ # {
1837
+ # "a" => [1, 2, nil],
1838
+ # "b" => [4, nil, 6]
1839
+ # }
1840
+ # )
1841
+ # df.select(Polars.all.forward_fill)
1842
+ # # =>
1843
+ # # shape: (3, 2)
1844
+ # # ┌─────┬─────┐
1845
+ # # │ a ┆ b │
1846
+ # # │ --- ┆ --- │
1847
+ # # │ i64 ┆ i64 │
1848
+ # # ╞═════╪═════╡
1849
+ # # │ 1 ┆ 4 │
1850
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1851
+ # # │ 2 ┆ 4 │
1852
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1853
+ # # │ 2 ┆ 6 │
1854
+ # # └─────┴─────┘
939
1855
  def forward_fill(limit: nil)
940
1856
  wrap_expr(_rbexpr.forward_fill(limit))
941
1857
  end
942
1858
 
1859
+ # Fill missing values with the next to be seen values.
1860
+ #
1861
+ # @param limit [Integer]
1862
+ # The number of consecutive null values to backward fill.
1863
+ #
1864
+ # @return [Expr]
1865
+ #
1866
+ # @example
1867
+ # df = Polars::DataFrame.new(
1868
+ # {
1869
+ # "a" => [1, 2, nil],
1870
+ # "b" => [4, nil, 6]
1871
+ # }
1872
+ # )
1873
+ # df.select(Polars.all.backward_fill)
1874
+ # # =>
1875
+ # # shape: (3, 2)
1876
+ # # ┌──────┬─────┐
1877
+ # # │ a ┆ b │
1878
+ # # │ --- ┆ --- │
1879
+ # # │ i64 ┆ i64 │
1880
+ # # ╞══════╪═════╡
1881
+ # # │ 1 ┆ 4 │
1882
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
1883
+ # # │ 2 ┆ 6 │
1884
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
1885
+ # # │ null ┆ 6 │
1886
+ # # └──────┴─────┘
943
1887
  def backward_fill(limit: nil)
944
1888
  wrap_expr(_rbexpr.backward_fill(limit))
945
1889
  end
946
1890
 
1891
+ # Reverse the selection.
1892
+ #
1893
+ # @return [Expr]
947
1894
  def reverse
948
1895
  wrap_expr(_rbexpr.reverse)
949
1896
  end
950
1897
 
1898
+ # Get standard deviation.
1899
+ #
1900
+ # @param ddof [Integer]
1901
+ # Degrees of freedom.
1902
+ #
1903
+ # @return [Expr]
1904
+ #
1905
+ # @example
1906
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
1907
+ # df.select(Polars.col("a").std)
1908
+ # # =>
1909
+ # # shape: (1, 1)
1910
+ # # ┌─────┐
1911
+ # # │ a │
1912
+ # # │ --- │
1913
+ # # │ f64 │
1914
+ # # ╞═════╡
1915
+ # # │ 1.0 │
1916
+ # # └─────┘
951
1917
  def std(ddof: 1)
952
1918
  wrap_expr(_rbexpr.std(ddof))
953
1919
  end
954
1920
 
1921
+ # Get variance.
1922
+ #
1923
+ # @param ddof [Integer]
1924
+ # Degrees of freedom.
1925
+ #
1926
+ # @return [Expr]
1927
+ #
1928
+ # @example
1929
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
1930
+ # df.select(Polars.col("a").var)
1931
+ # # =>
1932
+ # # shape: (1, 1)
1933
+ # # ┌─────┐
1934
+ # # │ a │
1935
+ # # │ --- │
1936
+ # # │ f64 │
1937
+ # # ╞═════╡
1938
+ # # │ 1.0 │
1939
+ # # └─────┘
955
1940
  def var(ddof: 1)
956
1941
  wrap_expr(_rbexpr.var(ddof))
957
1942
  end
958
1943
 
1944
+ # Get maximum value.
1945
+ #
1946
+ # @return [Expr]
1947
+ #
1948
+ # @example
1949
+ # df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
1950
+ # df.select(Polars.col("a").max)
1951
+ # # =>
1952
+ # # shape: (1, 1)
1953
+ # # ┌─────┐
1954
+ # # │ a │
1955
+ # # │ --- │
1956
+ # # │ f64 │
1957
+ # # ╞═════╡
1958
+ # # │ 1.0 │
1959
+ # # └─────┘
959
1960
  def max
960
1961
  wrap_expr(_rbexpr.max)
961
1962
  end
962
1963
 
1964
+ # Get minimum value.
1965
+ #
1966
+ # @return [Expr]
1967
+ #
1968
+ # @example
1969
+ # df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
1970
+ # df.select(Polars.col("a").min)
1971
+ # # =>
1972
+ # # shape: (1, 1)
1973
+ # # ┌──────┐
1974
+ # # │ a │
1975
+ # # │ --- │
1976
+ # # │ f64 │
1977
+ # # ╞══════╡
1978
+ # # │ -1.0 │
1979
+ # # └──────┘
963
1980
  def min
964
1981
  wrap_expr(_rbexpr.min)
965
1982
  end
966
1983
 
1984
+ # Get maximum value, but propagate/poison encountered NaN values.
1985
+ #
1986
+ # @return [Expr]
1987
+ #
1988
+ # @example
1989
+ # df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
1990
+ # df.select(Polars.col("a").nan_max)
1991
+ # # =>
1992
+ # # shape: (1, 1)
1993
+ # # ┌─────┐
1994
+ # # │ a │
1995
+ # # │ --- │
1996
+ # # │ f64 │
1997
+ # # ╞═════╡
1998
+ # # │ NaN │
1999
+ # # └─────┘
967
2000
  def nan_max
968
2001
  wrap_expr(_rbexpr.nan_max)
969
2002
  end
970
2003
 
971
- def nan_min
972
- wrap_expr(_rbexpr.nan_min)
973
- end
974
-
975
- def sum
976
- wrap_expr(_rbexpr.sum)
977
- end
978
-
979
- def mean
980
- wrap_expr(_rbexpr.mean)
981
- end
982
-
2004
+ # Get minimum value, but propagate/poison encountered NaN values.
2005
+ #
2006
+ # @return [Expr]
2007
+ #
2008
+ # @example
2009
+ # df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
2010
+ # df.select(Polars.col("a").nan_min)
2011
+ # # =>
2012
+ # # shape: (1, 1)
2013
+ # # ┌─────┐
2014
+ # # │ a │
2015
+ # # │ --- │
2016
+ # # │ f64 │
2017
+ # # ╞═════╡
2018
+ # # │ NaN │
2019
+ # # └─────┘
2020
+ def nan_min
2021
+ wrap_expr(_rbexpr.nan_min)
2022
+ end
2023
+
2024
+ # Get sum value.
2025
+ #
2026
+ # @return [Expr]
2027
+ #
2028
+ # @note
2029
+ # Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
2030
+ # `:i64` before summing to prevent overflow issues.
2031
+ #
2032
+ # @example
2033
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
2034
+ # df.select(Polars.col("a").sum)
2035
+ # # =>
2036
+ # # shape: (1, 1)
2037
+ # # ┌─────┐
2038
+ # # │ a │
2039
+ # # │ --- │
2040
+ # # │ i64 │
2041
+ # # ╞═════╡
2042
+ # # │ 0 │
2043
+ # # └─────┘
2044
+ def sum
2045
+ wrap_expr(_rbexpr.sum)
2046
+ end
2047
+
2048
+ # Get mean value.
2049
+ #
2050
+ # @return [Expr]
2051
+ #
2052
+ # @example
2053
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
2054
+ # df.select(Polars.col("a").mean)
2055
+ # # =>
2056
+ # # shape: (1, 1)
2057
+ # # ┌─────┐
2058
+ # # │ a │
2059
+ # # │ --- │
2060
+ # # │ f64 │
2061
+ # # ╞═════╡
2062
+ # # │ 0.0 │
2063
+ # # └─────┘
2064
+ def mean
2065
+ wrap_expr(_rbexpr.mean)
2066
+ end
2067
+
2068
+ # Get median value using linear interpolation.
2069
+ #
2070
+ # @return [Expr]
2071
+ #
2072
+ # @example
2073
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
2074
+ # df.select(Polars.col("a").median)
2075
+ # # =>
2076
+ # # shape: (1, 1)
2077
+ # # ┌─────┐
2078
+ # # │ a │
2079
+ # # │ --- │
2080
+ # # │ f64 │
2081
+ # # ╞═════╡
2082
+ # # │ 0.0 │
2083
+ # # └─────┘
983
2084
  def median
984
2085
  wrap_expr(_rbexpr.median)
985
2086
  end
986
2087
 
2088
+ # Compute the product of an expression.
2089
+ #
2090
+ # @return [Expr]
2091
+ #
2092
+ # @example
2093
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
2094
+ # df.select(Polars.col("a").product)
2095
+ # # =>
2096
+ # # shape: (1, 1)
2097
+ # # ┌─────┐
2098
+ # # │ a │
2099
+ # # │ --- │
2100
+ # # │ i64 │
2101
+ # # ╞═════╡
2102
+ # # │ 6 │
2103
+ # # └─────┘
987
2104
  def product
988
2105
  wrap_expr(_rbexpr.product)
989
2106
  end
990
2107
 
2108
+ # Count unique values.
2109
+ #
2110
+ # @return [Expr]
2111
+ #
2112
+ # @example
2113
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2114
+ # df.select(Polars.col("a").n_unique)
2115
+ # # =>
2116
+ # # shape: (1, 1)
2117
+ # # ┌─────┐
2118
+ # # │ a │
2119
+ # # │ --- │
2120
+ # # │ u32 │
2121
+ # # ╞═════╡
2122
+ # # │ 2 │
2123
+ # # └─────┘
991
2124
  def n_unique
992
2125
  wrap_expr(_rbexpr.n_unique)
993
2126
  end
994
2127
 
2128
+ # Count null values.
2129
+ #
2130
+ # @return [Expr]
2131
+ #
2132
+ # @example
2133
+ # df = Polars::DataFrame.new(
2134
+ # {
2135
+ # "a" => [nil, 1, nil],
2136
+ # "b" => [1, 2, 3]
2137
+ # }
2138
+ # )
2139
+ # df.select(Polars.all.null_count)
2140
+ # # =>
2141
+ # # shape: (1, 2)
2142
+ # # ┌─────┬─────┐
2143
+ # # │ a ┆ b │
2144
+ # # │ --- ┆ --- │
2145
+ # # │ u32 ┆ u32 │
2146
+ # # ╞═════╪═════╡
2147
+ # # │ 2 ┆ 0 │
2148
+ # # └─────┴─────┘
995
2149
  def null_count
996
2150
  wrap_expr(_rbexpr.null_count)
997
2151
  end
998
2152
 
2153
+ # Get index of first unique value.
2154
+ #
2155
+ # @return [Expr]
2156
+ #
2157
+ # @example
2158
+ # df = Polars::DataFrame.new(
2159
+ # {
2160
+ # "a" => [8, 9, 10],
2161
+ # "b" => [nil, 4, 4]
2162
+ # }
2163
+ # )
2164
+ # df.select(Polars.col("a").arg_unique)
2165
+ # # =>
2166
+ # # shape: (3, 1)
2167
+ # # ┌─────┐
2168
+ # # │ a │
2169
+ # # │ --- │
2170
+ # # │ u32 │
2171
+ # # ╞═════╡
2172
+ # # │ 0 │
2173
+ # # ├╌╌╌╌╌┤
2174
+ # # │ 1 │
2175
+ # # ├╌╌╌╌╌┤
2176
+ # # │ 2 │
2177
+ # # └─────┘
2178
+ #
2179
+ # @example
2180
+ # df.select(Polars.col("b").arg_unique)
2181
+ # # =>
2182
+ # # shape: (2, 1)
2183
+ # # ┌─────┐
2184
+ # # │ b │
2185
+ # # │ --- │
2186
+ # # │ u32 │
2187
+ # # ╞═════╡
2188
+ # # │ 0 │
2189
+ # # ├╌╌╌╌╌┤
2190
+ # # │ 1 │
2191
+ # # └─────┘
999
2192
  def arg_unique
1000
2193
  wrap_expr(_rbexpr.arg_unique)
1001
2194
  end
1002
2195
 
2196
+ # Get unique values of this expression.
2197
+ #
2198
+ # @param maintain_order [Boolean]
2199
+ # Maintain order of data. This requires more work.
2200
+ #
2201
+ # @return [Expr]
2202
+ #
2203
+ # @example
2204
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2205
+ # df.select(Polars.col("a").unique(maintain_order: true))
2206
+ # # =>
2207
+ # # shape: (2, 1)
2208
+ # # ┌─────┐
2209
+ # # │ a │
2210
+ # # │ --- │
2211
+ # # │ i64 │
2212
+ # # ╞═════╡
2213
+ # # │ 1 │
2214
+ # # ├╌╌╌╌╌┤
2215
+ # # │ 2 │
2216
+ # # └─────┘
1003
2217
  def unique(maintain_order: false)
1004
2218
  if maintain_order
1005
2219
  wrap_expr(_rbexpr.unique_stable)
@@ -1008,243 +2222,2508 @@ module Polars
1008
2222
  end
1009
2223
  end
1010
2224
 
2225
+ # Get the first value.
2226
+ #
2227
+ # @return [Expr]
2228
+ #
2229
+ # @example
2230
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2231
+ # df.select(Polars.col("a").first)
2232
+ # # =>
2233
+ # # shape: (1, 1)
2234
+ # # ┌─────┐
2235
+ # # │ a │
2236
+ # # │ --- │
2237
+ # # │ i64 │
2238
+ # # ╞═════╡
2239
+ # # │ 1 │
2240
+ # # └─────┘
1011
2241
  def first
1012
2242
  wrap_expr(_rbexpr.first)
1013
2243
  end
1014
2244
 
2245
+ # Get the last value.
2246
+ #
2247
+ # @return [Expr]
2248
+ #
2249
+ # @example
2250
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2251
+ # df.select(Polars.col("a").last)
2252
+ # # =>
2253
+ # # shape: (1, 1)
2254
+ # # ┌─────┐
2255
+ # # │ a │
2256
+ # # │ --- │
2257
+ # # │ i64 │
2258
+ # # ╞═════╡
2259
+ # # │ 2 │
2260
+ # # └─────┘
1015
2261
  def last
1016
2262
  wrap_expr(_rbexpr.last)
1017
2263
  end
1018
2264
 
2265
+ # Apply window function over a subgroup.
2266
+ #
2267
+ # This is similar to a groupby + aggregation + self join.
2268
+ # Or similar to [window functions in Postgres](https://www.postgresql.org/docs/current/tutorial-window.html).
2269
+ #
2270
+ # @param expr [Object]
2271
+ # Column(s) to group by.
2272
+ #
2273
+ # @return [Expr]
2274
+ #
2275
+ # @example
2276
+ # df = Polars::DataFrame.new(
2277
+ # {
2278
+ # "groups" => ["g1", "g1", "g2"],
2279
+ # "values" => [1, 2, 3]
2280
+ # }
2281
+ # )
2282
+ # df.with_column(
2283
+ # Polars.col("values").max.over("groups").alias("max_by_group")
2284
+ # )
2285
+ # # =>
2286
+ # # shape: (3, 3)
2287
+ # # ┌────────┬────────┬──────────────┐
2288
+ # # │ groups ┆ values ┆ max_by_group │
2289
+ # # │ --- ┆ --- ┆ --- │
2290
+ # # │ str ┆ i64 ┆ i64 │
2291
+ # # ╞════════╪════════╪══════════════╡
2292
+ # # │ g1 ┆ 1 ┆ 2 │
2293
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
2294
+ # # │ g1 ┆ 2 ┆ 2 │
2295
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
2296
+ # # │ g2 ┆ 3 ┆ 3 │
2297
+ # # └────────┴────────┴──────────────┘
2298
+ #
2299
+ # @example
2300
+ # df = Polars::DataFrame.new(
2301
+ # {
2302
+ # "groups" => [1, 1, 2, 2, 1, 2, 3, 3, 1],
2303
+ # "values" => [1, 2, 3, 4, 5, 6, 7, 8, 8]
2304
+ # }
2305
+ # )
2306
+ # df.lazy
2307
+ # .select([Polars.col("groups").sum.over("groups")])
2308
+ # .collect
2309
+ # # =>
2310
+ # # shape: (9, 1)
2311
+ # # ┌────────┐
2312
+ # # │ groups │
2313
+ # # │ --- │
2314
+ # # │ i64 │
2315
+ # # ╞════════╡
2316
+ # # │ 4 │
2317
+ # # ├╌╌╌╌╌╌╌╌┤
2318
+ # # │ 4 │
2319
+ # # ├╌╌╌╌╌╌╌╌┤
2320
+ # # │ 6 │
2321
+ # # ├╌╌╌╌╌╌╌╌┤
2322
+ # # │ 6 │
2323
+ # # ├╌╌╌╌╌╌╌╌┤
2324
+ # # │ ... │
2325
+ # # ├╌╌╌╌╌╌╌╌┤
2326
+ # # │ 6 │
2327
+ # # ├╌╌╌╌╌╌╌╌┤
2328
+ # # │ 6 │
2329
+ # # ├╌╌╌╌╌╌╌╌┤
2330
+ # # │ 6 │
2331
+ # # ├╌╌╌╌╌╌╌╌┤
2332
+ # # │ 4 │
2333
+ # # └────────┘
1019
2334
  def over(expr)
1020
2335
  rbexprs = Utils.selection_to_rbexpr_list(expr)
1021
2336
  wrap_expr(_rbexpr.over(rbexprs))
1022
2337
  end
1023
2338
 
2339
+ # Get mask of unique values.
2340
+ #
2341
+ # @return [Expr]
2342
+ #
2343
+ # @example
2344
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2345
+ # df.select(Polars.col("a").is_unique)
2346
+ # # =>
2347
+ # # shape: (3, 1)
2348
+ # # ┌───────┐
2349
+ # # │ a │
2350
+ # # │ --- │
2351
+ # # │ bool │
2352
+ # # ╞═══════╡
2353
+ # # │ false │
2354
+ # # ├╌╌╌╌╌╌╌┤
2355
+ # # │ false │
2356
+ # # ├╌╌╌╌╌╌╌┤
2357
+ # # │ true │
2358
+ # # └───────┘
1024
2359
  def is_unique
1025
2360
  wrap_expr(_rbexpr.is_unique)
1026
2361
  end
1027
2362
 
2363
+ # Get a mask of the first unique value.
2364
+ #
2365
+ # @return [Expr]
2366
+ #
2367
+ # @example
2368
+ # df = Polars::DataFrame.new(
2369
+ # {
2370
+ # "num" => [1, 2, 3, 1, 5]
2371
+ # }
2372
+ # )
2373
+ # df.with_column(Polars.col("num").is_first.alias("is_first"))
2374
+ # # =>
2375
+ # # shape: (5, 2)
2376
+ # # ┌─────┬──────────┐
2377
+ # # │ num ┆ is_first │
2378
+ # # │ --- ┆ --- │
2379
+ # # │ i64 ┆ bool │
2380
+ # # ╞═════╪══════════╡
2381
+ # # │ 1 ┆ true │
2382
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2383
+ # # │ 2 ┆ true │
2384
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2385
+ # # │ 3 ┆ true │
2386
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2387
+ # # │ 1 ┆ false │
2388
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2389
+ # # │ 5 ┆ true │
2390
+ # # └─────┴──────────┘
1028
2391
  def is_first
1029
2392
  wrap_expr(_rbexpr.is_first)
1030
2393
  end
1031
2394
 
2395
+ # Get mask of duplicated values.
2396
+ #
2397
+ # @return [Expr]
2398
+ #
2399
+ # @example
2400
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2401
+ # df.select(Polars.col("a").is_duplicated)
2402
+ # # =>
2403
+ # # shape: (3, 1)
2404
+ # # ┌───────┐
2405
+ # # │ a │
2406
+ # # │ --- │
2407
+ # # │ bool │
2408
+ # # ╞═══════╡
2409
+ # # │ true │
2410
+ # # ├╌╌╌╌╌╌╌┤
2411
+ # # │ true │
2412
+ # # ├╌╌╌╌╌╌╌┤
2413
+ # # │ false │
2414
+ # # └───────┘
1032
2415
  def is_duplicated
1033
2416
  wrap_expr(_rbexpr.is_duplicated)
1034
2417
  end
1035
2418
 
1036
- def quantile(quantile, interpolation: "nearest")
1037
- wrap_expr(_rbexpr.quantile(quantile, interpolation))
1038
- end
1039
-
1040
- def filter(predicate)
1041
- wrap_expr(_rbexpr.filter(predicate._rbexpr))
1042
- end
1043
-
1044
- def where(predicate)
1045
- filter(predicate)
1046
- end
1047
-
1048
- # def map
1049
- # end
1050
-
1051
- # def apply
1052
- # end
1053
-
2419
+ # Get quantile value.
1054
2420
  #
1055
- def flatten
1056
- wrap_expr(_rbexpr.explode)
1057
- end
1058
-
1059
- def explode
1060
- wrap_expr(_rbexpr.explode)
2421
+ # @param quantile [Float]
2422
+ # Quantile between 0.0 and 1.0.
2423
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
2424
+ # Interpolation method.
2425
+ #
2426
+ # @return [Expr]
2427
+ #
2428
+ # @example
2429
+ # df = Polars::DataFrame.new({"a" => [0, 1, 2, 3, 4, 5]})
2430
+ # df.select(Polars.col("a").quantile(0.3))
2431
+ # # =>
2432
+ # # shape: (1, 1)
2433
+ # # ┌─────┐
2434
+ # # │ a │
2435
+ # # │ --- │
2436
+ # # │ f64 │
2437
+ # # ╞═════╡
2438
+ # # │ 1.0 │
2439
+ # # └─────┘
2440
+ #
2441
+ # @example
2442
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "higher"))
2443
+ # # =>
2444
+ # # shape: (1, 1)
2445
+ # # ┌─────┐
2446
+ # # │ a │
2447
+ # # │ --- │
2448
+ # # │ f64 │
2449
+ # # ╞═════╡
2450
+ # # │ 2.0 │
2451
+ # # └─────┘
2452
+ #
2453
+ # @example
2454
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "lower"))
2455
+ # # =>
2456
+ # # shape: (1, 1)
2457
+ # # ┌─────┐
2458
+ # # │ a │
2459
+ # # │ --- │
2460
+ # # │ f64 │
2461
+ # # ╞═════╡
2462
+ # # │ 1.0 │
2463
+ # # └─────┘
2464
+ #
2465
+ # @example
2466
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "midpoint"))
2467
+ # # =>
2468
+ # # shape: (1, 1)
2469
+ # # ┌─────┐
2470
+ # # │ a │
2471
+ # # │ --- │
2472
+ # # │ f64 │
2473
+ # # ╞═════╡
2474
+ # # │ 1.5 │
2475
+ # # └─────┘
2476
+ #
2477
+ # @example
2478
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "linear"))
2479
+ # # =>
2480
+ # # shape: (1, 1)
2481
+ # # ┌─────┐
2482
+ # # │ a │
2483
+ # # │ --- │
2484
+ # # │ f64 │
2485
+ # # ╞═════╡
2486
+ # # │ 1.5 │
2487
+ # # └─────┘
2488
+ def quantile(quantile, interpolation: "nearest")
2489
+ wrap_expr(_rbexpr.quantile(quantile, interpolation))
2490
+ end
2491
+
2492
+ # Filter a single column.
2493
+ #
2494
+ # Mostly useful in an aggregation context. If you want to filter on a DataFrame
2495
+ # level, use `LazyFrame#filter`.
2496
+ #
2497
+ # @param predicate [Expr]
2498
+ # Boolean expression.
2499
+ #
2500
+ # @return [Expr]
2501
+ #
2502
+ # @example
2503
+ # df = Polars::DataFrame.new(
2504
+ # {
2505
+ # "group_col" => ["g1", "g1", "g2"],
2506
+ # "b" => [1, 2, 3]
2507
+ # }
2508
+ # )
2509
+ # (
2510
+ # df.groupby("group_col").agg(
2511
+ # [
2512
+ # Polars.col("b").filter(Polars.col("b") < 2).sum.alias("lt"),
2513
+ # Polars.col("b").filter(Polars.col("b") >= 2).sum.alias("gte")
2514
+ # ]
2515
+ # )
2516
+ # ).sort("group_col")
2517
+ # # =>
2518
+ # # shape: (2, 3)
2519
+ # # ┌───────────┬──────┬─────┐
2520
+ # # │ group_col ┆ lt ┆ gte │
2521
+ # # │ --- ┆ --- ┆ --- │
2522
+ # # │ str ┆ i64 ┆ i64 │
2523
+ # # ╞═══════════╪══════╪═════╡
2524
+ # # │ g1 ┆ 1 ┆ 2 │
2525
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
2526
+ # # │ g2 ┆ null ┆ 3 │
2527
+ # # └───────────┴──────┴─────┘
2528
+ def filter(predicate)
2529
+ wrap_expr(_rbexpr.filter(predicate._rbexpr))
2530
+ end
2531
+
2532
+ # Filter a single column.
2533
+ #
2534
+ # Alias for {#filter}.
2535
+ #
2536
+ # @param predicate [Expr]
2537
+ # Boolean expression.
2538
+ #
2539
+ # @return [Expr]
2540
+ #
2541
+ # @example
2542
+ # df = Polars::DataFrame.new(
2543
+ # {
2544
+ # "group_col" => ["g1", "g1", "g2"],
2545
+ # "b" => [1, 2, 3]
2546
+ # }
2547
+ # )
2548
+ # (
2549
+ # df.groupby("group_col").agg(
2550
+ # [
2551
+ # Polars.col("b").where(Polars.col("b") < 2).sum.alias("lt"),
2552
+ # Polars.col("b").where(Polars.col("b") >= 2).sum.alias("gte")
2553
+ # ]
2554
+ # )
2555
+ # ).sort("group_col")
2556
+ # # =>
2557
+ # # shape: (2, 3)
2558
+ # # ┌───────────┬──────┬─────┐
2559
+ # # │ group_col ┆ lt ┆ gte │
2560
+ # # │ --- ┆ --- ┆ --- │
2561
+ # # │ str ┆ i64 ┆ i64 │
2562
+ # # ╞═══════════╪══════╪═════╡
2563
+ # # │ g1 ┆ 1 ┆ 2 │
2564
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
2565
+ # # │ g2 ┆ null ┆ 3 │
2566
+ # # └───────────┴──────┴─────┘
2567
+ def where(predicate)
2568
+ filter(predicate)
2569
+ end
2570
+
2571
+ # Apply a custom Ruby function to a Series or sequence of Series.
2572
+ #
2573
+ # The output of this custom function must be a Series.
2574
+ # If you want to apply a custom function elementwise over single values, see
2575
+ # {#apply}. A use case for `map` is when you want to transform an
2576
+ # expression with a third-party library.
2577
+ #
2578
+ # Read more in [the book](https://pola-rs.github.io/polars-book/user-guide/dsl/custom_functions.html).
2579
+ #
2580
+ # @param return_dtype [Symbol]
2581
+ # Dtype of the output Series.
2582
+ # @param agg_list [Boolean]
2583
+ # Aggregate list.
2584
+ #
2585
+ # @return [Expr]
2586
+ #
2587
+ # @example
2588
+ # df = Polars::DataFrame.new(
2589
+ # {
2590
+ # "sine" => [0.0, 1.0, 0.0, -1.0],
2591
+ # "cosine" => [1.0, 0.0, -1.0, 0.0]
2592
+ # }
2593
+ # )
2594
+ # df.select(Polars.all.map { |x| x.to_numpy.argmax })
2595
+ # # =>
2596
+ # # shape: (1, 2)
2597
+ # # ┌──────┬────────┐
2598
+ # # │ sine ┆ cosine │
2599
+ # # │ --- ┆ --- │
2600
+ # # │ i64 ┆ i64 │
2601
+ # # ╞══════╪════════╡
2602
+ # # │ 1 ┆ 0 │
2603
+ # # └──────┴────────┘
2604
+ # def map(return_dtype: nil, agg_list: false, &f)
2605
+ # if !return_dtype.nil?
2606
+ # return_dtype = Utils.rb_type_to_dtype(return_dtype)
2607
+ # end
2608
+ # wrap_expr(_rbexpr.map(f, return_dtype, agg_list))
2609
+ # end
2610
+
2611
+ # Apply a custom/user-defined function (UDF) in a GroupBy or Projection context.
2612
+ #
2613
+ # Depending on the context it has the following behavior:
2614
+ #
2615
+ # * Selection
2616
+ # Expects `f` to be of type Callable[[Any], Any].
2617
+ # Applies a Ruby function over each individual value in the column.
2618
+ # * GroupBy
2619
+ # Expects `f` to be of type Callable[[Series], Series].
2620
+ # Applies a Ruby function over each group.
2621
+ #
2622
+ # Implementing logic using a Ruby function is almost always _significantly_
2623
+ # slower and more memory intensive than implementing the same logic using
2624
+ # the native expression API because:
2625
+ #
2626
+ # - The native expression engine runs in Rust; UDFs run in Ruby.
2627
+ # - Use of Ruby UDFs forces the DataFrame to be materialized in memory.
2628
+ # - Polars-native expressions can be parallelised (UDFs cannot).
2629
+ # - Polars-native expressions can be logically optimised (UDFs cannot).
2630
+ #
2631
+ # Wherever possible you should strongly prefer the native expression API
2632
+ # to achieve the best performance.
2633
+ #
2634
+ # @param return_dtype [Symbol]
2635
+ # Dtype of the output Series.
2636
+ # If not set, polars will assume that
2637
+ # the dtype remains unchanged.
2638
+ #
2639
+ # @return [Expr]
2640
+ #
2641
+ # @example
2642
+ # df = Polars::DataFrame.new(
2643
+ # {
2644
+ # "a" => [1, 2, 3, 1],
2645
+ # "b" => ["a", "b", "c", "c"]
2646
+ # }
2647
+ # )
2648
+ #
2649
+ # @example In a selection context, the function is applied by row.
2650
+ # df.with_column(
2651
+ # Polars.col("a").apply { |x| x * 2 }.alias("a_times_2")
2652
+ # )
2653
+ # # =>
2654
+ # # shape: (4, 3)
2655
+ # # ┌─────┬─────┬───────────┐
2656
+ # # │ a ┆ b ┆ a_times_2 │
2657
+ # # │ --- ┆ --- ┆ --- │
2658
+ # # │ i64 ┆ str ┆ i64 │
2659
+ # # ╞═════╪═════╪═══════════╡
2660
+ # # │ 1 ┆ a ┆ 2 │
2661
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
2662
+ # # │ 2 ┆ b ┆ 4 │
2663
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
2664
+ # # │ 3 ┆ c ┆ 6 │
2665
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
2666
+ # # │ 1 ┆ c ┆ 2 │
2667
+ # # └─────┴─────┴───────────┘
2668
+ #
2669
+ # @example In a GroupBy context the function is applied by group:
2670
+ # df.lazy
2671
+ # .groupby("b", maintain_order: true)
2672
+ # .agg(
2673
+ # [
2674
+ # Polars.col("a").apply { |x| x.sum }
2675
+ # ]
2676
+ # )
2677
+ # .collect
2678
+ # # =>
2679
+ # # shape: (3, 2)
2680
+ # # ┌─────┬─────┐
2681
+ # # │ b ┆ a │
2682
+ # # │ --- ┆ --- │
2683
+ # # │ str ┆ i64 │
2684
+ # # ╞═════╪═════╡
2685
+ # # │ a ┆ 1 │
2686
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2687
+ # # │ b ┆ 2 │
2688
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2689
+ # # │ c ┆ 4 │
2690
+ # # └─────┴─────┘
2691
+ # def apply(return_dtype: nil, &f)
2692
+ # wrap_f = lambda do |x|
2693
+ # x.apply(return_dtype: return_dtype, &f)
2694
+ # end
2695
+ # map(agg_list: true, return_dtype: return_dtype, &wrap_f)
2696
+ # end
2697
+
2698
+ # Explode a list or utf8 Series. This means that every item is expanded to a new
2699
+ # row.
2700
+ #
2701
+ # Alias for {#explode}.
2702
+ #
2703
+ # @return [Expr]
2704
+ #
2705
+ # @example
2706
+ # df = Polars::DataFrame.new({"foo" => ["hello", "world"]})
2707
+ # df.select(Polars.col("foo").flatten)
2708
+ # # =>
2709
+ # # shape: (10, 1)
2710
+ # # ┌─────┐
2711
+ # # │ foo │
2712
+ # # │ --- │
2713
+ # # │ str │
2714
+ # # ╞═════╡
2715
+ # # │ h │
2716
+ # # ├╌╌╌╌╌┤
2717
+ # # │ e │
2718
+ # # ├╌╌╌╌╌┤
2719
+ # # │ l │
2720
+ # # ├╌╌╌╌╌┤
2721
+ # # │ l │
2722
+ # # ├╌╌╌╌╌┤
2723
+ # # │ ... │
2724
+ # # ├╌╌╌╌╌┤
2725
+ # # │ o │
2726
+ # # ├╌╌╌╌╌┤
2727
+ # # │ r │
2728
+ # # ├╌╌╌╌╌┤
2729
+ # # │ l │
2730
+ # # ├╌╌╌╌╌┤
2731
+ # # │ d │
2732
+ # # └─────┘
2733
+ def flatten
2734
+ wrap_expr(_rbexpr.explode)
2735
+ end
2736
+
2737
+ # Explode a list or utf8 Series.
2738
+ #
2739
+ # This means that every item is expanded to a new row.
2740
+ #
2741
+ # @return [Expr]
2742
+ #
2743
+ # @example
2744
+ # df = Polars::DataFrame.new({"b" => [[1, 2, 3], [4, 5, 6]]})
2745
+ # df.select(Polars.col("b").explode)
2746
+ # # =>
2747
+ # # shape: (6, 1)
2748
+ # # ┌─────┐
2749
+ # # │ b │
2750
+ # # │ --- │
2751
+ # # │ i64 │
2752
+ # # ╞═════╡
2753
+ # # │ 1 │
2754
+ # # ├╌╌╌╌╌┤
2755
+ # # │ 2 │
2756
+ # # ├╌╌╌╌╌┤
2757
+ # # │ 3 │
2758
+ # # ├╌╌╌╌╌┤
2759
+ # # │ 4 │
2760
+ # # ├╌╌╌╌╌┤
2761
+ # # │ 5 │
2762
+ # # ├╌╌╌╌╌┤
2763
+ # # │ 6 │
2764
+ # # └─────┘
2765
+ def explode
2766
+ wrap_expr(_rbexpr.explode)
1061
2767
  end
1062
2768
 
2769
+ # Take every nth value in the Series and return as a new Series.
2770
+ #
2771
+ # @return [Expr]
2772
+ #
2773
+ # @example
2774
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
2775
+ # df.select(Polars.col("foo").take_every(3))
2776
+ # # =>
2777
+ # # shape: (3, 1)
2778
+ # # ┌─────┐
2779
+ # # │ foo │
2780
+ # # │ --- │
2781
+ # # │ i64 │
2782
+ # # ╞═════╡
2783
+ # # │ 1 │
2784
+ # # ├╌╌╌╌╌┤
2785
+ # # │ 4 │
2786
+ # # ├╌╌╌╌╌┤
2787
+ # # │ 7 │
2788
+ # # └─────┘
1063
2789
  def take_every(n)
1064
2790
  wrap_expr(_rbexpr.take_every(n))
1065
2791
  end
1066
2792
 
2793
+ # Get the first `n` rows.
2794
+ #
2795
+ # @param n [Integer]
2796
+ # Number of rows to return.
2797
+ #
2798
+ # @return [Expr]
2799
+ #
2800
+ # @example
2801
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
2802
+ # df.head(3)
2803
+ # # =>
2804
+ # # shape: (3, 1)
2805
+ # # ┌─────┐
2806
+ # # │ foo │
2807
+ # # │ --- │
2808
+ # # │ i64 │
2809
+ # # ╞═════╡
2810
+ # # │ 1 │
2811
+ # # ├╌╌╌╌╌┤
2812
+ # # │ 2 │
2813
+ # # ├╌╌╌╌╌┤
2814
+ # # │ 3 │
2815
+ # # └─────┘
1067
2816
  def head(n = 10)
1068
2817
  wrap_expr(_rbexpr.head(n))
1069
2818
  end
1070
2819
 
2820
+ # Get the last `n` rows.
2821
+ #
2822
+ # @param n [Integer]
2823
+ # Number of rows to return.
2824
+ #
2825
+ # @return [Expr]
2826
+ #
2827
+ # @example
2828
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
2829
+ # df.tail(3)
2830
+ # # =>
2831
+ # # shape: (3, 1)
2832
+ # # ┌─────┐
2833
+ # # │ foo │
2834
+ # # │ --- │
2835
+ # # │ i64 │
2836
+ # # ╞═════╡
2837
+ # # │ 5 │
2838
+ # # ├╌╌╌╌╌┤
2839
+ # # │ 6 │
2840
+ # # ├╌╌╌╌╌┤
2841
+ # # │ 7 │
2842
+ # # └─────┘
1071
2843
  def tail(n = 10)
1072
2844
  wrap_expr(_rbexpr.tail(n))
1073
2845
  end
1074
2846
 
2847
+ # Get the first `n` rows.
2848
+ #
2849
+ # Alias for {#head}.
2850
+ #
2851
+ # @param n [Integer]
2852
+ # Number of rows to return.
2853
+ #
2854
+ # @return [Expr]
1075
2855
  def limit(n = 10)
1076
2856
  head(n)
1077
2857
  end
1078
2858
 
2859
+ # Raise expression to the power of exponent.
2860
+ #
2861
+ # @return [Expr]
2862
+ #
2863
+ # @example
2864
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
2865
+ # df.select(Polars.col("foo").pow(3))
2866
+ # # =>
2867
+ # # shape: (4, 1)
2868
+ # # ┌──────┐
2869
+ # # │ foo │
2870
+ # # │ --- │
2871
+ # # │ f64 │
2872
+ # # ╞══════╡
2873
+ # # │ 1.0 │
2874
+ # # ├╌╌╌╌╌╌┤
2875
+ # # │ 8.0 │
2876
+ # # ├╌╌╌╌╌╌┤
2877
+ # # │ 27.0 │
2878
+ # # ├╌╌╌╌╌╌┤
2879
+ # # │ 64.0 │
2880
+ # # └──────┘
1079
2881
  def pow(exponent)
1080
2882
  exponent = Utils.expr_to_lit_or_expr(exponent)
1081
2883
  wrap_expr(_rbexpr.pow(exponent._rbexpr))
1082
2884
  end
1083
2885
 
1084
- # def is_in
1085
- # end
1086
-
2886
+ # Check if elements of this expression are present in the other Series.
1087
2887
  #
1088
- def repeat_by(by)
1089
- by = Utils.expr_to_lit_or_expr(by, false)
1090
- wrap_expr(_rbexpr.repeat_by(by._rbexpr))
1091
- end
1092
-
1093
- # def is_between
1094
- # end
1095
-
1096
- # def _hash
1097
- # end
1098
-
2888
+ # @param other [Object]
2889
+ # Series or sequence of primitive type.
1099
2890
  #
1100
- def reinterpret(signed: false)
1101
- wrap_expr(_rbexpr.reinterpret(signed))
1102
- end
1103
-
1104
- # def _inspect
1105
- # end
1106
-
2891
+ # @return [Expr]
1107
2892
  #
1108
- def interpolate
1109
- wrap_expr(_rbexpr.interpolate)
1110
- end
1111
-
1112
- # def rolling_min
1113
- # end
1114
-
1115
- # def rolling_max
1116
- # end
2893
+ # @example
2894
+ # df = Polars::DataFrame.new(
2895
+ # {"sets" => [[1, 2, 3], [1, 2], [9, 10]], "optional_members" => [1, 2, 3]}
2896
+ # )
2897
+ # df.select([Polars.col("optional_members").is_in("sets").alias("contains")])
2898
+ # # =>
2899
+ # # shape: (3, 1)
2900
+ # # ┌──────────┐
2901
+ # # │ contains │
2902
+ # # │ --- │
2903
+ # # │ bool │
2904
+ # # ╞══════════╡
2905
+ # # │ true │
2906
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
2907
+ # # │ true │
2908
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
2909
+ # # │ false │
2910
+ # # └──────────┘
2911
+ def is_in(other)
2912
+ if other.is_a?(Array)
2913
+ if other.length == 0
2914
+ other = Polars.lit(nil)
2915
+ else
2916
+ other = Polars.lit(Series.new(other))
2917
+ end
2918
+ else
2919
+ other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
2920
+ end
2921
+ wrap_expr(_rbexpr.is_in(other._rbexpr))
2922
+ end
1117
2923
 
1118
- # def rolling_mean
1119
- # end
2924
+ # Repeat the elements in this Series as specified in the given expression.
2925
+ #
2926
+ # The repeated elements are expanded into a `List`.
2927
+ #
2928
+ # @param by [Object]
2929
+ # Numeric column that determines how often the values will be repeated.
2930
+ # The column will be coerced to UInt32. Give this dtype to make the coercion a
2931
+ # no-op.
2932
+ #
2933
+ # @return [Expr]
2934
+ #
2935
+ # @example
2936
+ # df = Polars::DataFrame.new(
2937
+ # {
2938
+ # "a" => ["x", "y", "z"],
2939
+ # "n" => [1, 2, 3]
2940
+ # }
2941
+ # )
2942
+ # df.select(Polars.col("a").repeat_by("n"))
2943
+ # # =>
2944
+ # # shape: (3, 1)
2945
+ # # ┌─────────────────┐
2946
+ # # │ a │
2947
+ # # │ --- │
2948
+ # # │ list[str] │
2949
+ # # ╞═════════════════╡
2950
+ # # │ ["x"] │
2951
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
2952
+ # # │ ["y", "y"] │
2953
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
2954
+ # # │ ["z", "z", "z"] │
2955
+ # # └─────────────────┘
2956
+ def repeat_by(by)
2957
+ by = Utils.expr_to_lit_or_expr(by, str_to_lit: false)
2958
+ wrap_expr(_rbexpr.repeat_by(by._rbexpr))
2959
+ end
1120
2960
 
1121
- # def rolling_sum
1122
- # end
2961
+ # Check if this expression is between start and end.
2962
+ #
2963
+ # @param start [Object]
2964
+ # Lower bound as primitive type or datetime.
2965
+ # @param _end [Object]
2966
+ # Upper bound as primitive type or datetime.
2967
+ # @param include_bounds [Boolean]
2968
+ # False: Exclude both start and end (default).
2969
+ # True: Include both start and end.
2970
+ # (False, False): Exclude start and exclude end.
2971
+ # (True, True): Include start and include end.
2972
+ # (False, True): Exclude start and include end.
2973
+ # (True, False): Include start and exclude end.
2974
+ #
2975
+ # @return [Expr]
2976
+ #
2977
+ # @example
2978
+ # df = Polars::DataFrame.new({"num" => [1, 2, 3, 4, 5]})
2979
+ # df.with_column(Polars.col("num").is_between(2, 4))
2980
+ # # =>
2981
+ # # shape: (5, 2)
2982
+ # # ┌─────┬────────────┐
2983
+ # # │ num ┆ is_between │
2984
+ # # │ --- ┆ --- │
2985
+ # # │ i64 ┆ bool │
2986
+ # # ╞═════╪════════════╡
2987
+ # # │ 1 ┆ false │
2988
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2989
+ # # │ 2 ┆ false │
2990
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2991
+ # # │ 3 ┆ true │
2992
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2993
+ # # │ 4 ┆ false │
2994
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2995
+ # # │ 5 ┆ false │
2996
+ # # └─────┴────────────┘
2997
+ def is_between(start, _end, include_bounds: false)
2998
+ if include_bounds == false || include_bounds == [false, false]
2999
+ ((self > start) & (self < _end)).alias("is_between")
3000
+ elsif include_bounds == true || include_bounds == [true, true]
3001
+ ((self >= start) & (self <= _end)).alias("is_between")
3002
+ elsif include_bounds == [false, true]
3003
+ ((self > start) & (self <= _end)).alias("is_between")
3004
+ elsif include_bounds == [true, false]
3005
+ ((self >= start) & (self < _end)).alias("is_between")
3006
+ else
3007
+ raise ArgumentError, "include_bounds should be a bool or [bool, bool]."
3008
+ end
3009
+ end
1123
3010
 
1124
- # def rolling_std
1125
- # end
3011
+ # Hash the elements in the selection.
3012
+ #
3013
+ # The hash value is of type `:u64`.
3014
+ #
3015
+ # @param seed [Integer]
3016
+ # Random seed parameter. Defaults to 0.
3017
+ # @param seed_1 [Integer]
3018
+ # Random seed parameter. Defaults to `seed` if not set.
3019
+ # @param seed_2 [Integer]
3020
+ # Random seed parameter. Defaults to `seed` if not set.
3021
+ # @param seed_3 [Integer]
3022
+ # Random seed parameter. Defaults to `seed` if not set.
3023
+ #
3024
+ # @return [Expr]
3025
+ #
3026
+ # @example
3027
+ # df = Polars::DataFrame.new(
3028
+ # {
3029
+ # "a" => [1, 2, nil],
3030
+ # "b" => ["x", nil, "z"]
3031
+ # }
3032
+ # )
3033
+ # df.with_column(Polars.all._hash(10, 20, 30, 40))
3034
+ # # =>
3035
+ # # shape: (3, 2)
3036
+ # # ┌──────────────────────┬──────────────────────┐
3037
+ # # │ a ┆ b │
3038
+ # # │ --- ┆ --- │
3039
+ # # │ u64 ┆ u64 │
3040
+ # # ╞══════════════════════╪══════════════════════╡
3041
+ # # │ 4629889412789719550 ┆ 6959506404929392568 │
3042
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
3043
+ # # │ 16386608652769605760 ┆ 11638928888656214026 │
3044
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
3045
+ # # │ 11638928888656214026 ┆ 11040941213715918520 │
3046
+ # # └──────────────────────┴──────────────────────┘
3047
+ def _hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
3048
+ k0 = seed
3049
+ k1 = seed_1.nil? ? seed : seed_1
3050
+ k2 = seed_2.nil? ? seed : seed_2
3051
+ k3 = seed_3.nil? ? seed : seed_3
3052
+ wrap_expr(_rbexpr._hash(k0, k1, k2, k3))
3053
+ end
3054
+
3055
+ # Reinterpret the underlying bits as a signed/unsigned integer.
3056
+ #
3057
+ # This operation is only allowed for 64bit integers. For lower bits integers,
3058
+ # you can safely use that cast operation.
3059
+ #
3060
+ # @param signed [Boolean]
3061
+ # If true, reinterpret as `:i64`. Otherwise, reinterpret as `:u64`.
3062
+ #
3063
+ # @return [Expr]
3064
+ #
3065
+ # @example
3066
+ # s = Polars::Series.new("a", [1, 1, 2], dtype: :u64)
3067
+ # df = Polars::DataFrame.new([s])
3068
+ # df.select(
3069
+ # [
3070
+ # Polars.col("a").reinterpret(signed: true).alias("reinterpreted"),
3071
+ # Polars.col("a").alias("original")
3072
+ # ]
3073
+ # )
3074
+ # # =>
3075
+ # # shape: (3, 2)
3076
+ # # ┌───────────────┬──────────┐
3077
+ # # │ reinterpreted ┆ original │
3078
+ # # │ --- ┆ --- │
3079
+ # # │ i64 ┆ u64 │
3080
+ # # ╞═══════════════╪══════════╡
3081
+ # # │ 1 ┆ 1 │
3082
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
3083
+ # # │ 1 ┆ 1 │
3084
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
3085
+ # # │ 2 ┆ 2 │
3086
+ # # └───────────────┴──────────┘
3087
+ def reinterpret(signed: false)
3088
+ wrap_expr(_rbexpr.reinterpret(signed))
3089
+ end
1126
3090
 
1127
- # def rolling_var
1128
- # end
3091
+ # Print the value that this expression evaluates to and pass on the value.
3092
+ #
3093
+ # @return [Expr]
3094
+ #
3095
+ # @example
3096
+ # df = Polars::DataFrame.new({"foo" => [1, 1, 2]})
3097
+ # df.select(Polars.col("foo").cumsum._inspect("value is: %s").alias("bar"))
3098
+ # # =>
3099
+ # # value is: shape: (3,)
3100
+ # # Series: 'foo' [i64]
3101
+ # # [
3102
+ # # 1
3103
+ # # 2
3104
+ # # 4
3105
+ # # ]
3106
+ # # shape: (3, 1)
3107
+ # # ┌─────┐
3108
+ # # │ bar │
3109
+ # # │ --- │
3110
+ # # │ i64 │
3111
+ # # ╞═════╡
3112
+ # # │ 1 │
3113
+ # # ├╌╌╌╌╌┤
3114
+ # # │ 2 │
3115
+ # # ├╌╌╌╌╌┤
3116
+ # # │ 4 │
3117
+ # # └─────┘
3118
+ # def _inspect(fmt = "%s")
3119
+ # inspect = lambda do |s|
3120
+ # puts(fmt % [s])
3121
+ # s
3122
+ # end
1129
3123
 
1130
- # def rolling_median
3124
+ # map(return_dtype: nil, agg_list: true, &inspect)
1131
3125
  # end
1132
3126
 
1133
- # def rolling_quantile
1134
- # end
3127
+ # Fill nulls with linear interpolation over missing values.
3128
+ #
3129
+ # Can also be used to regrid data to a new grid - see examples below.
3130
+ #
3131
+ # @return [Expr]
3132
+ #
3133
+ # @example Fill nulls with linear interpolation
3134
+ # df = Polars::DataFrame.new(
3135
+ # {
3136
+ # "a" => [1, nil, 3],
3137
+ # "b" => [1.0, Float::NAN, 3.0]
3138
+ # }
3139
+ # )
3140
+ # df.select(Polars.all.interpolate)
3141
+ # # =>
3142
+ # # shape: (3, 2)
3143
+ # # ┌─────┬─────┐
3144
+ # # │ a ┆ b │
3145
+ # # │ --- ┆ --- │
3146
+ # # │ i64 ┆ f64 │
3147
+ # # ╞═════╪═════╡
3148
+ # # │ 1 ┆ 1.0 │
3149
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
3150
+ # # │ 2 ┆ NaN │
3151
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
3152
+ # # │ 3 ┆ 3.0 │
3153
+ # # └─────┴─────┘
3154
+ def interpolate
3155
+ wrap_expr(_rbexpr.interpolate)
3156
+ end
1135
3157
 
1136
- # def rolling_apply
3158
+ # Apply a rolling min (moving min) over the values in this array.
3159
+ #
3160
+ # A window of length `window_size` will traverse the array. The values that fill
3161
+ # this window will (optionally) be multiplied with the weights given by the
3162
+ # `weight` vector. The resulting values will be aggregated to their sum.
3163
+ #
3164
+ # @param window_size [Integer]
3165
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3166
+ # size indicated by a timedelta or the following string language:
3167
+ #
3168
+ # - 1ns (1 nanosecond)
3169
+ # - 1us (1 microsecond)
3170
+ # - 1ms (1 millisecond)
3171
+ # - 1s (1 second)
3172
+ # - 1m (1 minute)
3173
+ # - 1h (1 hour)
3174
+ # - 1d (1 day)
3175
+ # - 1w (1 week)
3176
+ # - 1mo (1 calendar month)
3177
+ # - 1y (1 calendar year)
3178
+ # - 1i (1 index count)
3179
+ #
3180
+ # If a timedelta or the dynamic string language is used, the `by`
3181
+ # and `closed` arguments must also be set.
3182
+ # @param weights [Array]
3183
+ # An optional slice with the same length as the window that will be multiplied
3184
+ # elementwise with the values in the window.
3185
+ # @param min_periods [Integer]
3186
+ # The number of values in the window that should be non-null before computing
3187
+ # a result. If None, it will be set equal to window size.
3188
+ # @param center [Boolean]
3189
+ # Set the labels at the center of the window
3190
+ # @param by [String]
3191
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3192
+ # set the column that will be used to determine the windows. This column must
3193
+ # be of dtype `{Date, Datetime}`
3194
+ # @param closed ["left", "right", "both", "none"]
3195
+ # Define whether the temporal window interval is closed or not.
3196
+ #
3197
+ # @note
3198
+ # This functionality is experimental and may change without it being considered a
3199
+ # breaking change.
3200
+ #
3201
+ # @note
3202
+ # If you want to compute multiple aggregation statistics over the same dynamic
3203
+ # window, consider using `groupby_rolling` this method can cache the window size
3204
+ # computation.
3205
+ #
3206
+ # @return [Expr]
3207
+ #
3208
+ # @example
3209
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
3210
+ # df.select(
3211
+ # [
3212
+ # Polars.col("A").rolling_min(2)
3213
+ # ]
3214
+ # )
3215
+ # # =>
3216
+ # # shape: (6, 1)
3217
+ # # ┌──────┐
3218
+ # # │ A │
3219
+ # # │ --- │
3220
+ # # │ f64 │
3221
+ # # ╞══════╡
3222
+ # # │ null │
3223
+ # # ├╌╌╌╌╌╌┤
3224
+ # # │ 1.0 │
3225
+ # # ├╌╌╌╌╌╌┤
3226
+ # # │ 2.0 │
3227
+ # # ├╌╌╌╌╌╌┤
3228
+ # # │ 3.0 │
3229
+ # # ├╌╌╌╌╌╌┤
3230
+ # # │ 4.0 │
3231
+ # # ├╌╌╌╌╌╌┤
3232
+ # # │ 5.0 │
3233
+ # # └──────┘
3234
+ def rolling_min(
3235
+ window_size,
3236
+ weights: nil,
3237
+ min_periods: nil,
3238
+ center: false,
3239
+ by: nil,
3240
+ closed: "left"
3241
+ )
3242
+ window_size, min_periods = _prepare_rolling_window_args(
3243
+ window_size, min_periods
3244
+ )
3245
+ wrap_expr(
3246
+ _rbexpr.rolling_min(
3247
+ window_size, weights, min_periods, center, by, closed
3248
+ )
3249
+ )
3250
+ end
3251
+
3252
+ # Apply a rolling max (moving max) over the values in this array.
3253
+ #
3254
+ # A window of length `window_size` will traverse the array. The values that fill
3255
+ # this window will (optionally) be multiplied with the weights given by the
3256
+ # `weight` vector. The resulting values will be aggregated to their sum.
3257
+ #
3258
+ # @param window_size [Integer]
3259
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3260
+ # size indicated by a timedelta or the following string language:
3261
+ #
3262
+ # - 1ns (1 nanosecond)
3263
+ # - 1us (1 microsecond)
3264
+ # - 1ms (1 millisecond)
3265
+ # - 1s (1 second)
3266
+ # - 1m (1 minute)
3267
+ # - 1h (1 hour)
3268
+ # - 1d (1 day)
3269
+ # - 1w (1 week)
3270
+ # - 1mo (1 calendar month)
3271
+ # - 1y (1 calendar year)
3272
+ # - 1i (1 index count)
3273
+ #
3274
+ # If a timedelta or the dynamic string language is used, the `by`
3275
+ # and `closed` arguments must also be set.
3276
+ # @param weights [Array]
3277
+ # An optional slice with the same length as the window that will be multiplied
3278
+ # elementwise with the values in the window.
3279
+ # @param min_periods [Integer]
3280
+ # The number of values in the window that should be non-null before computing
3281
+ # a result. If None, it will be set equal to window size.
3282
+ # @param center [Boolean]
3283
+ # Set the labels at the center of the window
3284
+ # @param by [String]
3285
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3286
+ # set the column that will be used to determine the windows. This column must
3287
+ # be of dtype `{Date, Datetime}`
3288
+ # @param closed ["left", "right", "both", "none"]
3289
+ # Define whether the temporal window interval is closed or not.
3290
+ #
3291
+ # @note
3292
+ # This functionality is experimental and may change without it being considered a
3293
+ # breaking change.
3294
+ #
3295
+ # @note
3296
+ # If you want to compute multiple aggregation statistics over the same dynamic
3297
+ # window, consider using `groupby_rolling` this method can cache the window size
3298
+ # computation.
3299
+ #
3300
+ # @return [Expr]
3301
+ #
3302
+ # @example
3303
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
3304
+ # df.select(
3305
+ # [
3306
+ # Polars.col("A").rolling_max(2)
3307
+ # ]
3308
+ # )
3309
+ # # =>
3310
+ # # shape: (6, 1)
3311
+ # # ┌──────┐
3312
+ # # │ A │
3313
+ # # │ --- │
3314
+ # # │ f64 │
3315
+ # # ╞══════╡
3316
+ # # │ null │
3317
+ # # ├╌╌╌╌╌╌┤
3318
+ # # │ 2.0 │
3319
+ # # ├╌╌╌╌╌╌┤
3320
+ # # │ 3.0 │
3321
+ # # ├╌╌╌╌╌╌┤
3322
+ # # │ 4.0 │
3323
+ # # ├╌╌╌╌╌╌┤
3324
+ # # │ 5.0 │
3325
+ # # ├╌╌╌╌╌╌┤
3326
+ # # │ 6.0 │
3327
+ # # └──────┘
3328
+ def rolling_max(
3329
+ window_size,
3330
+ weights: nil,
3331
+ min_periods: nil,
3332
+ center: false,
3333
+ by: nil,
3334
+ closed: "left"
3335
+ )
3336
+ window_size, min_periods = _prepare_rolling_window_args(
3337
+ window_size, min_periods
3338
+ )
3339
+ wrap_expr(
3340
+ _rbexpr.rolling_max(
3341
+ window_size, weights, min_periods, center, by, closed
3342
+ )
3343
+ )
3344
+ end
3345
+
3346
+ # Apply a rolling mean (moving mean) over the values in this array.
3347
+ #
3348
+ # A window of length `window_size` will traverse the array. The values that fill
3349
+ # this window will (optionally) be multiplied with the weights given by the
3350
+ # `weight` vector. The resulting values will be aggregated to their sum.
3351
+ #
3352
+ # @param window_size [Integer]
3353
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3354
+ # size indicated by a timedelta or the following string language:
3355
+ #
3356
+ # - 1ns (1 nanosecond)
3357
+ # - 1us (1 microsecond)
3358
+ # - 1ms (1 millisecond)
3359
+ # - 1s (1 second)
3360
+ # - 1m (1 minute)
3361
+ # - 1h (1 hour)
3362
+ # - 1d (1 day)
3363
+ # - 1w (1 week)
3364
+ # - 1mo (1 calendar month)
3365
+ # - 1y (1 calendar year)
3366
+ # - 1i (1 index count)
3367
+ #
3368
+ # If a timedelta or the dynamic string language is used, the `by`
3369
+ # and `closed` arguments must also be set.
3370
+ # @param weights [Array]
3371
+ # An optional slice with the same length as the window that will be multiplied
3372
+ # elementwise with the values in the window.
3373
+ # @param min_periods [Integer]
3374
+ # The number of values in the window that should be non-null before computing
3375
+ # a result. If None, it will be set equal to window size.
3376
+ # @param center [Boolean]
3377
+ # Set the labels at the center of the window
3378
+ # @param by [String]
3379
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3380
+ # set the column that will be used to determine the windows. This column must
3381
+ # be of dtype `{Date, Datetime}`
3382
+ # @param closed ["left", "right", "both", "none"]
3383
+ # Define whether the temporal window interval is closed or not.
3384
+ #
3385
+ # @note
3386
+ # This functionality is experimental and may change without it being considered a
3387
+ # breaking change.
3388
+ #
3389
+ # @note
3390
+ # If you want to compute multiple aggregation statistics over the same dynamic
3391
+ # window, consider using `groupby_rolling` this method can cache the window size
3392
+ # computation.
3393
+ #
3394
+ # @return [Expr]
3395
+ #
3396
+ # @example
3397
+ # df = Polars::DataFrame.new({"A" => [1.0, 8.0, 6.0, 2.0, 16.0, 10.0]})
3398
+ # df.select(
3399
+ # [
3400
+ # Polars.col("A").rolling_mean(2)
3401
+ # ]
3402
+ # )
3403
+ # # =>
3404
+ # # shape: (6, 1)
3405
+ # # ┌──────┐
3406
+ # # │ A │
3407
+ # # │ --- │
3408
+ # # │ f64 │
3409
+ # # ╞══════╡
3410
+ # # │ null │
3411
+ # # ├╌╌╌╌╌╌┤
3412
+ # # │ 4.5 │
3413
+ # # ├╌╌╌╌╌╌┤
3414
+ # # │ 7.0 │
3415
+ # # ├╌╌╌╌╌╌┤
3416
+ # # │ 4.0 │
3417
+ # # ├╌╌╌╌╌╌┤
3418
+ # # │ 9.0 │
3419
+ # # ├╌╌╌╌╌╌┤
3420
+ # # │ 13.0 │
3421
+ # # └──────┘
3422
+ def rolling_mean(
3423
+ window_size,
3424
+ weights: nil,
3425
+ min_periods: nil,
3426
+ center: false,
3427
+ by: nil,
3428
+ closed: "left"
3429
+ )
3430
+ window_size, min_periods = _prepare_rolling_window_args(
3431
+ window_size, min_periods
3432
+ )
3433
+ wrap_expr(
3434
+ _rbexpr.rolling_mean(
3435
+ window_size, weights, min_periods, center, by, closed
3436
+ )
3437
+ )
3438
+ end
3439
+
3440
+ # Apply a rolling sum (moving sum) over the values in this array.
3441
+ #
3442
+ # A window of length `window_size` will traverse the array. The values that fill
3443
+ # this window will (optionally) be multiplied with the weights given by the
3444
+ # `weight` vector. The resulting values will be aggregated to their sum.
3445
+ #
3446
+ # @param window_size [Integer]
3447
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3448
+ # size indicated by a timedelta or the following string language:
3449
+ #
3450
+ # - 1ns (1 nanosecond)
3451
+ # - 1us (1 microsecond)
3452
+ # - 1ms (1 millisecond)
3453
+ # - 1s (1 second)
3454
+ # - 1m (1 minute)
3455
+ # - 1h (1 hour)
3456
+ # - 1d (1 day)
3457
+ # - 1w (1 week)
3458
+ # - 1mo (1 calendar month)
3459
+ # - 1y (1 calendar year)
3460
+ # - 1i (1 index count)
3461
+ #
3462
+ # If a timedelta or the dynamic string language is used, the `by`
3463
+ # and `closed` arguments must also be set.
3464
+ # @param weights [Array]
3465
+ # An optional slice with the same length as the window that will be multiplied
3466
+ # elementwise with the values in the window.
3467
+ # @param min_periods [Integer]
3468
+ # The number of values in the window that should be non-null before computing
3469
+ # a result. If None, it will be set equal to window size.
3470
+ # @param center [Boolean]
3471
+ # Set the labels at the center of the window
3472
+ # @param by [String]
3473
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3474
+ # set the column that will be used to determine the windows. This column must
3475
+ # be of dtype `{Date, Datetime}`
3476
+ # @param closed ["left", "right", "both", "none"]
3477
+ # Define whether the temporal window interval is closed or not.
3478
+ #
3479
+ # @note
3480
+ # This functionality is experimental and may change without it being considered a
3481
+ # breaking change.
3482
+ #
3483
+ # @note
3484
+ # If you want to compute multiple aggregation statistics over the same dynamic
3485
+ # window, consider using `groupby_rolling` this method can cache the window size
3486
+ # computation.
3487
+ #
3488
+ # @return [Expr]
3489
+ #
3490
+ # @example
3491
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
3492
+ # df.select(
3493
+ # [
3494
+ # Polars.col("A").rolling_sum(2)
3495
+ # ]
3496
+ # )
3497
+ # # =>
3498
+ # # shape: (6, 1)
3499
+ # # ┌──────┐
3500
+ # # │ A │
3501
+ # # │ --- │
3502
+ # # │ f64 │
3503
+ # # ╞══════╡
3504
+ # # │ null │
3505
+ # # ├╌╌╌╌╌╌┤
3506
+ # # │ 3.0 │
3507
+ # # ├╌╌╌╌╌╌┤
3508
+ # # │ 5.0 │
3509
+ # # ├╌╌╌╌╌╌┤
3510
+ # # │ 7.0 │
3511
+ # # ├╌╌╌╌╌╌┤
3512
+ # # │ 9.0 │
3513
+ # # ├╌╌╌╌╌╌┤
3514
+ # # │ 11.0 │
3515
+ # # └──────┘
3516
+ def rolling_sum(
3517
+ window_size,
3518
+ weights: nil,
3519
+ min_periods: nil,
3520
+ center: false,
3521
+ by: nil,
3522
+ closed: "left"
3523
+ )
3524
+ window_size, min_periods = _prepare_rolling_window_args(
3525
+ window_size, min_periods
3526
+ )
3527
+ wrap_expr(
3528
+ _rbexpr.rolling_sum(
3529
+ window_size, weights, min_periods, center, by, closed
3530
+ )
3531
+ )
3532
+ end
3533
+
3534
+ # Compute a rolling standard deviation.
3535
+ #
3536
+ # A window of length `window_size` will traverse the array. The values that fill
3537
+ # this window will (optionally) be multiplied with the weights given by the
3538
+ # `weight` vector. The resulting values will be aggregated to their sum.
3539
+ #
3540
+ # @param window_size [Integer]
3541
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3542
+ # size indicated by a timedelta or the following string language:
3543
+ #
3544
+ # - 1ns (1 nanosecond)
3545
+ # - 1us (1 microsecond)
3546
+ # - 1ms (1 millisecond)
3547
+ # - 1s (1 second)
3548
+ # - 1m (1 minute)
3549
+ # - 1h (1 hour)
3550
+ # - 1d (1 day)
3551
+ # - 1w (1 week)
3552
+ # - 1mo (1 calendar month)
3553
+ # - 1y (1 calendar year)
3554
+ # - 1i (1 index count)
3555
+ #
3556
+ # If a timedelta or the dynamic string language is used, the `by`
3557
+ # and `closed` arguments must also be set.
3558
+ # @param weights [Array]
3559
+ # An optional slice with the same length as the window that will be multiplied
3560
+ # elementwise with the values in the window.
3561
+ # @param min_periods [Integer]
3562
+ # The number of values in the window that should be non-null before computing
3563
+ # a result. If None, it will be set equal to window size.
3564
+ # @param center [Boolean]
3565
+ # Set the labels at the center of the window
3566
+ # @param by [String]
3567
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3568
+ # set the column that will be used to determine the windows. This column must
3569
+ # be of dtype `{Date, Datetime}`
3570
+ # @param closed ["left", "right", "both", "none"]
3571
+ # Define whether the temporal window interval is closed or not.
3572
+ #
3573
+ # @note
3574
+ # This functionality is experimental and may change without it being considered a
3575
+ # breaking change.
3576
+ #
3577
+ # @note
3578
+ # If you want to compute multiple aggregation statistics over the same dynamic
3579
+ # window, consider using `groupby_rolling` this method can cache the window size
3580
+ # computation.
3581
+ #
3582
+ # @return [Expr]
3583
+ #
3584
+ # @example
3585
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
3586
+ # df.select(
3587
+ # [
3588
+ # Polars.col("A").rolling_std(3)
3589
+ # ]
3590
+ # )
3591
+ # # =>
3592
+ # # shape: (6, 1)
3593
+ # # ┌──────────┐
3594
+ # # │ A │
3595
+ # # │ --- │
3596
+ # # │ f64 │
3597
+ # # ╞══════════╡
3598
+ # # │ null │
3599
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3600
+ # # │ null │
3601
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3602
+ # # │ 1.0 │
3603
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3604
+ # # │ 1.0 │
3605
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3606
+ # # │ 1.527525 │
3607
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3608
+ # # │ 2.0 │
3609
+ # # └──────────┘
3610
+ def rolling_std(
3611
+ window_size,
3612
+ weights: nil,
3613
+ min_periods: nil,
3614
+ center: false,
3615
+ by: nil,
3616
+ closed: "left"
3617
+ )
3618
+ window_size, min_periods = _prepare_rolling_window_args(
3619
+ window_size, min_periods
3620
+ )
3621
+ wrap_expr(
3622
+ _rbexpr.rolling_std(
3623
+ window_size, weights, min_periods, center, by, closed
3624
+ )
3625
+ )
3626
+ end
3627
+
3628
+ # Compute a rolling variance.
3629
+ #
3630
+ # A window of length `window_size` will traverse the array. The values that fill
3631
+ # this window will (optionally) be multiplied with the weights given by the
3632
+ # `weight` vector. The resulting values will be aggregated to their sum.
3633
+ #
3634
+ # @param window_size [Integer]
3635
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3636
+ # size indicated by a timedelta or the following string language:
3637
+ #
3638
+ # - 1ns (1 nanosecond)
3639
+ # - 1us (1 microsecond)
3640
+ # - 1ms (1 millisecond)
3641
+ # - 1s (1 second)
3642
+ # - 1m (1 minute)
3643
+ # - 1h (1 hour)
3644
+ # - 1d (1 day)
3645
+ # - 1w (1 week)
3646
+ # - 1mo (1 calendar month)
3647
+ # - 1y (1 calendar year)
3648
+ # - 1i (1 index count)
3649
+ #
3650
+ # If a timedelta or the dynamic string language is used, the `by`
3651
+ # and `closed` arguments must also be set.
3652
+ # @param weights [Array]
3653
+ # An optional slice with the same length as the window that will be multiplied
3654
+ # elementwise with the values in the window.
3655
+ # @param min_periods [Integer]
3656
+ # The number of values in the window that should be non-null before computing
3657
+ # a result. If None, it will be set equal to window size.
3658
+ # @param center [Boolean]
3659
+ # Set the labels at the center of the window
3660
+ # @param by [String]
3661
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3662
+ # set the column that will be used to determine the windows. This column must
3663
+ # be of dtype `{Date, Datetime}`
3664
+ # @param closed ["left", "right", "both", "none"]
3665
+ # Define whether the temporal window interval is closed or not.
3666
+ #
3667
+ # @note
3668
+ # This functionality is experimental and may change without it being considered a
3669
+ # breaking change.
3670
+ #
3671
+ # @note
3672
+ # If you want to compute multiple aggregation statistics over the same dynamic
3673
+ # window, consider using `groupby_rolling` this method can cache the window size
3674
+ # computation.
3675
+ #
3676
+ # @return [Expr]
3677
+ #
3678
+ # @example
3679
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
3680
+ # df.select(
3681
+ # [
3682
+ # Polars.col("A").rolling_var(3)
3683
+ # ]
3684
+ # )
3685
+ # # =>
3686
+ # # shape: (6, 1)
3687
+ # # ┌──────────┐
3688
+ # # │ A │
3689
+ # # │ --- │
3690
+ # # │ f64 │
3691
+ # # ╞══════════╡
3692
+ # # │ null │
3693
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3694
+ # # │ null │
3695
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3696
+ # # │ 1.0 │
3697
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3698
+ # # │ 1.0 │
3699
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3700
+ # # │ 2.333333 │
3701
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3702
+ # # │ 4.0 │
3703
+ # # └──────────┘
3704
+ def rolling_var(
3705
+ window_size,
3706
+ weights: nil,
3707
+ min_periods: nil,
3708
+ center: false,
3709
+ by: nil,
3710
+ closed: "left"
3711
+ )
3712
+ window_size, min_periods = _prepare_rolling_window_args(
3713
+ window_size, min_periods
3714
+ )
3715
+ wrap_expr(
3716
+ _rbexpr.rolling_var(
3717
+ window_size, weights, min_periods, center, by, closed
3718
+ )
3719
+ )
3720
+ end
3721
+
3722
+ # Compute a rolling median.
3723
+ #
3724
+ # @param window_size [Integer]
3725
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3726
+ # size indicated by a timedelta or the following string language:
3727
+ #
3728
+ # - 1ns (1 nanosecond)
3729
+ # - 1us (1 microsecond)
3730
+ # - 1ms (1 millisecond)
3731
+ # - 1s (1 second)
3732
+ # - 1m (1 minute)
3733
+ # - 1h (1 hour)
3734
+ # - 1d (1 day)
3735
+ # - 1w (1 week)
3736
+ # - 1mo (1 calendar month)
3737
+ # - 1y (1 calendar year)
3738
+ # - 1i (1 index count)
3739
+ #
3740
+ # If a timedelta or the dynamic string language is used, the `by`
3741
+ # and `closed` arguments must also be set.
3742
+ # @param weights [Array]
3743
+ # An optional slice with the same length as the window that will be multiplied
3744
+ # elementwise with the values in the window.
3745
+ # @param min_periods [Integer]
3746
+ # The number of values in the window that should be non-null before computing
3747
+ # a result. If None, it will be set equal to window size.
3748
+ # @param center [Boolean]
3749
+ # Set the labels at the center of the window
3750
+ # @param by [String]
3751
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3752
+ # set the column that will be used to determine the windows. This column must
3753
+ # be of dtype `{Date, Datetime}`
3754
+ # @param closed ["left", "right", "both", "none"]
3755
+ # Define whether the temporal window interval is closed or not.
3756
+ #
3757
+ # @note
3758
+ # This functionality is experimental and may change without it being considered a
3759
+ # breaking change.
3760
+ #
3761
+ # @note
3762
+ # If you want to compute multiple aggregation statistics over the same dynamic
3763
+ # window, consider using `groupby_rolling` this method can cache the window size
3764
+ # computation.
3765
+ #
3766
+ # @return [Expr]
3767
+ #
3768
+ # @example
3769
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
3770
+ # df.select(
3771
+ # [
3772
+ # Polars.col("A").rolling_median(3)
3773
+ # ]
3774
+ # )
3775
+ # # =>
3776
+ # # shape: (6, 1)
3777
+ # # ┌──────┐
3778
+ # # │ A │
3779
+ # # │ --- │
3780
+ # # │ f64 │
3781
+ # # ╞══════╡
3782
+ # # │ null │
3783
+ # # ├╌╌╌╌╌╌┤
3784
+ # # │ null │
3785
+ # # ├╌╌╌╌╌╌┤
3786
+ # # │ 2.0 │
3787
+ # # ├╌╌╌╌╌╌┤
3788
+ # # │ 3.0 │
3789
+ # # ├╌╌╌╌╌╌┤
3790
+ # # │ 4.0 │
3791
+ # # ├╌╌╌╌╌╌┤
3792
+ # # │ 6.0 │
3793
+ # # └──────┘
3794
+ def rolling_median(
3795
+ window_size,
3796
+ weights: nil,
3797
+ min_periods: nil,
3798
+ center: false,
3799
+ by: nil,
3800
+ closed: "left"
3801
+ )
3802
+ window_size, min_periods = _prepare_rolling_window_args(
3803
+ window_size, min_periods
3804
+ )
3805
+ wrap_expr(
3806
+ _rbexpr.rolling_median(
3807
+ window_size, weights, min_periods, center, by, closed
3808
+ )
3809
+ )
3810
+ end
3811
+
3812
+ # Compute a rolling quantile.
3813
+ #
3814
+ # @param quantile [Float]
3815
+ # Quantile between 0.0 and 1.0.
3816
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
3817
+ # Interpolation method.
3818
+ # @param window_size [Integer]
3819
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3820
+ # size indicated by a timedelta or the following string language:
3821
+ #
3822
+ # - 1ns (1 nanosecond)
3823
+ # - 1us (1 microsecond)
3824
+ # - 1ms (1 millisecond)
3825
+ # - 1s (1 second)
3826
+ # - 1m (1 minute)
3827
+ # - 1h (1 hour)
3828
+ # - 1d (1 day)
3829
+ # - 1w (1 week)
3830
+ # - 1mo (1 calendar month)
3831
+ # - 1y (1 calendar year)
3832
+ # - 1i (1 index count)
3833
+ #
3834
+ # If a timedelta or the dynamic string language is used, the `by`
3835
+ # and `closed` arguments must also be set.
3836
+ # @param weights [Array]
3837
+ # An optional slice with the same length as the window that will be multiplied
3838
+ # elementwise with the values in the window.
3839
+ # @param min_periods [Integer]
3840
+ # The number of values in the window that should be non-null before computing
3841
+ # a result. If None, it will be set equal to window size.
3842
+ # @param center [Boolean]
3843
+ # Set the labels at the center of the window
3844
+ # @param by [String]
3845
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3846
+ # set the column that will be used to determine the windows. This column must
3847
+ # be of dtype `{Date, Datetime}`
3848
+ # @param closed ["left", "right", "both", "none"]
3849
+ # Define whether the temporal window interval is closed or not.
3850
+ #
3851
+ # @note
3852
+ # This functionality is experimental and may change without it being considered a
3853
+ # breaking change.
3854
+ #
3855
+ # @note
3856
+ # If you want to compute multiple aggregation statistics over the same dynamic
3857
+ # window, consider using `groupby_rolling` this method can cache the window size
3858
+ # computation.
3859
+ #
3860
+ # @return [Expr]
3861
+ #
3862
+ # @example
3863
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
3864
+ # df.select(
3865
+ # [
3866
+ # Polars.col("A").rolling_quantile(0.33, window_size: 3)
3867
+ # ]
3868
+ # )
3869
+ # # =>
3870
+ # # shape: (6, 1)
3871
+ # # ┌──────┐
3872
+ # # │ A │
3873
+ # # │ --- │
3874
+ # # │ f64 │
3875
+ # # ╞══════╡
3876
+ # # │ null │
3877
+ # # ├╌╌╌╌╌╌┤
3878
+ # # │ null │
3879
+ # # ├╌╌╌╌╌╌┤
3880
+ # # │ 1.0 │
3881
+ # # ├╌╌╌╌╌╌┤
3882
+ # # │ 2.0 │
3883
+ # # ├╌╌╌╌╌╌┤
3884
+ # # │ 3.0 │
3885
+ # # ├╌╌╌╌╌╌┤
3886
+ # # │ 4.0 │
3887
+ # # └──────┘
3888
+ def rolling_quantile(
3889
+ quantile,
3890
+ interpolation: "nearest",
3891
+ window_size: 2,
3892
+ weights: nil,
3893
+ min_periods: nil,
3894
+ center: false,
3895
+ by: nil,
3896
+ closed: "left"
3897
+ )
3898
+ window_size, min_periods = _prepare_rolling_window_args(
3899
+ window_size, min_periods
3900
+ )
3901
+ wrap_expr(
3902
+ _rbexpr.rolling_quantile(
3903
+ quantile, interpolation, window_size, weights, min_periods, center, by, closed
3904
+ )
3905
+ )
3906
+ end
3907
+
3908
+ # Apply a custom rolling window function.
3909
+ #
3910
+ # Prefer the specific rolling window functions over this one, as they are faster.
3911
+ #
3912
+ # Prefer:
3913
+ # * rolling_min
3914
+ # * rolling_max
3915
+ # * rolling_mean
3916
+ # * rolling_sum
3917
+ #
3918
+ # @param window_size [Integer]
3919
+ # The length of the window.
3920
+ # @param weights [Object]
3921
+ # An optional slice with the same length as the window that will be multiplied
3922
+ # elementwise with the values in the window.
3923
+ # @param min_periods [Integer]
3924
+ # The number of values in the window that should be non-null before computing
3925
+ # a result. If nil, it will be set equal to window size.
3926
+ # @param center [Boolean]
3927
+ # Set the labels at the center of the window
3928
+ #
3929
+ # @return [Expr]
3930
+ #
3931
+ # @example
3932
+ # df = Polars::DataFrame.new(
3933
+ # {
3934
+ # "A" => [1.0, 2.0, 9.0, 2.0, 13.0]
3935
+ # }
3936
+ # )
3937
+ # df.select(
3938
+ # [
3939
+ # Polars.col("A").rolling_apply(window_size: 3) { |s| s.std }
3940
+ # ]
3941
+ # )
3942
+ # # =>
3943
+ # # shape: (5, 1)
3944
+ # # ┌──────────┐
3945
+ # # │ A │
3946
+ # # │ --- │
3947
+ # # │ f64 │
3948
+ # # ╞══════════╡
3949
+ # # │ null │
3950
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3951
+ # # │ null │
3952
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3953
+ # # │ 4.358899 │
3954
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3955
+ # # │ 4.041452 │
3956
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3957
+ # # │ 5.567764 │
3958
+ # # └──────────┘
3959
+ # def rolling_apply(
3960
+ # window_size:,
3961
+ # weights: nil,
3962
+ # min_periods: nil,
3963
+ # center: false,
3964
+ # &function
3965
+ # )
3966
+ # if min_periods.nil?
3967
+ # min_periods = window_size
3968
+ # end
3969
+ # wrap_expr(
3970
+ # _rbexpr.rolling_apply(
3971
+ # function, window_size, weights, min_periods, center
3972
+ # )
3973
+ # )
1137
3974
  # end
1138
3975
 
3976
+ # Compute a rolling skew.
3977
+ #
3978
+ # @param window_size [Integer]
3979
+ # Integer size of the rolling window.
3980
+ # @param bias [Boolean]
3981
+ # If false, the calculations are corrected for statistical bias.
1139
3982
  #
3983
+ # @return [Expr]
1140
3984
  def rolling_skew(window_size, bias: true)
1141
3985
  wrap_expr(_rbexpr.rolling_skew(window_size, bias))
1142
3986
  end
1143
3987
 
3988
+ # Compute absolute values.
3989
+ #
3990
+ # @return [Expr]
3991
+ #
3992
+ # @example
3993
+ # df = Polars::DataFrame.new(
3994
+ # {
3995
+ # "A" => [-1.0, 0.0, 1.0, 2.0]
3996
+ # }
3997
+ # )
3998
+ # df.select(Polars.col("A").abs)
3999
+ # # =>
4000
+ # # shape: (4, 1)
4001
+ # # ┌─────┐
4002
+ # # │ A │
4003
+ # # │ --- │
4004
+ # # │ f64 │
4005
+ # # ╞═════╡
4006
+ # # │ 1.0 │
4007
+ # # ├╌╌╌╌╌┤
4008
+ # # │ 0.0 │
4009
+ # # ├╌╌╌╌╌┤
4010
+ # # │ 1.0 │
4011
+ # # ├╌╌╌╌╌┤
4012
+ # # │ 2.0 │
4013
+ # # └─────┘
1144
4014
  def abs
1145
4015
  wrap_expr(_rbexpr.abs)
1146
4016
  end
1147
4017
 
4018
+ # Get the index values that would sort this column.
4019
+ #
4020
+ # Alias for {#arg_sort}.
4021
+ #
4022
+ # @param reverse [Boolean]
4023
+ # Sort in reverse (descending) order.
4024
+ # @param nulls_last [Boolean]
4025
+ # Place null values last instead of first.
4026
+ #
4027
+ # @return [expr]
4028
+ #
4029
+ # @example
4030
+ # df = Polars::DataFrame.new(
4031
+ # {
4032
+ # "a" => [20, 10, 30]
4033
+ # }
4034
+ # )
4035
+ # df.select(Polars.col("a").argsort)
4036
+ # # =>
4037
+ # # shape: (3, 1)
4038
+ # # ┌─────┐
4039
+ # # │ a │
4040
+ # # │ --- │
4041
+ # # │ u32 │
4042
+ # # ╞═════╡
4043
+ # # │ 1 │
4044
+ # # ├╌╌╌╌╌┤
4045
+ # # │ 0 │
4046
+ # # ├╌╌╌╌╌┤
4047
+ # # │ 2 │
4048
+ # # └─────┘
1148
4049
  def argsort(reverse: false, nulls_last: false)
1149
4050
  arg_sort(reverse: reverse, nulls_last: nulls_last)
1150
4051
  end
1151
4052
 
4053
+ # Assign ranks to data, dealing with ties appropriately.
4054
+ #
4055
+ # @param method ["average", "min", "max", "dense", "ordinal", "random"]
4056
+ # The method used to assign ranks to tied elements.
4057
+ # The following methods are available:
4058
+ #
4059
+ # - 'average' : The average of the ranks that would have been assigned to
4060
+ # all the tied values is assigned to each value.
4061
+ # - 'min' : The minimum of the ranks that would have been assigned to all
4062
+ # the tied values is assigned to each value. (This is also referred to
4063
+ # as "competition" ranking.)
4064
+ # - 'max' : The maximum of the ranks that would have been assigned to all
4065
+ # the tied values is assigned to each value.
4066
+ # - 'dense' : Like 'min', but the rank of the next highest element is
4067
+ # assigned the rank immediately after those assigned to the tied
4068
+ # elements.
4069
+ # - 'ordinal' : All values are given a distinct rank, corresponding to
4070
+ # the order that the values occur in the Series.
4071
+ # - 'random' : Like 'ordinal', but the rank for ties is not dependent
4072
+ # on the order that the values occur in the Series.
4073
+ # @param reverse [Boolean]
4074
+ # Reverse the operation.
4075
+ #
4076
+ # @return [Expr]
4077
+ #
4078
+ # @example The 'average' method:
4079
+ # df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
4080
+ # df.select(Polars.col("a").rank)
4081
+ # # =>
4082
+ # # shape: (5, 1)
4083
+ # # ┌─────┐
4084
+ # # │ a │
4085
+ # # │ --- │
4086
+ # # │ f32 │
4087
+ # # ╞═════╡
4088
+ # # │ 3.0 │
4089
+ # # ├╌╌╌╌╌┤
4090
+ # # │ 4.5 │
4091
+ # # ├╌╌╌╌╌┤
4092
+ # # │ 1.5 │
4093
+ # # ├╌╌╌╌╌┤
4094
+ # # │ 1.5 │
4095
+ # # ├╌╌╌╌╌┤
4096
+ # # │ 4.5 │
4097
+ # # └─────┘
4098
+ #
4099
+ # @example The 'ordinal' method:
4100
+ # df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
4101
+ # df.select(Polars.col("a").rank(method: "ordinal"))
4102
+ # # =>
4103
+ # # shape: (5, 1)
4104
+ # # ┌─────┐
4105
+ # # │ a │
4106
+ # # │ --- │
4107
+ # # │ u32 │
4108
+ # # ╞═════╡
4109
+ # # │ 3 │
4110
+ # # ├╌╌╌╌╌┤
4111
+ # # │ 4 │
4112
+ # # ├╌╌╌╌╌┤
4113
+ # # │ 1 │
4114
+ # # ├╌╌╌╌╌┤
4115
+ # # │ 2 │
4116
+ # # ├╌╌╌╌╌┤
4117
+ # # │ 5 │
4118
+ # # └─────┘
1152
4119
  def rank(method: "average", reverse: false)
1153
4120
  wrap_expr(_rbexpr.rank(method, reverse))
1154
4121
  end
1155
4122
 
4123
+ # Calculate the n-th discrete difference.
4124
+ #
4125
+ # @param n [Integer]
4126
+ # Number of slots to shift.
4127
+ # @param null_behavior ["ignore", "drop"]
4128
+ # How to handle null values.
4129
+ #
4130
+ # @return [Expr]
4131
+ #
4132
+ # @example
4133
+ # df = Polars::DataFrame.new(
4134
+ # {
4135
+ # "a" => [20, 10, 30]
4136
+ # }
4137
+ # )
4138
+ # df.select(Polars.col("a").diff)
4139
+ # # =>
4140
+ # # shape: (3, 1)
4141
+ # # ┌──────┐
4142
+ # # │ a │
4143
+ # # │ --- │
4144
+ # # │ i64 │
4145
+ # # ╞══════╡
4146
+ # # │ null │
4147
+ # # ├╌╌╌╌╌╌┤
4148
+ # # │ -10 │
4149
+ # # ├╌╌╌╌╌╌┤
4150
+ # # │ 20 │
4151
+ # # └──────┘
1156
4152
  def diff(n: 1, null_behavior: "ignore")
1157
4153
  wrap_expr(_rbexpr.diff(n, null_behavior))
1158
4154
  end
1159
4155
 
4156
+ # Computes percentage change between values.
4157
+ #
4158
+ # Percentage change (as fraction) between current element and most-recent
4159
+ # non-null element at least `n` period(s) before the current element.
4160
+ #
4161
+ # Computes the change from the previous row by default.
4162
+ #
4163
+ # @param n [Integer]
4164
+ # Periods to shift for forming percent change.
4165
+ #
4166
+ # @return [Expr]
4167
+ #
4168
+ # @example
4169
+ # df = Polars::DataFrame.new(
4170
+ # {
4171
+ # "a" => [10, 11, 12, nil, 12]
4172
+ # }
4173
+ # )
4174
+ # df.with_column(Polars.col("a").pct_change.alias("pct_change"))
4175
+ # # =>
4176
+ # # shape: (5, 2)
4177
+ # # ┌──────┬────────────┐
4178
+ # # │ a ┆ pct_change │
4179
+ # # │ --- ┆ --- │
4180
+ # # │ i64 ┆ f64 │
4181
+ # # ╞══════╪════════════╡
4182
+ # # │ 10 ┆ null │
4183
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
4184
+ # # │ 11 ┆ 0.1 │
4185
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
4186
+ # # │ 12 ┆ 0.090909 │
4187
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
4188
+ # # │ null ┆ 0.0 │
4189
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
4190
+ # # │ 12 ┆ 0.0 │
4191
+ # # └──────┴────────────┘
1160
4192
  def pct_change(n: 1)
1161
4193
  wrap_expr(_rbexpr.pct_change(n))
1162
4194
  end
1163
4195
 
4196
+ # Compute the sample skewness of a data set.
4197
+ #
4198
+ # For normally distributed data, the skewness should be about zero. For
4199
+ # unimodal continuous distributions, a skewness value greater than zero means
4200
+ # that there is more weight in the right tail of the distribution. The
4201
+ # function `skewtest` can be used to determine if the skewness value
4202
+ # is close enough to zero, statistically speaking.
4203
+ #
4204
+ # @param bias [Boolean]
4205
+ # If false, the calculations are corrected for statistical bias.
4206
+ #
4207
+ # @return [Expr]
4208
+ #
4209
+ # @example
4210
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
4211
+ # df.select(Polars.col("a").skew)
4212
+ # # =>
4213
+ # # shape: (1, 1)
4214
+ # # ┌──────────┐
4215
+ # # │ a │
4216
+ # # │ --- │
4217
+ # # │ f64 │
4218
+ # # ╞══════════╡
4219
+ # # │ 0.343622 │
4220
+ # # └──────────┘
1164
4221
  def skew(bias: true)
1165
4222
  wrap_expr(_rbexpr.skew(bias))
1166
4223
  end
1167
4224
 
4225
+ # Compute the kurtosis (Fisher or Pearson) of a dataset.
4226
+ #
4227
+ # Kurtosis is the fourth central moment divided by the square of the
4228
+ # variance. If Fisher's definition is used, then 3.0 is subtracted from
4229
+ # the result to give 0.0 for a normal distribution.
4230
+ # If bias is False then the kurtosis is calculated using k statistics to
4231
+ # eliminate bias coming from biased moment estimators
4232
+ #
4233
+ # @param fisher [Boolean]
4234
+ # If true, Fisher's definition is used (normal ==> 0.0). If false,
4235
+ # Pearson's definition is used (normal ==> 3.0).
4236
+ # @param bias [Boolean]
4237
+ # If false, the calculations are corrected for statistical bias.
4238
+ #
4239
+ # @return [Expr]
4240
+ #
4241
+ # @example
4242
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
4243
+ # df.select(Polars.col("a").kurtosis)
4244
+ # # =>
4245
+ # # shape: (1, 1)
4246
+ # # ┌───────────┐
4247
+ # # │ a │
4248
+ # # │ --- │
4249
+ # # │ f64 │
4250
+ # # ╞═══════════╡
4251
+ # # │ -1.153061 │
4252
+ # # └───────────┘
1168
4253
  def kurtosis(fisher: true, bias: true)
1169
4254
  wrap_expr(_rbexpr.kurtosis(fisher, bias))
1170
4255
  end
1171
4256
 
4257
+ # Clip (limit) the values in an array to a `min` and `max` boundary.
4258
+ #
4259
+ # Only works for numerical types.
4260
+ #
4261
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4262
+ # expression. See `when` for more information.
4263
+ #
4264
+ # @param min_val [Numeric]
4265
+ # Minimum value.
4266
+ # @param max_val [Numeric]
4267
+ # Maximum value.
4268
+ #
4269
+ # @return [Expr]
4270
+ #
4271
+ # @example
4272
+ # df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
4273
+ # df.with_column(Polars.col("foo").clip(1, 10).alias("foo_clipped"))
4274
+ # # =>
4275
+ # # shape: (4, 2)
4276
+ # # ┌──────┬─────────────┐
4277
+ # # │ foo ┆ foo_clipped │
4278
+ # # │ --- ┆ --- │
4279
+ # # │ i64 ┆ i64 │
4280
+ # # ╞══════╪═════════════╡
4281
+ # # │ -50 ┆ 1 │
4282
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4283
+ # # │ 5 ┆ 5 │
4284
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4285
+ # # │ null ┆ null │
4286
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4287
+ # # │ 50 ┆ 10 │
4288
+ # # └──────┴─────────────┘
1172
4289
  def clip(min_val, max_val)
1173
4290
  wrap_expr(_rbexpr.clip(min_val, max_val))
1174
4291
  end
1175
4292
 
4293
+ # Clip (limit) the values in an array to a `min` boundary.
4294
+ #
4295
+ # Only works for numerical types.
4296
+ #
4297
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4298
+ # expression. See `when` for more information.
4299
+ #
4300
+ # @param min_val [Numeric]
4301
+ # Minimum value.
4302
+ #
4303
+ # @return [Expr]
4304
+ #
4305
+ # @example
4306
+ # df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
4307
+ # df.with_column(Polars.col("foo").clip_min(0).alias("foo_clipped"))
4308
+ # # =>
4309
+ # # shape: (4, 2)
4310
+ # # ┌──────┬─────────────┐
4311
+ # # │ foo ┆ foo_clipped │
4312
+ # # │ --- ┆ --- │
4313
+ # # │ i64 ┆ i64 │
4314
+ # # ╞══════╪═════════════╡
4315
+ # # │ -50 ┆ 0 │
4316
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4317
+ # # │ 5 ┆ 5 │
4318
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4319
+ # # │ null ┆ null │
4320
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4321
+ # # │ 50 ┆ 50 │
4322
+ # # └──────┴─────────────┘
1176
4323
  def clip_min(min_val)
1177
4324
  wrap_expr(_rbexpr.clip_min(min_val))
1178
4325
  end
1179
4326
 
4327
+ # Clip (limit) the values in an array to a `max` boundary.
4328
+ #
4329
+ # Only works for numerical types.
4330
+ #
4331
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4332
+ # expression. See `when` for more information.
4333
+ #
4334
+ # @param max_val [Numeric]
4335
+ # Maximum value.
4336
+ #
4337
+ # @return [Expr]
4338
+ #
4339
+ # @example
4340
+ # df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
4341
+ # df.with_column(Polars.col("foo").clip_max(0).alias("foo_clipped"))
4342
+ # # =>
4343
+ # # shape: (4, 2)
4344
+ # # ┌──────┬─────────────┐
4345
+ # # │ foo ┆ foo_clipped │
4346
+ # # │ --- ┆ --- │
4347
+ # # │ i64 ┆ i64 │
4348
+ # # ╞══════╪═════════════╡
4349
+ # # │ -50 ┆ -50 │
4350
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4351
+ # # │ 5 ┆ 0 │
4352
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4353
+ # # │ null ┆ null │
4354
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4355
+ # # │ 50 ┆ 0 │
4356
+ # # └──────┴─────────────┘
1180
4357
  def clip_max(max_val)
1181
4358
  wrap_expr(_rbexpr.clip_max(max_val))
1182
4359
  end
1183
4360
 
4361
+ # Calculate the lower bound.
4362
+ #
4363
+ # Returns a unit Series with the lowest value possible for the dtype of this
4364
+ # expression.
4365
+ #
4366
+ # @return [Expr]
4367
+ #
4368
+ # @example
4369
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
4370
+ # df.select(Polars.col("a").lower_bound)
4371
+ # # =>
4372
+ # # shape: (1, 1)
4373
+ # # ┌──────────────────────┐
4374
+ # # │ a │
4375
+ # # │ --- │
4376
+ # # │ i64 │
4377
+ # # ╞══════════════════════╡
4378
+ # # │ -9223372036854775808 │
4379
+ # # └──────────────────────┘
1184
4380
  def lower_bound
1185
4381
  wrap_expr(_rbexpr.lower_bound)
1186
4382
  end
1187
4383
 
4384
+ # Calculate the upper bound.
4385
+ #
4386
+ # Returns a unit Series with the highest value possible for the dtype of this
4387
+ # expression.
4388
+ #
4389
+ # @return [Expr]
4390
+ #
4391
+ # @example
4392
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
4393
+ # df.select(Polars.col("a").upper_bound)
4394
+ # # =>
4395
+ # # shape: (1, 1)
4396
+ # # ┌─────────────────────┐
4397
+ # # │ a │
4398
+ # # │ --- │
4399
+ # # │ i64 │
4400
+ # # ╞═════════════════════╡
4401
+ # # │ 9223372036854775807 │
4402
+ # # └─────────────────────┘
1188
4403
  def upper_bound
1189
4404
  wrap_expr(_rbexpr.upper_bound)
1190
4405
  end
1191
4406
 
4407
+ # Compute the element-wise indication of the sign.
4408
+ #
4409
+ # @return [Expr]
4410
+ #
4411
+ # @example
4412
+ # df = Polars::DataFrame.new({"a" => [-9.0, -0.0, 0.0, 4.0, nil]})
4413
+ # df.select(Polars.col("a").sign)
4414
+ # # =>
4415
+ # # shape: (5, 1)
4416
+ # # ┌──────┐
4417
+ # # │ a │
4418
+ # # │ --- │
4419
+ # # │ i64 │
4420
+ # # ╞══════╡
4421
+ # # │ -1 │
4422
+ # # ├╌╌╌╌╌╌┤
4423
+ # # │ 0 │
4424
+ # # ├╌╌╌╌╌╌┤
4425
+ # # │ 0 │
4426
+ # # ├╌╌╌╌╌╌┤
4427
+ # # │ 1 │
4428
+ # # ├╌╌╌╌╌╌┤
4429
+ # # │ null │
4430
+ # # └──────┘
1192
4431
  def sign
1193
4432
  wrap_expr(_rbexpr.sign)
1194
4433
  end
1195
4434
 
4435
+ # Compute the element-wise value for the sine.
4436
+ #
4437
+ # @return [Expr]
4438
+ #
4439
+ # @example
4440
+ # df = Polars::DataFrame.new({"a" => [0.0]})
4441
+ # df.select(Polars.col("a").sin)
4442
+ # # =>
4443
+ # # shape: (1, 1)
4444
+ # # ┌─────┐
4445
+ # # │ a │
4446
+ # # │ --- │
4447
+ # # │ f64 │
4448
+ # # ╞═════╡
4449
+ # # │ 0.0 │
4450
+ # # └─────┘
1196
4451
  def sin
1197
4452
  wrap_expr(_rbexpr.sin)
1198
4453
  end
1199
4454
 
4455
+ # Compute the element-wise value for the cosine.
4456
+ #
4457
+ # @return [Expr]
4458
+ #
4459
+ # @example
4460
+ # df = Polars::DataFrame.new({"a" => [0.0]})
4461
+ # df.select(Polars.col("a").cos)
4462
+ # # =>
4463
+ # # shape: (1, 1)
4464
+ # # ┌─────┐
4465
+ # # │ a │
4466
+ # # │ --- │
4467
+ # # │ f64 │
4468
+ # # ╞═════╡
4469
+ # # │ 1.0 │
4470
+ # # └─────┘
1200
4471
  def cos
1201
4472
  wrap_expr(_rbexpr.cos)
1202
4473
  end
1203
4474
 
4475
+ # Compute the element-wise value for the tangent.
4476
+ #
4477
+ # @return [Expr]
4478
+ #
4479
+ # @example
4480
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4481
+ # df.select(Polars.col("a").tan)
4482
+ # # =>
4483
+ # # shape: (1, 1)
4484
+ # # ┌──────────┐
4485
+ # # │ a │
4486
+ # # │ --- │
4487
+ # # │ f64 │
4488
+ # # ╞══════════╡
4489
+ # # │ 1.557408 │
4490
+ # # └──────────┘
1204
4491
  def tan
1205
4492
  wrap_expr(_rbexpr.tan)
1206
4493
  end
1207
4494
 
4495
+ # Compute the element-wise value for the inverse sine.
4496
+ #
4497
+ # @return [Expr]
4498
+ #
4499
+ # @example
4500
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4501
+ # df.select(Polars.col("a").arcsin)
4502
+ # # =>
4503
+ # # shape: (1, 1)
4504
+ # # ┌──────────┐
4505
+ # # │ a │
4506
+ # # │ --- │
4507
+ # # │ f64 │
4508
+ # # ╞══════════╡
4509
+ # # │ 1.570796 │
4510
+ # # └──────────┘
1208
4511
  def arcsin
1209
4512
  wrap_expr(_rbexpr.arcsin)
1210
4513
  end
1211
4514
 
4515
+ # Compute the element-wise value for the inverse cosine.
4516
+ #
4517
+ # @return [Expr]
4518
+ #
4519
+ # @example
4520
+ # df = Polars::DataFrame.new({"a" => [0.0]})
4521
+ # df.select(Polars.col("a").arccos)
4522
+ # # =>
4523
+ # # shape: (1, 1)
4524
+ # # ┌──────────┐
4525
+ # # │ a │
4526
+ # # │ --- │
4527
+ # # │ f64 │
4528
+ # # ╞══════════╡
4529
+ # # │ 1.570796 │
4530
+ # # └──────────┘
1212
4531
  def arccos
1213
4532
  wrap_expr(_rbexpr.arccos)
1214
4533
  end
1215
4534
 
4535
+ # Compute the element-wise value for the inverse tangent.
4536
+ #
4537
+ # @return [Expr]
4538
+ #
4539
+ # @example
4540
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4541
+ # df.select(Polars.col("a").arctan)
4542
+ # # =>
4543
+ # # shape: (1, 1)
4544
+ # # ┌──────────┐
4545
+ # # │ a │
4546
+ # # │ --- │
4547
+ # # │ f64 │
4548
+ # # ╞══════════╡
4549
+ # # │ 0.785398 │
4550
+ # # └──────────┘
1216
4551
  def arctan
1217
4552
  wrap_expr(_rbexpr.arctan)
1218
4553
  end
1219
4554
 
4555
+ # Compute the element-wise value for the hyperbolic sine.
4556
+ #
4557
+ # @return [Expr]
4558
+ #
4559
+ # @example
4560
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4561
+ # df.select(Polars.col("a").sinh)
4562
+ # # =>
4563
+ # # shape: (1, 1)
4564
+ # # ┌──────────┐
4565
+ # # │ a │
4566
+ # # │ --- │
4567
+ # # │ f64 │
4568
+ # # ╞══════════╡
4569
+ # # │ 1.175201 │
4570
+ # # └──────────┘
1220
4571
  def sinh
1221
4572
  wrap_expr(_rbexpr.sinh)
1222
4573
  end
1223
4574
 
4575
+ # Compute the element-wise value for the hyperbolic cosine.
4576
+ #
4577
+ # @return [Expr]
4578
+ #
4579
+ # @example
4580
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4581
+ # df.select(Polars.col("a").cosh)
4582
+ # # =>
4583
+ # # shape: (1, 1)
4584
+ # # ┌──────────┐
4585
+ # # │ a │
4586
+ # # │ --- │
4587
+ # # │ f64 │
4588
+ # # ╞══════════╡
4589
+ # # │ 1.543081 │
4590
+ # # └──────────┘
1224
4591
  def cosh
1225
4592
  wrap_expr(_rbexpr.cosh)
1226
4593
  end
1227
4594
 
4595
+ # Compute the element-wise value for the hyperbolic tangent.
4596
+ #
4597
+ # @return [Expr]
4598
+ #
4599
+ # @example
4600
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4601
+ # df.select(Polars.col("a").tanh)
4602
+ # # =>
4603
+ # # shape: (1, 1)
4604
+ # # ┌──────────┐
4605
+ # # │ a │
4606
+ # # │ --- │
4607
+ # # │ f64 │
4608
+ # # ╞══════════╡
4609
+ # # │ 0.761594 │
4610
+ # # └──────────┘
1228
4611
  def tanh
1229
4612
  wrap_expr(_rbexpr.tanh)
1230
4613
  end
1231
4614
 
4615
+ # Compute the element-wise value for the inverse hyperbolic sine.
4616
+ #
4617
+ # @return [Expr]
4618
+ #
4619
+ # @example
4620
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4621
+ # df.select(Polars.col("a").arcsinh)
4622
+ # # =>
4623
+ # # shape: (1, 1)
4624
+ # # ┌──────────┐
4625
+ # # │ a │
4626
+ # # │ --- │
4627
+ # # │ f64 │
4628
+ # # ╞══════════╡
4629
+ # # │ 0.881374 │
4630
+ # # └──────────┘
1232
4631
  def arcsinh
1233
4632
  wrap_expr(_rbexpr.arcsinh)
1234
4633
  end
1235
4634
 
4635
+ # Compute the element-wise value for the inverse hyperbolic cosine.
4636
+ #
4637
+ # @return [Expr]
4638
+ #
4639
+ # @example
4640
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4641
+ # df.select(Polars.col("a").arccosh)
4642
+ # # =>
4643
+ # # shape: (1, 1)
4644
+ # # ┌─────┐
4645
+ # # │ a │
4646
+ # # │ --- │
4647
+ # # │ f64 │
4648
+ # # ╞═════╡
4649
+ # # │ 0.0 │
4650
+ # # └─────┘
1236
4651
  def arccosh
1237
4652
  wrap_expr(_rbexpr.arccosh)
1238
4653
  end
1239
4654
 
4655
+ # Compute the element-wise value for the inverse hyperbolic tangent.
4656
+ #
4657
+ # @return [Expr]
4658
+ #
4659
+ # @example
4660
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4661
+ # df.select(Polars.col("a").arctanh)
4662
+ # # =>
4663
+ # # shape: (1, 1)
4664
+ # # ┌─────┐
4665
+ # # │ a │
4666
+ # # │ --- │
4667
+ # # │ f64 │
4668
+ # # ╞═════╡
4669
+ # # │ inf │
4670
+ # # └─────┘
1240
4671
  def arctanh
1241
4672
  wrap_expr(_rbexpr.arctanh)
1242
4673
  end
1243
4674
 
4675
+ # Reshape this Expr to a flat Series or a Series of Lists.
4676
+ #
4677
+ # @param dims [Array]
4678
+ # Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that
4679
+ # dimension is inferred.
4680
+ #
4681
+ # @return [Expr]
4682
+ #
4683
+ # @example
4684
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
4685
+ # df.select(Polars.col("foo").reshape([3, 3]))
4686
+ # # =>
4687
+ # # shape: (3, 1)
4688
+ # # ┌───────────┐
4689
+ # # │ foo │
4690
+ # # │ --- │
4691
+ # # │ list[i64] │
4692
+ # # ╞═══════════╡
4693
+ # # │ [1, 2, 3] │
4694
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
4695
+ # # │ [4, 5, 6] │
4696
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
4697
+ # # │ [7, 8, 9] │
4698
+ # # └───────────┘
1244
4699
  def reshape(dims)
1245
4700
  wrap_expr(_rbexpr.reshape(dims))
1246
4701
  end
1247
4702
 
4703
+ # Shuffle the contents of this expr.
4704
+ #
4705
+ # @param seed [Integer]
4706
+ # Seed for the random number generator. If set to None (default), a random
4707
+ # seed is generated using the `random` module.
4708
+ #
4709
+ # @return [Expr]
4710
+ #
4711
+ # @example
4712
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4713
+ # df.select(Polars.col("a").shuffle(seed: 1))
4714
+ # # =>
4715
+ # # shape: (3, 1)
4716
+ # # ┌─────┐
4717
+ # # │ a │
4718
+ # # │ --- │
4719
+ # # │ i64 │
4720
+ # # ╞═════╡
4721
+ # # │ 2 │
4722
+ # # ├╌╌╌╌╌┤
4723
+ # # │ 1 │
4724
+ # # ├╌╌╌╌╌┤
4725
+ # # │ 3 │
4726
+ # # └─────┘
1248
4727
  def shuffle(seed: nil)
1249
4728
  if seed.nil?
1250
4729
  seed = rand(10000)
@@ -1252,74 +4731,514 @@ module Polars
1252
4731
  wrap_expr(_rbexpr.shuffle(seed))
1253
4732
  end
1254
4733
 
1255
- # def sample
1256
- # end
1257
-
1258
- # def ewm_mean
1259
- # end
4734
+ # Sample from this expression.
4735
+ #
4736
+ # @param frac [Float]
4737
+ # Fraction of items to return. Cannot be used with `n`.
4738
+ # @param with_replacement [Boolean]
4739
+ # Allow values to be sampled more than once.
4740
+ # @param shuffle [Boolean]
4741
+ # Shuffle the order of sampled data points.
4742
+ # @param seed [Integer]
4743
+ # Seed for the random number generator. If set to None (default), a random
4744
+ # seed is used.
4745
+ # @param n [Integer]
4746
+ # Number of items to return. Cannot be used with `frac`.
4747
+ #
4748
+ # @return [Expr]
4749
+ #
4750
+ # @example
4751
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4752
+ # df.select(Polars.col("a").sample(frac: 1.0, with_replacement: true, seed: 1))
4753
+ # # =>
4754
+ # # shape: (3, 1)
4755
+ # # ┌─────┐
4756
+ # # │ a │
4757
+ # # │ --- │
4758
+ # # │ i64 │
4759
+ # # ╞═════╡
4760
+ # # │ 3 │
4761
+ # # ├╌╌╌╌╌┤
4762
+ # # │ 1 │
4763
+ # # ├╌╌╌╌╌┤
4764
+ # # │ 1 │
4765
+ # # └─────┘
4766
+ def sample(
4767
+ frac: nil,
4768
+ with_replacement: true,
4769
+ shuffle: false,
4770
+ seed: nil,
4771
+ n: nil
4772
+ )
4773
+ if !n.nil? && !frac.nil?
4774
+ raise ArgumentError, "cannot specify both `n` and `frac`"
4775
+ end
1260
4776
 
1261
- # def ewm_std
1262
- # end
4777
+ if !n.nil? && frac.nil?
4778
+ return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
4779
+ end
1263
4780
 
1264
- # def ewm_var
1265
- # end
4781
+ if frac.nil?
4782
+ frac = 1.0
4783
+ end
4784
+ wrap_expr(
4785
+ _rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
4786
+ )
4787
+ end
1266
4788
 
4789
+ # Exponentially-weighted moving average.
4790
+ #
4791
+ # @return [Expr]
4792
+ #
4793
+ # @example
4794
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4795
+ # df.select(Polars.col("a").ewm_mean(com: 1))
4796
+ # # =>
4797
+ # # shape: (3, 1)
4798
+ # # ┌──────────┐
4799
+ # # │ a │
4800
+ # # │ --- │
4801
+ # # │ f64 │
4802
+ # # ╞══════════╡
4803
+ # # │ 1.0 │
4804
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4805
+ # # │ 1.666667 │
4806
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4807
+ # # │ 2.428571 │
4808
+ # # └──────────┘
4809
+ def ewm_mean(
4810
+ com: nil,
4811
+ span: nil,
4812
+ half_life: nil,
4813
+ alpha: nil,
4814
+ adjust: true,
4815
+ min_periods: 1
4816
+ )
4817
+ alpha = _prepare_alpha(com, span, half_life, alpha)
4818
+ wrap_expr(_rbexpr.ewm_mean(alpha, adjust, min_periods))
4819
+ end
4820
+
4821
+ # Exponentially-weighted moving standard deviation.
4822
+ #
4823
+ # @return [Expr]
4824
+ #
4825
+ # @example
4826
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4827
+ # df.select(Polars.col("a").ewm_std(com: 1))
4828
+ # # =>
4829
+ # # shape: (3, 1)
4830
+ # # ┌──────────┐
4831
+ # # │ a │
4832
+ # # │ --- │
4833
+ # # │ f64 │
4834
+ # # ╞══════════╡
4835
+ # # │ 0.0 │
4836
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4837
+ # # │ 0.707107 │
4838
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4839
+ # # │ 0.963624 │
4840
+ # # └──────────┘
4841
+ def ewm_std(
4842
+ com: nil,
4843
+ span: nil,
4844
+ half_life: nil,
4845
+ alpha: nil,
4846
+ adjust: true,
4847
+ bias: false,
4848
+ min_periods: 1
4849
+ )
4850
+ alpha = _prepare_alpha(com, span, half_life, alpha)
4851
+ wrap_expr(_rbexpr.ewm_std(alpha, adjust, bias, min_periods))
4852
+ end
4853
+
4854
+ # Exponentially-weighted moving variance.
4855
+ #
4856
+ # @return [Expr]
4857
+ #
4858
+ # @example
4859
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4860
+ # df.select(Polars.col("a").ewm_var(com: 1))
4861
+ # # =>
4862
+ # # shape: (3, 1)
4863
+ # # ┌──────────┐
4864
+ # # │ a │
4865
+ # # │ --- │
4866
+ # # │ f64 │
4867
+ # # ╞══════════╡
4868
+ # # │ 0.0 │
4869
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4870
+ # # │ 0.5 │
4871
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4872
+ # # │ 0.928571 │
4873
+ # # └──────────┘
4874
+ def ewm_var(
4875
+ com: nil,
4876
+ span: nil,
4877
+ half_life: nil,
4878
+ alpha: nil,
4879
+ adjust: true,
4880
+ bias: false,
4881
+ min_periods: 1
4882
+ )
4883
+ alpha = _prepare_alpha(com, span, half_life, alpha)
4884
+ wrap_expr(_rbexpr.ewm_var(alpha, adjust, bias, min_periods))
4885
+ end
4886
+
4887
+ # Extend the Series with given number of values.
4888
+ #
4889
+ # @param value [Object]
4890
+ # The value to extend the Series with. This value may be nil to fill with
4891
+ # nulls.
4892
+ # @param n [Integer]
4893
+ # The number of values to extend.
4894
+ #
4895
+ # @return [Expr]
1267
4896
  #
4897
+ # @example
4898
+ # df = Polars::DataFrame.new({"values" => [1, 2, 3]})
4899
+ # df.select(Polars.col("values").extend_constant(99, 2))
4900
+ # # =>
4901
+ # # shape: (5, 1)
4902
+ # # ┌────────┐
4903
+ # # │ values │
4904
+ # # │ --- │
4905
+ # # │ i64 │
4906
+ # # ╞════════╡
4907
+ # # │ 1 │
4908
+ # # ├╌╌╌╌╌╌╌╌┤
4909
+ # # │ 2 │
4910
+ # # ├╌╌╌╌╌╌╌╌┤
4911
+ # # │ 3 │
4912
+ # # ├╌╌╌╌╌╌╌╌┤
4913
+ # # │ 99 │
4914
+ # # ├╌╌╌╌╌╌╌╌┤
4915
+ # # │ 99 │
4916
+ # # └────────┘
1268
4917
  def extend_constant(value, n)
1269
4918
  wrap_expr(_rbexpr.extend_constant(value, n))
1270
4919
  end
1271
4920
 
4921
+ # Count all unique values and create a struct mapping value to count.
4922
+ #
4923
+ # @param multithreaded [Boolean]
4924
+ # Better to turn this off in the aggregation context, as it can lead to
4925
+ # contention.
4926
+ # @param sort [Boolean]
4927
+ # Ensure the output is sorted from most values to least.
4928
+ #
4929
+ # @return [Expr]
4930
+ #
4931
+ # @example
4932
+ # df = Polars::DataFrame.new(
4933
+ # {
4934
+ # "id" => ["a", "b", "b", "c", "c", "c"]
4935
+ # }
4936
+ # )
4937
+ # df.select(
4938
+ # [
4939
+ # Polars.col("id").value_counts(sort: true),
4940
+ # ]
4941
+ # )
4942
+ # # =>
4943
+ # # shape: (3, 1)
4944
+ # # ┌───────────┐
4945
+ # # │ id │
4946
+ # # │ --- │
4947
+ # # │ struct[2] │
4948
+ # # ╞═══════════╡
4949
+ # # │ {"c",3} │
4950
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
4951
+ # # │ {"b",2} │
4952
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
4953
+ # # │ {"a",1} │
4954
+ # # └───────────┘
1272
4955
  def value_counts(multithreaded: false, sort: false)
1273
4956
  wrap_expr(_rbexpr.value_counts(multithreaded, sort))
1274
4957
  end
1275
4958
 
4959
+ # Return a count of the unique values in the order of appearance.
4960
+ #
4961
+ # This method differs from `value_counts` in that it does not return the
4962
+ # values, only the counts and might be faster
4963
+ #
4964
+ # @return [Expr]
4965
+ #
4966
+ # @example
4967
+ # df = Polars::DataFrame.new(
4968
+ # {
4969
+ # "id" => ["a", "b", "b", "c", "c", "c"]
4970
+ # }
4971
+ # )
4972
+ # df.select(
4973
+ # [
4974
+ # Polars.col("id").unique_counts
4975
+ # ]
4976
+ # )
4977
+ # # =>
4978
+ # # shape: (3, 1)
4979
+ # # ┌─────┐
4980
+ # # │ id │
4981
+ # # │ --- │
4982
+ # # │ u32 │
4983
+ # # ╞═════╡
4984
+ # # │ 1 │
4985
+ # # ├╌╌╌╌╌┤
4986
+ # # │ 2 │
4987
+ # # ├╌╌╌╌╌┤
4988
+ # # │ 3 │
4989
+ # # └─────┘
1276
4990
  def unique_counts
1277
4991
  wrap_expr(_rbexpr.unique_counts)
1278
4992
  end
1279
4993
 
4994
+ # Compute the logarithm to a given base.
4995
+ #
4996
+ # @param base [Float]
4997
+ # Given base, defaults to `e`.
4998
+ #
4999
+ # @return [Expr]
5000
+ #
5001
+ # @example
5002
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
5003
+ # df.select(Polars.col("a").log(2))
5004
+ # # =>
5005
+ # # shape: (3, 1)
5006
+ # # ┌──────────┐
5007
+ # # │ a │
5008
+ # # │ --- │
5009
+ # # │ f64 │
5010
+ # # ╞══════════╡
5011
+ # # │ 0.0 │
5012
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
5013
+ # # │ 1.0 │
5014
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
5015
+ # # │ 1.584963 │
5016
+ # # └──────────┘
1280
5017
  def log(base = Math::E)
1281
5018
  wrap_expr(_rbexpr.log(base))
1282
5019
  end
1283
5020
 
1284
- def entropy(base: 2, normalize: false)
5021
+ # Computes the entropy.
5022
+ #
5023
+ # Uses the formula `-sum(pk * log(pk)` where `pk` are discrete probabilities.
5024
+ #
5025
+ # @param base [Float]
5026
+ # Given base, defaults to `e`.
5027
+ # @param normalize [Boolean]
5028
+ # Normalize pk if it doesn't sum to 1.
5029
+ #
5030
+ # @return [Expr]
5031
+ #
5032
+ # @example
5033
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
5034
+ # df.select(Polars.col("a").entropy(base: 2))
5035
+ # # =>
5036
+ # # shape: (1, 1)
5037
+ # # ┌──────────┐
5038
+ # # │ a │
5039
+ # # │ --- │
5040
+ # # │ f64 │
5041
+ # # ╞══════════╡
5042
+ # # │ 1.459148 │
5043
+ # # └──────────┘
5044
+ #
5045
+ # @example
5046
+ # df.select(Polars.col("a").entropy(base: 2, normalize: false))
5047
+ # # =>
5048
+ # # shape: (1, 1)
5049
+ # # ┌───────────┐
5050
+ # # │ a │
5051
+ # # │ --- │
5052
+ # # │ f64 │
5053
+ # # ╞═══════════╡
5054
+ # # │ -6.754888 │
5055
+ # # └───────────┘
5056
+ def entropy(base: 2, normalize: true)
1285
5057
  wrap_expr(_rbexpr.entropy(base, normalize))
1286
5058
  end
1287
5059
 
1288
- # def cumulative_eval
1289
- # end
1290
-
1291
- # def set_sorted
5060
+ # Run an expression over a sliding window that increases `1` slot every iteration.
5061
+ #
5062
+ # @param expr [Expr]
5063
+ # Expression to evaluate
5064
+ # @param min_periods [Integer]
5065
+ # Number of valid values there should be in the window before the expression
5066
+ # is evaluated. valid values = `length - null_count`
5067
+ # @param parallel [Boolean]
5068
+ # Run in parallel. Don't do this in a groupby or another operation that
5069
+ # already has much parallelization.
5070
+ #
5071
+ # @return [Expr]
5072
+ #
5073
+ # @note
5074
+ # This functionality is experimental and may change without it being considered a
5075
+ # breaking change.
5076
+ #
5077
+ # @note
5078
+ # This can be really slow as it can have `O(n^2)` complexity. Don't use this
5079
+ # for operations that visit all elements.
5080
+ #
5081
+ # @example
5082
+ # df = Polars::DataFrame.new({"values" => [1, 2, 3, 4, 5]})
5083
+ # df.select(
5084
+ # [
5085
+ # Polars.col("values").cumulative_eval(
5086
+ # Polars.element.first - Polars.element.last ** 2
5087
+ # )
5088
+ # ]
5089
+ # )
5090
+ # # =>
5091
+ # # shape: (5, 1)
5092
+ # # ┌────────┐
5093
+ # # │ values │
5094
+ # # │ --- │
5095
+ # # │ f64 │
5096
+ # # ╞════════╡
5097
+ # # │ 0.0 │
5098
+ # # ├╌╌╌╌╌╌╌╌┤
5099
+ # # │ -3.0 │
5100
+ # # ├╌╌╌╌╌╌╌╌┤
5101
+ # # │ -8.0 │
5102
+ # # ├╌╌╌╌╌╌╌╌┤
5103
+ # # │ -15.0 │
5104
+ # # ├╌╌╌╌╌╌╌╌┤
5105
+ # # │ -24.0 │
5106
+ # # └────────┘
5107
+ def cumulative_eval(expr, min_periods: 1, parallel: false)
5108
+ wrap_expr(
5109
+ _rbexpr.cumulative_eval(expr._rbexpr, min_periods, parallel)
5110
+ )
5111
+ end
5112
+
5113
+ # Flags the expression as 'sorted'.
5114
+ #
5115
+ # Enables downstream code to user fast paths for sorted arrays.
5116
+ #
5117
+ # @param reverse [Boolean]
5118
+ # If the `Series` order is reversed, e.g. descending.
5119
+ #
5120
+ # @return [Expr]
5121
+ #
5122
+ # @note
5123
+ # This can lead to incorrect results if this `Series` is not sorted!!
5124
+ # Use with care!
5125
+ #
5126
+ # @example
5127
+ # df = Polars::DataFrame.new({"values" => [1, 2, 3]})
5128
+ # df.select(Polars.col("values").set_sorted.max)
5129
+ # # =>
5130
+ # # shape: (1, 1)
5131
+ # # ┌────────┐
5132
+ # # │ values │
5133
+ # # │ --- │
5134
+ # # │ i64 │
5135
+ # # ╞════════╡
5136
+ # # │ 3 │
5137
+ # # └────────┘
5138
+ # def set_sorted(reverse: false)
5139
+ # map { |s| s.set_sorted(reverse) }
1292
5140
  # end
1293
5141
 
5142
+ # Aggregate to list.
5143
+ #
5144
+ # @return [Expr]
1294
5145
  #
5146
+ # @example
5147
+ # df = Polars::DataFrame.new(
5148
+ # {
5149
+ # "a" => [1, 2, 3],
5150
+ # "b" => [4, 5, 6]
5151
+ # }
5152
+ # )
5153
+ # df.select(Polars.all.list)
5154
+ # # =>
5155
+ # # shape: (1, 2)
5156
+ # # ┌───────────┬───────────┐
5157
+ # # │ a ┆ b │
5158
+ # # │ --- ┆ --- │
5159
+ # # │ list[i64] ┆ list[i64] │
5160
+ # # ╞═══════════╪═══════════╡
5161
+ # # │ [1, 2, 3] ┆ [4, 5, 6] │
5162
+ # # └───────────┴───────────┘
1295
5163
  def list
1296
5164
  wrap_expr(_rbexpr.list)
1297
5165
  end
1298
5166
 
5167
+ # Shrink numeric columns to the minimal required datatype.
5168
+ #
5169
+ # Shrink to the dtype needed to fit the extrema of this `Series`.
5170
+ # This can be used to reduce memory pressure.
5171
+ #
5172
+ # @return [Expr]
5173
+ #
5174
+ # @example
5175
+ # Polars::DataFrame.new(
5176
+ # {
5177
+ # "a" => [1, 2, 3],
5178
+ # "b" => [1, 2, 2 << 32],
5179
+ # "c" => [-1, 2, 1 << 30],
5180
+ # "d" => [-112, 2, 112],
5181
+ # "e" => [-112, 2, 129],
5182
+ # "f" => ["a", "b", "c"],
5183
+ # "g" => [0.1, 1.32, 0.12],
5184
+ # "h" => [true, nil, false]
5185
+ # }
5186
+ # ).select(Polars.all.shrink_dtype)
5187
+ # # =>
5188
+ # # shape: (3, 8)
5189
+ # # ┌─────┬────────────┬────────────┬──────┬──────┬─────┬──────┬───────┐
5190
+ # # │ a ┆ b ┆ c ┆ d ┆ e ┆ f ┆ g ┆ h │
5191
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
5192
+ # # │ i8 ┆ i64 ┆ i32 ┆ i8 ┆ i16 ┆ str ┆ f32 ┆ bool │
5193
+ # # ╞═════╪════════════╪════════════╪══════╪══════╪═════╪══════╪═══════╡
5194
+ # # │ 1 ┆ 1 ┆ -1 ┆ -112 ┆ -112 ┆ a ┆ 0.1 ┆ true │
5195
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
5196
+ # # │ 2 ┆ 2 ┆ 2 ┆ 2 ┆ 2 ┆ b ┆ 1.32 ┆ null │
5197
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
5198
+ # # │ 3 ┆ 8589934592 ┆ 1073741824 ┆ 112 ┆ 129 ┆ c ┆ 0.12 ┆ false │
5199
+ # # └─────┴────────────┴────────────┴──────┴──────┴─────┴──────┴───────┘
1299
5200
  def shrink_dtype
1300
5201
  wrap_expr(_rbexpr.shrink_dtype)
1301
5202
  end
1302
5203
 
5204
+ # Create an object namespace of all list related methods.
5205
+ #
5206
+ # @return [ListExpr]
1303
5207
  def arr
1304
5208
  ListExpr.new(self)
1305
5209
  end
1306
5210
 
5211
+ # Create an object namespace of all categorical related methods.
5212
+ #
5213
+ # @return [CatExpr]
1307
5214
  def cat
1308
5215
  CatExpr.new(self)
1309
5216
  end
1310
5217
 
5218
+ # Create an object namespace of all datetime related methods.
5219
+ #
5220
+ # @return [DateTimeExpr]
1311
5221
  def dt
1312
5222
  DateTimeExpr.new(self)
1313
5223
  end
1314
5224
 
5225
+ # Create an object namespace of all meta related expression methods.
5226
+ #
5227
+ # @return [MetaExpr]
1315
5228
  def meta
1316
5229
  MetaExpr.new(self)
1317
5230
  end
1318
5231
 
5232
+ # Create an object namespace of all string related methods.
5233
+ #
5234
+ # @return [StringExpr]
1319
5235
  def str
1320
5236
  StringExpr.new(self)
1321
5237
  end
1322
5238
 
5239
+ # Create an object namespace of all struct related methods.
5240
+ #
5241
+ # @return [StructExpr]
1323
5242
  def struct
1324
5243
  StructExpr.new(self)
1325
5244
  end
@@ -1337,5 +5256,51 @@ module Polars
1337
5256
  def _to_expr(other)
1338
5257
  other.is_a?(Expr) ? other : Utils.lit(other)
1339
5258
  end
5259
+
5260
+ def _prepare_alpha(com, span, half_life, alpha)
5261
+ if [com, span, half_life, alpha].count { |v| !v.nil? } > 1
5262
+ raise ArgumentError, "Parameters 'com', 'span', 'half_life', and 'alpha' are mutually exclusive"
5263
+ end
5264
+
5265
+ if !com.nil?
5266
+ if com < 0.0
5267
+ raise ArgumentError, "Require 'com' >= 0 (found #{com})"
5268
+ end
5269
+ alpha = 1.0 / (1.0 + com)
5270
+
5271
+ elsif !span.nil?
5272
+ if span < 1.0
5273
+ raise ArgumentError, "Require 'span' >= 1 (found #{span})"
5274
+ end
5275
+ alpha = 2.0 / (span + 1.0)
5276
+
5277
+ elsif !half_life.nil?
5278
+ if half_life <= 0.0
5279
+ raise ArgumentError, "Require 'half_life' > 0 (found #{half_life})"
5280
+ end
5281
+ alpha = 1.0 - Math.exp(-Math.log(2.0) / half_life)
5282
+
5283
+ elsif alpha.nil?
5284
+ raise ArgumentError, "One of 'com', 'span', 'half_life', or 'alpha' must be set"
5285
+
5286
+ elsif alpha <= 0 || alpha > 1
5287
+ raise ArgumentError, "Require 0 < 'alpha' <= 1 (found #{alpha})"
5288
+ end
5289
+
5290
+ alpha
5291
+ end
5292
+
5293
+ def _prepare_rolling_window_args(window_size, min_periods)
5294
+ if window_size.is_a?(Integer)
5295
+ if min_periods.nil?
5296
+ min_periods = window_size
5297
+ end
5298
+ window_size = "#{window_size}i"
5299
+ end
5300
+ if min_periods.nil?
5301
+ min_periods = 1
5302
+ end
5303
+ [window_size, min_periods]
5304
+ end
1340
5305
  end
1341
5306
  end