polars-df 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +73 -3
- data/Cargo.toml +3 -0
- data/ext/polars/Cargo.toml +12 -1
- data/ext/polars/src/conversion.rs +80 -0
- data/ext/polars/src/error.rs +4 -0
- data/ext/polars/src/lazy/dataframe.rs +2 -2
- data/ext/polars/src/lazy/dsl.rs +98 -0
- data/ext/polars/src/lib.rs +34 -0
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +35 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/data_frame.rb +101 -4
- data/lib/polars/date_time_expr.rb +2 -2
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/expr.rb +3774 -58
- data/lib/polars/expr_dispatch.rb +8 -0
- data/lib/polars/group_by.rb +1 -0
- data/lib/polars/io.rb +1 -1
- data/lib/polars/lazy_frame.rb +8 -4
- data/lib/polars/lazy_functions.rb +126 -16
- data/lib/polars/lazy_group_by.rb +1 -0
- data/lib/polars/list_expr.rb +502 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/series.rb +802 -52
- data/lib/polars/string_expr.rb +189 -13
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +28 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +5 -0
- metadata +8 -2
data/lib/polars/expr.rb
CHANGED
@@ -138,8 +138,45 @@ module Polars
|
|
138
138
|
Utils.lit(0) - self
|
139
139
|
end
|
140
140
|
|
141
|
-
#
|
142
|
-
#
|
141
|
+
# Cast to physical representation of the logical dtype.
|
142
|
+
#
|
143
|
+
# - `:date` -> `:i32`
|
144
|
+
# - `:datetime` -> `:i64`
|
145
|
+
# - `:time` -> `:i64`
|
146
|
+
# - `:duration` -> `:i64`
|
147
|
+
# - `:cat` -> `:u32`
|
148
|
+
# - Other data types will be left unchanged.
|
149
|
+
#
|
150
|
+
# @return [Expr]
|
151
|
+
#
|
152
|
+
# @example
|
153
|
+
# Polars::DataFrame.new({"vals" => ["a", "x", nil, "a"]}).with_columns(
|
154
|
+
# [
|
155
|
+
# Polars.col("vals").cast(:cat),
|
156
|
+
# Polars.col("vals")
|
157
|
+
# .cast(:cat)
|
158
|
+
# .to_physical
|
159
|
+
# .alias("vals_physical")
|
160
|
+
# ]
|
161
|
+
# )
|
162
|
+
# # =>
|
163
|
+
# # shape: (4, 2)
|
164
|
+
# # ┌──────┬───────────────┐
|
165
|
+
# # │ vals ┆ vals_physical │
|
166
|
+
# # │ --- ┆ --- │
|
167
|
+
# # │ cat ┆ u32 │
|
168
|
+
# # ╞══════╪═══════════════╡
|
169
|
+
# # │ a ┆ 0 │
|
170
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
171
|
+
# # │ x ┆ 1 │
|
172
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
173
|
+
# # │ null ┆ null │
|
174
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
175
|
+
# # │ a ┆ 0 │
|
176
|
+
# # └──────┴───────────────┘
|
177
|
+
def to_physical
|
178
|
+
wrap_expr(_rbexpr.to_physical)
|
179
|
+
end
|
143
180
|
|
144
181
|
# Check if any boolean value in a Boolean column is `true`.
|
145
182
|
#
|
@@ -258,13 +295,82 @@ module Polars
|
|
258
295
|
wrap_expr(_rbexpr.exp)
|
259
296
|
end
|
260
297
|
|
298
|
+
# Rename the output of an expression.
|
299
|
+
#
|
300
|
+
# @param name [String]
|
301
|
+
# New name.
|
302
|
+
#
|
303
|
+
# @return [Expr]
|
304
|
+
#
|
305
|
+
# @example
|
306
|
+
# df = Polars::DataFrame.new(
|
307
|
+
# {
|
308
|
+
# "a" => [1, 2, 3],
|
309
|
+
# "b" => ["a", "b", nil]
|
310
|
+
# }
|
311
|
+
# )
|
312
|
+
# df.select(
|
313
|
+
# [
|
314
|
+
# Polars.col("a").alias("bar"),
|
315
|
+
# Polars.col("b").alias("foo")
|
316
|
+
# ]
|
317
|
+
# )
|
318
|
+
# # =>
|
319
|
+
# # shape: (3, 2)
|
320
|
+
# # ┌─────┬──────┐
|
321
|
+
# # │ bar ┆ foo │
|
322
|
+
# # │ --- ┆ --- │
|
323
|
+
# # │ i64 ┆ str │
|
324
|
+
# # ╞═════╪══════╡
|
325
|
+
# # │ 1 ┆ a │
|
326
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
327
|
+
# # │ 2 ┆ b │
|
328
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
329
|
+
# # │ 3 ┆ null │
|
330
|
+
# # └─────┴──────┘
|
261
331
|
def alias(name)
|
262
332
|
wrap_expr(_rbexpr._alias(name))
|
263
333
|
end
|
264
334
|
|
265
335
|
# TODO support symbols for exclude
|
266
336
|
|
337
|
+
# Exclude certain columns from a wildcard/regex selection.
|
338
|
+
#
|
339
|
+
# You may also use regexes in the exclude list. They must start with `^` and end
|
340
|
+
# with `$`.
|
341
|
+
#
|
342
|
+
# @param columns [Object]
|
343
|
+
# Column(s) to exclude from selection.
|
344
|
+
# This can be:
|
267
345
|
#
|
346
|
+
# - a column name, or multiple column names
|
347
|
+
# - a regular expression starting with `^` and ending with `$`
|
348
|
+
# - a dtype or multiple dtypes
|
349
|
+
#
|
350
|
+
# @return [Expr]
|
351
|
+
#
|
352
|
+
# @example
|
353
|
+
# df = Polars::DataFrame.new(
|
354
|
+
# {
|
355
|
+
# "aa" => [1, 2, 3],
|
356
|
+
# "ba" => ["a", "b", nil],
|
357
|
+
# "cc" => [nil, 2.5, 1.5]
|
358
|
+
# }
|
359
|
+
# )
|
360
|
+
# df.select(Polars.all.exclude("ba"))
|
361
|
+
# # =>
|
362
|
+
# # shape: (3, 2)
|
363
|
+
# # ┌─────┬──────┐
|
364
|
+
# # │ aa ┆ cc │
|
365
|
+
# # │ --- ┆ --- │
|
366
|
+
# # │ i64 ┆ f64 │
|
367
|
+
# # ╞═════╪══════╡
|
368
|
+
# # │ 1 ┆ null │
|
369
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
370
|
+
# # │ 2 ┆ 2.5 │
|
371
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
372
|
+
# # │ 3 ┆ 1.5 │
|
373
|
+
# # └─────┴──────┘
|
268
374
|
def exclude(columns)
|
269
375
|
if columns.is_a?(String)
|
270
376
|
columns = [columns]
|
@@ -285,14 +391,43 @@ module Polars
|
|
285
391
|
end
|
286
392
|
end
|
287
393
|
|
394
|
+
# Keep the original root name of the expression.
|
395
|
+
#
|
396
|
+
# @return [Expr]
|
397
|
+
#
|
398
|
+
# @example
|
399
|
+
# df = Polars::DataFrame.new(
|
400
|
+
# {
|
401
|
+
# "a" => [1, 2],
|
402
|
+
# "b" => [3, 4]
|
403
|
+
# }
|
404
|
+
# )
|
405
|
+
# df.with_columns([(Polars.col("a") * 9).alias("c").keep_name])
|
406
|
+
# # =>
|
407
|
+
# # shape: (2, 2)
|
408
|
+
# # ┌─────┬─────┐
|
409
|
+
# # │ a ┆ b │
|
410
|
+
# # │ --- ┆ --- │
|
411
|
+
# # │ i64 ┆ i64 │
|
412
|
+
# # ╞═════╪═════╡
|
413
|
+
# # │ 9 ┆ 3 │
|
414
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
415
|
+
# # │ 18 ┆ 4 │
|
416
|
+
# # └─────┴─────┘
|
288
417
|
def keep_name
|
289
418
|
wrap_expr(_rbexpr.keep_name)
|
290
419
|
end
|
291
420
|
|
421
|
+
# Add a prefix to the root column name of the expression.
|
422
|
+
#
|
423
|
+
# @return [Expr]
|
292
424
|
def prefix(prefix)
|
293
425
|
wrap_expr(_rbexpr.prefix(prefix))
|
294
426
|
end
|
295
427
|
|
428
|
+
# Add a suffix to the root column name of the expression.
|
429
|
+
#
|
430
|
+
# @return [Expr]
|
296
431
|
def suffix(suffix)
|
297
432
|
wrap_expr(_rbexpr.suffix(suffix))
|
298
433
|
end
|
@@ -464,14 +599,112 @@ module Polars
|
|
464
599
|
wrap_expr(_rbexpr.is_infinite)
|
465
600
|
end
|
466
601
|
|
602
|
+
# Returns a boolean Series indicating which values are NaN.
|
603
|
+
#
|
604
|
+
# @note
|
605
|
+
# Floating point `NaN` (Not A Number) should not be confused
|
606
|
+
# with missing data represented as `nil`.
|
607
|
+
#
|
608
|
+
# @return [Expr]
|
609
|
+
#
|
610
|
+
# @example
|
611
|
+
# df = Polars::DataFrame.new(
|
612
|
+
# {
|
613
|
+
# "a" => [1, 2, nil, 1, 5],
|
614
|
+
# "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
|
615
|
+
# }
|
616
|
+
# )
|
617
|
+
# df.with_column(Polars.col(Polars::Float64).is_nan.suffix("_isnan"))
|
618
|
+
# # =>
|
619
|
+
# # shape: (5, 3)
|
620
|
+
# # ┌──────┬─────┬─────────┐
|
621
|
+
# # │ a ┆ b ┆ b_isnan │
|
622
|
+
# # │ --- ┆ --- ┆ --- │
|
623
|
+
# # │ i64 ┆ f64 ┆ bool │
|
624
|
+
# # ╞══════╪═════╪═════════╡
|
625
|
+
# # │ 1 ┆ 1.0 ┆ false │
|
626
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
627
|
+
# # │ 2 ┆ 2.0 ┆ false │
|
628
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
629
|
+
# # │ null ┆ NaN ┆ true │
|
630
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
631
|
+
# # │ 1 ┆ 1.0 ┆ false │
|
632
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
633
|
+
# # │ 5 ┆ 5.0 ┆ false │
|
634
|
+
# # └──────┴─────┴─────────┘
|
467
635
|
def is_nan
|
468
636
|
wrap_expr(_rbexpr.is_nan)
|
469
637
|
end
|
470
638
|
|
639
|
+
# Returns a boolean Series indicating which values are not NaN.
|
640
|
+
#
|
641
|
+
# @note
|
642
|
+
# Floating point `NaN` (Not A Number) should not be confused
|
643
|
+
# with missing data represented as `nil`.
|
644
|
+
#
|
645
|
+
# @return [Expr]
|
646
|
+
#
|
647
|
+
# @example
|
648
|
+
# df = Polars::DataFrame.new(
|
649
|
+
# {
|
650
|
+
# "a" => [1, 2, nil, 1, 5],
|
651
|
+
# "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
|
652
|
+
# }
|
653
|
+
# )
|
654
|
+
# df.with_column(Polars.col(Polars::Float64).is_not_nan.suffix("_is_not_nan"))
|
655
|
+
# # =>
|
656
|
+
# # shape: (5, 3)
|
657
|
+
# # ┌──────┬─────┬──────────────┐
|
658
|
+
# # │ a ┆ b ┆ b_is_not_nan │
|
659
|
+
# # │ --- ┆ --- ┆ --- │
|
660
|
+
# # │ i64 ┆ f64 ┆ bool │
|
661
|
+
# # ╞══════╪═════╪══════════════╡
|
662
|
+
# # │ 1 ┆ 1.0 ┆ true │
|
663
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
664
|
+
# # │ 2 ┆ 2.0 ┆ true │
|
665
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
666
|
+
# # │ null ┆ NaN ┆ false │
|
667
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
668
|
+
# # │ 1 ┆ 1.0 ┆ true │
|
669
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
670
|
+
# # │ 5 ┆ 5.0 ┆ true │
|
671
|
+
# # └──────┴─────┴──────────────┘
|
471
672
|
def is_not_nan
|
472
673
|
wrap_expr(_rbexpr.is_not_nan)
|
473
674
|
end
|
474
675
|
|
676
|
+
# Get the group indexes of the group by operation.
|
677
|
+
#
|
678
|
+
# Should be used in aggregation context only.
|
679
|
+
#
|
680
|
+
# @return [Expr]
|
681
|
+
#
|
682
|
+
# @example
|
683
|
+
# df = Polars::DataFrame.new(
|
684
|
+
# {
|
685
|
+
# "group" => [
|
686
|
+
# "one",
|
687
|
+
# "one",
|
688
|
+
# "one",
|
689
|
+
# "two",
|
690
|
+
# "two",
|
691
|
+
# "two"
|
692
|
+
# ],
|
693
|
+
# "value" => [94, 95, 96, 97, 97, 99]
|
694
|
+
# }
|
695
|
+
# )
|
696
|
+
# df.groupby("group", maintain_order: true).agg(Polars.col("value").agg_groups)
|
697
|
+
# # =>
|
698
|
+
# # shape: (2, 2)
|
699
|
+
# # ┌───────┬───────────┐
|
700
|
+
# # │ group ┆ value │
|
701
|
+
# # │ --- ┆ --- │
|
702
|
+
# # │ str ┆ list[u32] │
|
703
|
+
# # ╞═══════╪═══════════╡
|
704
|
+
# # │ one ┆ [0, 1, 2] │
|
705
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
706
|
+
# # │ two ┆ [3, 4, 5] │
|
707
|
+
# # └───────┴───────────┘
|
475
708
|
def agg_groups
|
476
709
|
wrap_expr(_rbexpr.agg_groups)
|
477
710
|
end
|
@@ -557,6 +790,36 @@ module Polars
|
|
557
790
|
wrap_expr(_rbexpr.slice(offset._rbexpr, length._rbexpr))
|
558
791
|
end
|
559
792
|
|
793
|
+
# Append expressions.
|
794
|
+
#
|
795
|
+
# This is done by adding the chunks of `other` to this `Series`.
|
796
|
+
#
|
797
|
+
# @param other [Expr]
|
798
|
+
# Expression to append.
|
799
|
+
# @param upcast [Boolean]
|
800
|
+
# Cast both `Series` to the same supertype.
|
801
|
+
#
|
802
|
+
# @return [Expr]
|
803
|
+
#
|
804
|
+
# @example
|
805
|
+
# df = Polars::DataFrame.new(
|
806
|
+
# {
|
807
|
+
# "a" => [8, 9, 10],
|
808
|
+
# "b" => [nil, 4, 4]
|
809
|
+
# }
|
810
|
+
# )
|
811
|
+
# df.select(Polars.all.head(1).append(Polars.all.tail(1)))
|
812
|
+
# # =>
|
813
|
+
# # shape: (2, 2)
|
814
|
+
# # ┌─────┬──────┐
|
815
|
+
# # │ a ┆ b │
|
816
|
+
# # │ --- ┆ --- │
|
817
|
+
# # │ i64 ┆ i64 │
|
818
|
+
# # ╞═════╪══════╡
|
819
|
+
# # │ 8 ┆ null │
|
820
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
821
|
+
# # │ 10 ┆ 4 │
|
822
|
+
# # └─────┴──────┘
|
560
823
|
def append(other, upcast: true)
|
561
824
|
other = Utils.expr_to_lit_or_expr(other)
|
562
825
|
wrap_expr(_rbexpr.append(other._rbexpr, upcast))
|
@@ -567,7 +830,7 @@ module Polars
|
|
567
830
|
# @return [Expr]
|
568
831
|
#
|
569
832
|
# @example Create a Series with 3 nulls, append column a then rechunk
|
570
|
-
# df = Polars::DataFrame.new({"a"
|
833
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
571
834
|
# df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
|
572
835
|
# # =>
|
573
836
|
# # shape: (6, 1)
|
@@ -650,22 +913,182 @@ module Polars
|
|
650
913
|
wrap_expr(_rbexpr.drop_nans)
|
651
914
|
end
|
652
915
|
|
916
|
+
# Get an array with the cumulative sum computed at every element.
|
917
|
+
#
|
918
|
+
# @param reverse [Boolean]
|
919
|
+
# Reverse the operation.
|
920
|
+
#
|
921
|
+
# @return [Expr]
|
922
|
+
#
|
923
|
+
# @note
|
924
|
+
# Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
|
925
|
+
# `:i64` before summing to prevent overflow issues.
|
926
|
+
#
|
927
|
+
# @example
|
928
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
929
|
+
# df.select(
|
930
|
+
# [
|
931
|
+
# Polars.col("a").cumsum,
|
932
|
+
# Polars.col("a").cumsum(reverse: true).alias("a_reverse")
|
933
|
+
# ]
|
934
|
+
# )
|
935
|
+
# # =>
|
936
|
+
# # shape: (4, 2)
|
937
|
+
# # ┌─────┬───────────┐
|
938
|
+
# # │ a ┆ a_reverse │
|
939
|
+
# # │ --- ┆ --- │
|
940
|
+
# # │ i64 ┆ i64 │
|
941
|
+
# # ╞═════╪═══════════╡
|
942
|
+
# # │ 1 ┆ 10 │
|
943
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
944
|
+
# # │ 3 ┆ 9 │
|
945
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
946
|
+
# # │ 6 ┆ 7 │
|
947
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
948
|
+
# # │ 10 ┆ 4 │
|
949
|
+
# # └─────┴───────────┘
|
653
950
|
def cumsum(reverse: false)
|
654
951
|
wrap_expr(_rbexpr.cumsum(reverse))
|
655
952
|
end
|
656
953
|
|
954
|
+
# Get an array with the cumulative product computed at every element.
|
955
|
+
#
|
956
|
+
# @param reverse [Boolean]
|
957
|
+
# Reverse the operation.
|
958
|
+
#
|
959
|
+
# @return [Expr]
|
960
|
+
#
|
961
|
+
# @note
|
962
|
+
# Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
|
963
|
+
# `:i64` before summing to prevent overflow issues.
|
964
|
+
#
|
965
|
+
# @example
|
966
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
967
|
+
# df.select(
|
968
|
+
# [
|
969
|
+
# Polars.col("a").cumprod,
|
970
|
+
# Polars.col("a").cumprod(reverse: true).alias("a_reverse")
|
971
|
+
# ]
|
972
|
+
# )
|
973
|
+
# # =>
|
974
|
+
# # shape: (4, 2)
|
975
|
+
# # ┌─────┬───────────┐
|
976
|
+
# # │ a ┆ a_reverse │
|
977
|
+
# # │ --- ┆ --- │
|
978
|
+
# # │ i64 ┆ i64 │
|
979
|
+
# # ╞═════╪═══════════╡
|
980
|
+
# # │ 1 ┆ 24 │
|
981
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
982
|
+
# # │ 2 ┆ 24 │
|
983
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
984
|
+
# # │ 6 ┆ 12 │
|
985
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
986
|
+
# # │ 24 ┆ 4 │
|
987
|
+
# # └─────┴───────────┘
|
657
988
|
def cumprod(reverse: false)
|
658
989
|
wrap_expr(_rbexpr.cumprod(reverse))
|
659
990
|
end
|
660
991
|
|
992
|
+
# Get an array with the cumulative min computed at every element.
|
993
|
+
#
|
994
|
+
# @param reverse [Boolean]
|
995
|
+
# Reverse the operation.
|
996
|
+
#
|
997
|
+
# @return [Expr]
|
998
|
+
#
|
999
|
+
# @example
|
1000
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
1001
|
+
# df.select(
|
1002
|
+
# [
|
1003
|
+
# Polars.col("a").cummin,
|
1004
|
+
# Polars.col("a").cummin(reverse: true).alias("a_reverse")
|
1005
|
+
# ]
|
1006
|
+
# )
|
1007
|
+
# # =>
|
1008
|
+
# # shape: (4, 2)
|
1009
|
+
# # ┌─────┬───────────┐
|
1010
|
+
# # │ a ┆ a_reverse │
|
1011
|
+
# # │ --- ┆ --- │
|
1012
|
+
# # │ i64 ┆ i64 │
|
1013
|
+
# # ╞═════╪═══════════╡
|
1014
|
+
# # │ 1 ┆ 1 │
|
1015
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1016
|
+
# # │ 1 ┆ 2 │
|
1017
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1018
|
+
# # │ 1 ┆ 3 │
|
1019
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1020
|
+
# # │ 1 ┆ 4 │
|
1021
|
+
# # └─────┴───────────┘
|
661
1022
|
def cummin(reverse: false)
|
662
1023
|
wrap_expr(_rbexpr.cummin(reverse))
|
663
1024
|
end
|
664
1025
|
|
1026
|
+
# Get an array with the cumulative max computed at every element.
|
1027
|
+
#
|
1028
|
+
# @param reverse [Boolean]
|
1029
|
+
# Reverse the operation.
|
1030
|
+
#
|
1031
|
+
# @return [Expr]
|
1032
|
+
#
|
1033
|
+
# @example
|
1034
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
1035
|
+
# df.select(
|
1036
|
+
# [
|
1037
|
+
# Polars.col("a").cummax,
|
1038
|
+
# Polars.col("a").cummax(reverse: true).alias("a_reverse")
|
1039
|
+
# ]
|
1040
|
+
# )
|
1041
|
+
# # =>
|
1042
|
+
# # shape: (4, 2)
|
1043
|
+
# # ┌─────┬───────────┐
|
1044
|
+
# # │ a ┆ a_reverse │
|
1045
|
+
# # │ --- ┆ --- │
|
1046
|
+
# # │ i64 ┆ i64 │
|
1047
|
+
# # ╞═════╪═══════════╡
|
1048
|
+
# # │ 1 ┆ 4 │
|
1049
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1050
|
+
# # │ 2 ┆ 4 │
|
1051
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1052
|
+
# # │ 3 ┆ 4 │
|
1053
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1054
|
+
# # │ 4 ┆ 4 │
|
1055
|
+
# # └─────┴───────────┘
|
665
1056
|
def cummax(reverse: false)
|
666
1057
|
wrap_expr(_rbexpr.cummax(reverse))
|
667
1058
|
end
|
668
1059
|
|
1060
|
+
# Get an array with the cumulative count computed at every element.
|
1061
|
+
#
|
1062
|
+
# Counting from 0 to len
|
1063
|
+
#
|
1064
|
+
# @param reverse [Boolean]
|
1065
|
+
# Reverse the operation.
|
1066
|
+
#
|
1067
|
+
# @return [Expr]
|
1068
|
+
#
|
1069
|
+
# @example
|
1070
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
1071
|
+
# df.select(
|
1072
|
+
# [
|
1073
|
+
# Polars.col("a").cumcount,
|
1074
|
+
# Polars.col("a").cumcount(reverse: true).alias("a_reverse")
|
1075
|
+
# ]
|
1076
|
+
# )
|
1077
|
+
# # =>
|
1078
|
+
# # shape: (4, 2)
|
1079
|
+
# # ┌─────┬───────────┐
|
1080
|
+
# # │ a ┆ a_reverse │
|
1081
|
+
# # │ --- ┆ --- │
|
1082
|
+
# # │ u32 ┆ u32 │
|
1083
|
+
# # ╞═════╪═══════════╡
|
1084
|
+
# # │ 0 ┆ 3 │
|
1085
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1086
|
+
# # │ 1 ┆ 2 │
|
1087
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1088
|
+
# # │ 2 ┆ 1 │
|
1089
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1090
|
+
# # │ 3 ┆ 0 │
|
1091
|
+
# # └─────┴───────────┘
|
669
1092
|
def cumcount(reverse: false)
|
670
1093
|
wrap_expr(_rbexpr.cumcount(reverse))
|
671
1094
|
end
|
@@ -755,6 +1178,30 @@ module Polars
|
|
755
1178
|
wrap_expr(_rbexpr.round(decimals))
|
756
1179
|
end
|
757
1180
|
|
1181
|
+
# Compute the dot/inner product between two Expressions.
|
1182
|
+
#
|
1183
|
+
# @param other [Expr]
|
1184
|
+
# Expression to compute dot product with.
|
1185
|
+
#
|
1186
|
+
# @return [Expr]
|
1187
|
+
#
|
1188
|
+
# @example
|
1189
|
+
# df = Polars::DataFrame.new(
|
1190
|
+
# {
|
1191
|
+
# "a" => [1, 3, 5],
|
1192
|
+
# "b" => [2, 4, 6]
|
1193
|
+
# }
|
1194
|
+
# )
|
1195
|
+
# df.select(Polars.col("a").dot(Polars.col("b")))
|
1196
|
+
# # =>
|
1197
|
+
# # shape: (1, 1)
|
1198
|
+
# # ┌─────┐
|
1199
|
+
# # │ a │
|
1200
|
+
# # │ --- │
|
1201
|
+
# # │ i64 │
|
1202
|
+
# # ╞═════╡
|
1203
|
+
# # │ 44 │
|
1204
|
+
# # └─────┘
|
758
1205
|
def dot(other)
|
759
1206
|
other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
|
760
1207
|
wrap_expr(_rbexpr.dot(other._rbexpr))
|
@@ -789,19 +1236,206 @@ module Polars
|
|
789
1236
|
wrap_expr(_rbexpr.mode)
|
790
1237
|
end
|
791
1238
|
|
1239
|
+
# Cast between data types.
|
1240
|
+
#
|
1241
|
+
# @param dtype [Symbol]
|
1242
|
+
# DataType to cast to.
|
1243
|
+
# @param strict [Boolean]
|
1244
|
+
# Throw an error if a cast could not be done.
|
1245
|
+
# For instance, due to an overflow.
|
1246
|
+
#
|
1247
|
+
# @return [Expr]
|
1248
|
+
#
|
1249
|
+
# @example
|
1250
|
+
# df = Polars::DataFrame.new(
|
1251
|
+
# {
|
1252
|
+
# "a" => [1, 2, 3],
|
1253
|
+
# "b" => ["4", "5", "6"]
|
1254
|
+
# }
|
1255
|
+
# )
|
1256
|
+
# df.with_columns(
|
1257
|
+
# [
|
1258
|
+
# Polars.col("a").cast(:f64),
|
1259
|
+
# Polars.col("b").cast(:i32)
|
1260
|
+
# ]
|
1261
|
+
# )
|
1262
|
+
# # =>
|
1263
|
+
# # shape: (3, 2)
|
1264
|
+
# # ┌─────┬─────┐
|
1265
|
+
# # │ a ┆ b │
|
1266
|
+
# # │ --- ┆ --- │
|
1267
|
+
# # │ f64 ┆ i32 │
|
1268
|
+
# # ╞═════╪═════╡
|
1269
|
+
# # │ 1.0 ┆ 4 │
|
1270
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1271
|
+
# # │ 2.0 ┆ 5 │
|
1272
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1273
|
+
# # │ 3.0 ┆ 6 │
|
1274
|
+
# # └─────┴─────┘
|
792
1275
|
def cast(dtype, strict: true)
|
793
1276
|
dtype = Utils.rb_type_to_dtype(dtype)
|
794
1277
|
wrap_expr(_rbexpr.cast(dtype, strict))
|
795
1278
|
end
|
796
1279
|
|
1280
|
+
# Sort this column. In projection/ selection context the whole column is sorted.
|
1281
|
+
#
|
1282
|
+
# If used in a groupby context, the groups are sorted.
|
1283
|
+
#
|
1284
|
+
# @param reverse [Boolean]
|
1285
|
+
# false -> order from small to large.
|
1286
|
+
# true -> order from large to small.
|
1287
|
+
# @param nulls_last [Boolean]
|
1288
|
+
# If true nulls are considered to be larger than any valid value.
|
1289
|
+
#
|
1290
|
+
# @return [Expr]
|
1291
|
+
#
|
1292
|
+
# @example
|
1293
|
+
# df = Polars::DataFrame.new(
|
1294
|
+
# {
|
1295
|
+
# "group" => [
|
1296
|
+
# "one",
|
1297
|
+
# "one",
|
1298
|
+
# "one",
|
1299
|
+
# "two",
|
1300
|
+
# "two",
|
1301
|
+
# "two"
|
1302
|
+
# ],
|
1303
|
+
# "value" => [1, 98, 2, 3, 99, 4]
|
1304
|
+
# }
|
1305
|
+
# )
|
1306
|
+
# df.select(Polars.col("value").sort)
|
1307
|
+
# # =>
|
1308
|
+
# # shape: (6, 1)
|
1309
|
+
# # ┌───────┐
|
1310
|
+
# # │ value │
|
1311
|
+
# # │ --- │
|
1312
|
+
# # │ i64 │
|
1313
|
+
# # ╞═══════╡
|
1314
|
+
# # │ 1 │
|
1315
|
+
# # ├╌╌╌╌╌╌╌┤
|
1316
|
+
# # │ 2 │
|
1317
|
+
# # ├╌╌╌╌╌╌╌┤
|
1318
|
+
# # │ 3 │
|
1319
|
+
# # ├╌╌╌╌╌╌╌┤
|
1320
|
+
# # │ 4 │
|
1321
|
+
# # ├╌╌╌╌╌╌╌┤
|
1322
|
+
# # │ 98 │
|
1323
|
+
# # ├╌╌╌╌╌╌╌┤
|
1324
|
+
# # │ 99 │
|
1325
|
+
# # └───────┘
|
1326
|
+
#
|
1327
|
+
# @example
|
1328
|
+
# df.select(Polars.col("value").sort)
|
1329
|
+
# # =>
|
1330
|
+
# # shape: (6, 1)
|
1331
|
+
# # ┌───────┐
|
1332
|
+
# # │ value │
|
1333
|
+
# # │ --- │
|
1334
|
+
# # │ i64 │
|
1335
|
+
# # ╞═══════╡
|
1336
|
+
# # │ 1 │
|
1337
|
+
# # ├╌╌╌╌╌╌╌┤
|
1338
|
+
# # │ 2 │
|
1339
|
+
# # ├╌╌╌╌╌╌╌┤
|
1340
|
+
# # │ 3 │
|
1341
|
+
# # ├╌╌╌╌╌╌╌┤
|
1342
|
+
# # │ 4 │
|
1343
|
+
# # ├╌╌╌╌╌╌╌┤
|
1344
|
+
# # │ 98 │
|
1345
|
+
# # ├╌╌╌╌╌╌╌┤
|
1346
|
+
# # │ 99 │
|
1347
|
+
# # └───────┘
|
1348
|
+
#
|
1349
|
+
# @example
|
1350
|
+
# df.groupby("group").agg(Polars.col("value").sort)
|
1351
|
+
# # =>
|
1352
|
+
# # shape: (2, 2)
|
1353
|
+
# # ┌───────┬────────────┐
|
1354
|
+
# # │ group ┆ value │
|
1355
|
+
# # │ --- ┆ --- │
|
1356
|
+
# # │ str ┆ list[i64] │
|
1357
|
+
# # ╞═══════╪════════════╡
|
1358
|
+
# # │ two ┆ [3, 4, 99] │
|
1359
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
1360
|
+
# # │ one ┆ [1, 2, 98] │
|
1361
|
+
# # └───────┴────────────┘
|
797
1362
|
def sort(reverse: false, nulls_last: false)
|
798
1363
|
wrap_expr(_rbexpr.sort_with(reverse, nulls_last))
|
799
1364
|
end
|
800
1365
|
|
1366
|
+
# Return the `k` largest elements.
|
1367
|
+
#
|
1368
|
+
# If 'reverse: true` the smallest elements will be given.
|
1369
|
+
#
|
1370
|
+
# @param k [Integer]
|
1371
|
+
# Number of elements to return.
|
1372
|
+
# @param reverse [Boolean]
|
1373
|
+
# Return the smallest elements.
|
1374
|
+
#
|
1375
|
+
# @return [Expr]
|
1376
|
+
#
|
1377
|
+
# @example
|
1378
|
+
# df = Polars::DataFrame.new(
|
1379
|
+
# {
|
1380
|
+
# "value" => [1, 98, 2, 3, 99, 4]
|
1381
|
+
# }
|
1382
|
+
# )
|
1383
|
+
# df.select(
|
1384
|
+
# [
|
1385
|
+
# Polars.col("value").top_k.alias("top_k"),
|
1386
|
+
# Polars.col("value").top_k(reverse: true).alias("bottom_k")
|
1387
|
+
# ]
|
1388
|
+
# )
|
1389
|
+
# # =>
|
1390
|
+
# # shape: (5, 2)
|
1391
|
+
# # ┌───────┬──────────┐
|
1392
|
+
# # │ top_k ┆ bottom_k │
|
1393
|
+
# # │ --- ┆ --- │
|
1394
|
+
# # │ i64 ┆ i64 │
|
1395
|
+
# # ╞═══════╪══════════╡
|
1396
|
+
# # │ 99 ┆ 1 │
|
1397
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
1398
|
+
# # │ 98 ┆ 2 │
|
1399
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
1400
|
+
# # │ 4 ┆ 3 │
|
1401
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
1402
|
+
# # │ 3 ┆ 4 │
|
1403
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
1404
|
+
# # │ 2 ┆ 98 │
|
1405
|
+
# # └───────┴──────────┘
|
801
1406
|
def top_k(k: 5, reverse: false)
|
802
1407
|
wrap_expr(_rbexpr.top_k(k, reverse))
|
803
1408
|
end
|
804
1409
|
|
1410
|
+
# Get the index values that would sort this column.
|
1411
|
+
#
|
1412
|
+
# @param reverse [Boolean]
|
1413
|
+
# Sort in reverse (descending) order.
|
1414
|
+
# @param nulls_last [Boolean]
|
1415
|
+
# Place null values last instead of first.
|
1416
|
+
#
|
1417
|
+
# @return [Expr]
|
1418
|
+
#
|
1419
|
+
# @example
|
1420
|
+
# df = Polars::DataFrame.new(
|
1421
|
+
# {
|
1422
|
+
# "a" => [20, 10, 30]
|
1423
|
+
# }
|
1424
|
+
# )
|
1425
|
+
# df.select(Polars.col("a").arg_sort)
|
1426
|
+
# # =>
|
1427
|
+
# # shape: (3, 1)
|
1428
|
+
# # ┌─────┐
|
1429
|
+
# # │ a │
|
1430
|
+
# # │ --- │
|
1431
|
+
# # │ u32 │
|
1432
|
+
# # ╞═════╡
|
1433
|
+
# # │ 1 │
|
1434
|
+
# # ├╌╌╌╌╌┤
|
1435
|
+
# # │ 0 │
|
1436
|
+
# # ├╌╌╌╌╌┤
|
1437
|
+
# # │ 2 │
|
1438
|
+
# # └─────┘
|
805
1439
|
def arg_sort(reverse: false, nulls_last: false)
|
806
1440
|
wrap_expr(_rbexpr.arg_sort(reverse, nulls_last))
|
807
1441
|
end
|
@@ -854,15 +1488,91 @@ module Polars
|
|
854
1488
|
wrap_expr(_rbexpr.arg_min)
|
855
1489
|
end
|
856
1490
|
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
1491
|
+
# Find indices where elements should be inserted to maintain order.
|
1492
|
+
#
|
1493
|
+
# @param element [Object]
|
1494
|
+
# Expression or scalar value.
|
1495
|
+
#
|
1496
|
+
# @return [Expr]
|
1497
|
+
#
|
1498
|
+
# @example
|
1499
|
+
# df = Polars::DataFrame.new(
|
1500
|
+
# {
|
1501
|
+
# "values" => [1, 2, 3, 5]
|
1502
|
+
# }
|
1503
|
+
# )
|
1504
|
+
# df.select(
|
1505
|
+
# [
|
1506
|
+
# Polars.col("values").search_sorted(0).alias("zero"),
|
1507
|
+
# Polars.col("values").search_sorted(3).alias("three"),
|
1508
|
+
# Polars.col("values").search_sorted(6).alias("six")
|
1509
|
+
# ]
|
1510
|
+
# )
|
1511
|
+
# # =>
|
1512
|
+
# # shape: (1, 3)
|
1513
|
+
# # ┌──────┬───────┬─────┐
|
1514
|
+
# # │ zero ┆ three ┆ six │
|
1515
|
+
# # │ --- ┆ --- ┆ --- │
|
1516
|
+
# # │ u32 ┆ u32 ┆ u32 │
|
1517
|
+
# # ╞══════╪═══════╪═════╡
|
1518
|
+
# # │ 0 ┆ 2 ┆ 4 │
|
1519
|
+
# # └──────┴───────┴─────┘
|
1520
|
+
def search_sorted(element)
|
1521
|
+
element = Utils.expr_to_lit_or_expr(element, str_to_lit: false)
|
1522
|
+
wrap_expr(_rbexpr.search_sorted(element._rbexpr))
|
1523
|
+
end
|
1524
|
+
|
1525
|
+
# Sort this column by the ordering of another column, or multiple other columns.
|
1526
|
+
#
|
1527
|
+
# In projection/ selection context the whole column is sorted.
|
1528
|
+
# If used in a groupby context, the groups are sorted.
|
1529
|
+
#
|
1530
|
+
# @param by [Object]
|
1531
|
+
# The column(s) used for sorting.
|
1532
|
+
# @param reverse [Boolean]
|
1533
|
+
# false -> order from small to large.
|
1534
|
+
# true -> order from large to small.
|
1535
|
+
#
|
1536
|
+
# @return [Expr]
|
1537
|
+
#
|
1538
|
+
# @example
|
1539
|
+
# df = Polars::DataFrame.new(
|
1540
|
+
# {
|
1541
|
+
# "group" => [
|
1542
|
+
# "one",
|
1543
|
+
# "one",
|
1544
|
+
# "one",
|
1545
|
+
# "two",
|
1546
|
+
# "two",
|
1547
|
+
# "two"
|
1548
|
+
# ],
|
1549
|
+
# "value" => [1, 98, 2, 3, 99, 4]
|
1550
|
+
# }
|
1551
|
+
# )
|
1552
|
+
# df.select(Polars.col("group").sort_by("value"))
|
1553
|
+
# # =>
|
1554
|
+
# # shape: (6, 1)
|
1555
|
+
# # ┌───────┐
|
1556
|
+
# # │ group │
|
1557
|
+
# # │ --- │
|
1558
|
+
# # │ str │
|
1559
|
+
# # ╞═══════╡
|
1560
|
+
# # │ one │
|
1561
|
+
# # ├╌╌╌╌╌╌╌┤
|
1562
|
+
# # │ one │
|
1563
|
+
# # ├╌╌╌╌╌╌╌┤
|
1564
|
+
# # │ two │
|
1565
|
+
# # ├╌╌╌╌╌╌╌┤
|
1566
|
+
# # │ two │
|
1567
|
+
# # ├╌╌╌╌╌╌╌┤
|
1568
|
+
# # │ one │
|
1569
|
+
# # ├╌╌╌╌╌╌╌┤
|
1570
|
+
# # │ two │
|
1571
|
+
# # └───────┘
|
1572
|
+
def sort_by(by, reverse: false)
|
1573
|
+
if !by.is_a?(Array)
|
1574
|
+
by = [by]
|
1575
|
+
end
|
866
1576
|
if !reverse.is_a?(Array)
|
867
1577
|
reverse = [reverse]
|
868
1578
|
end
|
@@ -871,6 +1581,39 @@ module Polars
|
|
871
1581
|
wrap_expr(_rbexpr.sort_by(by, reverse))
|
872
1582
|
end
|
873
1583
|
|
1584
|
+
# Take values by index.
|
1585
|
+
#
|
1586
|
+
# @param indices [Expr]
|
1587
|
+
# An expression that leads to a `:u32` dtyped Series.
|
1588
|
+
#
|
1589
|
+
# @return [Expr]
|
1590
|
+
#
|
1591
|
+
# @example
|
1592
|
+
# df = Polars::DataFrame.new(
|
1593
|
+
# {
|
1594
|
+
# "group" => [
|
1595
|
+
# "one",
|
1596
|
+
# "one",
|
1597
|
+
# "one",
|
1598
|
+
# "two",
|
1599
|
+
# "two",
|
1600
|
+
# "two"
|
1601
|
+
# ],
|
1602
|
+
# "value" => [1, 98, 2, 3, 99, 4]
|
1603
|
+
# }
|
1604
|
+
# )
|
1605
|
+
# df.groupby("group", maintain_order: true).agg(Polars.col("value").take(1))
|
1606
|
+
# # =>
|
1607
|
+
# # shape: (2, 2)
|
1608
|
+
# # ┌───────┬───────┐
|
1609
|
+
# # │ group ┆ value │
|
1610
|
+
# # │ --- ┆ --- │
|
1611
|
+
# # │ str ┆ i64 │
|
1612
|
+
# # ╞═══════╪═══════╡
|
1613
|
+
# # │ one ┆ 98 │
|
1614
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
1615
|
+
# # │ two ┆ 99 │
|
1616
|
+
# # └───────┴───────┘
|
874
1617
|
def take(indices)
|
875
1618
|
if indices.is_a?(Array)
|
876
1619
|
indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
|
@@ -909,11 +1652,105 @@ module Polars
|
|
909
1652
|
wrap_expr(_rbexpr.shift(periods))
|
910
1653
|
end
|
911
1654
|
|
1655
|
+
# Shift the values by a given period and fill the resulting null values.
|
1656
|
+
#
|
1657
|
+
# @param periods [Integer]
|
1658
|
+
# Number of places to shift (may be negative).
|
1659
|
+
# @param fill_value [Object]
|
1660
|
+
# Fill nil values with the result of this expression.
|
1661
|
+
#
|
1662
|
+
# @return [Expr]
|
1663
|
+
#
|
1664
|
+
# @example
|
1665
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
|
1666
|
+
# df.select(Polars.col("foo").shift_and_fill(1, "a"))
|
1667
|
+
# # =>
|
1668
|
+
# # shape: (4, 1)
|
1669
|
+
# # ┌─────┐
|
1670
|
+
# # │ foo │
|
1671
|
+
# # │ --- │
|
1672
|
+
# # │ str │
|
1673
|
+
# # ╞═════╡
|
1674
|
+
# # │ a │
|
1675
|
+
# # ├╌╌╌╌╌┤
|
1676
|
+
# # │ 1 │
|
1677
|
+
# # ├╌╌╌╌╌┤
|
1678
|
+
# # │ 2 │
|
1679
|
+
# # ├╌╌╌╌╌┤
|
1680
|
+
# # │ 3 │
|
1681
|
+
# # └─────┘
|
912
1682
|
def shift_and_fill(periods, fill_value)
|
913
1683
|
fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
|
914
1684
|
wrap_expr(_rbexpr.shift_and_fill(periods, fill_value._rbexpr))
|
915
1685
|
end
|
916
1686
|
|
1687
|
+
# Fill null values using the specified value or strategy.
|
1688
|
+
#
|
1689
|
+
# To interpolate over null values see interpolate.
|
1690
|
+
#
|
1691
|
+
# @param value [Object]
|
1692
|
+
# Value used to fill null values.
|
1693
|
+
# @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
|
1694
|
+
# Strategy used to fill null values.
|
1695
|
+
# @param limit [Integer]
|
1696
|
+
# Number of consecutive null values to fill when using the 'forward' or
|
1697
|
+
# 'backward' strategy.
|
1698
|
+
#
|
1699
|
+
# @return [Expr]
|
1700
|
+
#
|
1701
|
+
# @example
|
1702
|
+
# df = Polars::DataFrame.new(
|
1703
|
+
# {
|
1704
|
+
# "a" => [1, 2, nil],
|
1705
|
+
# "b" => [4, nil, 6]
|
1706
|
+
# }
|
1707
|
+
# )
|
1708
|
+
# df.fill_null(strategy: "zero")
|
1709
|
+
# # =>
|
1710
|
+
# # shape: (3, 2)
|
1711
|
+
# # ┌─────┬─────┐
|
1712
|
+
# # │ a ┆ b │
|
1713
|
+
# # │ --- ┆ --- │
|
1714
|
+
# # │ i64 ┆ i64 │
|
1715
|
+
# # ╞═════╪═════╡
|
1716
|
+
# # │ 1 ┆ 4 │
|
1717
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1718
|
+
# # │ 2 ┆ 0 │
|
1719
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1720
|
+
# # │ 0 ┆ 6 │
|
1721
|
+
# # └─────┴─────┘
|
1722
|
+
#
|
1723
|
+
# @example
|
1724
|
+
# df.fill_null(99)
|
1725
|
+
# # =>
|
1726
|
+
# # shape: (3, 2)
|
1727
|
+
# # ┌─────┬─────┐
|
1728
|
+
# # │ a ┆ b │
|
1729
|
+
# # │ --- ┆ --- │
|
1730
|
+
# # │ i64 ┆ i64 │
|
1731
|
+
# # ╞═════╪═════╡
|
1732
|
+
# # │ 1 ┆ 4 │
|
1733
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1734
|
+
# # │ 2 ┆ 99 │
|
1735
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1736
|
+
# # │ 99 ┆ 6 │
|
1737
|
+
# # └─────┴─────┘
|
1738
|
+
#
|
1739
|
+
# @example
|
1740
|
+
# df.fill_null(strategy: "forward")
|
1741
|
+
# # =>
|
1742
|
+
# # shape: (3, 2)
|
1743
|
+
# # ┌─────┬─────┐
|
1744
|
+
# # │ a ┆ b │
|
1745
|
+
# # │ --- ┆ --- │
|
1746
|
+
# # │ i64 ┆ i64 │
|
1747
|
+
# # ╞═════╪═════╡
|
1748
|
+
# # │ 1 ┆ 4 │
|
1749
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1750
|
+
# # │ 2 ┆ 4 │
|
1751
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1752
|
+
# # │ 2 ┆ 6 │
|
1753
|
+
# # └─────┴─────┘
|
917
1754
|
def fill_null(value = nil, strategy: nil, limit: nil)
|
918
1755
|
if !value.nil? && !strategy.nil?
|
919
1756
|
raise ArgumentError, "cannot specify both 'value' and 'strategy'."
|
@@ -931,75 +1768,426 @@ module Polars
|
|
931
1768
|
end
|
932
1769
|
end
|
933
1770
|
|
1771
|
+
# Fill floating point NaN value with a fill value.
|
1772
|
+
#
|
1773
|
+
# @return [Expr]
|
1774
|
+
#
|
1775
|
+
# @example
|
1776
|
+
# df = Polars::DataFrame.new(
|
1777
|
+
# {
|
1778
|
+
# "a" => [1.0, nil, Float::NAN],
|
1779
|
+
# "b" => [4.0, Float::NAN, 6]
|
1780
|
+
# }
|
1781
|
+
# )
|
1782
|
+
# df.fill_nan("zero")
|
1783
|
+
# # =>
|
1784
|
+
# # shape: (3, 2)
|
1785
|
+
# # ┌──────┬──────┐
|
1786
|
+
# # │ a ┆ b │
|
1787
|
+
# # │ --- ┆ --- │
|
1788
|
+
# # │ str ┆ str │
|
1789
|
+
# # ╞══════╪══════╡
|
1790
|
+
# # │ 1.0 ┆ 4.0 │
|
1791
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1792
|
+
# # │ null ┆ zero │
|
1793
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1794
|
+
# # │ zero ┆ 6.0 │
|
1795
|
+
# # └──────┴──────┘
|
934
1796
|
def fill_nan(fill_value)
|
935
1797
|
fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
|
936
1798
|
wrap_expr(_rbexpr.fill_nan(fill_value._rbexpr))
|
937
1799
|
end
|
938
1800
|
|
1801
|
+
# Fill missing values with the latest seen values.
|
1802
|
+
#
|
1803
|
+
# @param limit [Integer]
|
1804
|
+
# The number of consecutive null values to forward fill.
|
1805
|
+
#
|
1806
|
+
# @return [Expr]
|
1807
|
+
#
|
1808
|
+
# @example
|
1809
|
+
# df = Polars::DataFrame.new(
|
1810
|
+
# {
|
1811
|
+
# "a" => [1, 2, nil],
|
1812
|
+
# "b" => [4, nil, 6]
|
1813
|
+
# }
|
1814
|
+
# )
|
1815
|
+
# df.select(Polars.all.forward_fill)
|
1816
|
+
# # =>
|
1817
|
+
# # shape: (3, 2)
|
1818
|
+
# # ┌─────┬─────┐
|
1819
|
+
# # │ a ┆ b │
|
1820
|
+
# # │ --- ┆ --- │
|
1821
|
+
# # │ i64 ┆ i64 │
|
1822
|
+
# # ╞═════╪═════╡
|
1823
|
+
# # │ 1 ┆ 4 │
|
1824
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1825
|
+
# # │ 2 ┆ 4 │
|
1826
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1827
|
+
# # │ 2 ┆ 6 │
|
1828
|
+
# # └─────┴─────┘
|
939
1829
|
def forward_fill(limit: nil)
|
940
1830
|
wrap_expr(_rbexpr.forward_fill(limit))
|
941
1831
|
end
|
942
1832
|
|
1833
|
+
# Fill missing values with the next to be seen values.
|
1834
|
+
#
|
1835
|
+
# @param limit [Integer]
|
1836
|
+
# The number of consecutive null values to backward fill.
|
1837
|
+
#
|
1838
|
+
# @return [Expr]
|
1839
|
+
#
|
1840
|
+
# @example
|
1841
|
+
# df = Polars::DataFrame.new(
|
1842
|
+
# {
|
1843
|
+
# "a" => [1, 2, nil],
|
1844
|
+
# "b" => [4, nil, 6]
|
1845
|
+
# }
|
1846
|
+
# )
|
1847
|
+
# df.select(Polars.all.backward_fill)
|
1848
|
+
# # =>
|
1849
|
+
# # shape: (3, 2)
|
1850
|
+
# # ┌──────┬─────┐
|
1851
|
+
# # │ a ┆ b │
|
1852
|
+
# # │ --- ┆ --- │
|
1853
|
+
# # │ i64 ┆ i64 │
|
1854
|
+
# # ╞══════╪═════╡
|
1855
|
+
# # │ 1 ┆ 4 │
|
1856
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
|
1857
|
+
# # │ 2 ┆ 6 │
|
1858
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
|
1859
|
+
# # │ null ┆ 6 │
|
1860
|
+
# # └──────┴─────┘
|
943
1861
|
def backward_fill(limit: nil)
|
944
1862
|
wrap_expr(_rbexpr.backward_fill(limit))
|
945
1863
|
end
|
946
1864
|
|
1865
|
+
# Reverse the selection.
|
1866
|
+
#
|
1867
|
+
# @return [Expr]
|
947
1868
|
def reverse
|
948
1869
|
wrap_expr(_rbexpr.reverse)
|
949
1870
|
end
|
950
1871
|
|
1872
|
+
# Get standard deviation.
|
1873
|
+
#
|
1874
|
+
# @param ddof [Integer]
|
1875
|
+
# Degrees of freedom.
|
1876
|
+
#
|
1877
|
+
# @return [Expr]
|
1878
|
+
#
|
1879
|
+
# @example
|
1880
|
+
# df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
|
1881
|
+
# df.select(Polars.col("a").std)
|
1882
|
+
# # =>
|
1883
|
+
# # shape: (1, 1)
|
1884
|
+
# # ┌─────┐
|
1885
|
+
# # │ a │
|
1886
|
+
# # │ --- │
|
1887
|
+
# # │ f64 │
|
1888
|
+
# # ╞═════╡
|
1889
|
+
# # │ 1.0 │
|
1890
|
+
# # └─────┘
|
951
1891
|
def std(ddof: 1)
|
952
1892
|
wrap_expr(_rbexpr.std(ddof))
|
953
1893
|
end
|
954
1894
|
|
1895
|
+
# Get variance.
|
1896
|
+
#
|
1897
|
+
# @param ddof [Integer]
|
1898
|
+
# Degrees of freedom.
|
1899
|
+
#
|
1900
|
+
# @return [Expr]
|
1901
|
+
#
|
1902
|
+
# @example
|
1903
|
+
# df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
|
1904
|
+
# df.select(Polars.col("a").var)
|
1905
|
+
# # =>
|
1906
|
+
# # shape: (1, 1)
|
1907
|
+
# # ┌─────┐
|
1908
|
+
# # │ a │
|
1909
|
+
# # │ --- │
|
1910
|
+
# # │ f64 │
|
1911
|
+
# # ╞═════╡
|
1912
|
+
# # │ 1.0 │
|
1913
|
+
# # └─────┘
|
955
1914
|
def var(ddof: 1)
|
956
1915
|
wrap_expr(_rbexpr.var(ddof))
|
957
1916
|
end
|
958
1917
|
|
1918
|
+
# Get maximum value.
|
1919
|
+
#
|
1920
|
+
# @return [Expr]
|
1921
|
+
#
|
1922
|
+
# @example
|
1923
|
+
# df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
|
1924
|
+
# df.select(Polars.col("a").max)
|
1925
|
+
# # =>
|
1926
|
+
# # shape: (1, 1)
|
1927
|
+
# # ┌─────┐
|
1928
|
+
# # │ a │
|
1929
|
+
# # │ --- │
|
1930
|
+
# # │ f64 │
|
1931
|
+
# # ╞═════╡
|
1932
|
+
# # │ 1.0 │
|
1933
|
+
# # └─────┘
|
959
1934
|
def max
|
960
1935
|
wrap_expr(_rbexpr.max)
|
961
1936
|
end
|
962
1937
|
|
1938
|
+
# Get minimum value.
|
1939
|
+
#
|
1940
|
+
# @return [Expr]
|
1941
|
+
#
|
1942
|
+
# @example
|
1943
|
+
# df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
|
1944
|
+
# df.select(Polars.col("a").min)
|
1945
|
+
# # =>
|
1946
|
+
# # shape: (1, 1)
|
1947
|
+
# # ┌──────┐
|
1948
|
+
# # │ a │
|
1949
|
+
# # │ --- │
|
1950
|
+
# # │ f64 │
|
1951
|
+
# # ╞══════╡
|
1952
|
+
# # │ -1.0 │
|
1953
|
+
# # └──────┘
|
963
1954
|
def min
|
964
1955
|
wrap_expr(_rbexpr.min)
|
965
1956
|
end
|
966
1957
|
|
1958
|
+
# Get maximum value, but propagate/poison encountered NaN values.
|
1959
|
+
#
|
1960
|
+
# @return [Expr]
|
1961
|
+
#
|
1962
|
+
# @example
|
1963
|
+
# df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
|
1964
|
+
# df.select(Polars.col("a").nan_max)
|
1965
|
+
# # =>
|
1966
|
+
# # shape: (1, 1)
|
1967
|
+
# # ┌─────┐
|
1968
|
+
# # │ a │
|
1969
|
+
# # │ --- │
|
1970
|
+
# # │ f64 │
|
1971
|
+
# # ╞═════╡
|
1972
|
+
# # │ NaN │
|
1973
|
+
# # └─────┘
|
967
1974
|
def nan_max
|
968
1975
|
wrap_expr(_rbexpr.nan_max)
|
969
1976
|
end
|
970
1977
|
|
1978
|
+
# Get minimum value, but propagate/poison encountered NaN values.
|
1979
|
+
#
|
1980
|
+
# @return [Expr]
|
1981
|
+
#
|
1982
|
+
# @example
|
1983
|
+
# df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
|
1984
|
+
# df.select(Polars.col("a").nan_min)
|
1985
|
+
# # =>
|
1986
|
+
# # shape: (1, 1)
|
1987
|
+
# # ┌─────┐
|
1988
|
+
# # │ a │
|
1989
|
+
# # │ --- │
|
1990
|
+
# # │ f64 │
|
1991
|
+
# # ╞═════╡
|
1992
|
+
# # │ NaN │
|
1993
|
+
# # └─────┘
|
971
1994
|
def nan_min
|
972
1995
|
wrap_expr(_rbexpr.nan_min)
|
973
1996
|
end
|
974
1997
|
|
1998
|
+
# Get sum value.
|
1999
|
+
#
|
2000
|
+
# @return [Expr]
|
2001
|
+
#
|
2002
|
+
# @note
|
2003
|
+
# Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
|
2004
|
+
# `:i64` before summing to prevent overflow issues.
|
2005
|
+
#
|
2006
|
+
# @example
|
2007
|
+
# df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
|
2008
|
+
# df.select(Polars.col("a").sum)
|
2009
|
+
# # =>
|
2010
|
+
# # shape: (1, 1)
|
2011
|
+
# # ┌─────┐
|
2012
|
+
# # │ a │
|
2013
|
+
# # │ --- │
|
2014
|
+
# # │ i64 │
|
2015
|
+
# # ╞═════╡
|
2016
|
+
# # │ 0 │
|
2017
|
+
# # └─────┘
|
975
2018
|
def sum
|
976
2019
|
wrap_expr(_rbexpr.sum)
|
977
2020
|
end
|
978
2021
|
|
979
|
-
|
980
|
-
|
2022
|
+
# Get mean value.
|
2023
|
+
#
|
2024
|
+
# @return [Expr]
|
2025
|
+
#
|
2026
|
+
# @example
|
2027
|
+
# df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
|
2028
|
+
# df.select(Polars.col("a").mean)
|
2029
|
+
# # =>
|
2030
|
+
# # shape: (1, 1)
|
2031
|
+
# # ┌─────┐
|
2032
|
+
# # │ a │
|
2033
|
+
# # │ --- │
|
2034
|
+
# # │ f64 │
|
2035
|
+
# # ╞═════╡
|
2036
|
+
# # │ 0.0 │
|
2037
|
+
# # └─────┘
|
2038
|
+
def mean
|
2039
|
+
wrap_expr(_rbexpr.mean)
|
981
2040
|
end
|
982
2041
|
|
2042
|
+
# Get median value using linear interpolation.
|
2043
|
+
#
|
2044
|
+
# @return [Expr]
|
2045
|
+
#
|
2046
|
+
# @example
|
2047
|
+
# df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
|
2048
|
+
# df.select(Polars.col("a").median)
|
2049
|
+
# # =>
|
2050
|
+
# # shape: (1, 1)
|
2051
|
+
# # ┌─────┐
|
2052
|
+
# # │ a │
|
2053
|
+
# # │ --- │
|
2054
|
+
# # │ f64 │
|
2055
|
+
# # ╞═════╡
|
2056
|
+
# # │ 0.0 │
|
2057
|
+
# # └─────┘
|
983
2058
|
def median
|
984
2059
|
wrap_expr(_rbexpr.median)
|
985
2060
|
end
|
986
2061
|
|
2062
|
+
# Compute the product of an expression.
|
2063
|
+
#
|
2064
|
+
# @return [Expr]
|
2065
|
+
#
|
2066
|
+
# @example
|
2067
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
2068
|
+
# df.select(Polars.col("a").product)
|
2069
|
+
# # =>
|
2070
|
+
# # shape: (1, 1)
|
2071
|
+
# # ┌─────┐
|
2072
|
+
# # │ a │
|
2073
|
+
# # │ --- │
|
2074
|
+
# # │ i64 │
|
2075
|
+
# # ╞═════╡
|
2076
|
+
# # │ 6 │
|
2077
|
+
# # └─────┘
|
987
2078
|
def product
|
988
2079
|
wrap_expr(_rbexpr.product)
|
989
2080
|
end
|
990
2081
|
|
2082
|
+
# Count unique values.
|
2083
|
+
#
|
2084
|
+
# @return [Expr]
|
2085
|
+
#
|
2086
|
+
# @example
|
2087
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
2088
|
+
# df.select(Polars.col("a").n_unique)
|
2089
|
+
# # =>
|
2090
|
+
# # shape: (1, 1)
|
2091
|
+
# # ┌─────┐
|
2092
|
+
# # │ a │
|
2093
|
+
# # │ --- │
|
2094
|
+
# # │ u32 │
|
2095
|
+
# # ╞═════╡
|
2096
|
+
# # │ 2 │
|
2097
|
+
# # └─────┘
|
991
2098
|
def n_unique
|
992
2099
|
wrap_expr(_rbexpr.n_unique)
|
993
2100
|
end
|
994
2101
|
|
2102
|
+
# Count null values.
|
2103
|
+
#
|
2104
|
+
# @return [Expr]
|
2105
|
+
#
|
2106
|
+
# @example
|
2107
|
+
# df = Polars::DataFrame.new(
|
2108
|
+
# {
|
2109
|
+
# "a" => [nil, 1, nil],
|
2110
|
+
# "b" => [1, 2, 3]
|
2111
|
+
# }
|
2112
|
+
# )
|
2113
|
+
# df.select(Polars.all.null_count)
|
2114
|
+
# # =>
|
2115
|
+
# # shape: (1, 2)
|
2116
|
+
# # ┌─────┬─────┐
|
2117
|
+
# # │ a ┆ b │
|
2118
|
+
# # │ --- ┆ --- │
|
2119
|
+
# # │ u32 ┆ u32 │
|
2120
|
+
# # ╞═════╪═════╡
|
2121
|
+
# # │ 2 ┆ 0 │
|
2122
|
+
# # └─────┴─────┘
|
995
2123
|
def null_count
|
996
2124
|
wrap_expr(_rbexpr.null_count)
|
997
2125
|
end
|
998
2126
|
|
2127
|
+
# Get index of first unique value.
|
2128
|
+
#
|
2129
|
+
# @return [Expr]
|
2130
|
+
#
|
2131
|
+
# @example
|
2132
|
+
# df = Polars::DataFrame.new(
|
2133
|
+
# {
|
2134
|
+
# "a" => [8, 9, 10],
|
2135
|
+
# "b" => [nil, 4, 4]
|
2136
|
+
# }
|
2137
|
+
# )
|
2138
|
+
# df.select(Polars.col("a").arg_unique)
|
2139
|
+
# # =>
|
2140
|
+
# # shape: (3, 1)
|
2141
|
+
# # ┌─────┐
|
2142
|
+
# # │ a │
|
2143
|
+
# # │ --- │
|
2144
|
+
# # │ u32 │
|
2145
|
+
# # ╞═════╡
|
2146
|
+
# # │ 0 │
|
2147
|
+
# # ├╌╌╌╌╌┤
|
2148
|
+
# # │ 1 │
|
2149
|
+
# # ├╌╌╌╌╌┤
|
2150
|
+
# # │ 2 │
|
2151
|
+
# # └─────┘
|
2152
|
+
#
|
2153
|
+
# @example
|
2154
|
+
# df.select(Polars.col("b").arg_unique)
|
2155
|
+
# # =>
|
2156
|
+
# # shape: (2, 1)
|
2157
|
+
# # ┌─────┐
|
2158
|
+
# # │ b │
|
2159
|
+
# # │ --- │
|
2160
|
+
# # │ u32 │
|
2161
|
+
# # ╞═════╡
|
2162
|
+
# # │ 0 │
|
2163
|
+
# # ├╌╌╌╌╌┤
|
2164
|
+
# # │ 1 │
|
2165
|
+
# # └─────┘
|
999
2166
|
def arg_unique
|
1000
2167
|
wrap_expr(_rbexpr.arg_unique)
|
1001
2168
|
end
|
1002
2169
|
|
2170
|
+
# Get unique values of this expression.
|
2171
|
+
#
|
2172
|
+
# @param maintain_order [Boolean]
|
2173
|
+
# Maintain order of data. This requires more work.
|
2174
|
+
#
|
2175
|
+
# @return [Expr]
|
2176
|
+
#
|
2177
|
+
# @example
|
2178
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
2179
|
+
# df.select(Polars.col("a").unique(maintain_order: true))
|
2180
|
+
# # =>
|
2181
|
+
# # shape: (2, 1)
|
2182
|
+
# # ┌─────┐
|
2183
|
+
# # │ a │
|
2184
|
+
# # │ --- │
|
2185
|
+
# # │ i64 │
|
2186
|
+
# # ╞═════╡
|
2187
|
+
# # │ 1 │
|
2188
|
+
# # ├╌╌╌╌╌┤
|
2189
|
+
# # │ 2 │
|
2190
|
+
# # └─────┘
|
1003
2191
|
def unique(maintain_order: false)
|
1004
2192
|
if maintain_order
|
1005
2193
|
wrap_expr(_rbexpr.unique_stable)
|
@@ -1008,95 +2196,743 @@ module Polars
|
|
1008
2196
|
end
|
1009
2197
|
end
|
1010
2198
|
|
2199
|
+
# Get the first value.
|
2200
|
+
#
|
2201
|
+
# @return [Expr]
|
2202
|
+
#
|
2203
|
+
# @example
|
2204
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
2205
|
+
# df.select(Polars.col("a").first)
|
2206
|
+
# # =>
|
2207
|
+
# # shape: (1, 1)
|
2208
|
+
# # ┌─────┐
|
2209
|
+
# # │ a │
|
2210
|
+
# # │ --- │
|
2211
|
+
# # │ i64 │
|
2212
|
+
# # ╞═════╡
|
2213
|
+
# # │ 1 │
|
2214
|
+
# # └─────┘
|
1011
2215
|
def first
|
1012
2216
|
wrap_expr(_rbexpr.first)
|
1013
2217
|
end
|
1014
2218
|
|
2219
|
+
# Get the last value.
|
2220
|
+
#
|
2221
|
+
# @return [Expr]
|
2222
|
+
#
|
2223
|
+
# @example
|
2224
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
2225
|
+
# df.select(Polars.col("a").last)
|
2226
|
+
# # =>
|
2227
|
+
# # shape: (1, 1)
|
2228
|
+
# # ┌─────┐
|
2229
|
+
# # │ a │
|
2230
|
+
# # │ --- │
|
2231
|
+
# # │ i64 │
|
2232
|
+
# # ╞═════╡
|
2233
|
+
# # │ 2 │
|
2234
|
+
# # └─────┘
|
1015
2235
|
def last
|
1016
2236
|
wrap_expr(_rbexpr.last)
|
1017
2237
|
end
|
1018
2238
|
|
2239
|
+
# Apply window function over a subgroup.
|
2240
|
+
#
|
2241
|
+
# This is similar to a groupby + aggregation + self join.
|
2242
|
+
# Or similar to [window functions in Postgres](https://www.postgresql.org/docs/current/tutorial-window.html).
|
2243
|
+
#
|
2244
|
+
# @param expr [Object]
|
2245
|
+
# Column(s) to group by.
|
2246
|
+
#
|
2247
|
+
# @return [Expr]
|
2248
|
+
#
|
2249
|
+
# @example
|
2250
|
+
# df = Polars::DataFrame.new(
|
2251
|
+
# {
|
2252
|
+
# "groups" => ["g1", "g1", "g2"],
|
2253
|
+
# "values" => [1, 2, 3]
|
2254
|
+
# }
|
2255
|
+
# )
|
2256
|
+
# df.with_column(
|
2257
|
+
# Polars.col("values").max.over("groups").alias("max_by_group")
|
2258
|
+
# )
|
2259
|
+
# # =>
|
2260
|
+
# # shape: (3, 3)
|
2261
|
+
# # ┌────────┬────────┬──────────────┐
|
2262
|
+
# # │ groups ┆ values ┆ max_by_group │
|
2263
|
+
# # │ --- ┆ --- ┆ --- │
|
2264
|
+
# # │ str ┆ i64 ┆ i64 │
|
2265
|
+
# # ╞════════╪════════╪══════════════╡
|
2266
|
+
# # │ g1 ┆ 1 ┆ 2 │
|
2267
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2268
|
+
# # │ g1 ┆ 2 ┆ 2 │
|
2269
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2270
|
+
# # │ g2 ┆ 3 ┆ 3 │
|
2271
|
+
# # └────────┴────────┴──────────────┘
|
2272
|
+
#
|
2273
|
+
# @example
|
2274
|
+
# df = Polars::DataFrame.new(
|
2275
|
+
# {
|
2276
|
+
# "groups" => [1, 1, 2, 2, 1, 2, 3, 3, 1],
|
2277
|
+
# "values" => [1, 2, 3, 4, 5, 6, 7, 8, 8]
|
2278
|
+
# }
|
2279
|
+
# )
|
2280
|
+
# df.lazy
|
2281
|
+
# .select([Polars.col("groups").sum.over("groups")])
|
2282
|
+
# .collect
|
2283
|
+
# # =>
|
2284
|
+
# # shape: (9, 1)
|
2285
|
+
# # ┌────────┐
|
2286
|
+
# # │ groups │
|
2287
|
+
# # │ --- │
|
2288
|
+
# # │ i64 │
|
2289
|
+
# # ╞════════╡
|
2290
|
+
# # │ 4 │
|
2291
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2292
|
+
# # │ 4 │
|
2293
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2294
|
+
# # │ 6 │
|
2295
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2296
|
+
# # │ 6 │
|
2297
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2298
|
+
# # │ ... │
|
2299
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2300
|
+
# # │ 6 │
|
2301
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2302
|
+
# # │ 6 │
|
2303
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2304
|
+
# # │ 6 │
|
2305
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2306
|
+
# # │ 4 │
|
2307
|
+
# # └────────┘
|
1019
2308
|
def over(expr)
|
1020
2309
|
rbexprs = Utils.selection_to_rbexpr_list(expr)
|
1021
2310
|
wrap_expr(_rbexpr.over(rbexprs))
|
1022
2311
|
end
|
1023
2312
|
|
2313
|
+
# Get mask of unique values.
|
2314
|
+
#
|
2315
|
+
# @return [Expr]
|
2316
|
+
#
|
2317
|
+
# @example
|
2318
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
2319
|
+
# df.select(Polars.col("a").is_unique)
|
2320
|
+
# # =>
|
2321
|
+
# # shape: (3, 1)
|
2322
|
+
# # ┌───────┐
|
2323
|
+
# # │ a │
|
2324
|
+
# # │ --- │
|
2325
|
+
# # │ bool │
|
2326
|
+
# # ╞═══════╡
|
2327
|
+
# # │ false │
|
2328
|
+
# # ├╌╌╌╌╌╌╌┤
|
2329
|
+
# # │ false │
|
2330
|
+
# # ├╌╌╌╌╌╌╌┤
|
2331
|
+
# # │ true │
|
2332
|
+
# # └───────┘
|
1024
2333
|
def is_unique
|
1025
2334
|
wrap_expr(_rbexpr.is_unique)
|
1026
2335
|
end
|
1027
2336
|
|
2337
|
+
# Get a mask of the first unique value.
|
2338
|
+
#
|
2339
|
+
# @return [Expr]
|
2340
|
+
#
|
2341
|
+
# @example
|
2342
|
+
# df = Polars::DataFrame.new(
|
2343
|
+
# {
|
2344
|
+
# "num" => [1, 2, 3, 1, 5]
|
2345
|
+
# }
|
2346
|
+
# )
|
2347
|
+
# df.with_column(Polars.col("num").is_first.alias("is_first"))
|
2348
|
+
# # =>
|
2349
|
+
# # shape: (5, 2)
|
2350
|
+
# # ┌─────┬──────────┐
|
2351
|
+
# # │ num ┆ is_first │
|
2352
|
+
# # │ --- ┆ --- │
|
2353
|
+
# # │ i64 ┆ bool │
|
2354
|
+
# # ╞═════╪══════════╡
|
2355
|
+
# # │ 1 ┆ true │
|
2356
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
2357
|
+
# # │ 2 ┆ true │
|
2358
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
2359
|
+
# # │ 3 ┆ true │
|
2360
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
2361
|
+
# # │ 1 ┆ false │
|
2362
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
2363
|
+
# # │ 5 ┆ true │
|
2364
|
+
# # └─────┴──────────┘
|
1028
2365
|
def is_first
|
1029
2366
|
wrap_expr(_rbexpr.is_first)
|
1030
2367
|
end
|
1031
2368
|
|
2369
|
+
# Get mask of duplicated values.
|
2370
|
+
#
|
2371
|
+
# @return [Expr]
|
2372
|
+
#
|
2373
|
+
# @example
|
2374
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
2375
|
+
# df.select(Polars.col("a").is_duplicated)
|
2376
|
+
# # =>
|
2377
|
+
# # shape: (3, 1)
|
2378
|
+
# # ┌───────┐
|
2379
|
+
# # │ a │
|
2380
|
+
# # │ --- │
|
2381
|
+
# # │ bool │
|
2382
|
+
# # ╞═══════╡
|
2383
|
+
# # │ true │
|
2384
|
+
# # ├╌╌╌╌╌╌╌┤
|
2385
|
+
# # │ true │
|
2386
|
+
# # ├╌╌╌╌╌╌╌┤
|
2387
|
+
# # │ false │
|
2388
|
+
# # └───────┘
|
1032
2389
|
def is_duplicated
|
1033
2390
|
wrap_expr(_rbexpr.is_duplicated)
|
1034
2391
|
end
|
1035
2392
|
|
2393
|
+
# Get quantile value.
|
2394
|
+
#
|
2395
|
+
# @param quantile [Float]
|
2396
|
+
# Quantile between 0.0 and 1.0.
|
2397
|
+
# @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
|
2398
|
+
# Interpolation method.
|
2399
|
+
#
|
2400
|
+
# @return [Expr]
|
2401
|
+
#
|
2402
|
+
# @example
|
2403
|
+
# df = Polars::DataFrame.new({"a" => [0, 1, 2, 3, 4, 5]})
|
2404
|
+
# df.select(Polars.col("a").quantile(0.3))
|
2405
|
+
# # =>
|
2406
|
+
# # shape: (1, 1)
|
2407
|
+
# # ┌─────┐
|
2408
|
+
# # │ a │
|
2409
|
+
# # │ --- │
|
2410
|
+
# # │ f64 │
|
2411
|
+
# # ╞═════╡
|
2412
|
+
# # │ 1.0 │
|
2413
|
+
# # └─────┘
|
2414
|
+
#
|
2415
|
+
# @example
|
2416
|
+
# df.select(Polars.col("a").quantile(0.3, interpolation: "higher"))
|
2417
|
+
# # =>
|
2418
|
+
# # shape: (1, 1)
|
2419
|
+
# # ┌─────┐
|
2420
|
+
# # │ a │
|
2421
|
+
# # │ --- │
|
2422
|
+
# # │ f64 │
|
2423
|
+
# # ╞═════╡
|
2424
|
+
# # │ 2.0 │
|
2425
|
+
# # └─────┘
|
2426
|
+
#
|
2427
|
+
# @example
|
2428
|
+
# df.select(Polars.col("a").quantile(0.3, interpolation: "lower"))
|
2429
|
+
# # =>
|
2430
|
+
# # shape: (1, 1)
|
2431
|
+
# # ┌─────┐
|
2432
|
+
# # │ a │
|
2433
|
+
# # │ --- │
|
2434
|
+
# # │ f64 │
|
2435
|
+
# # ╞═════╡
|
2436
|
+
# # │ 1.0 │
|
2437
|
+
# # └─────┘
|
2438
|
+
#
|
2439
|
+
# @example
|
2440
|
+
# df.select(Polars.col("a").quantile(0.3, interpolation: "midpoint"))
|
2441
|
+
# # =>
|
2442
|
+
# # shape: (1, 1)
|
2443
|
+
# # ┌─────┐
|
2444
|
+
# # │ a │
|
2445
|
+
# # │ --- │
|
2446
|
+
# # │ f64 │
|
2447
|
+
# # ╞═════╡
|
2448
|
+
# # │ 1.5 │
|
2449
|
+
# # └─────┘
|
2450
|
+
#
|
2451
|
+
# @example
|
2452
|
+
# df.select(Polars.col("a").quantile(0.3, interpolation: "linear"))
|
2453
|
+
# # =>
|
2454
|
+
# # shape: (1, 1)
|
2455
|
+
# # ┌─────┐
|
2456
|
+
# # │ a │
|
2457
|
+
# # │ --- │
|
2458
|
+
# # │ f64 │
|
2459
|
+
# # ╞═════╡
|
2460
|
+
# # │ 1.5 │
|
2461
|
+
# # └─────┘
|
1036
2462
|
def quantile(quantile, interpolation: "nearest")
|
1037
2463
|
wrap_expr(_rbexpr.quantile(quantile, interpolation))
|
1038
2464
|
end
|
1039
2465
|
|
1040
|
-
|
2466
|
+
# Filter a single column.
|
2467
|
+
#
|
2468
|
+
# Mostly useful in an aggregation context. If you want to filter on a DataFrame
|
2469
|
+
# level, use `LazyFrame#filter`.
|
2470
|
+
#
|
2471
|
+
# @param predicate [Expr]
|
2472
|
+
# Boolean expression.
|
2473
|
+
#
|
2474
|
+
# @return [Expr]
|
2475
|
+
#
|
2476
|
+
# @example
|
2477
|
+
# df = Polars::DataFrame.new(
|
2478
|
+
# {
|
2479
|
+
# "group_col" => ["g1", "g1", "g2"],
|
2480
|
+
# "b" => [1, 2, 3]
|
2481
|
+
# }
|
2482
|
+
# )
|
2483
|
+
# (
|
2484
|
+
# df.groupby("group_col").agg(
|
2485
|
+
# [
|
2486
|
+
# Polars.col("b").filter(Polars.col("b") < 2).sum.alias("lt"),
|
2487
|
+
# Polars.col("b").filter(Polars.col("b") >= 2).sum.alias("gte")
|
2488
|
+
# ]
|
2489
|
+
# )
|
2490
|
+
# ).sort("group_col")
|
2491
|
+
# # =>
|
2492
|
+
# # shape: (2, 3)
|
2493
|
+
# # ┌───────────┬──────┬─────┐
|
2494
|
+
# # │ group_col ┆ lt ┆ gte │
|
2495
|
+
# # │ --- ┆ --- ┆ --- │
|
2496
|
+
# # │ str ┆ i64 ┆ i64 │
|
2497
|
+
# # ╞═══════════╪══════╪═════╡
|
2498
|
+
# # │ g1 ┆ 1 ┆ 2 │
|
2499
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
|
2500
|
+
# # │ g2 ┆ null ┆ 3 │
|
2501
|
+
# # └───────────┴──────┴─────┘
|
2502
|
+
def filter(predicate)
|
1041
2503
|
wrap_expr(_rbexpr.filter(predicate._rbexpr))
|
1042
2504
|
end
|
1043
2505
|
|
2506
|
+
# Filter a single column.
|
2507
|
+
#
|
2508
|
+
# Alias for {#filter}.
|
2509
|
+
#
|
2510
|
+
# @param predicate [Expr]
|
2511
|
+
# Boolean expression.
|
2512
|
+
#
|
2513
|
+
# @return [Expr]
|
2514
|
+
#
|
2515
|
+
# @example
|
2516
|
+
# df = Polars::DataFrame.new(
|
2517
|
+
# {
|
2518
|
+
# "group_col" => ["g1", "g1", "g2"],
|
2519
|
+
# "b" => [1, 2, 3]
|
2520
|
+
# }
|
2521
|
+
# )
|
2522
|
+
# (
|
2523
|
+
# df.groupby("group_col").agg(
|
2524
|
+
# [
|
2525
|
+
# Polars.col("b").where(Polars.col("b") < 2).sum.alias("lt"),
|
2526
|
+
# Polars.col("b").where(Polars.col("b") >= 2).sum.alias("gte")
|
2527
|
+
# ]
|
2528
|
+
# )
|
2529
|
+
# ).sort("group_col")
|
2530
|
+
# # =>
|
2531
|
+
# # shape: (2, 3)
|
2532
|
+
# # ┌───────────┬──────┬─────┐
|
2533
|
+
# # │ group_col ┆ lt ┆ gte │
|
2534
|
+
# # │ --- ┆ --- ┆ --- │
|
2535
|
+
# # │ str ┆ i64 ┆ i64 │
|
2536
|
+
# # ╞═══════════╪══════╪═════╡
|
2537
|
+
# # │ g1 ┆ 1 ┆ 2 │
|
2538
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
|
2539
|
+
# # │ g2 ┆ null ┆ 3 │
|
2540
|
+
# # └───────────┴──────┴─────┘
|
1044
2541
|
def where(predicate)
|
1045
2542
|
filter(predicate)
|
1046
2543
|
end
|
1047
2544
|
|
1048
|
-
#
|
2545
|
+
# Apply a custom Ruby function to a Series or sequence of Series.
|
2546
|
+
#
|
2547
|
+
# The output of this custom function must be a Series.
|
2548
|
+
# If you want to apply a custom function elementwise over single values, see
|
2549
|
+
# {#apply}. A use case for `map` is when you want to transform an
|
2550
|
+
# expression with a third-party library.
|
2551
|
+
#
|
2552
|
+
# Read more in [the book](https://pola-rs.github.io/polars-book/user-guide/dsl/custom_functions.html).
|
2553
|
+
#
|
2554
|
+
# @param return_dtype [Symbol]
|
2555
|
+
# Dtype of the output Series.
|
2556
|
+
# @param agg_list [Boolean]
|
2557
|
+
# Aggregate list.
|
2558
|
+
#
|
2559
|
+
# @return [Expr]
|
2560
|
+
#
|
2561
|
+
# @example
|
2562
|
+
# df = Polars::DataFrame.new(
|
2563
|
+
# {
|
2564
|
+
# "sine" => [0.0, 1.0, 0.0, -1.0],
|
2565
|
+
# "cosine" => [1.0, 0.0, -1.0, 0.0]
|
2566
|
+
# }
|
2567
|
+
# )
|
2568
|
+
# df.select(Polars.all.map { |x| x.to_numpy.argmax })
|
2569
|
+
# # =>
|
2570
|
+
# # shape: (1, 2)
|
2571
|
+
# # ┌──────┬────────┐
|
2572
|
+
# # │ sine ┆ cosine │
|
2573
|
+
# # │ --- ┆ --- │
|
2574
|
+
# # │ i64 ┆ i64 │
|
2575
|
+
# # ╞══════╪════════╡
|
2576
|
+
# # │ 1 ┆ 0 │
|
2577
|
+
# # └──────┴────────┘
|
2578
|
+
# def map(return_dtype: nil, agg_list: false, &block)
|
2579
|
+
# if !return_dtype.nil?
|
2580
|
+
# return_dtype = Utils.rb_type_to_dtype(return_dtype)
|
2581
|
+
# end
|
2582
|
+
# wrap_expr(_rbexpr.map(return_dtype, agg_list, &block))
|
1049
2583
|
# end
|
1050
2584
|
|
1051
2585
|
# def apply
|
1052
2586
|
# end
|
1053
2587
|
|
2588
|
+
# Explode a list or utf8 Series. This means that every item is expanded to a new
|
2589
|
+
# row.
|
2590
|
+
#
|
2591
|
+
# Alias for {#explode}.
|
2592
|
+
#
|
2593
|
+
# @return [Expr]
|
1054
2594
|
#
|
2595
|
+
# @example
|
2596
|
+
# df = Polars::DataFrame.new({"foo" => ["hello", "world"]})
|
2597
|
+
# df.select(Polars.col("foo").flatten)
|
2598
|
+
# # =>
|
2599
|
+
# # shape: (10, 1)
|
2600
|
+
# # ┌─────┐
|
2601
|
+
# # │ foo │
|
2602
|
+
# # │ --- │
|
2603
|
+
# # │ str │
|
2604
|
+
# # ╞═════╡
|
2605
|
+
# # │ h │
|
2606
|
+
# # ├╌╌╌╌╌┤
|
2607
|
+
# # │ e │
|
2608
|
+
# # ├╌╌╌╌╌┤
|
2609
|
+
# # │ l │
|
2610
|
+
# # ├╌╌╌╌╌┤
|
2611
|
+
# # │ l │
|
2612
|
+
# # ├╌╌╌╌╌┤
|
2613
|
+
# # │ ... │
|
2614
|
+
# # ├╌╌╌╌╌┤
|
2615
|
+
# # │ o │
|
2616
|
+
# # ├╌╌╌╌╌┤
|
2617
|
+
# # │ r │
|
2618
|
+
# # ├╌╌╌╌╌┤
|
2619
|
+
# # │ l │
|
2620
|
+
# # ├╌╌╌╌╌┤
|
2621
|
+
# # │ d │
|
2622
|
+
# # └─────┘
|
1055
2623
|
def flatten
|
1056
2624
|
wrap_expr(_rbexpr.explode)
|
1057
2625
|
end
|
1058
2626
|
|
2627
|
+
# Explode a list or utf8 Series.
|
2628
|
+
#
|
2629
|
+
# This means that every item is expanded to a new row.
|
2630
|
+
#
|
2631
|
+
# @return [Expr]
|
2632
|
+
#
|
2633
|
+
# @example
|
2634
|
+
# df = Polars::DataFrame.new({"b" => [[1, 2, 3], [4, 5, 6]]})
|
2635
|
+
# df.select(Polars.col("b").explode)
|
2636
|
+
# # =>
|
2637
|
+
# # shape: (6, 1)
|
2638
|
+
# # ┌─────┐
|
2639
|
+
# # │ b │
|
2640
|
+
# # │ --- │
|
2641
|
+
# # │ i64 │
|
2642
|
+
# # ╞═════╡
|
2643
|
+
# # │ 1 │
|
2644
|
+
# # ├╌╌╌╌╌┤
|
2645
|
+
# # │ 2 │
|
2646
|
+
# # ├╌╌╌╌╌┤
|
2647
|
+
# # │ 3 │
|
2648
|
+
# # ├╌╌╌╌╌┤
|
2649
|
+
# # │ 4 │
|
2650
|
+
# # ├╌╌╌╌╌┤
|
2651
|
+
# # │ 5 │
|
2652
|
+
# # ├╌╌╌╌╌┤
|
2653
|
+
# # │ 6 │
|
2654
|
+
# # └─────┘
|
1059
2655
|
def explode
|
1060
2656
|
wrap_expr(_rbexpr.explode)
|
1061
2657
|
end
|
1062
2658
|
|
2659
|
+
# Take every nth value in the Series and return as a new Series.
|
2660
|
+
#
|
2661
|
+
# @return [Expr]
|
2662
|
+
#
|
2663
|
+
# @example
|
2664
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
|
2665
|
+
# df.select(Polars.col("foo").take_every(3))
|
2666
|
+
# # =>
|
2667
|
+
# # shape: (3, 1)
|
2668
|
+
# # ┌─────┐
|
2669
|
+
# # │ foo │
|
2670
|
+
# # │ --- │
|
2671
|
+
# # │ i64 │
|
2672
|
+
# # ╞═════╡
|
2673
|
+
# # │ 1 │
|
2674
|
+
# # ├╌╌╌╌╌┤
|
2675
|
+
# # │ 4 │
|
2676
|
+
# # ├╌╌╌╌╌┤
|
2677
|
+
# # │ 7 │
|
2678
|
+
# # └─────┘
|
1063
2679
|
def take_every(n)
|
1064
2680
|
wrap_expr(_rbexpr.take_every(n))
|
1065
2681
|
end
|
1066
2682
|
|
2683
|
+
# Get the first `n` rows.
|
2684
|
+
#
|
2685
|
+
# @param n [Integer]
|
2686
|
+
# Number of rows to return.
|
2687
|
+
#
|
2688
|
+
# @return [Expr]
|
2689
|
+
#
|
2690
|
+
# @example
|
2691
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
|
2692
|
+
# df.head(3)
|
2693
|
+
# # =>
|
2694
|
+
# # shape: (3, 1)
|
2695
|
+
# # ┌─────┐
|
2696
|
+
# # │ foo │
|
2697
|
+
# # │ --- │
|
2698
|
+
# # │ i64 │
|
2699
|
+
# # ╞═════╡
|
2700
|
+
# # │ 1 │
|
2701
|
+
# # ├╌╌╌╌╌┤
|
2702
|
+
# # │ 2 │
|
2703
|
+
# # ├╌╌╌╌╌┤
|
2704
|
+
# # │ 3 │
|
2705
|
+
# # └─────┘
|
1067
2706
|
def head(n = 10)
|
1068
2707
|
wrap_expr(_rbexpr.head(n))
|
1069
2708
|
end
|
1070
2709
|
|
2710
|
+
# Get the last `n` rows.
|
2711
|
+
#
|
2712
|
+
# @param n [Integer]
|
2713
|
+
# Number of rows to return.
|
2714
|
+
#
|
2715
|
+
# @return [Expr]
|
2716
|
+
#
|
2717
|
+
# @example
|
2718
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
|
2719
|
+
# df.tail(3)
|
2720
|
+
# # =>
|
2721
|
+
# # shape: (3, 1)
|
2722
|
+
# # ┌─────┐
|
2723
|
+
# # │ foo │
|
2724
|
+
# # │ --- │
|
2725
|
+
# # │ i64 │
|
2726
|
+
# # ╞═════╡
|
2727
|
+
# # │ 5 │
|
2728
|
+
# # ├╌╌╌╌╌┤
|
2729
|
+
# # │ 6 │
|
2730
|
+
# # ├╌╌╌╌╌┤
|
2731
|
+
# # │ 7 │
|
2732
|
+
# # └─────┘
|
1071
2733
|
def tail(n = 10)
|
1072
2734
|
wrap_expr(_rbexpr.tail(n))
|
1073
2735
|
end
|
1074
2736
|
|
2737
|
+
# Get the first `n` rows.
|
2738
|
+
#
|
2739
|
+
# Alias for {#head}.
|
2740
|
+
#
|
2741
|
+
# @param n [Integer]
|
2742
|
+
# Number of rows to return.
|
2743
|
+
#
|
2744
|
+
# @return [Expr]
|
1075
2745
|
def limit(n = 10)
|
1076
2746
|
head(n)
|
1077
2747
|
end
|
1078
2748
|
|
2749
|
+
# Raise expression to the power of exponent.
|
2750
|
+
#
|
2751
|
+
# @return [Expr]
|
2752
|
+
#
|
2753
|
+
# @example
|
2754
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
|
2755
|
+
# df.select(Polars.col("foo").pow(3))
|
2756
|
+
# # =>
|
2757
|
+
# # shape: (4, 1)
|
2758
|
+
# # ┌──────┐
|
2759
|
+
# # │ foo │
|
2760
|
+
# # │ --- │
|
2761
|
+
# # │ f64 │
|
2762
|
+
# # ╞══════╡
|
2763
|
+
# # │ 1.0 │
|
2764
|
+
# # ├╌╌╌╌╌╌┤
|
2765
|
+
# # │ 8.0 │
|
2766
|
+
# # ├╌╌╌╌╌╌┤
|
2767
|
+
# # │ 27.0 │
|
2768
|
+
# # ├╌╌╌╌╌╌┤
|
2769
|
+
# # │ 64.0 │
|
2770
|
+
# # └──────┘
|
1079
2771
|
def pow(exponent)
|
1080
2772
|
exponent = Utils.expr_to_lit_or_expr(exponent)
|
1081
2773
|
wrap_expr(_rbexpr.pow(exponent._rbexpr))
|
1082
2774
|
end
|
1083
2775
|
|
1084
|
-
#
|
1085
|
-
#
|
2776
|
+
# Check if elements of this expression are present in the other Series.
|
2777
|
+
#
|
2778
|
+
# @param other [Object]
|
2779
|
+
# Series or sequence of primitive type.
|
2780
|
+
#
|
2781
|
+
# @return [Expr]
|
2782
|
+
#
|
2783
|
+
# @example
|
2784
|
+
# df = Polars::DataFrame.new(
|
2785
|
+
# {"sets" => [[1, 2, 3], [1, 2], [9, 10]], "optional_members" => [1, 2, 3]}
|
2786
|
+
# )
|
2787
|
+
# df.select([Polars.col("optional_members").is_in("sets").alias("contains")])
|
2788
|
+
# # =>
|
2789
|
+
# # shape: (3, 1)
|
2790
|
+
# # ┌──────────┐
|
2791
|
+
# # │ contains │
|
2792
|
+
# # │ --- │
|
2793
|
+
# # │ bool │
|
2794
|
+
# # ╞══════════╡
|
2795
|
+
# # │ true │
|
2796
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
2797
|
+
# # │ true │
|
2798
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
2799
|
+
# # │ false │
|
2800
|
+
# # └──────────┘
|
2801
|
+
def is_in(other)
|
2802
|
+
if other.is_a?(Array)
|
2803
|
+
if other.length == 0
|
2804
|
+
other = Polars.lit(nil)
|
2805
|
+
else
|
2806
|
+
other = Polars.lit(Series.new(other))
|
2807
|
+
end
|
2808
|
+
else
|
2809
|
+
other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
|
2810
|
+
end
|
2811
|
+
wrap_expr(_rbexpr.is_in(other._rbexpr))
|
2812
|
+
end
|
1086
2813
|
|
2814
|
+
# Repeat the elements in this Series as specified in the given expression.
|
2815
|
+
#
|
2816
|
+
# The repeated elements are expanded into a `List`.
|
2817
|
+
#
|
2818
|
+
# @param by [Object]
|
2819
|
+
# Numeric column that determines how often the values will be repeated.
|
2820
|
+
# The column will be coerced to UInt32. Give this dtype to make the coercion a
|
2821
|
+
# no-op.
|
1087
2822
|
#
|
2823
|
+
# @return [Expr]
|
2824
|
+
#
|
2825
|
+
# @example
|
2826
|
+
# df = Polars::DataFrame.new(
|
2827
|
+
# {
|
2828
|
+
# "a" => ["x", "y", "z"],
|
2829
|
+
# "n" => [1, 2, 3]
|
2830
|
+
# }
|
2831
|
+
# )
|
2832
|
+
# df.select(Polars.col("a").repeat_by("n"))
|
2833
|
+
# # =>
|
2834
|
+
# # shape: (3, 1)
|
2835
|
+
# # ┌─────────────────┐
|
2836
|
+
# # │ a │
|
2837
|
+
# # │ --- │
|
2838
|
+
# # │ list[str] │
|
2839
|
+
# # ╞═════════════════╡
|
2840
|
+
# # │ ["x"] │
|
2841
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2842
|
+
# # │ ["y", "y"] │
|
2843
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2844
|
+
# # │ ["z", "z", "z"] │
|
2845
|
+
# # └─────────────────┘
|
1088
2846
|
def repeat_by(by)
|
1089
|
-
by = Utils.expr_to_lit_or_expr(by, false)
|
2847
|
+
by = Utils.expr_to_lit_or_expr(by, str_to_lit: false)
|
1090
2848
|
wrap_expr(_rbexpr.repeat_by(by._rbexpr))
|
1091
2849
|
end
|
1092
2850
|
|
1093
|
-
#
|
1094
|
-
#
|
2851
|
+
# Check if this expression is between start and end.
|
2852
|
+
#
|
2853
|
+
# @param start [Object]
|
2854
|
+
# Lower bound as primitive type or datetime.
|
2855
|
+
# @param _end [Object]
|
2856
|
+
# Upper bound as primitive type or datetime.
|
2857
|
+
# @param include_bounds [Boolean]
|
2858
|
+
# False: Exclude both start and end (default).
|
2859
|
+
# True: Include both start and end.
|
2860
|
+
# (False, False): Exclude start and exclude end.
|
2861
|
+
# (True, True): Include start and include end.
|
2862
|
+
# (False, True): Exclude start and include end.
|
2863
|
+
# (True, False): Include start and exclude end.
|
2864
|
+
#
|
2865
|
+
# @return [Expr]
|
2866
|
+
#
|
2867
|
+
# @example
|
2868
|
+
# df = Polars::DataFrame.new({"num" => [1, 2, 3, 4, 5]})
|
2869
|
+
# df.with_column(Polars.col("num").is_between(2, 4))
|
2870
|
+
# # =>
|
2871
|
+
# # shape: (5, 2)
|
2872
|
+
# # ┌─────┬────────────┐
|
2873
|
+
# # │ num ┆ is_between │
|
2874
|
+
# # │ --- ┆ --- │
|
2875
|
+
# # │ i64 ┆ bool │
|
2876
|
+
# # ╞═════╪════════════╡
|
2877
|
+
# # │ 1 ┆ false │
|
2878
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2879
|
+
# # │ 2 ┆ false │
|
2880
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2881
|
+
# # │ 3 ┆ true │
|
2882
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2883
|
+
# # │ 4 ┆ false │
|
2884
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2885
|
+
# # │ 5 ┆ false │
|
2886
|
+
# # └─────┴────────────┘
|
2887
|
+
def is_between(start, _end, include_bounds: false)
|
2888
|
+
if include_bounds == false || include_bounds == [false, false]
|
2889
|
+
((self > start) & (self < _end)).alias("is_between")
|
2890
|
+
elsif include_bounds == true || include_bounds == [true, true]
|
2891
|
+
((self >= start) & (self <= _end)).alias("is_between")
|
2892
|
+
elsif include_bounds == [false, true]
|
2893
|
+
((self > start) & (self <= _end)).alias("is_between")
|
2894
|
+
elsif include_bounds == [true, false]
|
2895
|
+
((self >= start) & (self < _end)).alias("is_between")
|
2896
|
+
else
|
2897
|
+
raise ArgumentError, "include_bounds should be a bool or [bool, bool]."
|
2898
|
+
end
|
2899
|
+
end
|
1095
2900
|
|
1096
2901
|
# def _hash
|
1097
2902
|
# end
|
1098
2903
|
|
2904
|
+
# Reinterpret the underlying bits as a signed/unsigned integer.
|
2905
|
+
#
|
2906
|
+
# This operation is only allowed for 64bit integers. For lower bits integers,
|
2907
|
+
# you can safely use that cast operation.
|
2908
|
+
#
|
2909
|
+
# @param signed [Boolean]
|
2910
|
+
# If true, reinterpret as `:i64`. Otherwise, reinterpret as `:u64`.
|
2911
|
+
#
|
2912
|
+
# @return [Expr]
|
1099
2913
|
#
|
2914
|
+
# @example
|
2915
|
+
# s = Polars::Series.new("a", [1, 1, 2], dtype: :u64)
|
2916
|
+
# df = Polars::DataFrame.new([s])
|
2917
|
+
# df.select(
|
2918
|
+
# [
|
2919
|
+
# Polars.col("a").reinterpret(signed: true).alias("reinterpreted"),
|
2920
|
+
# Polars.col("a").alias("original")
|
2921
|
+
# ]
|
2922
|
+
# )
|
2923
|
+
# # =>
|
2924
|
+
# # shape: (3, 2)
|
2925
|
+
# # ┌───────────────┬──────────┐
|
2926
|
+
# # │ reinterpreted ┆ original │
|
2927
|
+
# # │ --- ┆ --- │
|
2928
|
+
# # │ i64 ┆ u64 │
|
2929
|
+
# # ╞═══════════════╪══════════╡
|
2930
|
+
# # │ 1 ┆ 1 │
|
2931
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
2932
|
+
# # │ 1 ┆ 1 │
|
2933
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
2934
|
+
# # │ 2 ┆ 2 │
|
2935
|
+
# # └───────────────┴──────────┘
|
1100
2936
|
def reinterpret(signed: false)
|
1101
2937
|
wrap_expr(_rbexpr.reinterpret(signed))
|
1102
2938
|
end
|
@@ -1104,147 +2940,1541 @@ module Polars
|
|
1104
2940
|
# def _inspect
|
1105
2941
|
# end
|
1106
2942
|
|
2943
|
+
# Fill nulls with linear interpolation over missing values.
|
2944
|
+
#
|
2945
|
+
# Can also be used to regrid data to a new grid - see examples below.
|
2946
|
+
#
|
2947
|
+
# @return [Expr]
|
1107
2948
|
#
|
2949
|
+
# @example Fill nulls with linear interpolation
|
2950
|
+
# df = Polars::DataFrame.new(
|
2951
|
+
# {
|
2952
|
+
# "a" => [1, nil, 3],
|
2953
|
+
# "b" => [1.0, Float::NAN, 3.0]
|
2954
|
+
# }
|
2955
|
+
# )
|
2956
|
+
# df.select(Polars.all.interpolate)
|
2957
|
+
# # =>
|
2958
|
+
# # shape: (3, 2)
|
2959
|
+
# # ┌─────┬─────┐
|
2960
|
+
# # │ a ┆ b │
|
2961
|
+
# # │ --- ┆ --- │
|
2962
|
+
# # │ i64 ┆ f64 │
|
2963
|
+
# # ╞═════╪═════╡
|
2964
|
+
# # │ 1 ┆ 1.0 │
|
2965
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
2966
|
+
# # │ 2 ┆ NaN │
|
2967
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
2968
|
+
# # │ 3 ┆ 3.0 │
|
2969
|
+
# # └─────┴─────┘
|
1108
2970
|
def interpolate
|
1109
2971
|
wrap_expr(_rbexpr.interpolate)
|
1110
2972
|
end
|
1111
2973
|
|
1112
|
-
#
|
1113
|
-
#
|
1114
|
-
|
1115
|
-
#
|
1116
|
-
#
|
1117
|
-
|
1118
|
-
#
|
1119
|
-
#
|
1120
|
-
|
1121
|
-
#
|
1122
|
-
#
|
1123
|
-
|
1124
|
-
#
|
1125
|
-
#
|
1126
|
-
|
1127
|
-
#
|
1128
|
-
#
|
1129
|
-
|
1130
|
-
#
|
1131
|
-
#
|
1132
|
-
|
1133
|
-
#
|
1134
|
-
#
|
2974
|
+
# Apply a rolling min (moving min) over the values in this array.
|
2975
|
+
#
|
2976
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
2977
|
+
# this window will (optionally) be multiplied with the weights given by the
|
2978
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
2979
|
+
#
|
2980
|
+
# @param window_size [Integer]
|
2981
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
2982
|
+
# size indicated by a timedelta or the following string language:
|
2983
|
+
#
|
2984
|
+
# - 1ns (1 nanosecond)
|
2985
|
+
# - 1us (1 microsecond)
|
2986
|
+
# - 1ms (1 millisecond)
|
2987
|
+
# - 1s (1 second)
|
2988
|
+
# - 1m (1 minute)
|
2989
|
+
# - 1h (1 hour)
|
2990
|
+
# - 1d (1 day)
|
2991
|
+
# - 1w (1 week)
|
2992
|
+
# - 1mo (1 calendar month)
|
2993
|
+
# - 1y (1 calendar year)
|
2994
|
+
# - 1i (1 index count)
|
2995
|
+
#
|
2996
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
2997
|
+
# and `closed` arguments must also be set.
|
2998
|
+
# @param weights [Array]
|
2999
|
+
# An optional slice with the same length as the window that will be multiplied
|
3000
|
+
# elementwise with the values in the window.
|
3001
|
+
# @param min_periods [Integer]
|
3002
|
+
# The number of values in the window that should be non-null before computing
|
3003
|
+
# a result. If None, it will be set equal to window size.
|
3004
|
+
# @param center [Boolean]
|
3005
|
+
# Set the labels at the center of the window
|
3006
|
+
# @param by [String]
|
3007
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3008
|
+
# set the column that will be used to determine the windows. This column must
|
3009
|
+
# be of dtype `{Date, Datetime}`
|
3010
|
+
# @param closed ["left", "right", "both", "none"]
|
3011
|
+
# Define whether the temporal window interval is closed or not.
|
3012
|
+
#
|
3013
|
+
# @note
|
3014
|
+
# This functionality is experimental and may change without it being considered a
|
3015
|
+
# breaking change.
|
3016
|
+
#
|
3017
|
+
# @note
|
3018
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3019
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3020
|
+
# computation.
|
3021
|
+
#
|
3022
|
+
# @return [Expr]
|
3023
|
+
#
|
3024
|
+
# @example
|
3025
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
|
3026
|
+
# df.select(
|
3027
|
+
# [
|
3028
|
+
# Polars.col("A").rolling_min(2)
|
3029
|
+
# ]
|
3030
|
+
# )
|
3031
|
+
# # =>
|
3032
|
+
# # shape: (6, 1)
|
3033
|
+
# # ┌──────┐
|
3034
|
+
# # │ A │
|
3035
|
+
# # │ --- │
|
3036
|
+
# # │ f64 │
|
3037
|
+
# # ╞══════╡
|
3038
|
+
# # │ null │
|
3039
|
+
# # ├╌╌╌╌╌╌┤
|
3040
|
+
# # │ 1.0 │
|
3041
|
+
# # ├╌╌╌╌╌╌┤
|
3042
|
+
# # │ 2.0 │
|
3043
|
+
# # ├╌╌╌╌╌╌┤
|
3044
|
+
# # │ 3.0 │
|
3045
|
+
# # ├╌╌╌╌╌╌┤
|
3046
|
+
# # │ 4.0 │
|
3047
|
+
# # ├╌╌╌╌╌╌┤
|
3048
|
+
# # │ 5.0 │
|
3049
|
+
# # └──────┘
|
3050
|
+
def rolling_min(
|
3051
|
+
window_size,
|
3052
|
+
weights: nil,
|
3053
|
+
min_periods: nil,
|
3054
|
+
center: false,
|
3055
|
+
by: nil,
|
3056
|
+
closed: "left"
|
3057
|
+
)
|
3058
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3059
|
+
window_size, min_periods
|
3060
|
+
)
|
3061
|
+
wrap_expr(
|
3062
|
+
_rbexpr.rolling_min(
|
3063
|
+
window_size, weights, min_periods, center, by, closed
|
3064
|
+
)
|
3065
|
+
)
|
3066
|
+
end
|
3067
|
+
|
3068
|
+
# Apply a rolling max (moving max) over the values in this array.
|
3069
|
+
#
|
3070
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
3071
|
+
# this window will (optionally) be multiplied with the weights given by the
|
3072
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
3073
|
+
#
|
3074
|
+
# @param window_size [Integer]
|
3075
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
3076
|
+
# size indicated by a timedelta or the following string language:
|
3077
|
+
#
|
3078
|
+
# - 1ns (1 nanosecond)
|
3079
|
+
# - 1us (1 microsecond)
|
3080
|
+
# - 1ms (1 millisecond)
|
3081
|
+
# - 1s (1 second)
|
3082
|
+
# - 1m (1 minute)
|
3083
|
+
# - 1h (1 hour)
|
3084
|
+
# - 1d (1 day)
|
3085
|
+
# - 1w (1 week)
|
3086
|
+
# - 1mo (1 calendar month)
|
3087
|
+
# - 1y (1 calendar year)
|
3088
|
+
# - 1i (1 index count)
|
3089
|
+
#
|
3090
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
3091
|
+
# and `closed` arguments must also be set.
|
3092
|
+
# @param weights [Array]
|
3093
|
+
# An optional slice with the same length as the window that will be multiplied
|
3094
|
+
# elementwise with the values in the window.
|
3095
|
+
# @param min_periods [Integer]
|
3096
|
+
# The number of values in the window that should be non-null before computing
|
3097
|
+
# a result. If None, it will be set equal to window size.
|
3098
|
+
# @param center [Boolean]
|
3099
|
+
# Set the labels at the center of the window
|
3100
|
+
# @param by [String]
|
3101
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3102
|
+
# set the column that will be used to determine the windows. This column must
|
3103
|
+
# be of dtype `{Date, Datetime}`
|
3104
|
+
# @param closed ["left", "right", "both", "none"]
|
3105
|
+
# Define whether the temporal window interval is closed or not.
|
3106
|
+
#
|
3107
|
+
# @note
|
3108
|
+
# This functionality is experimental and may change without it being considered a
|
3109
|
+
# breaking change.
|
3110
|
+
#
|
3111
|
+
# @note
|
3112
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3113
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3114
|
+
# computation.
|
3115
|
+
#
|
3116
|
+
# @return [Expr]
|
3117
|
+
#
|
3118
|
+
# @example
|
3119
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
|
3120
|
+
# df.select(
|
3121
|
+
# [
|
3122
|
+
# Polars.col("A").rolling_max(2)
|
3123
|
+
# ]
|
3124
|
+
# )
|
3125
|
+
# # =>
|
3126
|
+
# # shape: (6, 1)
|
3127
|
+
# # ┌──────┐
|
3128
|
+
# # │ A │
|
3129
|
+
# # │ --- │
|
3130
|
+
# # │ f64 │
|
3131
|
+
# # ╞══════╡
|
3132
|
+
# # │ null │
|
3133
|
+
# # ├╌╌╌╌╌╌┤
|
3134
|
+
# # │ 2.0 │
|
3135
|
+
# # ├╌╌╌╌╌╌┤
|
3136
|
+
# # │ 3.0 │
|
3137
|
+
# # ├╌╌╌╌╌╌┤
|
3138
|
+
# # │ 4.0 │
|
3139
|
+
# # ├╌╌╌╌╌╌┤
|
3140
|
+
# # │ 5.0 │
|
3141
|
+
# # ├╌╌╌╌╌╌┤
|
3142
|
+
# # │ 6.0 │
|
3143
|
+
# # └──────┘
|
3144
|
+
def rolling_max(
|
3145
|
+
window_size,
|
3146
|
+
weights: nil,
|
3147
|
+
min_periods: nil,
|
3148
|
+
center: false,
|
3149
|
+
by: nil,
|
3150
|
+
closed: "left"
|
3151
|
+
)
|
3152
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3153
|
+
window_size, min_periods
|
3154
|
+
)
|
3155
|
+
wrap_expr(
|
3156
|
+
_rbexpr.rolling_max(
|
3157
|
+
window_size, weights, min_periods, center, by, closed
|
3158
|
+
)
|
3159
|
+
)
|
3160
|
+
end
|
3161
|
+
|
3162
|
+
# Apply a rolling mean (moving mean) over the values in this array.
|
3163
|
+
#
|
3164
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
3165
|
+
# this window will (optionally) be multiplied with the weights given by the
|
3166
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
3167
|
+
#
|
3168
|
+
# @param window_size [Integer]
|
3169
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
3170
|
+
# size indicated by a timedelta or the following string language:
|
3171
|
+
#
|
3172
|
+
# - 1ns (1 nanosecond)
|
3173
|
+
# - 1us (1 microsecond)
|
3174
|
+
# - 1ms (1 millisecond)
|
3175
|
+
# - 1s (1 second)
|
3176
|
+
# - 1m (1 minute)
|
3177
|
+
# - 1h (1 hour)
|
3178
|
+
# - 1d (1 day)
|
3179
|
+
# - 1w (1 week)
|
3180
|
+
# - 1mo (1 calendar month)
|
3181
|
+
# - 1y (1 calendar year)
|
3182
|
+
# - 1i (1 index count)
|
3183
|
+
#
|
3184
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
3185
|
+
# and `closed` arguments must also be set.
|
3186
|
+
# @param weights [Array]
|
3187
|
+
# An optional slice with the same length as the window that will be multiplied
|
3188
|
+
# elementwise with the values in the window.
|
3189
|
+
# @param min_periods [Integer]
|
3190
|
+
# The number of values in the window that should be non-null before computing
|
3191
|
+
# a result. If None, it will be set equal to window size.
|
3192
|
+
# @param center [Boolean]
|
3193
|
+
# Set the labels at the center of the window
|
3194
|
+
# @param by [String]
|
3195
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3196
|
+
# set the column that will be used to determine the windows. This column must
|
3197
|
+
# be of dtype `{Date, Datetime}`
|
3198
|
+
# @param closed ["left", "right", "both", "none"]
|
3199
|
+
# Define whether the temporal window interval is closed or not.
|
3200
|
+
#
|
3201
|
+
# @note
|
3202
|
+
# This functionality is experimental and may change without it being considered a
|
3203
|
+
# breaking change.
|
3204
|
+
#
|
3205
|
+
# @note
|
3206
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3207
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3208
|
+
# computation.
|
3209
|
+
#
|
3210
|
+
# @return [Expr]
|
3211
|
+
#
|
3212
|
+
# @example
|
3213
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 8.0, 6.0, 2.0, 16.0, 10.0]})
|
3214
|
+
# df.select(
|
3215
|
+
# [
|
3216
|
+
# Polars.col("A").rolling_mean(2)
|
3217
|
+
# ]
|
3218
|
+
# )
|
3219
|
+
# # =>
|
3220
|
+
# # shape: (6, 1)
|
3221
|
+
# # ┌──────┐
|
3222
|
+
# # │ A │
|
3223
|
+
# # │ --- │
|
3224
|
+
# # │ f64 │
|
3225
|
+
# # ╞══════╡
|
3226
|
+
# # │ null │
|
3227
|
+
# # ├╌╌╌╌╌╌┤
|
3228
|
+
# # │ 4.5 │
|
3229
|
+
# # ├╌╌╌╌╌╌┤
|
3230
|
+
# # │ 7.0 │
|
3231
|
+
# # ├╌╌╌╌╌╌┤
|
3232
|
+
# # │ 4.0 │
|
3233
|
+
# # ├╌╌╌╌╌╌┤
|
3234
|
+
# # │ 9.0 │
|
3235
|
+
# # ├╌╌╌╌╌╌┤
|
3236
|
+
# # │ 13.0 │
|
3237
|
+
# # └──────┘
|
3238
|
+
def rolling_mean(
|
3239
|
+
window_size,
|
3240
|
+
weights: nil,
|
3241
|
+
min_periods: nil,
|
3242
|
+
center: false,
|
3243
|
+
by: nil,
|
3244
|
+
closed: "left"
|
3245
|
+
)
|
3246
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3247
|
+
window_size, min_periods
|
3248
|
+
)
|
3249
|
+
wrap_expr(
|
3250
|
+
_rbexpr.rolling_mean(
|
3251
|
+
window_size, weights, min_periods, center, by, closed
|
3252
|
+
)
|
3253
|
+
)
|
3254
|
+
end
|
3255
|
+
|
3256
|
+
# Apply a rolling sum (moving sum) over the values in this array.
|
3257
|
+
#
|
3258
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
3259
|
+
# this window will (optionally) be multiplied with the weights given by the
|
3260
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
3261
|
+
#
|
3262
|
+
# @param window_size [Integer]
|
3263
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
3264
|
+
# size indicated by a timedelta or the following string language:
|
3265
|
+
#
|
3266
|
+
# - 1ns (1 nanosecond)
|
3267
|
+
# - 1us (1 microsecond)
|
3268
|
+
# - 1ms (1 millisecond)
|
3269
|
+
# - 1s (1 second)
|
3270
|
+
# - 1m (1 minute)
|
3271
|
+
# - 1h (1 hour)
|
3272
|
+
# - 1d (1 day)
|
3273
|
+
# - 1w (1 week)
|
3274
|
+
# - 1mo (1 calendar month)
|
3275
|
+
# - 1y (1 calendar year)
|
3276
|
+
# - 1i (1 index count)
|
3277
|
+
#
|
3278
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
3279
|
+
# and `closed` arguments must also be set.
|
3280
|
+
# @param weights [Array]
|
3281
|
+
# An optional slice with the same length as the window that will be multiplied
|
3282
|
+
# elementwise with the values in the window.
|
3283
|
+
# @param min_periods [Integer]
|
3284
|
+
# The number of values in the window that should be non-null before computing
|
3285
|
+
# a result. If None, it will be set equal to window size.
|
3286
|
+
# @param center [Boolean]
|
3287
|
+
# Set the labels at the center of the window
|
3288
|
+
# @param by [String]
|
3289
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3290
|
+
# set the column that will be used to determine the windows. This column must
|
3291
|
+
# be of dtype `{Date, Datetime}`
|
3292
|
+
# @param closed ["left", "right", "both", "none"]
|
3293
|
+
# Define whether the temporal window interval is closed or not.
|
3294
|
+
#
|
3295
|
+
# @note
|
3296
|
+
# This functionality is experimental and may change without it being considered a
|
3297
|
+
# breaking change.
|
3298
|
+
#
|
3299
|
+
# @note
|
3300
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3301
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3302
|
+
# computation.
|
3303
|
+
#
|
3304
|
+
# @return [Expr]
|
3305
|
+
#
|
3306
|
+
# @example
|
3307
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
|
3308
|
+
# df.select(
|
3309
|
+
# [
|
3310
|
+
# Polars.col("A").rolling_sum(2)
|
3311
|
+
# ]
|
3312
|
+
# )
|
3313
|
+
# # =>
|
3314
|
+
# # shape: (6, 1)
|
3315
|
+
# # ┌──────┐
|
3316
|
+
# # │ A │
|
3317
|
+
# # │ --- │
|
3318
|
+
# # │ f64 │
|
3319
|
+
# # ╞══════╡
|
3320
|
+
# # │ null │
|
3321
|
+
# # ├╌╌╌╌╌╌┤
|
3322
|
+
# # │ 3.0 │
|
3323
|
+
# # ├╌╌╌╌╌╌┤
|
3324
|
+
# # │ 5.0 │
|
3325
|
+
# # ├╌╌╌╌╌╌┤
|
3326
|
+
# # │ 7.0 │
|
3327
|
+
# # ├╌╌╌╌╌╌┤
|
3328
|
+
# # │ 9.0 │
|
3329
|
+
# # ├╌╌╌╌╌╌┤
|
3330
|
+
# # │ 11.0 │
|
3331
|
+
# # └──────┘
|
3332
|
+
def rolling_sum(
|
3333
|
+
window_size,
|
3334
|
+
weights: nil,
|
3335
|
+
min_periods: nil,
|
3336
|
+
center: false,
|
3337
|
+
by: nil,
|
3338
|
+
closed: "left"
|
3339
|
+
)
|
3340
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3341
|
+
window_size, min_periods
|
3342
|
+
)
|
3343
|
+
wrap_expr(
|
3344
|
+
_rbexpr.rolling_sum(
|
3345
|
+
window_size, weights, min_periods, center, by, closed
|
3346
|
+
)
|
3347
|
+
)
|
3348
|
+
end
|
3349
|
+
|
3350
|
+
# Compute a rolling standard deviation.
|
3351
|
+
#
|
3352
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
3353
|
+
# this window will (optionally) be multiplied with the weights given by the
|
3354
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
3355
|
+
#
|
3356
|
+
# @param window_size [Integer]
|
3357
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
3358
|
+
# size indicated by a timedelta or the following string language:
|
3359
|
+
#
|
3360
|
+
# - 1ns (1 nanosecond)
|
3361
|
+
# - 1us (1 microsecond)
|
3362
|
+
# - 1ms (1 millisecond)
|
3363
|
+
# - 1s (1 second)
|
3364
|
+
# - 1m (1 minute)
|
3365
|
+
# - 1h (1 hour)
|
3366
|
+
# - 1d (1 day)
|
3367
|
+
# - 1w (1 week)
|
3368
|
+
# - 1mo (1 calendar month)
|
3369
|
+
# - 1y (1 calendar year)
|
3370
|
+
# - 1i (1 index count)
|
3371
|
+
#
|
3372
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
3373
|
+
# and `closed` arguments must also be set.
|
3374
|
+
# @param weights [Array]
|
3375
|
+
# An optional slice with the same length as the window that will be multiplied
|
3376
|
+
# elementwise with the values in the window.
|
3377
|
+
# @param min_periods [Integer]
|
3378
|
+
# The number of values in the window that should be non-null before computing
|
3379
|
+
# a result. If None, it will be set equal to window size.
|
3380
|
+
# @param center [Boolean]
|
3381
|
+
# Set the labels at the center of the window
|
3382
|
+
# @param by [String]
|
3383
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3384
|
+
# set the column that will be used to determine the windows. This column must
|
3385
|
+
# be of dtype `{Date, Datetime}`
|
3386
|
+
# @param closed ["left", "right", "both", "none"]
|
3387
|
+
# Define whether the temporal window interval is closed or not.
|
3388
|
+
#
|
3389
|
+
# @note
|
3390
|
+
# This functionality is experimental and may change without it being considered a
|
3391
|
+
# breaking change.
|
3392
|
+
#
|
3393
|
+
# @note
|
3394
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3395
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3396
|
+
# computation.
|
3397
|
+
#
|
3398
|
+
# @return [Expr]
|
3399
|
+
#
|
3400
|
+
# @example
|
3401
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
|
3402
|
+
# df.select(
|
3403
|
+
# [
|
3404
|
+
# Polars.col("A").rolling_std(3)
|
3405
|
+
# ]
|
3406
|
+
# )
|
3407
|
+
# # =>
|
3408
|
+
# # shape: (6, 1)
|
3409
|
+
# # ┌──────────┐
|
3410
|
+
# # │ A │
|
3411
|
+
# # │ --- │
|
3412
|
+
# # │ f64 │
|
3413
|
+
# # ╞══════════╡
|
3414
|
+
# # │ null │
|
3415
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3416
|
+
# # │ null │
|
3417
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3418
|
+
# # │ 1.0 │
|
3419
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3420
|
+
# # │ 1.0 │
|
3421
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3422
|
+
# # │ 1.527525 │
|
3423
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3424
|
+
# # │ 2.0 │
|
3425
|
+
# # └──────────┘
|
3426
|
+
def rolling_std(
|
3427
|
+
window_size,
|
3428
|
+
weights: nil,
|
3429
|
+
min_periods: nil,
|
3430
|
+
center: false,
|
3431
|
+
by: nil,
|
3432
|
+
closed: "left"
|
3433
|
+
)
|
3434
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3435
|
+
window_size, min_periods
|
3436
|
+
)
|
3437
|
+
wrap_expr(
|
3438
|
+
_rbexpr.rolling_std(
|
3439
|
+
window_size, weights, min_periods, center, by, closed
|
3440
|
+
)
|
3441
|
+
)
|
3442
|
+
end
|
3443
|
+
|
3444
|
+
# Compute a rolling variance.
|
3445
|
+
#
|
3446
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
3447
|
+
# this window will (optionally) be multiplied with the weights given by the
|
3448
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
3449
|
+
#
|
3450
|
+
# @param window_size [Integer]
|
3451
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
3452
|
+
# size indicated by a timedelta or the following string language:
|
3453
|
+
#
|
3454
|
+
# - 1ns (1 nanosecond)
|
3455
|
+
# - 1us (1 microsecond)
|
3456
|
+
# - 1ms (1 millisecond)
|
3457
|
+
# - 1s (1 second)
|
3458
|
+
# - 1m (1 minute)
|
3459
|
+
# - 1h (1 hour)
|
3460
|
+
# - 1d (1 day)
|
3461
|
+
# - 1w (1 week)
|
3462
|
+
# - 1mo (1 calendar month)
|
3463
|
+
# - 1y (1 calendar year)
|
3464
|
+
# - 1i (1 index count)
|
3465
|
+
#
|
3466
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
3467
|
+
# and `closed` arguments must also be set.
|
3468
|
+
# @param weights [Array]
|
3469
|
+
# An optional slice with the same length as the window that will be multiplied
|
3470
|
+
# elementwise with the values in the window.
|
3471
|
+
# @param min_periods [Integer]
|
3472
|
+
# The number of values in the window that should be non-null before computing
|
3473
|
+
# a result. If None, it will be set equal to window size.
|
3474
|
+
# @param center [Boolean]
|
3475
|
+
# Set the labels at the center of the window
|
3476
|
+
# @param by [String]
|
3477
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3478
|
+
# set the column that will be used to determine the windows. This column must
|
3479
|
+
# be of dtype `{Date, Datetime}`
|
3480
|
+
# @param closed ["left", "right", "both", "none"]
|
3481
|
+
# Define whether the temporal window interval is closed or not.
|
3482
|
+
#
|
3483
|
+
# @note
|
3484
|
+
# This functionality is experimental and may change without it being considered a
|
3485
|
+
# breaking change.
|
3486
|
+
#
|
3487
|
+
# @note
|
3488
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3489
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3490
|
+
# computation.
|
3491
|
+
#
|
3492
|
+
# @return [Expr]
|
3493
|
+
#
|
3494
|
+
# @example
|
3495
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
|
3496
|
+
# df.select(
|
3497
|
+
# [
|
3498
|
+
# Polars.col("A").rolling_var(3)
|
3499
|
+
# ]
|
3500
|
+
# )
|
3501
|
+
# # =>
|
3502
|
+
# # shape: (6, 1)
|
3503
|
+
# # ┌──────────┐
|
3504
|
+
# # │ A │
|
3505
|
+
# # │ --- │
|
3506
|
+
# # │ f64 │
|
3507
|
+
# # ╞══════════╡
|
3508
|
+
# # │ null │
|
3509
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3510
|
+
# # │ null │
|
3511
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3512
|
+
# # │ 1.0 │
|
3513
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3514
|
+
# # │ 1.0 │
|
3515
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3516
|
+
# # │ 2.333333 │
|
3517
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3518
|
+
# # │ 4.0 │
|
3519
|
+
# # └──────────┘
|
3520
|
+
def rolling_var(
|
3521
|
+
window_size,
|
3522
|
+
weights: nil,
|
3523
|
+
min_periods: nil,
|
3524
|
+
center: false,
|
3525
|
+
by: nil,
|
3526
|
+
closed: "left"
|
3527
|
+
)
|
3528
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3529
|
+
window_size, min_periods
|
3530
|
+
)
|
3531
|
+
wrap_expr(
|
3532
|
+
_rbexpr.rolling_var(
|
3533
|
+
window_size, weights, min_periods, center, by, closed
|
3534
|
+
)
|
3535
|
+
)
|
3536
|
+
end
|
3537
|
+
|
3538
|
+
# Compute a rolling median.
|
3539
|
+
#
|
3540
|
+
# @param window_size [Integer]
|
3541
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
3542
|
+
# size indicated by a timedelta or the following string language:
|
3543
|
+
#
|
3544
|
+
# - 1ns (1 nanosecond)
|
3545
|
+
# - 1us (1 microsecond)
|
3546
|
+
# - 1ms (1 millisecond)
|
3547
|
+
# - 1s (1 second)
|
3548
|
+
# - 1m (1 minute)
|
3549
|
+
# - 1h (1 hour)
|
3550
|
+
# - 1d (1 day)
|
3551
|
+
# - 1w (1 week)
|
3552
|
+
# - 1mo (1 calendar month)
|
3553
|
+
# - 1y (1 calendar year)
|
3554
|
+
# - 1i (1 index count)
|
3555
|
+
#
|
3556
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
3557
|
+
# and `closed` arguments must also be set.
|
3558
|
+
# @param weights [Array]
|
3559
|
+
# An optional slice with the same length as the window that will be multiplied
|
3560
|
+
# elementwise with the values in the window.
|
3561
|
+
# @param min_periods [Integer]
|
3562
|
+
# The number of values in the window that should be non-null before computing
|
3563
|
+
# a result. If None, it will be set equal to window size.
|
3564
|
+
# @param center [Boolean]
|
3565
|
+
# Set the labels at the center of the window
|
3566
|
+
# @param by [String]
|
3567
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3568
|
+
# set the column that will be used to determine the windows. This column must
|
3569
|
+
# be of dtype `{Date, Datetime}`
|
3570
|
+
# @param closed ["left", "right", "both", "none"]
|
3571
|
+
# Define whether the temporal window interval is closed or not.
|
3572
|
+
#
|
3573
|
+
# @note
|
3574
|
+
# This functionality is experimental and may change without it being considered a
|
3575
|
+
# breaking change.
|
3576
|
+
#
|
3577
|
+
# @note
|
3578
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3579
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3580
|
+
# computation.
|
3581
|
+
#
|
3582
|
+
# @return [Expr]
|
3583
|
+
#
|
3584
|
+
# @example
|
3585
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
|
3586
|
+
# df.select(
|
3587
|
+
# [
|
3588
|
+
# Polars.col("A").rolling_median(3)
|
3589
|
+
# ]
|
3590
|
+
# )
|
3591
|
+
# # =>
|
3592
|
+
# # shape: (6, 1)
|
3593
|
+
# # ┌──────┐
|
3594
|
+
# # │ A │
|
3595
|
+
# # │ --- │
|
3596
|
+
# # │ f64 │
|
3597
|
+
# # ╞══════╡
|
3598
|
+
# # │ null │
|
3599
|
+
# # ├╌╌╌╌╌╌┤
|
3600
|
+
# # │ null │
|
3601
|
+
# # ├╌╌╌╌╌╌┤
|
3602
|
+
# # │ 2.0 │
|
3603
|
+
# # ├╌╌╌╌╌╌┤
|
3604
|
+
# # │ 3.0 │
|
3605
|
+
# # ├╌╌╌╌╌╌┤
|
3606
|
+
# # │ 4.0 │
|
3607
|
+
# # ├╌╌╌╌╌╌┤
|
3608
|
+
# # │ 6.0 │
|
3609
|
+
# # └──────┘
|
3610
|
+
def rolling_median(
|
3611
|
+
window_size,
|
3612
|
+
weights: nil,
|
3613
|
+
min_periods: nil,
|
3614
|
+
center: false,
|
3615
|
+
by: nil,
|
3616
|
+
closed: "left"
|
3617
|
+
)
|
3618
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3619
|
+
window_size, min_periods
|
3620
|
+
)
|
3621
|
+
wrap_expr(
|
3622
|
+
_rbexpr.rolling_median(
|
3623
|
+
window_size, weights, min_periods, center, by, closed
|
3624
|
+
)
|
3625
|
+
)
|
3626
|
+
end
|
3627
|
+
|
3628
|
+
# Compute a rolling quantile.
|
3629
|
+
#
|
3630
|
+
# @param quantile [Float]
|
3631
|
+
# Quantile between 0.0 and 1.0.
|
3632
|
+
# @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
|
3633
|
+
# Interpolation method.
|
3634
|
+
# @param window_size [Integer]
|
3635
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
3636
|
+
# size indicated by a timedelta or the following string language:
|
3637
|
+
#
|
3638
|
+
# - 1ns (1 nanosecond)
|
3639
|
+
# - 1us (1 microsecond)
|
3640
|
+
# - 1ms (1 millisecond)
|
3641
|
+
# - 1s (1 second)
|
3642
|
+
# - 1m (1 minute)
|
3643
|
+
# - 1h (1 hour)
|
3644
|
+
# - 1d (1 day)
|
3645
|
+
# - 1w (1 week)
|
3646
|
+
# - 1mo (1 calendar month)
|
3647
|
+
# - 1y (1 calendar year)
|
3648
|
+
# - 1i (1 index count)
|
3649
|
+
#
|
3650
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
3651
|
+
# and `closed` arguments must also be set.
|
3652
|
+
# @param weights [Array]
|
3653
|
+
# An optional slice with the same length as the window that will be multiplied
|
3654
|
+
# elementwise with the values in the window.
|
3655
|
+
# @param min_periods [Integer]
|
3656
|
+
# The number of values in the window that should be non-null before computing
|
3657
|
+
# a result. If None, it will be set equal to window size.
|
3658
|
+
# @param center [Boolean]
|
3659
|
+
# Set the labels at the center of the window
|
3660
|
+
# @param by [String]
|
3661
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3662
|
+
# set the column that will be used to determine the windows. This column must
|
3663
|
+
# be of dtype `{Date, Datetime}`
|
3664
|
+
# @param closed ["left", "right", "both", "none"]
|
3665
|
+
# Define whether the temporal window interval is closed or not.
|
3666
|
+
#
|
3667
|
+
# @note
|
3668
|
+
# This functionality is experimental and may change without it being considered a
|
3669
|
+
# breaking change.
|
3670
|
+
#
|
3671
|
+
# @note
|
3672
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3673
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3674
|
+
# computation.
|
3675
|
+
#
|
3676
|
+
# @return [Expr]
|
3677
|
+
#
|
3678
|
+
# @example
|
3679
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
|
3680
|
+
# df.select(
|
3681
|
+
# [
|
3682
|
+
# Polars.col("A").rolling_quantile(0.33, window_size: 3)
|
3683
|
+
# ]
|
3684
|
+
# )
|
3685
|
+
# # =>
|
3686
|
+
# # shape: (6, 1)
|
3687
|
+
# # ┌──────┐
|
3688
|
+
# # │ A │
|
3689
|
+
# # │ --- │
|
3690
|
+
# # │ f64 │
|
3691
|
+
# # ╞══════╡
|
3692
|
+
# # │ null │
|
3693
|
+
# # ├╌╌╌╌╌╌┤
|
3694
|
+
# # │ null │
|
3695
|
+
# # ├╌╌╌╌╌╌┤
|
3696
|
+
# # │ 1.0 │
|
3697
|
+
# # ├╌╌╌╌╌╌┤
|
3698
|
+
# # │ 2.0 │
|
3699
|
+
# # ├╌╌╌╌╌╌┤
|
3700
|
+
# # │ 3.0 │
|
3701
|
+
# # ├╌╌╌╌╌╌┤
|
3702
|
+
# # │ 4.0 │
|
3703
|
+
# # └──────┘
|
3704
|
+
def rolling_quantile(
|
3705
|
+
quantile,
|
3706
|
+
interpolation: "nearest",
|
3707
|
+
window_size: 2,
|
3708
|
+
weights: nil,
|
3709
|
+
min_periods: nil,
|
3710
|
+
center: false,
|
3711
|
+
by: nil,
|
3712
|
+
closed: "left"
|
3713
|
+
)
|
3714
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3715
|
+
window_size, min_periods
|
3716
|
+
)
|
3717
|
+
wrap_expr(
|
3718
|
+
_rbexpr.rolling_quantile(
|
3719
|
+
quantile, interpolation, window_size, weights, min_periods, center, by, closed
|
3720
|
+
)
|
3721
|
+
)
|
3722
|
+
end
|
1135
3723
|
|
1136
3724
|
# def rolling_apply
|
1137
3725
|
# end
|
1138
3726
|
|
3727
|
+
# Compute a rolling skew.
|
1139
3728
|
#
|
3729
|
+
# @param window_size [Integer]
|
3730
|
+
# Integer size of the rolling window.
|
3731
|
+
# @param bias [Boolean]
|
3732
|
+
# If false, the calculations are corrected for statistical bias.
|
3733
|
+
#
|
3734
|
+
# @return [Expr]
|
1140
3735
|
def rolling_skew(window_size, bias: true)
|
1141
3736
|
wrap_expr(_rbexpr.rolling_skew(window_size, bias))
|
1142
3737
|
end
|
1143
3738
|
|
3739
|
+
# Compute absolute values.
|
3740
|
+
#
|
3741
|
+
# @return [Expr]
|
3742
|
+
#
|
3743
|
+
# @example
|
3744
|
+
# df = Polars::DataFrame.new(
|
3745
|
+
# {
|
3746
|
+
# "A" => [-1.0, 0.0, 1.0, 2.0]
|
3747
|
+
# }
|
3748
|
+
# )
|
3749
|
+
# df.select(Polars.col("A").abs)
|
3750
|
+
# # =>
|
3751
|
+
# # shape: (4, 1)
|
3752
|
+
# # ┌─────┐
|
3753
|
+
# # │ A │
|
3754
|
+
# # │ --- │
|
3755
|
+
# # │ f64 │
|
3756
|
+
# # ╞═════╡
|
3757
|
+
# # │ 1.0 │
|
3758
|
+
# # ├╌╌╌╌╌┤
|
3759
|
+
# # │ 0.0 │
|
3760
|
+
# # ├╌╌╌╌╌┤
|
3761
|
+
# # │ 1.0 │
|
3762
|
+
# # ├╌╌╌╌╌┤
|
3763
|
+
# # │ 2.0 │
|
3764
|
+
# # └─────┘
|
1144
3765
|
def abs
|
1145
3766
|
wrap_expr(_rbexpr.abs)
|
1146
3767
|
end
|
1147
3768
|
|
3769
|
+
# Get the index values that would sort this column.
|
3770
|
+
#
|
3771
|
+
# Alias for {#arg_sort}.
|
3772
|
+
#
|
3773
|
+
# @param reverse [Boolean]
|
3774
|
+
# Sort in reverse (descending) order.
|
3775
|
+
# @param nulls_last [Boolean]
|
3776
|
+
# Place null values last instead of first.
|
3777
|
+
#
|
3778
|
+
# @return [expr]
|
3779
|
+
#
|
3780
|
+
# @example
|
3781
|
+
# df = Polars::DataFrame.new(
|
3782
|
+
# {
|
3783
|
+
# "a" => [20, 10, 30]
|
3784
|
+
# }
|
3785
|
+
# )
|
3786
|
+
# df.select(Polars.col("a").argsort)
|
3787
|
+
# # =>
|
3788
|
+
# # shape: (3, 1)
|
3789
|
+
# # ┌─────┐
|
3790
|
+
# # │ a │
|
3791
|
+
# # │ --- │
|
3792
|
+
# # │ u32 │
|
3793
|
+
# # ╞═════╡
|
3794
|
+
# # │ 1 │
|
3795
|
+
# # ├╌╌╌╌╌┤
|
3796
|
+
# # │ 0 │
|
3797
|
+
# # ├╌╌╌╌╌┤
|
3798
|
+
# # │ 2 │
|
3799
|
+
# # └─────┘
|
1148
3800
|
def argsort(reverse: false, nulls_last: false)
|
1149
3801
|
arg_sort(reverse: reverse, nulls_last: nulls_last)
|
1150
3802
|
end
|
1151
3803
|
|
3804
|
+
# Assign ranks to data, dealing with ties appropriately.
|
3805
|
+
#
|
3806
|
+
# @param method ["average", "min", "max", "dense", "ordinal", "random"]
|
3807
|
+
# The method used to assign ranks to tied elements.
|
3808
|
+
# The following methods are available:
|
3809
|
+
#
|
3810
|
+
# - 'average' : The average of the ranks that would have been assigned to
|
3811
|
+
# all the tied values is assigned to each value.
|
3812
|
+
# - 'min' : The minimum of the ranks that would have been assigned to all
|
3813
|
+
# the tied values is assigned to each value. (This is also referred to
|
3814
|
+
# as "competition" ranking.)
|
3815
|
+
# - 'max' : The maximum of the ranks that would have been assigned to all
|
3816
|
+
# the tied values is assigned to each value.
|
3817
|
+
# - 'dense' : Like 'min', but the rank of the next highest element is
|
3818
|
+
# assigned the rank immediately after those assigned to the tied
|
3819
|
+
# elements.
|
3820
|
+
# - 'ordinal' : All values are given a distinct rank, corresponding to
|
3821
|
+
# the order that the values occur in the Series.
|
3822
|
+
# - 'random' : Like 'ordinal', but the rank for ties is not dependent
|
3823
|
+
# on the order that the values occur in the Series.
|
3824
|
+
# @param reverse [Boolean]
|
3825
|
+
# Reverse the operation.
|
3826
|
+
#
|
3827
|
+
# @return [Expr]
|
3828
|
+
#
|
3829
|
+
# @example The 'average' method:
|
3830
|
+
# df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
|
3831
|
+
# df.select(Polars.col("a").rank)
|
3832
|
+
# # =>
|
3833
|
+
# # shape: (5, 1)
|
3834
|
+
# # ┌─────┐
|
3835
|
+
# # │ a │
|
3836
|
+
# # │ --- │
|
3837
|
+
# # │ f32 │
|
3838
|
+
# # ╞═════╡
|
3839
|
+
# # │ 3.0 │
|
3840
|
+
# # ├╌╌╌╌╌┤
|
3841
|
+
# # │ 4.5 │
|
3842
|
+
# # ├╌╌╌╌╌┤
|
3843
|
+
# # │ 1.5 │
|
3844
|
+
# # ├╌╌╌╌╌┤
|
3845
|
+
# # │ 1.5 │
|
3846
|
+
# # ├╌╌╌╌╌┤
|
3847
|
+
# # │ 4.5 │
|
3848
|
+
# # └─────┘
|
3849
|
+
#
|
3850
|
+
# @example The 'ordinal' method:
|
3851
|
+
# df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
|
3852
|
+
# df.select(Polars.col("a").rank(method: "ordinal"))
|
3853
|
+
# # =>
|
3854
|
+
# # shape: (5, 1)
|
3855
|
+
# # ┌─────┐
|
3856
|
+
# # │ a │
|
3857
|
+
# # │ --- │
|
3858
|
+
# # │ u32 │
|
3859
|
+
# # ╞═════╡
|
3860
|
+
# # │ 3 │
|
3861
|
+
# # ├╌╌╌╌╌┤
|
3862
|
+
# # │ 4 │
|
3863
|
+
# # ├╌╌╌╌╌┤
|
3864
|
+
# # │ 1 │
|
3865
|
+
# # ├╌╌╌╌╌┤
|
3866
|
+
# # │ 2 │
|
3867
|
+
# # ├╌╌╌╌╌┤
|
3868
|
+
# # │ 5 │
|
3869
|
+
# # └─────┘
|
1152
3870
|
def rank(method: "average", reverse: false)
|
1153
3871
|
wrap_expr(_rbexpr.rank(method, reverse))
|
1154
3872
|
end
|
1155
3873
|
|
3874
|
+
# Calculate the n-th discrete difference.
|
3875
|
+
#
|
3876
|
+
# @param n [Integer]
|
3877
|
+
# Number of slots to shift.
|
3878
|
+
# @param null_behavior ["ignore", "drop"]
|
3879
|
+
# How to handle null values.
|
3880
|
+
#
|
3881
|
+
# @return [Expr]
|
3882
|
+
#
|
3883
|
+
# @example
|
3884
|
+
# df = Polars::DataFrame.new(
|
3885
|
+
# {
|
3886
|
+
# "a" => [20, 10, 30]
|
3887
|
+
# }
|
3888
|
+
# )
|
3889
|
+
# df.select(Polars.col("a").diff)
|
3890
|
+
# # =>
|
3891
|
+
# # shape: (3, 1)
|
3892
|
+
# # ┌──────┐
|
3893
|
+
# # │ a │
|
3894
|
+
# # │ --- │
|
3895
|
+
# # │ i64 │
|
3896
|
+
# # ╞══════╡
|
3897
|
+
# # │ null │
|
3898
|
+
# # ├╌╌╌╌╌╌┤
|
3899
|
+
# # │ -10 │
|
3900
|
+
# # ├╌╌╌╌╌╌┤
|
3901
|
+
# # │ 20 │
|
3902
|
+
# # └──────┘
|
1156
3903
|
def diff(n: 1, null_behavior: "ignore")
|
1157
3904
|
wrap_expr(_rbexpr.diff(n, null_behavior))
|
1158
3905
|
end
|
1159
3906
|
|
3907
|
+
# Computes percentage change between values.
|
3908
|
+
#
|
3909
|
+
# Percentage change (as fraction) between current element and most-recent
|
3910
|
+
# non-null element at least `n` period(s) before the current element.
|
3911
|
+
#
|
3912
|
+
# Computes the change from the previous row by default.
|
3913
|
+
#
|
3914
|
+
# @param n [Integer]
|
3915
|
+
# Periods to shift for forming percent change.
|
3916
|
+
#
|
3917
|
+
# @return [Expr]
|
3918
|
+
#
|
3919
|
+
# @example
|
3920
|
+
# df = Polars::DataFrame.new(
|
3921
|
+
# {
|
3922
|
+
# "a" => [10, 11, 12, nil, 12]
|
3923
|
+
# }
|
3924
|
+
# )
|
3925
|
+
# df.with_column(Polars.col("a").pct_change.alias("pct_change"))
|
3926
|
+
# # =>
|
3927
|
+
# # shape: (5, 2)
|
3928
|
+
# # ┌──────┬────────────┐
|
3929
|
+
# # │ a ┆ pct_change │
|
3930
|
+
# # │ --- ┆ --- │
|
3931
|
+
# # │ i64 ┆ f64 │
|
3932
|
+
# # ╞══════╪════════════╡
|
3933
|
+
# # │ 10 ┆ null │
|
3934
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
3935
|
+
# # │ 11 ┆ 0.1 │
|
3936
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
3937
|
+
# # │ 12 ┆ 0.090909 │
|
3938
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
3939
|
+
# # │ null ┆ 0.0 │
|
3940
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
3941
|
+
# # │ 12 ┆ 0.0 │
|
3942
|
+
# # └──────┴────────────┘
|
1160
3943
|
def pct_change(n: 1)
|
1161
3944
|
wrap_expr(_rbexpr.pct_change(n))
|
1162
3945
|
end
|
1163
3946
|
|
3947
|
+
# Compute the sample skewness of a data set.
|
3948
|
+
#
|
3949
|
+
# For normally distributed data, the skewness should be about zero. For
|
3950
|
+
# unimodal continuous distributions, a skewness value greater than zero means
|
3951
|
+
# that there is more weight in the right tail of the distribution. The
|
3952
|
+
# function `skewtest` can be used to determine if the skewness value
|
3953
|
+
# is close enough to zero, statistically speaking.
|
3954
|
+
#
|
3955
|
+
# @param bias [Boolean]
|
3956
|
+
# If false, the calculations are corrected for statistical bias.
|
3957
|
+
#
|
3958
|
+
# @return [Expr]
|
3959
|
+
#
|
3960
|
+
# @example
|
3961
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
|
3962
|
+
# df.select(Polars.col("a").skew)
|
3963
|
+
# # =>
|
3964
|
+
# # shape: (1, 1)
|
3965
|
+
# # ┌──────────┐
|
3966
|
+
# # │ a │
|
3967
|
+
# # │ --- │
|
3968
|
+
# # │ f64 │
|
3969
|
+
# # ╞══════════╡
|
3970
|
+
# # │ 0.343622 │
|
3971
|
+
# # └──────────┘
|
1164
3972
|
def skew(bias: true)
|
1165
3973
|
wrap_expr(_rbexpr.skew(bias))
|
1166
3974
|
end
|
1167
3975
|
|
3976
|
+
# Compute the kurtosis (Fisher or Pearson) of a dataset.
|
3977
|
+
#
|
3978
|
+
# Kurtosis is the fourth central moment divided by the square of the
|
3979
|
+
# variance. If Fisher's definition is used, then 3.0 is subtracted from
|
3980
|
+
# the result to give 0.0 for a normal distribution.
|
3981
|
+
# If bias is False then the kurtosis is calculated using k statistics to
|
3982
|
+
# eliminate bias coming from biased moment estimators
|
3983
|
+
#
|
3984
|
+
# @param fisher [Boolean]
|
3985
|
+
# If true, Fisher's definition is used (normal ==> 0.0). If false,
|
3986
|
+
# Pearson's definition is used (normal ==> 3.0).
|
3987
|
+
# @param bias [Boolean]
|
3988
|
+
# If false, the calculations are corrected for statistical bias.
|
3989
|
+
#
|
3990
|
+
# @return [Expr]
|
3991
|
+
#
|
3992
|
+
# @example
|
3993
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
|
3994
|
+
# df.select(Polars.col("a").kurtosis)
|
3995
|
+
# # =>
|
3996
|
+
# # shape: (1, 1)
|
3997
|
+
# # ┌───────────┐
|
3998
|
+
# # │ a │
|
3999
|
+
# # │ --- │
|
4000
|
+
# # │ f64 │
|
4001
|
+
# # ╞═══════════╡
|
4002
|
+
# # │ -1.153061 │
|
4003
|
+
# # └───────────┘
|
1168
4004
|
def kurtosis(fisher: true, bias: true)
|
1169
4005
|
wrap_expr(_rbexpr.kurtosis(fisher, bias))
|
1170
4006
|
end
|
1171
4007
|
|
4008
|
+
# Clip (limit) the values in an array to a `min` and `max` boundary.
|
4009
|
+
#
|
4010
|
+
# Only works for numerical types.
|
4011
|
+
#
|
4012
|
+
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
4013
|
+
# expression. See `when` for more information.
|
4014
|
+
#
|
4015
|
+
# @param min_val [Numeric]
|
4016
|
+
# Minimum value.
|
4017
|
+
# @param max_val [Numeric]
|
4018
|
+
# Maximum value.
|
4019
|
+
#
|
4020
|
+
# @return [Expr]
|
4021
|
+
#
|
4022
|
+
# @example
|
4023
|
+
# df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
|
4024
|
+
# df.with_column(Polars.col("foo").clip(1, 10).alias("foo_clipped"))
|
4025
|
+
# # =>
|
4026
|
+
# # shape: (4, 2)
|
4027
|
+
# # ┌──────┬─────────────┐
|
4028
|
+
# # │ foo ┆ foo_clipped │
|
4029
|
+
# # │ --- ┆ --- │
|
4030
|
+
# # │ i64 ┆ i64 │
|
4031
|
+
# # ╞══════╪═════════════╡
|
4032
|
+
# # │ -50 ┆ 1 │
|
4033
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4034
|
+
# # │ 5 ┆ 5 │
|
4035
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4036
|
+
# # │ null ┆ null │
|
4037
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4038
|
+
# # │ 50 ┆ 10 │
|
4039
|
+
# # └──────┴─────────────┘
|
1172
4040
|
def clip(min_val, max_val)
|
1173
4041
|
wrap_expr(_rbexpr.clip(min_val, max_val))
|
1174
4042
|
end
|
1175
4043
|
|
4044
|
+
# Clip (limit) the values in an array to a `min` boundary.
|
4045
|
+
#
|
4046
|
+
# Only works for numerical types.
|
4047
|
+
#
|
4048
|
+
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
4049
|
+
# expression. See `when` for more information.
|
4050
|
+
#
|
4051
|
+
# @param min_val [Numeric]
|
4052
|
+
# Minimum value.
|
4053
|
+
#
|
4054
|
+
# @return [Expr]
|
4055
|
+
#
|
4056
|
+
# @example
|
4057
|
+
# df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
|
4058
|
+
# df.with_column(Polars.col("foo").clip_min(0).alias("foo_clipped"))
|
4059
|
+
# # =>
|
4060
|
+
# # shape: (4, 2)
|
4061
|
+
# # ┌──────┬─────────────┐
|
4062
|
+
# # │ foo ┆ foo_clipped │
|
4063
|
+
# # │ --- ┆ --- │
|
4064
|
+
# # │ i64 ┆ i64 │
|
4065
|
+
# # ╞══════╪═════════════╡
|
4066
|
+
# # │ -50 ┆ 0 │
|
4067
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4068
|
+
# # │ 5 ┆ 5 │
|
4069
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4070
|
+
# # │ null ┆ null │
|
4071
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4072
|
+
# # │ 50 ┆ 50 │
|
4073
|
+
# # └──────┴─────────────┘
|
1176
4074
|
def clip_min(min_val)
|
1177
4075
|
wrap_expr(_rbexpr.clip_min(min_val))
|
1178
4076
|
end
|
1179
4077
|
|
4078
|
+
# Clip (limit) the values in an array to a `max` boundary.
|
4079
|
+
#
|
4080
|
+
# Only works for numerical types.
|
4081
|
+
#
|
4082
|
+
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
4083
|
+
# expression. See `when` for more information.
|
4084
|
+
#
|
4085
|
+
# @param max_val [Numeric]
|
4086
|
+
# Maximum value.
|
4087
|
+
#
|
4088
|
+
# @return [Expr]
|
4089
|
+
#
|
4090
|
+
# @example
|
4091
|
+
# df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
|
4092
|
+
# df.with_column(Polars.col("foo").clip_max(0).alias("foo_clipped"))
|
4093
|
+
# # =>
|
4094
|
+
# # shape: (4, 2)
|
4095
|
+
# # ┌──────┬─────────────┐
|
4096
|
+
# # │ foo ┆ foo_clipped │
|
4097
|
+
# # │ --- ┆ --- │
|
4098
|
+
# # │ i64 ┆ i64 │
|
4099
|
+
# # ╞══════╪═════════════╡
|
4100
|
+
# # │ -50 ┆ -50 │
|
4101
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4102
|
+
# # │ 5 ┆ 0 │
|
4103
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4104
|
+
# # │ null ┆ null │
|
4105
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4106
|
+
# # │ 50 ┆ 0 │
|
4107
|
+
# # └──────┴─────────────┘
|
1180
4108
|
def clip_max(max_val)
|
1181
4109
|
wrap_expr(_rbexpr.clip_max(max_val))
|
1182
4110
|
end
|
1183
4111
|
|
4112
|
+
# Calculate the lower bound.
|
4113
|
+
#
|
4114
|
+
# Returns a unit Series with the lowest value possible for the dtype of this
|
4115
|
+
# expression.
|
4116
|
+
#
|
4117
|
+
# @return [Expr]
|
4118
|
+
#
|
4119
|
+
# @example
|
4120
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
|
4121
|
+
# df.select(Polars.col("a").lower_bound)
|
4122
|
+
# # =>
|
4123
|
+
# # shape: (1, 1)
|
4124
|
+
# # ┌──────────────────────┐
|
4125
|
+
# # │ a │
|
4126
|
+
# # │ --- │
|
4127
|
+
# # │ i64 │
|
4128
|
+
# # ╞══════════════════════╡
|
4129
|
+
# # │ -9223372036854775808 │
|
4130
|
+
# # └──────────────────────┘
|
1184
4131
|
def lower_bound
|
1185
4132
|
wrap_expr(_rbexpr.lower_bound)
|
1186
4133
|
end
|
1187
4134
|
|
4135
|
+
# Calculate the upper bound.
|
4136
|
+
#
|
4137
|
+
# Returns a unit Series with the highest value possible for the dtype of this
|
4138
|
+
# expression.
|
4139
|
+
#
|
4140
|
+
# @return [Expr]
|
4141
|
+
#
|
4142
|
+
# @example
|
4143
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
|
4144
|
+
# df.select(Polars.col("a").upper_bound)
|
4145
|
+
# # =>
|
4146
|
+
# # shape: (1, 1)
|
4147
|
+
# # ┌─────────────────────┐
|
4148
|
+
# # │ a │
|
4149
|
+
# # │ --- │
|
4150
|
+
# # │ i64 │
|
4151
|
+
# # ╞═════════════════════╡
|
4152
|
+
# # │ 9223372036854775807 │
|
4153
|
+
# # └─────────────────────┘
|
1188
4154
|
def upper_bound
|
1189
4155
|
wrap_expr(_rbexpr.upper_bound)
|
1190
4156
|
end
|
1191
4157
|
|
4158
|
+
# Compute the element-wise indication of the sign.
|
4159
|
+
#
|
4160
|
+
# @return [Expr]
|
4161
|
+
#
|
4162
|
+
# @example
|
4163
|
+
# df = Polars::DataFrame.new({"a" => [-9.0, -0.0, 0.0, 4.0, nil]})
|
4164
|
+
# df.select(Polars.col("a").sign)
|
4165
|
+
# # =>
|
4166
|
+
# # shape: (5, 1)
|
4167
|
+
# # ┌──────┐
|
4168
|
+
# # │ a │
|
4169
|
+
# # │ --- │
|
4170
|
+
# # │ i64 │
|
4171
|
+
# # ╞══════╡
|
4172
|
+
# # │ -1 │
|
4173
|
+
# # ├╌╌╌╌╌╌┤
|
4174
|
+
# # │ 0 │
|
4175
|
+
# # ├╌╌╌╌╌╌┤
|
4176
|
+
# # │ 0 │
|
4177
|
+
# # ├╌╌╌╌╌╌┤
|
4178
|
+
# # │ 1 │
|
4179
|
+
# # ├╌╌╌╌╌╌┤
|
4180
|
+
# # │ null │
|
4181
|
+
# # └──────┘
|
1192
4182
|
def sign
|
1193
4183
|
wrap_expr(_rbexpr.sign)
|
1194
4184
|
end
|
1195
4185
|
|
4186
|
+
# Compute the element-wise value for the sine.
|
4187
|
+
#
|
4188
|
+
# @return [Expr]
|
4189
|
+
#
|
4190
|
+
# @example
|
4191
|
+
# df = Polars::DataFrame.new({"a" => [0.0]})
|
4192
|
+
# df.select(Polars.col("a").sin)
|
4193
|
+
# # =>
|
4194
|
+
# # shape: (1, 1)
|
4195
|
+
# # ┌─────┐
|
4196
|
+
# # │ a │
|
4197
|
+
# # │ --- │
|
4198
|
+
# # │ f64 │
|
4199
|
+
# # ╞═════╡
|
4200
|
+
# # │ 0.0 │
|
4201
|
+
# # └─────┘
|
1196
4202
|
def sin
|
1197
4203
|
wrap_expr(_rbexpr.sin)
|
1198
4204
|
end
|
1199
4205
|
|
4206
|
+
# Compute the element-wise value for the cosine.
|
4207
|
+
#
|
4208
|
+
# @return [Expr]
|
4209
|
+
#
|
4210
|
+
# @example
|
4211
|
+
# df = Polars::DataFrame.new({"a" => [0.0]})
|
4212
|
+
# df.select(Polars.col("a").cos)
|
4213
|
+
# # =>
|
4214
|
+
# # shape: (1, 1)
|
4215
|
+
# # ┌─────┐
|
4216
|
+
# # │ a │
|
4217
|
+
# # │ --- │
|
4218
|
+
# # │ f64 │
|
4219
|
+
# # ╞═════╡
|
4220
|
+
# # │ 1.0 │
|
4221
|
+
# # └─────┘
|
1200
4222
|
def cos
|
1201
4223
|
wrap_expr(_rbexpr.cos)
|
1202
4224
|
end
|
1203
4225
|
|
4226
|
+
# Compute the element-wise value for the tangent.
|
4227
|
+
#
|
4228
|
+
# @return [Expr]
|
4229
|
+
#
|
4230
|
+
# @example
|
4231
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4232
|
+
# df.select(Polars.col("a").tan)
|
4233
|
+
# # =>
|
4234
|
+
# # shape: (1, 1)
|
4235
|
+
# # ┌──────────┐
|
4236
|
+
# # │ a │
|
4237
|
+
# # │ --- │
|
4238
|
+
# # │ f64 │
|
4239
|
+
# # ╞══════════╡
|
4240
|
+
# # │ 1.557408 │
|
4241
|
+
# # └──────────┘
|
1204
4242
|
def tan
|
1205
4243
|
wrap_expr(_rbexpr.tan)
|
1206
4244
|
end
|
1207
4245
|
|
4246
|
+
# Compute the element-wise value for the inverse sine.
|
4247
|
+
#
|
4248
|
+
# @return [Expr]
|
4249
|
+
#
|
4250
|
+
# @example
|
4251
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4252
|
+
# df.select(Polars.col("a").arcsin)
|
4253
|
+
# # =>
|
4254
|
+
# # shape: (1, 1)
|
4255
|
+
# # ┌──────────┐
|
4256
|
+
# # │ a │
|
4257
|
+
# # │ --- │
|
4258
|
+
# # │ f64 │
|
4259
|
+
# # ╞══════════╡
|
4260
|
+
# # │ 1.570796 │
|
4261
|
+
# # └──────────┘
|
1208
4262
|
def arcsin
|
1209
4263
|
wrap_expr(_rbexpr.arcsin)
|
1210
4264
|
end
|
1211
4265
|
|
4266
|
+
# Compute the element-wise value for the inverse cosine.
|
4267
|
+
#
|
4268
|
+
# @return [Expr]
|
4269
|
+
#
|
4270
|
+
# @example
|
4271
|
+
# df = Polars::DataFrame.new({"a" => [0.0]})
|
4272
|
+
# df.select(Polars.col("a").arccos)
|
4273
|
+
# # =>
|
4274
|
+
# # shape: (1, 1)
|
4275
|
+
# # ┌──────────┐
|
4276
|
+
# # │ a │
|
4277
|
+
# # │ --- │
|
4278
|
+
# # │ f64 │
|
4279
|
+
# # ╞══════════╡
|
4280
|
+
# # │ 1.570796 │
|
4281
|
+
# # └──────────┘
|
1212
4282
|
def arccos
|
1213
4283
|
wrap_expr(_rbexpr.arccos)
|
1214
4284
|
end
|
1215
4285
|
|
4286
|
+
# Compute the element-wise value for the inverse tangent.
|
4287
|
+
#
|
4288
|
+
# @return [Expr]
|
4289
|
+
#
|
4290
|
+
# @example
|
4291
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4292
|
+
# df.select(Polars.col("a").arctan)
|
4293
|
+
# # =>
|
4294
|
+
# # shape: (1, 1)
|
4295
|
+
# # ┌──────────┐
|
4296
|
+
# # │ a │
|
4297
|
+
# # │ --- │
|
4298
|
+
# # │ f64 │
|
4299
|
+
# # ╞══════════╡
|
4300
|
+
# # │ 0.785398 │
|
4301
|
+
# # └──────────┘
|
1216
4302
|
def arctan
|
1217
4303
|
wrap_expr(_rbexpr.arctan)
|
1218
4304
|
end
|
1219
4305
|
|
4306
|
+
# Compute the element-wise value for the hyperbolic sine.
|
4307
|
+
#
|
4308
|
+
# @return [Expr]
|
4309
|
+
#
|
4310
|
+
# @example
|
4311
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4312
|
+
# df.select(Polars.col("a").sinh)
|
4313
|
+
# # =>
|
4314
|
+
# # shape: (1, 1)
|
4315
|
+
# # ┌──────────┐
|
4316
|
+
# # │ a │
|
4317
|
+
# # │ --- │
|
4318
|
+
# # │ f64 │
|
4319
|
+
# # ╞══════════╡
|
4320
|
+
# # │ 1.175201 │
|
4321
|
+
# # └──────────┘
|
1220
4322
|
def sinh
|
1221
4323
|
wrap_expr(_rbexpr.sinh)
|
1222
4324
|
end
|
1223
4325
|
|
4326
|
+
# Compute the element-wise value for the hyperbolic cosine.
|
4327
|
+
#
|
4328
|
+
# @return [Expr]
|
4329
|
+
#
|
4330
|
+
# @example
|
4331
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4332
|
+
# df.select(Polars.col("a").cosh)
|
4333
|
+
# # =>
|
4334
|
+
# # shape: (1, 1)
|
4335
|
+
# # ┌──────────┐
|
4336
|
+
# # │ a │
|
4337
|
+
# # │ --- │
|
4338
|
+
# # │ f64 │
|
4339
|
+
# # ╞══════════╡
|
4340
|
+
# # │ 1.543081 │
|
4341
|
+
# # └──────────┘
|
1224
4342
|
def cosh
|
1225
4343
|
wrap_expr(_rbexpr.cosh)
|
1226
4344
|
end
|
1227
4345
|
|
4346
|
+
# Compute the element-wise value for the hyperbolic tangent.
|
4347
|
+
#
|
4348
|
+
# @return [Expr]
|
4349
|
+
#
|
4350
|
+
# @example
|
4351
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4352
|
+
# df.select(Polars.col("a").tanh)
|
4353
|
+
# # =>
|
4354
|
+
# # shape: (1, 1)
|
4355
|
+
# # ┌──────────┐
|
4356
|
+
# # │ a │
|
4357
|
+
# # │ --- │
|
4358
|
+
# # │ f64 │
|
4359
|
+
# # ╞══════════╡
|
4360
|
+
# # │ 0.761594 │
|
4361
|
+
# # └──────────┘
|
1228
4362
|
def tanh
|
1229
4363
|
wrap_expr(_rbexpr.tanh)
|
1230
4364
|
end
|
1231
4365
|
|
4366
|
+
# Compute the element-wise value for the inverse hyperbolic sine.
|
4367
|
+
#
|
4368
|
+
# @return [Expr]
|
4369
|
+
#
|
4370
|
+
# @example
|
4371
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4372
|
+
# df.select(Polars.col("a").arcsinh)
|
4373
|
+
# # =>
|
4374
|
+
# # shape: (1, 1)
|
4375
|
+
# # ┌──────────┐
|
4376
|
+
# # │ a │
|
4377
|
+
# # │ --- │
|
4378
|
+
# # │ f64 │
|
4379
|
+
# # ╞══════════╡
|
4380
|
+
# # │ 0.881374 │
|
4381
|
+
# # └──────────┘
|
1232
4382
|
def arcsinh
|
1233
4383
|
wrap_expr(_rbexpr.arcsinh)
|
1234
4384
|
end
|
1235
4385
|
|
4386
|
+
# Compute the element-wise value for the inverse hyperbolic cosine.
|
4387
|
+
#
|
4388
|
+
# @return [Expr]
|
4389
|
+
#
|
4390
|
+
# @example
|
4391
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4392
|
+
# df.select(Polars.col("a").arccosh)
|
4393
|
+
# # =>
|
4394
|
+
# # shape: (1, 1)
|
4395
|
+
# # ┌─────┐
|
4396
|
+
# # │ a │
|
4397
|
+
# # │ --- │
|
4398
|
+
# # │ f64 │
|
4399
|
+
# # ╞═════╡
|
4400
|
+
# # │ 0.0 │
|
4401
|
+
# # └─────┘
|
1236
4402
|
def arccosh
|
1237
4403
|
wrap_expr(_rbexpr.arccosh)
|
1238
4404
|
end
|
1239
4405
|
|
4406
|
+
# Compute the element-wise value for the inverse hyperbolic tangent.
|
4407
|
+
#
|
4408
|
+
# @return [Expr]
|
4409
|
+
#
|
4410
|
+
# @example
|
4411
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4412
|
+
# df.select(Polars.col("a").arctanh)
|
4413
|
+
# # =>
|
4414
|
+
# # shape: (1, 1)
|
4415
|
+
# # ┌─────┐
|
4416
|
+
# # │ a │
|
4417
|
+
# # │ --- │
|
4418
|
+
# # │ f64 │
|
4419
|
+
# # ╞═════╡
|
4420
|
+
# # │ inf │
|
4421
|
+
# # └─────┘
|
1240
4422
|
def arctanh
|
1241
4423
|
wrap_expr(_rbexpr.arctanh)
|
1242
4424
|
end
|
1243
4425
|
|
4426
|
+
# Reshape this Expr to a flat Series or a Series of Lists.
|
4427
|
+
#
|
4428
|
+
# @param dims [Array]
|
4429
|
+
# Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that
|
4430
|
+
# dimension is inferred.
|
4431
|
+
#
|
4432
|
+
# @return [Expr]
|
4433
|
+
#
|
4434
|
+
# @example
|
4435
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
|
4436
|
+
# df.select(Polars.col("foo").reshape([3, 3]))
|
4437
|
+
# # =>
|
4438
|
+
# # shape: (3, 1)
|
4439
|
+
# # ┌───────────┐
|
4440
|
+
# # │ foo │
|
4441
|
+
# # │ --- │
|
4442
|
+
# # │ list[i64] │
|
4443
|
+
# # ╞═══════════╡
|
4444
|
+
# # │ [1, 2, 3] │
|
4445
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌┤
|
4446
|
+
# # │ [4, 5, 6] │
|
4447
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌┤
|
4448
|
+
# # │ [7, 8, 9] │
|
4449
|
+
# # └───────────┘
|
1244
4450
|
def reshape(dims)
|
1245
4451
|
wrap_expr(_rbexpr.reshape(dims))
|
1246
4452
|
end
|
1247
4453
|
|
4454
|
+
# Shuffle the contents of this expr.
|
4455
|
+
#
|
4456
|
+
# @param seed [Integer]
|
4457
|
+
# Seed for the random number generator. If set to None (default), a random
|
4458
|
+
# seed is generated using the `random` module.
|
4459
|
+
#
|
4460
|
+
# @return [Expr]
|
4461
|
+
#
|
4462
|
+
# @example
|
4463
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
4464
|
+
# df.select(Polars.col("a").shuffle(seed: 1))
|
4465
|
+
# # =>
|
4466
|
+
# # shape: (3, 1)
|
4467
|
+
# # ┌─────┐
|
4468
|
+
# # │ a │
|
4469
|
+
# # │ --- │
|
4470
|
+
# # │ i64 │
|
4471
|
+
# # ╞═════╡
|
4472
|
+
# # │ 2 │
|
4473
|
+
# # ├╌╌╌╌╌┤
|
4474
|
+
# # │ 1 │
|
4475
|
+
# # ├╌╌╌╌╌┤
|
4476
|
+
# # │ 3 │
|
4477
|
+
# # └─────┘
|
1248
4478
|
def shuffle(seed: nil)
|
1249
4479
|
if seed.nil?
|
1250
4480
|
seed = rand(10000)
|
@@ -1252,74 +4482,514 @@ module Polars
|
|
1252
4482
|
wrap_expr(_rbexpr.shuffle(seed))
|
1253
4483
|
end
|
1254
4484
|
|
1255
|
-
#
|
1256
|
-
#
|
1257
|
-
|
1258
|
-
#
|
1259
|
-
#
|
4485
|
+
# Sample from this expression.
|
4486
|
+
#
|
4487
|
+
# @param frac [Float]
|
4488
|
+
# Fraction of items to return. Cannot be used with `n`.
|
4489
|
+
# @param with_replacement [Boolean]
|
4490
|
+
# Allow values to be sampled more than once.
|
4491
|
+
# @param shuffle [Boolean]
|
4492
|
+
# Shuffle the order of sampled data points.
|
4493
|
+
# @param seed [Integer]
|
4494
|
+
# Seed for the random number generator. If set to None (default), a random
|
4495
|
+
# seed is used.
|
4496
|
+
# @param n [Integer]
|
4497
|
+
# Number of items to return. Cannot be used with `frac`.
|
4498
|
+
#
|
4499
|
+
# @return [Expr]
|
4500
|
+
#
|
4501
|
+
# @example
|
4502
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
4503
|
+
# df.select(Polars.col("a").sample(frac: 1.0, with_replacement: true, seed: 1))
|
4504
|
+
# # =>
|
4505
|
+
# # shape: (3, 1)
|
4506
|
+
# # ┌─────┐
|
4507
|
+
# # │ a │
|
4508
|
+
# # │ --- │
|
4509
|
+
# # │ i64 │
|
4510
|
+
# # ╞═════╡
|
4511
|
+
# # │ 3 │
|
4512
|
+
# # ├╌╌╌╌╌┤
|
4513
|
+
# # │ 1 │
|
4514
|
+
# # ├╌╌╌╌╌┤
|
4515
|
+
# # │ 1 │
|
4516
|
+
# # └─────┘
|
4517
|
+
def sample(
|
4518
|
+
frac: nil,
|
4519
|
+
with_replacement: true,
|
4520
|
+
shuffle: false,
|
4521
|
+
seed: nil,
|
4522
|
+
n: nil
|
4523
|
+
)
|
4524
|
+
if !n.nil? && !frac.nil?
|
4525
|
+
raise ArgumentError, "cannot specify both `n` and `frac`"
|
4526
|
+
end
|
1260
4527
|
|
1261
|
-
|
1262
|
-
|
4528
|
+
if !n.nil? && frac.nil?
|
4529
|
+
return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
|
4530
|
+
end
|
1263
4531
|
|
1264
|
-
|
1265
|
-
|
4532
|
+
if frac.nil?
|
4533
|
+
frac = 1.0
|
4534
|
+
end
|
4535
|
+
wrap_expr(
|
4536
|
+
_rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
|
4537
|
+
)
|
4538
|
+
end
|
1266
4539
|
|
4540
|
+
# Exponentially-weighted moving average.
|
4541
|
+
#
|
4542
|
+
# @return [Expr]
|
4543
|
+
#
|
4544
|
+
# @example
|
4545
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
4546
|
+
# df.select(Polars.col("a").ewm_mean(com: 1))
|
4547
|
+
# # =>
|
4548
|
+
# # shape: (3, 1)
|
4549
|
+
# # ┌──────────┐
|
4550
|
+
# # │ a │
|
4551
|
+
# # │ --- │
|
4552
|
+
# # │ f64 │
|
4553
|
+
# # ╞══════════╡
|
4554
|
+
# # │ 1.0 │
|
4555
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
4556
|
+
# # │ 1.666667 │
|
4557
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
4558
|
+
# # │ 2.428571 │
|
4559
|
+
# # └──────────┘
|
4560
|
+
def ewm_mean(
|
4561
|
+
com: nil,
|
4562
|
+
span: nil,
|
4563
|
+
half_life: nil,
|
4564
|
+
alpha: nil,
|
4565
|
+
adjust: true,
|
4566
|
+
min_periods: 1
|
4567
|
+
)
|
4568
|
+
alpha = _prepare_alpha(com, span, half_life, alpha)
|
4569
|
+
wrap_expr(_rbexpr.ewm_mean(alpha, adjust, min_periods))
|
4570
|
+
end
|
4571
|
+
|
4572
|
+
# Exponentially-weighted moving standard deviation.
|
4573
|
+
#
|
4574
|
+
# @return [Expr]
|
4575
|
+
#
|
4576
|
+
# @example
|
4577
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
4578
|
+
# df.select(Polars.col("a").ewm_std(com: 1))
|
4579
|
+
# # =>
|
4580
|
+
# # shape: (3, 1)
|
4581
|
+
# # ┌──────────┐
|
4582
|
+
# # │ a │
|
4583
|
+
# # │ --- │
|
4584
|
+
# # │ f64 │
|
4585
|
+
# # ╞══════════╡
|
4586
|
+
# # │ 0.0 │
|
4587
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
4588
|
+
# # │ 0.707107 │
|
4589
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
4590
|
+
# # │ 0.963624 │
|
4591
|
+
# # └──────────┘
|
4592
|
+
def ewm_std(
|
4593
|
+
com: nil,
|
4594
|
+
span: nil,
|
4595
|
+
half_life: nil,
|
4596
|
+
alpha: nil,
|
4597
|
+
adjust: true,
|
4598
|
+
bias: false,
|
4599
|
+
min_periods: 1
|
4600
|
+
)
|
4601
|
+
alpha = _prepare_alpha(com, span, half_life, alpha)
|
4602
|
+
wrap_expr(_rbexpr.ewm_std(alpha, adjust, bias, min_periods))
|
4603
|
+
end
|
4604
|
+
|
4605
|
+
# Exponentially-weighted moving variance.
|
4606
|
+
#
|
4607
|
+
# @return [Expr]
|
4608
|
+
#
|
4609
|
+
# @example
|
4610
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
4611
|
+
# df.select(Polars.col("a").ewm_var(com: 1))
|
4612
|
+
# # =>
|
4613
|
+
# # shape: (3, 1)
|
4614
|
+
# # ┌──────────┐
|
4615
|
+
# # │ a │
|
4616
|
+
# # │ --- │
|
4617
|
+
# # │ f64 │
|
4618
|
+
# # ╞══════════╡
|
4619
|
+
# # │ 0.0 │
|
4620
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
4621
|
+
# # │ 0.5 │
|
4622
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
4623
|
+
# # │ 0.928571 │
|
4624
|
+
# # └──────────┘
|
4625
|
+
def ewm_var(
|
4626
|
+
com: nil,
|
4627
|
+
span: nil,
|
4628
|
+
half_life: nil,
|
4629
|
+
alpha: nil,
|
4630
|
+
adjust: true,
|
4631
|
+
bias: false,
|
4632
|
+
min_periods: 1
|
4633
|
+
)
|
4634
|
+
alpha = _prepare_alpha(com, span, half_life, alpha)
|
4635
|
+
wrap_expr(_rbexpr.ewm_var(alpha, adjust, bias, min_periods))
|
4636
|
+
end
|
4637
|
+
|
4638
|
+
# Extend the Series with given number of values.
|
4639
|
+
#
|
4640
|
+
# @param value [Object]
|
4641
|
+
# The value to extend the Series with. This value may be nil to fill with
|
4642
|
+
# nulls.
|
4643
|
+
# @param n [Integer]
|
4644
|
+
# The number of values to extend.
|
4645
|
+
#
|
4646
|
+
# @return [Expr]
|
1267
4647
|
#
|
4648
|
+
# @example
|
4649
|
+
# df = Polars::DataFrame.new({"values" => [1, 2, 3]})
|
4650
|
+
# df.select(Polars.col("values").extend_constant(99, 2))
|
4651
|
+
# # =>
|
4652
|
+
# # shape: (5, 1)
|
4653
|
+
# # ┌────────┐
|
4654
|
+
# # │ values │
|
4655
|
+
# # │ --- │
|
4656
|
+
# # │ i64 │
|
4657
|
+
# # ╞════════╡
|
4658
|
+
# # │ 1 │
|
4659
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
4660
|
+
# # │ 2 │
|
4661
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
4662
|
+
# # │ 3 │
|
4663
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
4664
|
+
# # │ 99 │
|
4665
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
4666
|
+
# # │ 99 │
|
4667
|
+
# # └────────┘
|
1268
4668
|
def extend_constant(value, n)
|
1269
4669
|
wrap_expr(_rbexpr.extend_constant(value, n))
|
1270
4670
|
end
|
1271
4671
|
|
4672
|
+
# Count all unique values and create a struct mapping value to count.
|
4673
|
+
#
|
4674
|
+
# @param multithreaded [Boolean]
|
4675
|
+
# Better to turn this off in the aggregation context, as it can lead to
|
4676
|
+
# contention.
|
4677
|
+
# @param sort [Boolean]
|
4678
|
+
# Ensure the output is sorted from most values to least.
|
4679
|
+
#
|
4680
|
+
# @return [Expr]
|
4681
|
+
#
|
4682
|
+
# @example
|
4683
|
+
# df = Polars::DataFrame.new(
|
4684
|
+
# {
|
4685
|
+
# "id" => ["a", "b", "b", "c", "c", "c"]
|
4686
|
+
# }
|
4687
|
+
# )
|
4688
|
+
# df.select(
|
4689
|
+
# [
|
4690
|
+
# Polars.col("id").value_counts(sort: true),
|
4691
|
+
# ]
|
4692
|
+
# )
|
4693
|
+
# # =>
|
4694
|
+
# # shape: (3, 1)
|
4695
|
+
# # ┌───────────┐
|
4696
|
+
# # │ id │
|
4697
|
+
# # │ --- │
|
4698
|
+
# # │ struct[2] │
|
4699
|
+
# # ╞═══════════╡
|
4700
|
+
# # │ {"c",3} │
|
4701
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌┤
|
4702
|
+
# # │ {"b",2} │
|
4703
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌┤
|
4704
|
+
# # │ {"a",1} │
|
4705
|
+
# # └───────────┘
|
1272
4706
|
def value_counts(multithreaded: false, sort: false)
|
1273
4707
|
wrap_expr(_rbexpr.value_counts(multithreaded, sort))
|
1274
4708
|
end
|
1275
4709
|
|
4710
|
+
# Return a count of the unique values in the order of appearance.
|
4711
|
+
#
|
4712
|
+
# This method differs from `value_counts` in that it does not return the
|
4713
|
+
# values, only the counts and might be faster
|
4714
|
+
#
|
4715
|
+
# @return [Expr]
|
4716
|
+
#
|
4717
|
+
# @example
|
4718
|
+
# df = Polars::DataFrame.new(
|
4719
|
+
# {
|
4720
|
+
# "id" => ["a", "b", "b", "c", "c", "c"]
|
4721
|
+
# }
|
4722
|
+
# )
|
4723
|
+
# df.select(
|
4724
|
+
# [
|
4725
|
+
# Polars.col("id").unique_counts
|
4726
|
+
# ]
|
4727
|
+
# )
|
4728
|
+
# # =>
|
4729
|
+
# # shape: (3, 1)
|
4730
|
+
# # ┌─────┐
|
4731
|
+
# # │ id │
|
4732
|
+
# # │ --- │
|
4733
|
+
# # │ u32 │
|
4734
|
+
# # ╞═════╡
|
4735
|
+
# # │ 1 │
|
4736
|
+
# # ├╌╌╌╌╌┤
|
4737
|
+
# # │ 2 │
|
4738
|
+
# # ├╌╌╌╌╌┤
|
4739
|
+
# # │ 3 │
|
4740
|
+
# # └─────┘
|
1276
4741
|
def unique_counts
|
1277
4742
|
wrap_expr(_rbexpr.unique_counts)
|
1278
4743
|
end
|
1279
4744
|
|
4745
|
+
# Compute the logarithm to a given base.
|
4746
|
+
#
|
4747
|
+
# @param base [Float]
|
4748
|
+
# Given base, defaults to `e`.
|
4749
|
+
#
|
4750
|
+
# @return [Expr]
|
4751
|
+
#
|
4752
|
+
# @example
|
4753
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
4754
|
+
# df.select(Polars.col("a").log(2))
|
4755
|
+
# # =>
|
4756
|
+
# # shape: (3, 1)
|
4757
|
+
# # ┌──────────┐
|
4758
|
+
# # │ a │
|
4759
|
+
# # │ --- │
|
4760
|
+
# # │ f64 │
|
4761
|
+
# # ╞══════════╡
|
4762
|
+
# # │ 0.0 │
|
4763
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
4764
|
+
# # │ 1.0 │
|
4765
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
4766
|
+
# # │ 1.584963 │
|
4767
|
+
# # └──────────┘
|
1280
4768
|
def log(base = Math::E)
|
1281
4769
|
wrap_expr(_rbexpr.log(base))
|
1282
4770
|
end
|
1283
4771
|
|
1284
|
-
|
4772
|
+
# Computes the entropy.
|
4773
|
+
#
|
4774
|
+
# Uses the formula `-sum(pk * log(pk)` where `pk` are discrete probabilities.
|
4775
|
+
#
|
4776
|
+
# @param base [Float]
|
4777
|
+
# Given base, defaults to `e`.
|
4778
|
+
# @param normalize [Boolean]
|
4779
|
+
# Normalize pk if it doesn't sum to 1.
|
4780
|
+
#
|
4781
|
+
# @return [Expr]
|
4782
|
+
#
|
4783
|
+
# @example
|
4784
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
4785
|
+
# df.select(Polars.col("a").entropy(base: 2))
|
4786
|
+
# # =>
|
4787
|
+
# # shape: (1, 1)
|
4788
|
+
# # ┌──────────┐
|
4789
|
+
# # │ a │
|
4790
|
+
# # │ --- │
|
4791
|
+
# # │ f64 │
|
4792
|
+
# # ╞══════════╡
|
4793
|
+
# # │ 1.459148 │
|
4794
|
+
# # └──────────┘
|
4795
|
+
#
|
4796
|
+
# @example
|
4797
|
+
# df.select(Polars.col("a").entropy(base: 2, normalize: false))
|
4798
|
+
# # =>
|
4799
|
+
# # shape: (1, 1)
|
4800
|
+
# # ┌───────────┐
|
4801
|
+
# # │ a │
|
4802
|
+
# # │ --- │
|
4803
|
+
# # │ f64 │
|
4804
|
+
# # ╞═══════════╡
|
4805
|
+
# # │ -6.754888 │
|
4806
|
+
# # └───────────┘
|
4807
|
+
def entropy(base: 2, normalize: true)
|
1285
4808
|
wrap_expr(_rbexpr.entropy(base, normalize))
|
1286
4809
|
end
|
1287
4810
|
|
1288
|
-
#
|
1289
|
-
#
|
1290
|
-
|
1291
|
-
#
|
4811
|
+
# Run an expression over a sliding window that increases `1` slot every iteration.
|
4812
|
+
#
|
4813
|
+
# @param expr [Expr]
|
4814
|
+
# Expression to evaluate
|
4815
|
+
# @param min_periods [Integer]
|
4816
|
+
# Number of valid values there should be in the window before the expression
|
4817
|
+
# is evaluated. valid values = `length - null_count`
|
4818
|
+
# @param parallel [Boolean]
|
4819
|
+
# Run in parallel. Don't do this in a groupby or another operation that
|
4820
|
+
# already has much parallelization.
|
4821
|
+
#
|
4822
|
+
# @return [Expr]
|
4823
|
+
#
|
4824
|
+
# @note
|
4825
|
+
# This functionality is experimental and may change without it being considered a
|
4826
|
+
# breaking change.
|
4827
|
+
#
|
4828
|
+
# @note
|
4829
|
+
# This can be really slow as it can have `O(n^2)` complexity. Don't use this
|
4830
|
+
# for operations that visit all elements.
|
4831
|
+
#
|
4832
|
+
# @example
|
4833
|
+
# df = Polars::DataFrame.new({"values" => [1, 2, 3, 4, 5]})
|
4834
|
+
# df.select(
|
4835
|
+
# [
|
4836
|
+
# Polars.col("values").cumulative_eval(
|
4837
|
+
# Polars.element.first - Polars.element.last ** 2
|
4838
|
+
# )
|
4839
|
+
# ]
|
4840
|
+
# )
|
4841
|
+
# # =>
|
4842
|
+
# # shape: (5, 1)
|
4843
|
+
# # ┌────────┐
|
4844
|
+
# # │ values │
|
4845
|
+
# # │ --- │
|
4846
|
+
# # │ f64 │
|
4847
|
+
# # ╞════════╡
|
4848
|
+
# # │ 0.0 │
|
4849
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
4850
|
+
# # │ -3.0 │
|
4851
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
4852
|
+
# # │ -8.0 │
|
4853
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
4854
|
+
# # │ -15.0 │
|
4855
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
4856
|
+
# # │ -24.0 │
|
4857
|
+
# # └────────┘
|
4858
|
+
def cumulative_eval(expr, min_periods: 1, parallel: false)
|
4859
|
+
wrap_expr(
|
4860
|
+
_rbexpr.cumulative_eval(expr._rbexpr, min_periods, parallel)
|
4861
|
+
)
|
4862
|
+
end
|
4863
|
+
|
4864
|
+
# Flags the expression as 'sorted'.
|
4865
|
+
#
|
4866
|
+
# Enables downstream code to user fast paths for sorted arrays.
|
4867
|
+
#
|
4868
|
+
# @param reverse [Boolean]
|
4869
|
+
# If the `Series` order is reversed, e.g. descending.
|
4870
|
+
#
|
4871
|
+
# @return [Expr]
|
4872
|
+
#
|
4873
|
+
# @note
|
4874
|
+
# This can lead to incorrect results if this `Series` is not sorted!!
|
4875
|
+
# Use with care!
|
4876
|
+
#
|
4877
|
+
# @example
|
4878
|
+
# df = Polars::DataFrame.new({"values" => [1, 2, 3]})
|
4879
|
+
# df.select(Polars.col("values").set_sorted.max)
|
4880
|
+
# # =>
|
4881
|
+
# # shape: (1, 1)
|
4882
|
+
# # ┌────────┐
|
4883
|
+
# # │ values │
|
4884
|
+
# # │ --- │
|
4885
|
+
# # │ i64 │
|
4886
|
+
# # ╞════════╡
|
4887
|
+
# # │ 3 │
|
4888
|
+
# # └────────┘
|
4889
|
+
# def set_sorted(reverse: false)
|
4890
|
+
# map { |s| s.set_sorted(reverse) }
|
1292
4891
|
# end
|
1293
4892
|
|
4893
|
+
# Aggregate to list.
|
4894
|
+
#
|
4895
|
+
# @return [Expr]
|
1294
4896
|
#
|
4897
|
+
# @example
|
4898
|
+
# df = Polars::DataFrame.new(
|
4899
|
+
# {
|
4900
|
+
# "a" => [1, 2, 3],
|
4901
|
+
# "b" => [4, 5, 6]
|
4902
|
+
# }
|
4903
|
+
# )
|
4904
|
+
# df.select(Polars.all.list)
|
4905
|
+
# # =>
|
4906
|
+
# # shape: (1, 2)
|
4907
|
+
# # ┌───────────┬───────────┐
|
4908
|
+
# # │ a ┆ b │
|
4909
|
+
# # │ --- ┆ --- │
|
4910
|
+
# # │ list[i64] ┆ list[i64] │
|
4911
|
+
# # ╞═══════════╪═══════════╡
|
4912
|
+
# # │ [1, 2, 3] ┆ [4, 5, 6] │
|
4913
|
+
# # └───────────┴───────────┘
|
1295
4914
|
def list
|
1296
4915
|
wrap_expr(_rbexpr.list)
|
1297
4916
|
end
|
1298
4917
|
|
4918
|
+
# Shrink numeric columns to the minimal required datatype.
|
4919
|
+
#
|
4920
|
+
# Shrink to the dtype needed to fit the extrema of this `Series`.
|
4921
|
+
# This can be used to reduce memory pressure.
|
4922
|
+
#
|
4923
|
+
# @return [Expr]
|
4924
|
+
#
|
4925
|
+
# @example
|
4926
|
+
# Polars::DataFrame.new(
|
4927
|
+
# {
|
4928
|
+
# "a" => [1, 2, 3],
|
4929
|
+
# "b" => [1, 2, 2 << 32],
|
4930
|
+
# "c" => [-1, 2, 1 << 30],
|
4931
|
+
# "d" => [-112, 2, 112],
|
4932
|
+
# "e" => [-112, 2, 129],
|
4933
|
+
# "f" => ["a", "b", "c"],
|
4934
|
+
# "g" => [0.1, 1.32, 0.12],
|
4935
|
+
# "h" => [true, nil, false]
|
4936
|
+
# }
|
4937
|
+
# ).select(Polars.all.shrink_dtype)
|
4938
|
+
# # =>
|
4939
|
+
# # shape: (3, 8)
|
4940
|
+
# # ┌─────┬────────────┬────────────┬──────┬──────┬─────┬──────┬───────┐
|
4941
|
+
# # │ a ┆ b ┆ c ┆ d ┆ e ┆ f ┆ g ┆ h │
|
4942
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
4943
|
+
# # │ i8 ┆ i64 ┆ i32 ┆ i8 ┆ i16 ┆ str ┆ f32 ┆ bool │
|
4944
|
+
# # ╞═════╪════════════╪════════════╪══════╪══════╪═════╪══════╪═══════╡
|
4945
|
+
# # │ 1 ┆ 1 ┆ -1 ┆ -112 ┆ -112 ┆ a ┆ 0.1 ┆ true │
|
4946
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
4947
|
+
# # │ 2 ┆ 2 ┆ 2 ┆ 2 ┆ 2 ┆ b ┆ 1.32 ┆ null │
|
4948
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
4949
|
+
# # │ 3 ┆ 8589934592 ┆ 1073741824 ┆ 112 ┆ 129 ┆ c ┆ 0.12 ┆ false │
|
4950
|
+
# # └─────┴────────────┴────────────┴──────┴──────┴─────┴──────┴───────┘
|
1299
4951
|
def shrink_dtype
|
1300
4952
|
wrap_expr(_rbexpr.shrink_dtype)
|
1301
4953
|
end
|
1302
4954
|
|
4955
|
+
# Create an object namespace of all list related methods.
|
4956
|
+
#
|
4957
|
+
# @return [ListExpr]
|
1303
4958
|
def arr
|
1304
4959
|
ListExpr.new(self)
|
1305
4960
|
end
|
1306
4961
|
|
4962
|
+
# Create an object namespace of all categorical related methods.
|
4963
|
+
#
|
4964
|
+
# @return [CatExpr]
|
1307
4965
|
def cat
|
1308
4966
|
CatExpr.new(self)
|
1309
4967
|
end
|
1310
4968
|
|
4969
|
+
# Create an object namespace of all datetime related methods.
|
4970
|
+
#
|
4971
|
+
# @return [DateTimeExpr]
|
1311
4972
|
def dt
|
1312
4973
|
DateTimeExpr.new(self)
|
1313
4974
|
end
|
1314
4975
|
|
4976
|
+
# Create an object namespace of all meta related expression methods.
|
4977
|
+
#
|
4978
|
+
# @return [MetaExpr]
|
1315
4979
|
def meta
|
1316
4980
|
MetaExpr.new(self)
|
1317
4981
|
end
|
1318
4982
|
|
4983
|
+
# Create an object namespace of all string related methods.
|
4984
|
+
#
|
4985
|
+
# @return [StringExpr]
|
1319
4986
|
def str
|
1320
4987
|
StringExpr.new(self)
|
1321
4988
|
end
|
1322
4989
|
|
4990
|
+
# Create an object namespace of all struct related methods.
|
4991
|
+
#
|
4992
|
+
# @return [StructExpr]
|
1323
4993
|
def struct
|
1324
4994
|
StructExpr.new(self)
|
1325
4995
|
end
|
@@ -1337,5 +5007,51 @@ module Polars
|
|
1337
5007
|
def _to_expr(other)
|
1338
5008
|
other.is_a?(Expr) ? other : Utils.lit(other)
|
1339
5009
|
end
|
5010
|
+
|
5011
|
+
def _prepare_alpha(com, span, half_life, alpha)
|
5012
|
+
if [com, span, half_life, alpha].count { |v| !v.nil? } > 1
|
5013
|
+
raise ArgumentError, "Parameters 'com', 'span', 'half_life', and 'alpha' are mutually exclusive"
|
5014
|
+
end
|
5015
|
+
|
5016
|
+
if !com.nil?
|
5017
|
+
if com < 0.0
|
5018
|
+
raise ArgumentError, "Require 'com' >= 0 (found #{com})"
|
5019
|
+
end
|
5020
|
+
alpha = 1.0 / (1.0 + com)
|
5021
|
+
|
5022
|
+
elsif !span.nil?
|
5023
|
+
if span < 1.0
|
5024
|
+
raise ArgumentError, "Require 'span' >= 1 (found #{span})"
|
5025
|
+
end
|
5026
|
+
alpha = 2.0 / (span + 1.0)
|
5027
|
+
|
5028
|
+
elsif !half_life.nil?
|
5029
|
+
if half_life <= 0.0
|
5030
|
+
raise ArgumentError, "Require 'half_life' > 0 (found #{half_life})"
|
5031
|
+
end
|
5032
|
+
alpha = 1.0 - Math.exp(-Math.log(2.0) / half_life)
|
5033
|
+
|
5034
|
+
elsif alpha.nil?
|
5035
|
+
raise ArgumentError, "One of 'com', 'span', 'half_life', or 'alpha' must be set"
|
5036
|
+
|
5037
|
+
elsif alpha <= 0 || alpha > 1
|
5038
|
+
raise ArgumentError, "Require 0 < 'alpha' <= 1 (found #{alpha})"
|
5039
|
+
end
|
5040
|
+
|
5041
|
+
alpha
|
5042
|
+
end
|
5043
|
+
|
5044
|
+
def _prepare_rolling_window_args(window_size, min_periods)
|
5045
|
+
if window_size.is_a?(Integer)
|
5046
|
+
if min_periods.nil?
|
5047
|
+
min_periods = window_size
|
5048
|
+
end
|
5049
|
+
window_size = "#{window_size}i"
|
5050
|
+
end
|
5051
|
+
if min_periods.nil?
|
5052
|
+
min_periods = 1
|
5053
|
+
end
|
5054
|
+
[window_size, min_periods]
|
5055
|
+
end
|
1340
5056
|
end
|
1341
5057
|
end
|