polars-df 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Cargo.lock +142 -11
- data/Cargo.toml +5 -0
- data/ext/polars/Cargo.toml +17 -1
- data/ext/polars/src/apply/dataframe.rs +292 -0
- data/ext/polars/src/apply/mod.rs +254 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +180 -5
- data/ext/polars/src/dataframe.rs +146 -1
- data/ext/polars/src/error.rs +12 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +74 -3
- data/ext/polars/src/lazy/dsl.rs +136 -0
- data/ext/polars/src/lib.rs +199 -1
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +331 -0
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1558 -60
- data/lib/polars/date_time_expr.rb +2 -2
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/dynamic_group_by.rb +49 -0
- data/lib/polars/expr.rb +4072 -107
- data/lib/polars/expr_dispatch.rb +8 -0
- data/lib/polars/functions.rb +192 -3
- data/lib/polars/group_by.rb +44 -3
- data/lib/polars/io.rb +20 -4
- data/lib/polars/lazy_frame.rb +800 -26
- data/lib/polars/lazy_functions.rb +687 -43
- data/lib/polars/lazy_group_by.rb +1 -0
- data/lib/polars/list_expr.rb +502 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +934 -62
- data/lib/polars/string_expr.rb +189 -13
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +44 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +14 -1
- metadata +15 -3
data/lib/polars/expr.rb
CHANGED
@@ -138,8 +138,45 @@ module Polars
|
|
138
138
|
Utils.lit(0) - self
|
139
139
|
end
|
140
140
|
|
141
|
-
#
|
142
|
-
#
|
141
|
+
# Cast to physical representation of the logical dtype.
|
142
|
+
#
|
143
|
+
# - `:date` -> `:i32`
|
144
|
+
# - `:datetime` -> `:i64`
|
145
|
+
# - `:time` -> `:i64`
|
146
|
+
# - `:duration` -> `:i64`
|
147
|
+
# - `:cat` -> `:u32`
|
148
|
+
# - Other data types will be left unchanged.
|
149
|
+
#
|
150
|
+
# @return [Expr]
|
151
|
+
#
|
152
|
+
# @example
|
153
|
+
# Polars::DataFrame.new({"vals" => ["a", "x", nil, "a"]}).with_columns(
|
154
|
+
# [
|
155
|
+
# Polars.col("vals").cast(:cat),
|
156
|
+
# Polars.col("vals")
|
157
|
+
# .cast(:cat)
|
158
|
+
# .to_physical
|
159
|
+
# .alias("vals_physical")
|
160
|
+
# ]
|
161
|
+
# )
|
162
|
+
# # =>
|
163
|
+
# # shape: (4, 2)
|
164
|
+
# # ┌──────┬───────────────┐
|
165
|
+
# # │ vals ┆ vals_physical │
|
166
|
+
# # │ --- ┆ --- │
|
167
|
+
# # │ cat ┆ u32 │
|
168
|
+
# # ╞══════╪═══════════════╡
|
169
|
+
# # │ a ┆ 0 │
|
170
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
171
|
+
# # │ x ┆ 1 │
|
172
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
173
|
+
# # │ null ┆ null │
|
174
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
175
|
+
# # │ a ┆ 0 │
|
176
|
+
# # └──────┴───────────────┘
|
177
|
+
def to_physical
|
178
|
+
wrap_expr(_rbexpr.to_physical)
|
179
|
+
end
|
143
180
|
|
144
181
|
# Check if any boolean value in a Boolean column is `true`.
|
145
182
|
#
|
@@ -258,13 +295,82 @@ module Polars
|
|
258
295
|
wrap_expr(_rbexpr.exp)
|
259
296
|
end
|
260
297
|
|
298
|
+
# Rename the output of an expression.
|
299
|
+
#
|
300
|
+
# @param name [String]
|
301
|
+
# New name.
|
302
|
+
#
|
303
|
+
# @return [Expr]
|
304
|
+
#
|
305
|
+
# @example
|
306
|
+
# df = Polars::DataFrame.new(
|
307
|
+
# {
|
308
|
+
# "a" => [1, 2, 3],
|
309
|
+
# "b" => ["a", "b", nil]
|
310
|
+
# }
|
311
|
+
# )
|
312
|
+
# df.select(
|
313
|
+
# [
|
314
|
+
# Polars.col("a").alias("bar"),
|
315
|
+
# Polars.col("b").alias("foo")
|
316
|
+
# ]
|
317
|
+
# )
|
318
|
+
# # =>
|
319
|
+
# # shape: (3, 2)
|
320
|
+
# # ┌─────┬──────┐
|
321
|
+
# # │ bar ┆ foo │
|
322
|
+
# # │ --- ┆ --- │
|
323
|
+
# # │ i64 ┆ str │
|
324
|
+
# # ╞═════╪══════╡
|
325
|
+
# # │ 1 ┆ a │
|
326
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
327
|
+
# # │ 2 ┆ b │
|
328
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
329
|
+
# # │ 3 ┆ null │
|
330
|
+
# # └─────┴──────┘
|
261
331
|
def alias(name)
|
262
332
|
wrap_expr(_rbexpr._alias(name))
|
263
333
|
end
|
264
334
|
|
265
335
|
# TODO support symbols for exclude
|
266
336
|
|
337
|
+
# Exclude certain columns from a wildcard/regex selection.
|
338
|
+
#
|
339
|
+
# You may also use regexes in the exclude list. They must start with `^` and end
|
340
|
+
# with `$`.
|
341
|
+
#
|
342
|
+
# @param columns [Object]
|
343
|
+
# Column(s) to exclude from selection.
|
344
|
+
# This can be:
|
267
345
|
#
|
346
|
+
# - a column name, or multiple column names
|
347
|
+
# - a regular expression starting with `^` and ending with `$`
|
348
|
+
# - a dtype or multiple dtypes
|
349
|
+
#
|
350
|
+
# @return [Expr]
|
351
|
+
#
|
352
|
+
# @example
|
353
|
+
# df = Polars::DataFrame.new(
|
354
|
+
# {
|
355
|
+
# "aa" => [1, 2, 3],
|
356
|
+
# "ba" => ["a", "b", nil],
|
357
|
+
# "cc" => [nil, 2.5, 1.5]
|
358
|
+
# }
|
359
|
+
# )
|
360
|
+
# df.select(Polars.all.exclude("ba"))
|
361
|
+
# # =>
|
362
|
+
# # shape: (3, 2)
|
363
|
+
# # ┌─────┬──────┐
|
364
|
+
# # │ aa ┆ cc │
|
365
|
+
# # │ --- ┆ --- │
|
366
|
+
# # │ i64 ┆ f64 │
|
367
|
+
# # ╞═════╪══════╡
|
368
|
+
# # │ 1 ┆ null │
|
369
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
370
|
+
# # │ 2 ┆ 2.5 │
|
371
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
372
|
+
# # │ 3 ┆ 1.5 │
|
373
|
+
# # └─────┴──────┘
|
268
374
|
def exclude(columns)
|
269
375
|
if columns.is_a?(String)
|
270
376
|
columns = [columns]
|
@@ -285,20 +391,75 @@ module Polars
|
|
285
391
|
end
|
286
392
|
end
|
287
393
|
|
394
|
+
# Keep the original root name of the expression.
|
395
|
+
#
|
396
|
+
# @return [Expr]
|
397
|
+
#
|
398
|
+
# @example
|
399
|
+
# df = Polars::DataFrame.new(
|
400
|
+
# {
|
401
|
+
# "a" => [1, 2],
|
402
|
+
# "b" => [3, 4]
|
403
|
+
# }
|
404
|
+
# )
|
405
|
+
# df.with_columns([(Polars.col("a") * 9).alias("c").keep_name])
|
406
|
+
# # =>
|
407
|
+
# # shape: (2, 2)
|
408
|
+
# # ┌─────┬─────┐
|
409
|
+
# # │ a ┆ b │
|
410
|
+
# # │ --- ┆ --- │
|
411
|
+
# # │ i64 ┆ i64 │
|
412
|
+
# # ╞═════╪═════╡
|
413
|
+
# # │ 9 ┆ 3 │
|
414
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
415
|
+
# # │ 18 ┆ 4 │
|
416
|
+
# # └─────┴─────┘
|
288
417
|
def keep_name
|
289
418
|
wrap_expr(_rbexpr.keep_name)
|
290
419
|
end
|
291
420
|
|
421
|
+
# Add a prefix to the root column name of the expression.
|
422
|
+
#
|
423
|
+
# @return [Expr]
|
292
424
|
def prefix(prefix)
|
293
425
|
wrap_expr(_rbexpr.prefix(prefix))
|
294
426
|
end
|
295
427
|
|
428
|
+
# Add a suffix to the root column name of the expression.
|
429
|
+
#
|
430
|
+
# @return [Expr]
|
296
431
|
def suffix(suffix)
|
297
432
|
wrap_expr(_rbexpr.suffix(suffix))
|
298
433
|
end
|
299
434
|
|
300
|
-
#
|
301
|
-
#
|
435
|
+
# Rename the output of an expression by mapping a function over the root name.
|
436
|
+
#
|
437
|
+
# @return [Expr]
|
438
|
+
#
|
439
|
+
# @example
|
440
|
+
# df = Polars::DataFrame.new(
|
441
|
+
# {
|
442
|
+
# "A" => [1, 2],
|
443
|
+
# "B" => [3, 4]
|
444
|
+
# }
|
445
|
+
# )
|
446
|
+
# df.select(
|
447
|
+
# Polars.all.reverse.map_alias { |colName| colName + "_reverse" }
|
448
|
+
# )
|
449
|
+
# # =>
|
450
|
+
# # shape: (2, 2)
|
451
|
+
# # ┌───────────┬───────────┐
|
452
|
+
# # │ A_reverse ┆ B_reverse │
|
453
|
+
# # │ --- ┆ --- │
|
454
|
+
# # │ i64 ┆ i64 │
|
455
|
+
# # ╞═══════════╪═══════════╡
|
456
|
+
# # │ 2 ┆ 4 │
|
457
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
458
|
+
# # │ 1 ┆ 3 │
|
459
|
+
# # └───────────┴───────────┘
|
460
|
+
def map_alias(&f)
|
461
|
+
Utils.wrap_expr(_rbexpr.map_alias(f))
|
462
|
+
end
|
302
463
|
|
303
464
|
# Negate a boolean expression.
|
304
465
|
#
|
@@ -464,14 +625,112 @@ module Polars
|
|
464
625
|
wrap_expr(_rbexpr.is_infinite)
|
465
626
|
end
|
466
627
|
|
628
|
+
# Returns a boolean Series indicating which values are NaN.
|
629
|
+
#
|
630
|
+
# @note
|
631
|
+
# Floating point `NaN` (Not A Number) should not be confused
|
632
|
+
# with missing data represented as `nil`.
|
633
|
+
#
|
634
|
+
# @return [Expr]
|
635
|
+
#
|
636
|
+
# @example
|
637
|
+
# df = Polars::DataFrame.new(
|
638
|
+
# {
|
639
|
+
# "a" => [1, 2, nil, 1, 5],
|
640
|
+
# "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
|
641
|
+
# }
|
642
|
+
# )
|
643
|
+
# df.with_column(Polars.col(Polars::Float64).is_nan.suffix("_isnan"))
|
644
|
+
# # =>
|
645
|
+
# # shape: (5, 3)
|
646
|
+
# # ┌──────┬─────┬─────────┐
|
647
|
+
# # │ a ┆ b ┆ b_isnan │
|
648
|
+
# # │ --- ┆ --- ┆ --- │
|
649
|
+
# # │ i64 ┆ f64 ┆ bool │
|
650
|
+
# # ╞══════╪═════╪═════════╡
|
651
|
+
# # │ 1 ┆ 1.0 ┆ false │
|
652
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
653
|
+
# # │ 2 ┆ 2.0 ┆ false │
|
654
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
655
|
+
# # │ null ┆ NaN ┆ true │
|
656
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
657
|
+
# # │ 1 ┆ 1.0 ┆ false │
|
658
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
659
|
+
# # │ 5 ┆ 5.0 ┆ false │
|
660
|
+
# # └──────┴─────┴─────────┘
|
467
661
|
def is_nan
|
468
662
|
wrap_expr(_rbexpr.is_nan)
|
469
663
|
end
|
470
664
|
|
665
|
+
# Returns a boolean Series indicating which values are not NaN.
|
666
|
+
#
|
667
|
+
# @note
|
668
|
+
# Floating point `NaN` (Not A Number) should not be confused
|
669
|
+
# with missing data represented as `nil`.
|
670
|
+
#
|
671
|
+
# @return [Expr]
|
672
|
+
#
|
673
|
+
# @example
|
674
|
+
# df = Polars::DataFrame.new(
|
675
|
+
# {
|
676
|
+
# "a" => [1, 2, nil, 1, 5],
|
677
|
+
# "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
|
678
|
+
# }
|
679
|
+
# )
|
680
|
+
# df.with_column(Polars.col(Polars::Float64).is_not_nan.suffix("_is_not_nan"))
|
681
|
+
# # =>
|
682
|
+
# # shape: (5, 3)
|
683
|
+
# # ┌──────┬─────┬──────────────┐
|
684
|
+
# # │ a ┆ b ┆ b_is_not_nan │
|
685
|
+
# # │ --- ┆ --- ┆ --- │
|
686
|
+
# # │ i64 ┆ f64 ┆ bool │
|
687
|
+
# # ╞══════╪═════╪══════════════╡
|
688
|
+
# # │ 1 ┆ 1.0 ┆ true │
|
689
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
690
|
+
# # │ 2 ┆ 2.0 ┆ true │
|
691
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
692
|
+
# # │ null ┆ NaN ┆ false │
|
693
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
694
|
+
# # │ 1 ┆ 1.0 ┆ true │
|
695
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
696
|
+
# # │ 5 ┆ 5.0 ┆ true │
|
697
|
+
# # └──────┴─────┴──────────────┘
|
471
698
|
def is_not_nan
|
472
699
|
wrap_expr(_rbexpr.is_not_nan)
|
473
700
|
end
|
474
701
|
|
702
|
+
# Get the group indexes of the group by operation.
|
703
|
+
#
|
704
|
+
# Should be used in aggregation context only.
|
705
|
+
#
|
706
|
+
# @return [Expr]
|
707
|
+
#
|
708
|
+
# @example
|
709
|
+
# df = Polars::DataFrame.new(
|
710
|
+
# {
|
711
|
+
# "group" => [
|
712
|
+
# "one",
|
713
|
+
# "one",
|
714
|
+
# "one",
|
715
|
+
# "two",
|
716
|
+
# "two",
|
717
|
+
# "two"
|
718
|
+
# ],
|
719
|
+
# "value" => [94, 95, 96, 97, 97, 99]
|
720
|
+
# }
|
721
|
+
# )
|
722
|
+
# df.groupby("group", maintain_order: true).agg(Polars.col("value").agg_groups)
|
723
|
+
# # =>
|
724
|
+
# # shape: (2, 2)
|
725
|
+
# # ┌───────┬───────────┐
|
726
|
+
# # │ group ┆ value │
|
727
|
+
# # │ --- ┆ --- │
|
728
|
+
# # │ str ┆ list[u32] │
|
729
|
+
# # ╞═══════╪═══════════╡
|
730
|
+
# # │ one ┆ [0, 1, 2] │
|
731
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
732
|
+
# # │ two ┆ [3, 4, 5] │
|
733
|
+
# # └───────┴───────────┘
|
475
734
|
def agg_groups
|
476
735
|
wrap_expr(_rbexpr.agg_groups)
|
477
736
|
end
|
@@ -557,6 +816,36 @@ module Polars
|
|
557
816
|
wrap_expr(_rbexpr.slice(offset._rbexpr, length._rbexpr))
|
558
817
|
end
|
559
818
|
|
819
|
+
# Append expressions.
|
820
|
+
#
|
821
|
+
# This is done by adding the chunks of `other` to this `Series`.
|
822
|
+
#
|
823
|
+
# @param other [Expr]
|
824
|
+
# Expression to append.
|
825
|
+
# @param upcast [Boolean]
|
826
|
+
# Cast both `Series` to the same supertype.
|
827
|
+
#
|
828
|
+
# @return [Expr]
|
829
|
+
#
|
830
|
+
# @example
|
831
|
+
# df = Polars::DataFrame.new(
|
832
|
+
# {
|
833
|
+
# "a" => [8, 9, 10],
|
834
|
+
# "b" => [nil, 4, 4]
|
835
|
+
# }
|
836
|
+
# )
|
837
|
+
# df.select(Polars.all.head(1).append(Polars.all.tail(1)))
|
838
|
+
# # =>
|
839
|
+
# # shape: (2, 2)
|
840
|
+
# # ┌─────┬──────┐
|
841
|
+
# # │ a ┆ b │
|
842
|
+
# # │ --- ┆ --- │
|
843
|
+
# # │ i64 ┆ i64 │
|
844
|
+
# # ╞═════╪══════╡
|
845
|
+
# # │ 8 ┆ null │
|
846
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
847
|
+
# # │ 10 ┆ 4 │
|
848
|
+
# # └─────┴──────┘
|
560
849
|
def append(other, upcast: true)
|
561
850
|
other = Utils.expr_to_lit_or_expr(other)
|
562
851
|
wrap_expr(_rbexpr.append(other._rbexpr, upcast))
|
@@ -567,7 +856,7 @@ module Polars
|
|
567
856
|
# @return [Expr]
|
568
857
|
#
|
569
858
|
# @example Create a Series with 3 nulls, append column a then rechunk
|
570
|
-
# df = Polars::DataFrame.new({"a"
|
859
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
571
860
|
# df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
|
572
861
|
# # =>
|
573
862
|
# # shape: (6, 1)
|
@@ -650,22 +939,182 @@ module Polars
|
|
650
939
|
wrap_expr(_rbexpr.drop_nans)
|
651
940
|
end
|
652
941
|
|
942
|
+
# Get an array with the cumulative sum computed at every element.
|
943
|
+
#
|
944
|
+
# @param reverse [Boolean]
|
945
|
+
# Reverse the operation.
|
946
|
+
#
|
947
|
+
# @return [Expr]
|
948
|
+
#
|
949
|
+
# @note
|
950
|
+
# Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
|
951
|
+
# `:i64` before summing to prevent overflow issues.
|
952
|
+
#
|
953
|
+
# @example
|
954
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
955
|
+
# df.select(
|
956
|
+
# [
|
957
|
+
# Polars.col("a").cumsum,
|
958
|
+
# Polars.col("a").cumsum(reverse: true).alias("a_reverse")
|
959
|
+
# ]
|
960
|
+
# )
|
961
|
+
# # =>
|
962
|
+
# # shape: (4, 2)
|
963
|
+
# # ┌─────┬───────────┐
|
964
|
+
# # │ a ┆ a_reverse │
|
965
|
+
# # │ --- ┆ --- │
|
966
|
+
# # │ i64 ┆ i64 │
|
967
|
+
# # ╞═════╪═══════════╡
|
968
|
+
# # │ 1 ┆ 10 │
|
969
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
970
|
+
# # │ 3 ┆ 9 │
|
971
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
972
|
+
# # │ 6 ┆ 7 │
|
973
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
974
|
+
# # │ 10 ┆ 4 │
|
975
|
+
# # └─────┴───────────┘
|
653
976
|
def cumsum(reverse: false)
|
654
977
|
wrap_expr(_rbexpr.cumsum(reverse))
|
655
978
|
end
|
656
979
|
|
980
|
+
# Get an array with the cumulative product computed at every element.
|
981
|
+
#
|
982
|
+
# @param reverse [Boolean]
|
983
|
+
# Reverse the operation.
|
984
|
+
#
|
985
|
+
# @return [Expr]
|
986
|
+
#
|
987
|
+
# @note
|
988
|
+
# Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
|
989
|
+
# `:i64` before summing to prevent overflow issues.
|
990
|
+
#
|
991
|
+
# @example
|
992
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
993
|
+
# df.select(
|
994
|
+
# [
|
995
|
+
# Polars.col("a").cumprod,
|
996
|
+
# Polars.col("a").cumprod(reverse: true).alias("a_reverse")
|
997
|
+
# ]
|
998
|
+
# )
|
999
|
+
# # =>
|
1000
|
+
# # shape: (4, 2)
|
1001
|
+
# # ┌─────┬───────────┐
|
1002
|
+
# # │ a ┆ a_reverse │
|
1003
|
+
# # │ --- ┆ --- │
|
1004
|
+
# # │ i64 ┆ i64 │
|
1005
|
+
# # ╞═════╪═══════════╡
|
1006
|
+
# # │ 1 ┆ 24 │
|
1007
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1008
|
+
# # │ 2 ┆ 24 │
|
1009
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1010
|
+
# # │ 6 ┆ 12 │
|
1011
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1012
|
+
# # │ 24 ┆ 4 │
|
1013
|
+
# # └─────┴───────────┘
|
657
1014
|
def cumprod(reverse: false)
|
658
1015
|
wrap_expr(_rbexpr.cumprod(reverse))
|
659
1016
|
end
|
660
1017
|
|
1018
|
+
# Get an array with the cumulative min computed at every element.
|
1019
|
+
#
|
1020
|
+
# @param reverse [Boolean]
|
1021
|
+
# Reverse the operation.
|
1022
|
+
#
|
1023
|
+
# @return [Expr]
|
1024
|
+
#
|
1025
|
+
# @example
|
1026
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
1027
|
+
# df.select(
|
1028
|
+
# [
|
1029
|
+
# Polars.col("a").cummin,
|
1030
|
+
# Polars.col("a").cummin(reverse: true).alias("a_reverse")
|
1031
|
+
# ]
|
1032
|
+
# )
|
1033
|
+
# # =>
|
1034
|
+
# # shape: (4, 2)
|
1035
|
+
# # ┌─────┬───────────┐
|
1036
|
+
# # │ a ┆ a_reverse │
|
1037
|
+
# # │ --- ┆ --- │
|
1038
|
+
# # │ i64 ┆ i64 │
|
1039
|
+
# # ╞═════╪═══════════╡
|
1040
|
+
# # │ 1 ┆ 1 │
|
1041
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1042
|
+
# # │ 1 ┆ 2 │
|
1043
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1044
|
+
# # │ 1 ┆ 3 │
|
1045
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1046
|
+
# # │ 1 ┆ 4 │
|
1047
|
+
# # └─────┴───────────┘
|
661
1048
|
def cummin(reverse: false)
|
662
1049
|
wrap_expr(_rbexpr.cummin(reverse))
|
663
1050
|
end
|
664
1051
|
|
1052
|
+
# Get an array with the cumulative max computed at every element.
|
1053
|
+
#
|
1054
|
+
# @param reverse [Boolean]
|
1055
|
+
# Reverse the operation.
|
1056
|
+
#
|
1057
|
+
# @return [Expr]
|
1058
|
+
#
|
1059
|
+
# @example
|
1060
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
1061
|
+
# df.select(
|
1062
|
+
# [
|
1063
|
+
# Polars.col("a").cummax,
|
1064
|
+
# Polars.col("a").cummax(reverse: true).alias("a_reverse")
|
1065
|
+
# ]
|
1066
|
+
# )
|
1067
|
+
# # =>
|
1068
|
+
# # shape: (4, 2)
|
1069
|
+
# # ┌─────┬───────────┐
|
1070
|
+
# # │ a ┆ a_reverse │
|
1071
|
+
# # │ --- ┆ --- │
|
1072
|
+
# # │ i64 ┆ i64 │
|
1073
|
+
# # ╞═════╪═══════════╡
|
1074
|
+
# # │ 1 ┆ 4 │
|
1075
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1076
|
+
# # │ 2 ┆ 4 │
|
1077
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1078
|
+
# # │ 3 ┆ 4 │
|
1079
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1080
|
+
# # │ 4 ┆ 4 │
|
1081
|
+
# # └─────┴───────────┘
|
665
1082
|
def cummax(reverse: false)
|
666
1083
|
wrap_expr(_rbexpr.cummax(reverse))
|
667
1084
|
end
|
668
1085
|
|
1086
|
+
# Get an array with the cumulative count computed at every element.
|
1087
|
+
#
|
1088
|
+
# Counting from 0 to len
|
1089
|
+
#
|
1090
|
+
# @param reverse [Boolean]
|
1091
|
+
# Reverse the operation.
|
1092
|
+
#
|
1093
|
+
# @return [Expr]
|
1094
|
+
#
|
1095
|
+
# @example
|
1096
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
1097
|
+
# df.select(
|
1098
|
+
# [
|
1099
|
+
# Polars.col("a").cumcount,
|
1100
|
+
# Polars.col("a").cumcount(reverse: true).alias("a_reverse")
|
1101
|
+
# ]
|
1102
|
+
# )
|
1103
|
+
# # =>
|
1104
|
+
# # shape: (4, 2)
|
1105
|
+
# # ┌─────┬───────────┐
|
1106
|
+
# # │ a ┆ a_reverse │
|
1107
|
+
# # │ --- ┆ --- │
|
1108
|
+
# # │ u32 ┆ u32 │
|
1109
|
+
# # ╞═════╪═══════════╡
|
1110
|
+
# # │ 0 ┆ 3 │
|
1111
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1112
|
+
# # │ 1 ┆ 2 │
|
1113
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1114
|
+
# # │ 2 ┆ 1 │
|
1115
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1116
|
+
# # │ 3 ┆ 0 │
|
1117
|
+
# # └─────┴───────────┘
|
669
1118
|
def cumcount(reverse: false)
|
670
1119
|
wrap_expr(_rbexpr.cumcount(reverse))
|
671
1120
|
end
|
@@ -755,6 +1204,30 @@ module Polars
|
|
755
1204
|
wrap_expr(_rbexpr.round(decimals))
|
756
1205
|
end
|
757
1206
|
|
1207
|
+
# Compute the dot/inner product between two Expressions.
|
1208
|
+
#
|
1209
|
+
# @param other [Expr]
|
1210
|
+
# Expression to compute dot product with.
|
1211
|
+
#
|
1212
|
+
# @return [Expr]
|
1213
|
+
#
|
1214
|
+
# @example
|
1215
|
+
# df = Polars::DataFrame.new(
|
1216
|
+
# {
|
1217
|
+
# "a" => [1, 3, 5],
|
1218
|
+
# "b" => [2, 4, 6]
|
1219
|
+
# }
|
1220
|
+
# )
|
1221
|
+
# df.select(Polars.col("a").dot(Polars.col("b")))
|
1222
|
+
# # =>
|
1223
|
+
# # shape: (1, 1)
|
1224
|
+
# # ┌─────┐
|
1225
|
+
# # │ a │
|
1226
|
+
# # │ --- │
|
1227
|
+
# # │ i64 │
|
1228
|
+
# # ╞═════╡
|
1229
|
+
# # │ 44 │
|
1230
|
+
# # └─────┘
|
758
1231
|
def dot(other)
|
759
1232
|
other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
|
760
1233
|
wrap_expr(_rbexpr.dot(other._rbexpr))
|
@@ -789,24 +1262,183 @@ module Polars
|
|
789
1262
|
wrap_expr(_rbexpr.mode)
|
790
1263
|
end
|
791
1264
|
|
1265
|
+
# Cast between data types.
|
1266
|
+
#
|
1267
|
+
# @param dtype [Symbol]
|
1268
|
+
# DataType to cast to.
|
1269
|
+
# @param strict [Boolean]
|
1270
|
+
# Throw an error if a cast could not be done.
|
1271
|
+
# For instance, due to an overflow.
|
1272
|
+
#
|
1273
|
+
# @return [Expr]
|
1274
|
+
#
|
1275
|
+
# @example
|
1276
|
+
# df = Polars::DataFrame.new(
|
1277
|
+
# {
|
1278
|
+
# "a" => [1, 2, 3],
|
1279
|
+
# "b" => ["4", "5", "6"]
|
1280
|
+
# }
|
1281
|
+
# )
|
1282
|
+
# df.with_columns(
|
1283
|
+
# [
|
1284
|
+
# Polars.col("a").cast(:f64),
|
1285
|
+
# Polars.col("b").cast(:i32)
|
1286
|
+
# ]
|
1287
|
+
# )
|
1288
|
+
# # =>
|
1289
|
+
# # shape: (3, 2)
|
1290
|
+
# # ┌─────┬─────┐
|
1291
|
+
# # │ a ┆ b │
|
1292
|
+
# # │ --- ┆ --- │
|
1293
|
+
# # │ f64 ┆ i32 │
|
1294
|
+
# # ╞═════╪═════╡
|
1295
|
+
# # │ 1.0 ┆ 4 │
|
1296
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1297
|
+
# # │ 2.0 ┆ 5 │
|
1298
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1299
|
+
# # │ 3.0 ┆ 6 │
|
1300
|
+
# # └─────┴─────┘
|
792
1301
|
def cast(dtype, strict: true)
|
793
1302
|
dtype = Utils.rb_type_to_dtype(dtype)
|
794
1303
|
wrap_expr(_rbexpr.cast(dtype, strict))
|
795
1304
|
end
|
796
1305
|
|
1306
|
+
# Sort this column. In projection/ selection context the whole column is sorted.
|
1307
|
+
#
|
1308
|
+
# If used in a groupby context, the groups are sorted.
|
1309
|
+
#
|
1310
|
+
# @param reverse [Boolean]
|
1311
|
+
# false -> order from small to large.
|
1312
|
+
# true -> order from large to small.
|
1313
|
+
# @param nulls_last [Boolean]
|
1314
|
+
# If true nulls are considered to be larger than any valid value.
|
1315
|
+
#
|
1316
|
+
# @return [Expr]
|
1317
|
+
#
|
1318
|
+
# @example
|
1319
|
+
# df = Polars::DataFrame.new(
|
1320
|
+
# {
|
1321
|
+
# "group" => [
|
1322
|
+
# "one",
|
1323
|
+
# "one",
|
1324
|
+
# "one",
|
1325
|
+
# "two",
|
1326
|
+
# "two",
|
1327
|
+
# "two"
|
1328
|
+
# ],
|
1329
|
+
# "value" => [1, 98, 2, 3, 99, 4]
|
1330
|
+
# }
|
1331
|
+
# )
|
1332
|
+
# df.select(Polars.col("value").sort)
|
1333
|
+
# # =>
|
1334
|
+
# # shape: (6, 1)
|
1335
|
+
# # ┌───────┐
|
1336
|
+
# # │ value │
|
1337
|
+
# # │ --- │
|
1338
|
+
# # │ i64 │
|
1339
|
+
# # ╞═══════╡
|
1340
|
+
# # │ 1 │
|
1341
|
+
# # ├╌╌╌╌╌╌╌┤
|
1342
|
+
# # │ 2 │
|
1343
|
+
# # ├╌╌╌╌╌╌╌┤
|
1344
|
+
# # │ 3 │
|
1345
|
+
# # ├╌╌╌╌╌╌╌┤
|
1346
|
+
# # │ 4 │
|
1347
|
+
# # ├╌╌╌╌╌╌╌┤
|
1348
|
+
# # │ 98 │
|
1349
|
+
# # ├╌╌╌╌╌╌╌┤
|
1350
|
+
# # │ 99 │
|
1351
|
+
# # └───────┘
|
1352
|
+
#
|
1353
|
+
# @example
|
1354
|
+
# df.select(Polars.col("value").sort)
|
1355
|
+
# # =>
|
1356
|
+
# # shape: (6, 1)
|
1357
|
+
# # ┌───────┐
|
1358
|
+
# # │ value │
|
1359
|
+
# # │ --- │
|
1360
|
+
# # │ i64 │
|
1361
|
+
# # ╞═══════╡
|
1362
|
+
# # │ 1 │
|
1363
|
+
# # ├╌╌╌╌╌╌╌┤
|
1364
|
+
# # │ 2 │
|
1365
|
+
# # ├╌╌╌╌╌╌╌┤
|
1366
|
+
# # │ 3 │
|
1367
|
+
# # ├╌╌╌╌╌╌╌┤
|
1368
|
+
# # │ 4 │
|
1369
|
+
# # ├╌╌╌╌╌╌╌┤
|
1370
|
+
# # │ 98 │
|
1371
|
+
# # ├╌╌╌╌╌╌╌┤
|
1372
|
+
# # │ 99 │
|
1373
|
+
# # └───────┘
|
1374
|
+
#
|
1375
|
+
# @example
|
1376
|
+
# df.groupby("group").agg(Polars.col("value").sort)
|
1377
|
+
# # =>
|
1378
|
+
# # shape: (2, 2)
|
1379
|
+
# # ┌───────┬────────────┐
|
1380
|
+
# # │ group ┆ value │
|
1381
|
+
# # │ --- ┆ --- │
|
1382
|
+
# # │ str ┆ list[i64] │
|
1383
|
+
# # ╞═══════╪════════════╡
|
1384
|
+
# # │ two ┆ [3, 4, 99] │
|
1385
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
1386
|
+
# # │ one ┆ [1, 2, 98] │
|
1387
|
+
# # └───────┴────────────┘
|
797
1388
|
def sort(reverse: false, nulls_last: false)
|
798
1389
|
wrap_expr(_rbexpr.sort_with(reverse, nulls_last))
|
799
1390
|
end
|
800
1391
|
|
1392
|
+
# Return the `k` largest elements.
|
1393
|
+
#
|
1394
|
+
# If 'reverse: true` the smallest elements will be given.
|
1395
|
+
#
|
1396
|
+
# @param k [Integer]
|
1397
|
+
# Number of elements to return.
|
1398
|
+
# @param reverse [Boolean]
|
1399
|
+
# Return the smallest elements.
|
1400
|
+
#
|
1401
|
+
# @return [Expr]
|
1402
|
+
#
|
1403
|
+
# @example
|
1404
|
+
# df = Polars::DataFrame.new(
|
1405
|
+
# {
|
1406
|
+
# "value" => [1, 98, 2, 3, 99, 4]
|
1407
|
+
# }
|
1408
|
+
# )
|
1409
|
+
# df.select(
|
1410
|
+
# [
|
1411
|
+
# Polars.col("value").top_k.alias("top_k"),
|
1412
|
+
# Polars.col("value").top_k(reverse: true).alias("bottom_k")
|
1413
|
+
# ]
|
1414
|
+
# )
|
1415
|
+
# # =>
|
1416
|
+
# # shape: (5, 2)
|
1417
|
+
# # ┌───────┬──────────┐
|
1418
|
+
# # │ top_k ┆ bottom_k │
|
1419
|
+
# # │ --- ┆ --- │
|
1420
|
+
# # │ i64 ┆ i64 │
|
1421
|
+
# # ╞═══════╪══════════╡
|
1422
|
+
# # │ 99 ┆ 1 │
|
1423
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
1424
|
+
# # │ 98 ┆ 2 │
|
1425
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
1426
|
+
# # │ 4 ┆ 3 │
|
1427
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
1428
|
+
# # │ 3 ┆ 4 │
|
1429
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
1430
|
+
# # │ 2 ┆ 98 │
|
1431
|
+
# # └───────┴──────────┘
|
801
1432
|
def top_k(k: 5, reverse: false)
|
802
1433
|
wrap_expr(_rbexpr.top_k(k, reverse))
|
803
1434
|
end
|
804
1435
|
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
#
|
1436
|
+
# Get the index values that would sort this column.
|
1437
|
+
#
|
1438
|
+
# @param reverse [Boolean]
|
1439
|
+
# Sort in reverse (descending) order.
|
1440
|
+
# @param nulls_last [Boolean]
|
1441
|
+
# Place null values last instead of first.
|
810
1442
|
#
|
811
1443
|
# @return [Expr]
|
812
1444
|
#
|
@@ -816,21 +1448,49 @@ module Polars
|
|
816
1448
|
# "a" => [20, 10, 30]
|
817
1449
|
# }
|
818
1450
|
# )
|
819
|
-
# df.select(Polars.col("a").
|
1451
|
+
# df.select(Polars.col("a").arg_sort)
|
820
1452
|
# # =>
|
821
|
-
# # shape: (
|
1453
|
+
# # shape: (3, 1)
|
822
1454
|
# # ┌─────┐
|
823
1455
|
# # │ a │
|
824
1456
|
# # │ --- │
|
825
1457
|
# # │ u32 │
|
826
1458
|
# # ╞═════╡
|
1459
|
+
# # │ 1 │
|
1460
|
+
# # ├╌╌╌╌╌┤
|
1461
|
+
# # │ 0 │
|
1462
|
+
# # ├╌╌╌╌╌┤
|
827
1463
|
# # │ 2 │
|
828
1464
|
# # └─────┘
|
829
|
-
def
|
830
|
-
wrap_expr(_rbexpr.
|
1465
|
+
def arg_sort(reverse: false, nulls_last: false)
|
1466
|
+
wrap_expr(_rbexpr.arg_sort(reverse, nulls_last))
|
831
1467
|
end
|
832
1468
|
|
833
|
-
# Get the index of the
|
1469
|
+
# Get the index of the maximal value.
|
1470
|
+
#
|
1471
|
+
# @return [Expr]
|
1472
|
+
#
|
1473
|
+
# @example
|
1474
|
+
# df = Polars::DataFrame.new(
|
1475
|
+
# {
|
1476
|
+
# "a" => [20, 10, 30]
|
1477
|
+
# }
|
1478
|
+
# )
|
1479
|
+
# df.select(Polars.col("a").arg_max)
|
1480
|
+
# # =>
|
1481
|
+
# # shape: (1, 1)
|
1482
|
+
# # ┌─────┐
|
1483
|
+
# # │ a │
|
1484
|
+
# # │ --- │
|
1485
|
+
# # │ u32 │
|
1486
|
+
# # ╞═════╡
|
1487
|
+
# # │ 2 │
|
1488
|
+
# # └─────┘
|
1489
|
+
def arg_max
|
1490
|
+
wrap_expr(_rbexpr.arg_max)
|
1491
|
+
end
|
1492
|
+
|
1493
|
+
# Get the index of the minimal value.
|
834
1494
|
#
|
835
1495
|
# @return [Expr]
|
836
1496
|
#
|
@@ -854,11 +1514,87 @@ module Polars
|
|
854
1514
|
wrap_expr(_rbexpr.arg_min)
|
855
1515
|
end
|
856
1516
|
|
1517
|
+
# Find indices where elements should be inserted to maintain order.
|
1518
|
+
#
|
1519
|
+
# @param element [Object]
|
1520
|
+
# Expression or scalar value.
|
1521
|
+
#
|
1522
|
+
# @return [Expr]
|
1523
|
+
#
|
1524
|
+
# @example
|
1525
|
+
# df = Polars::DataFrame.new(
|
1526
|
+
# {
|
1527
|
+
# "values" => [1, 2, 3, 5]
|
1528
|
+
# }
|
1529
|
+
# )
|
1530
|
+
# df.select(
|
1531
|
+
# [
|
1532
|
+
# Polars.col("values").search_sorted(0).alias("zero"),
|
1533
|
+
# Polars.col("values").search_sorted(3).alias("three"),
|
1534
|
+
# Polars.col("values").search_sorted(6).alias("six")
|
1535
|
+
# ]
|
1536
|
+
# )
|
1537
|
+
# # =>
|
1538
|
+
# # shape: (1, 3)
|
1539
|
+
# # ┌──────┬───────┬─────┐
|
1540
|
+
# # │ zero ┆ three ┆ six │
|
1541
|
+
# # │ --- ┆ --- ┆ --- │
|
1542
|
+
# # │ u32 ┆ u32 ┆ u32 │
|
1543
|
+
# # ╞══════╪═══════╪═════╡
|
1544
|
+
# # │ 0 ┆ 2 ┆ 4 │
|
1545
|
+
# # └──────┴───────┴─────┘
|
857
1546
|
def search_sorted(element)
|
858
1547
|
element = Utils.expr_to_lit_or_expr(element, str_to_lit: false)
|
859
1548
|
wrap_expr(_rbexpr.search_sorted(element._rbexpr))
|
860
1549
|
end
|
861
1550
|
|
1551
|
+
# Sort this column by the ordering of another column, or multiple other columns.
|
1552
|
+
#
|
1553
|
+
# In projection/ selection context the whole column is sorted.
|
1554
|
+
# If used in a groupby context, the groups are sorted.
|
1555
|
+
#
|
1556
|
+
# @param by [Object]
|
1557
|
+
# The column(s) used for sorting.
|
1558
|
+
# @param reverse [Boolean]
|
1559
|
+
# false -> order from small to large.
|
1560
|
+
# true -> order from large to small.
|
1561
|
+
#
|
1562
|
+
# @return [Expr]
|
1563
|
+
#
|
1564
|
+
# @example
|
1565
|
+
# df = Polars::DataFrame.new(
|
1566
|
+
# {
|
1567
|
+
# "group" => [
|
1568
|
+
# "one",
|
1569
|
+
# "one",
|
1570
|
+
# "one",
|
1571
|
+
# "two",
|
1572
|
+
# "two",
|
1573
|
+
# "two"
|
1574
|
+
# ],
|
1575
|
+
# "value" => [1, 98, 2, 3, 99, 4]
|
1576
|
+
# }
|
1577
|
+
# )
|
1578
|
+
# df.select(Polars.col("group").sort_by("value"))
|
1579
|
+
# # =>
|
1580
|
+
# # shape: (6, 1)
|
1581
|
+
# # ┌───────┐
|
1582
|
+
# # │ group │
|
1583
|
+
# # │ --- │
|
1584
|
+
# # │ str │
|
1585
|
+
# # ╞═══════╡
|
1586
|
+
# # │ one │
|
1587
|
+
# # ├╌╌╌╌╌╌╌┤
|
1588
|
+
# # │ one │
|
1589
|
+
# # ├╌╌╌╌╌╌╌┤
|
1590
|
+
# # │ two │
|
1591
|
+
# # ├╌╌╌╌╌╌╌┤
|
1592
|
+
# # │ two │
|
1593
|
+
# # ├╌╌╌╌╌╌╌┤
|
1594
|
+
# # │ one │
|
1595
|
+
# # ├╌╌╌╌╌╌╌┤
|
1596
|
+
# # │ two │
|
1597
|
+
# # └───────┘
|
862
1598
|
def sort_by(by, reverse: false)
|
863
1599
|
if !by.is_a?(Array)
|
864
1600
|
by = [by]
|
@@ -871,6 +1607,39 @@ module Polars
|
|
871
1607
|
wrap_expr(_rbexpr.sort_by(by, reverse))
|
872
1608
|
end
|
873
1609
|
|
1610
|
+
# Take values by index.
|
1611
|
+
#
|
1612
|
+
# @param indices [Expr]
|
1613
|
+
# An expression that leads to a `:u32` dtyped Series.
|
1614
|
+
#
|
1615
|
+
# @return [Expr]
|
1616
|
+
#
|
1617
|
+
# @example
|
1618
|
+
# df = Polars::DataFrame.new(
|
1619
|
+
# {
|
1620
|
+
# "group" => [
|
1621
|
+
# "one",
|
1622
|
+
# "one",
|
1623
|
+
# "one",
|
1624
|
+
# "two",
|
1625
|
+
# "two",
|
1626
|
+
# "two"
|
1627
|
+
# ],
|
1628
|
+
# "value" => [1, 98, 2, 3, 99, 4]
|
1629
|
+
# }
|
1630
|
+
# )
|
1631
|
+
# df.groupby("group", maintain_order: true).agg(Polars.col("value").take(1))
|
1632
|
+
# # =>
|
1633
|
+
# # shape: (2, 2)
|
1634
|
+
# # ┌───────┬───────┐
|
1635
|
+
# # │ group ┆ value │
|
1636
|
+
# # │ --- ┆ --- │
|
1637
|
+
# # │ str ┆ i64 │
|
1638
|
+
# # ╞═══════╪═══════╡
|
1639
|
+
# # │ one ┆ 98 │
|
1640
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
1641
|
+
# # │ two ┆ 99 │
|
1642
|
+
# # └───────┴───────┘
|
874
1643
|
def take(indices)
|
875
1644
|
if indices.is_a?(Array)
|
876
1645
|
indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
|
@@ -909,11 +1678,105 @@ module Polars
|
|
909
1678
|
wrap_expr(_rbexpr.shift(periods))
|
910
1679
|
end
|
911
1680
|
|
1681
|
+
# Shift the values by a given period and fill the resulting null values.
|
1682
|
+
#
|
1683
|
+
# @param periods [Integer]
|
1684
|
+
# Number of places to shift (may be negative).
|
1685
|
+
# @param fill_value [Object]
|
1686
|
+
# Fill nil values with the result of this expression.
|
1687
|
+
#
|
1688
|
+
# @return [Expr]
|
1689
|
+
#
|
1690
|
+
# @example
|
1691
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
|
1692
|
+
# df.select(Polars.col("foo").shift_and_fill(1, "a"))
|
1693
|
+
# # =>
|
1694
|
+
# # shape: (4, 1)
|
1695
|
+
# # ┌─────┐
|
1696
|
+
# # │ foo │
|
1697
|
+
# # │ --- │
|
1698
|
+
# # │ str │
|
1699
|
+
# # ╞═════╡
|
1700
|
+
# # │ a │
|
1701
|
+
# # ├╌╌╌╌╌┤
|
1702
|
+
# # │ 1 │
|
1703
|
+
# # ├╌╌╌╌╌┤
|
1704
|
+
# # │ 2 │
|
1705
|
+
# # ├╌╌╌╌╌┤
|
1706
|
+
# # │ 3 │
|
1707
|
+
# # └─────┘
|
912
1708
|
def shift_and_fill(periods, fill_value)
|
913
1709
|
fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
|
914
1710
|
wrap_expr(_rbexpr.shift_and_fill(periods, fill_value._rbexpr))
|
915
1711
|
end
|
916
1712
|
|
1713
|
+
# Fill null values using the specified value or strategy.
|
1714
|
+
#
|
1715
|
+
# To interpolate over null values see interpolate.
|
1716
|
+
#
|
1717
|
+
# @param value [Object]
|
1718
|
+
# Value used to fill null values.
|
1719
|
+
# @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
|
1720
|
+
# Strategy used to fill null values.
|
1721
|
+
# @param limit [Integer]
|
1722
|
+
# Number of consecutive null values to fill when using the 'forward' or
|
1723
|
+
# 'backward' strategy.
|
1724
|
+
#
|
1725
|
+
# @return [Expr]
|
1726
|
+
#
|
1727
|
+
# @example
|
1728
|
+
# df = Polars::DataFrame.new(
|
1729
|
+
# {
|
1730
|
+
# "a" => [1, 2, nil],
|
1731
|
+
# "b" => [4, nil, 6]
|
1732
|
+
# }
|
1733
|
+
# )
|
1734
|
+
# df.fill_null(strategy: "zero")
|
1735
|
+
# # =>
|
1736
|
+
# # shape: (3, 2)
|
1737
|
+
# # ┌─────┬─────┐
|
1738
|
+
# # │ a ┆ b │
|
1739
|
+
# # │ --- ┆ --- │
|
1740
|
+
# # │ i64 ┆ i64 │
|
1741
|
+
# # ╞═════╪═════╡
|
1742
|
+
# # │ 1 ┆ 4 │
|
1743
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1744
|
+
# # │ 2 ┆ 0 │
|
1745
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1746
|
+
# # │ 0 ┆ 6 │
|
1747
|
+
# # └─────┴─────┘
|
1748
|
+
#
|
1749
|
+
# @example
|
1750
|
+
# df.fill_null(99)
|
1751
|
+
# # =>
|
1752
|
+
# # shape: (3, 2)
|
1753
|
+
# # ┌─────┬─────┐
|
1754
|
+
# # │ a ┆ b │
|
1755
|
+
# # │ --- ┆ --- │
|
1756
|
+
# # │ i64 ┆ i64 │
|
1757
|
+
# # ╞═════╪═════╡
|
1758
|
+
# # │ 1 ┆ 4 │
|
1759
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1760
|
+
# # │ 2 ┆ 99 │
|
1761
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1762
|
+
# # │ 99 ┆ 6 │
|
1763
|
+
# # └─────┴─────┘
|
1764
|
+
#
|
1765
|
+
# @example
|
1766
|
+
# df.fill_null(strategy: "forward")
|
1767
|
+
# # =>
|
1768
|
+
# # shape: (3, 2)
|
1769
|
+
# # ┌─────┬─────┐
|
1770
|
+
# # │ a ┆ b │
|
1771
|
+
# # │ --- ┆ --- │
|
1772
|
+
# # │ i64 ┆ i64 │
|
1773
|
+
# # ╞═════╪═════╡
|
1774
|
+
# # │ 1 ┆ 4 │
|
1775
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1776
|
+
# # │ 2 ┆ 4 │
|
1777
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1778
|
+
# # │ 2 ┆ 6 │
|
1779
|
+
# # └─────┴─────┘
|
917
1780
|
def fill_null(value = nil, strategy: nil, limit: nil)
|
918
1781
|
if !value.nil? && !strategy.nil?
|
919
1782
|
raise ArgumentError, "cannot specify both 'value' and 'strategy'."
|
@@ -931,75 +1794,426 @@ module Polars
|
|
931
1794
|
end
|
932
1795
|
end
|
933
1796
|
|
1797
|
+
# Fill floating point NaN value with a fill value.
|
1798
|
+
#
|
1799
|
+
# @return [Expr]
|
1800
|
+
#
|
1801
|
+
# @example
|
1802
|
+
# df = Polars::DataFrame.new(
|
1803
|
+
# {
|
1804
|
+
# "a" => [1.0, nil, Float::NAN],
|
1805
|
+
# "b" => [4.0, Float::NAN, 6]
|
1806
|
+
# }
|
1807
|
+
# )
|
1808
|
+
# df.fill_nan("zero")
|
1809
|
+
# # =>
|
1810
|
+
# # shape: (3, 2)
|
1811
|
+
# # ┌──────┬──────┐
|
1812
|
+
# # │ a ┆ b │
|
1813
|
+
# # │ --- ┆ --- │
|
1814
|
+
# # │ str ┆ str │
|
1815
|
+
# # ╞══════╪══════╡
|
1816
|
+
# # │ 1.0 ┆ 4.0 │
|
1817
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1818
|
+
# # │ null ┆ zero │
|
1819
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1820
|
+
# # │ zero ┆ 6.0 │
|
1821
|
+
# # └──────┴──────┘
|
934
1822
|
def fill_nan(fill_value)
|
935
1823
|
fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
|
936
1824
|
wrap_expr(_rbexpr.fill_nan(fill_value._rbexpr))
|
937
1825
|
end
|
938
1826
|
|
1827
|
+
# Fill missing values with the latest seen values.
|
1828
|
+
#
|
1829
|
+
# @param limit [Integer]
|
1830
|
+
# The number of consecutive null values to forward fill.
|
1831
|
+
#
|
1832
|
+
# @return [Expr]
|
1833
|
+
#
|
1834
|
+
# @example
|
1835
|
+
# df = Polars::DataFrame.new(
|
1836
|
+
# {
|
1837
|
+
# "a" => [1, 2, nil],
|
1838
|
+
# "b" => [4, nil, 6]
|
1839
|
+
# }
|
1840
|
+
# )
|
1841
|
+
# df.select(Polars.all.forward_fill)
|
1842
|
+
# # =>
|
1843
|
+
# # shape: (3, 2)
|
1844
|
+
# # ┌─────┬─────┐
|
1845
|
+
# # │ a ┆ b │
|
1846
|
+
# # │ --- ┆ --- │
|
1847
|
+
# # │ i64 ┆ i64 │
|
1848
|
+
# # ╞═════╪═════╡
|
1849
|
+
# # │ 1 ┆ 4 │
|
1850
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1851
|
+
# # │ 2 ┆ 4 │
|
1852
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1853
|
+
# # │ 2 ┆ 6 │
|
1854
|
+
# # └─────┴─────┘
|
939
1855
|
def forward_fill(limit: nil)
|
940
1856
|
wrap_expr(_rbexpr.forward_fill(limit))
|
941
1857
|
end
|
942
1858
|
|
1859
|
+
# Fill missing values with the next to be seen values.
|
1860
|
+
#
|
1861
|
+
# @param limit [Integer]
|
1862
|
+
# The number of consecutive null values to backward fill.
|
1863
|
+
#
|
1864
|
+
# @return [Expr]
|
1865
|
+
#
|
1866
|
+
# @example
|
1867
|
+
# df = Polars::DataFrame.new(
|
1868
|
+
# {
|
1869
|
+
# "a" => [1, 2, nil],
|
1870
|
+
# "b" => [4, nil, 6]
|
1871
|
+
# }
|
1872
|
+
# )
|
1873
|
+
# df.select(Polars.all.backward_fill)
|
1874
|
+
# # =>
|
1875
|
+
# # shape: (3, 2)
|
1876
|
+
# # ┌──────┬─────┐
|
1877
|
+
# # │ a ┆ b │
|
1878
|
+
# # │ --- ┆ --- │
|
1879
|
+
# # │ i64 ┆ i64 │
|
1880
|
+
# # ╞══════╪═════╡
|
1881
|
+
# # │ 1 ┆ 4 │
|
1882
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
|
1883
|
+
# # │ 2 ┆ 6 │
|
1884
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
|
1885
|
+
# # │ null ┆ 6 │
|
1886
|
+
# # └──────┴─────┘
|
943
1887
|
def backward_fill(limit: nil)
|
944
1888
|
wrap_expr(_rbexpr.backward_fill(limit))
|
945
1889
|
end
|
946
1890
|
|
1891
|
+
# Reverse the selection.
|
1892
|
+
#
|
1893
|
+
# @return [Expr]
|
947
1894
|
def reverse
|
948
1895
|
wrap_expr(_rbexpr.reverse)
|
949
1896
|
end
|
950
1897
|
|
1898
|
+
# Get standard deviation.
|
1899
|
+
#
|
1900
|
+
# @param ddof [Integer]
|
1901
|
+
# Degrees of freedom.
|
1902
|
+
#
|
1903
|
+
# @return [Expr]
|
1904
|
+
#
|
1905
|
+
# @example
|
1906
|
+
# df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
|
1907
|
+
# df.select(Polars.col("a").std)
|
1908
|
+
# # =>
|
1909
|
+
# # shape: (1, 1)
|
1910
|
+
# # ┌─────┐
|
1911
|
+
# # │ a │
|
1912
|
+
# # │ --- │
|
1913
|
+
# # │ f64 │
|
1914
|
+
# # ╞═════╡
|
1915
|
+
# # │ 1.0 │
|
1916
|
+
# # └─────┘
|
951
1917
|
def std(ddof: 1)
|
952
1918
|
wrap_expr(_rbexpr.std(ddof))
|
953
1919
|
end
|
954
1920
|
|
1921
|
+
# Get variance.
|
1922
|
+
#
|
1923
|
+
# @param ddof [Integer]
|
1924
|
+
# Degrees of freedom.
|
1925
|
+
#
|
1926
|
+
# @return [Expr]
|
1927
|
+
#
|
1928
|
+
# @example
|
1929
|
+
# df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
|
1930
|
+
# df.select(Polars.col("a").var)
|
1931
|
+
# # =>
|
1932
|
+
# # shape: (1, 1)
|
1933
|
+
# # ┌─────┐
|
1934
|
+
# # │ a │
|
1935
|
+
# # │ --- │
|
1936
|
+
# # │ f64 │
|
1937
|
+
# # ╞═════╡
|
1938
|
+
# # │ 1.0 │
|
1939
|
+
# # └─────┘
|
955
1940
|
def var(ddof: 1)
|
956
1941
|
wrap_expr(_rbexpr.var(ddof))
|
957
1942
|
end
|
958
1943
|
|
1944
|
+
# Get maximum value.
|
1945
|
+
#
|
1946
|
+
# @return [Expr]
|
1947
|
+
#
|
1948
|
+
# @example
|
1949
|
+
# df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
|
1950
|
+
# df.select(Polars.col("a").max)
|
1951
|
+
# # =>
|
1952
|
+
# # shape: (1, 1)
|
1953
|
+
# # ┌─────┐
|
1954
|
+
# # │ a │
|
1955
|
+
# # │ --- │
|
1956
|
+
# # │ f64 │
|
1957
|
+
# # ╞═════╡
|
1958
|
+
# # │ 1.0 │
|
1959
|
+
# # └─────┘
|
959
1960
|
def max
|
960
1961
|
wrap_expr(_rbexpr.max)
|
961
1962
|
end
|
962
1963
|
|
1964
|
+
# Get minimum value.
|
1965
|
+
#
|
1966
|
+
# @return [Expr]
|
1967
|
+
#
|
1968
|
+
# @example
|
1969
|
+
# df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
|
1970
|
+
# df.select(Polars.col("a").min)
|
1971
|
+
# # =>
|
1972
|
+
# # shape: (1, 1)
|
1973
|
+
# # ┌──────┐
|
1974
|
+
# # │ a │
|
1975
|
+
# # │ --- │
|
1976
|
+
# # │ f64 │
|
1977
|
+
# # ╞══════╡
|
1978
|
+
# # │ -1.0 │
|
1979
|
+
# # └──────┘
|
963
1980
|
def min
|
964
1981
|
wrap_expr(_rbexpr.min)
|
965
1982
|
end
|
966
1983
|
|
1984
|
+
# Get maximum value, but propagate/poison encountered NaN values.
|
1985
|
+
#
|
1986
|
+
# @return [Expr]
|
1987
|
+
#
|
1988
|
+
# @example
|
1989
|
+
# df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
|
1990
|
+
# df.select(Polars.col("a").nan_max)
|
1991
|
+
# # =>
|
1992
|
+
# # shape: (1, 1)
|
1993
|
+
# # ┌─────┐
|
1994
|
+
# # │ a │
|
1995
|
+
# # │ --- │
|
1996
|
+
# # │ f64 │
|
1997
|
+
# # ╞═════╡
|
1998
|
+
# # │ NaN │
|
1999
|
+
# # └─────┘
|
967
2000
|
def nan_max
|
968
2001
|
wrap_expr(_rbexpr.nan_max)
|
969
2002
|
end
|
970
2003
|
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
976
|
-
|
977
|
-
|
978
|
-
|
979
|
-
|
980
|
-
|
981
|
-
|
982
|
-
|
2004
|
+
# Get minimum value, but propagate/poison encountered NaN values.
|
2005
|
+
#
|
2006
|
+
# @return [Expr]
|
2007
|
+
#
|
2008
|
+
# @example
|
2009
|
+
# df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
|
2010
|
+
# df.select(Polars.col("a").nan_min)
|
2011
|
+
# # =>
|
2012
|
+
# # shape: (1, 1)
|
2013
|
+
# # ┌─────┐
|
2014
|
+
# # │ a │
|
2015
|
+
# # │ --- │
|
2016
|
+
# # │ f64 │
|
2017
|
+
# # ╞═════╡
|
2018
|
+
# # │ NaN │
|
2019
|
+
# # └─────┘
|
2020
|
+
def nan_min
|
2021
|
+
wrap_expr(_rbexpr.nan_min)
|
2022
|
+
end
|
2023
|
+
|
2024
|
+
# Get sum value.
|
2025
|
+
#
|
2026
|
+
# @return [Expr]
|
2027
|
+
#
|
2028
|
+
# @note
|
2029
|
+
# Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
|
2030
|
+
# `:i64` before summing to prevent overflow issues.
|
2031
|
+
#
|
2032
|
+
# @example
|
2033
|
+
# df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
|
2034
|
+
# df.select(Polars.col("a").sum)
|
2035
|
+
# # =>
|
2036
|
+
# # shape: (1, 1)
|
2037
|
+
# # ┌─────┐
|
2038
|
+
# # │ a │
|
2039
|
+
# # │ --- │
|
2040
|
+
# # │ i64 │
|
2041
|
+
# # ╞═════╡
|
2042
|
+
# # │ 0 │
|
2043
|
+
# # └─────┘
|
2044
|
+
def sum
|
2045
|
+
wrap_expr(_rbexpr.sum)
|
2046
|
+
end
|
2047
|
+
|
2048
|
+
# Get mean value.
|
2049
|
+
#
|
2050
|
+
# @return [Expr]
|
2051
|
+
#
|
2052
|
+
# @example
|
2053
|
+
# df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
|
2054
|
+
# df.select(Polars.col("a").mean)
|
2055
|
+
# # =>
|
2056
|
+
# # shape: (1, 1)
|
2057
|
+
# # ┌─────┐
|
2058
|
+
# # │ a │
|
2059
|
+
# # │ --- │
|
2060
|
+
# # │ f64 │
|
2061
|
+
# # ╞═════╡
|
2062
|
+
# # │ 0.0 │
|
2063
|
+
# # └─────┘
|
2064
|
+
def mean
|
2065
|
+
wrap_expr(_rbexpr.mean)
|
2066
|
+
end
|
2067
|
+
|
2068
|
+
# Get median value using linear interpolation.
|
2069
|
+
#
|
2070
|
+
# @return [Expr]
|
2071
|
+
#
|
2072
|
+
# @example
|
2073
|
+
# df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
|
2074
|
+
# df.select(Polars.col("a").median)
|
2075
|
+
# # =>
|
2076
|
+
# # shape: (1, 1)
|
2077
|
+
# # ┌─────┐
|
2078
|
+
# # │ a │
|
2079
|
+
# # │ --- │
|
2080
|
+
# # │ f64 │
|
2081
|
+
# # ╞═════╡
|
2082
|
+
# # │ 0.0 │
|
2083
|
+
# # └─────┘
|
983
2084
|
def median
|
984
2085
|
wrap_expr(_rbexpr.median)
|
985
2086
|
end
|
986
2087
|
|
2088
|
+
# Compute the product of an expression.
|
2089
|
+
#
|
2090
|
+
# @return [Expr]
|
2091
|
+
#
|
2092
|
+
# @example
|
2093
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
2094
|
+
# df.select(Polars.col("a").product)
|
2095
|
+
# # =>
|
2096
|
+
# # shape: (1, 1)
|
2097
|
+
# # ┌─────┐
|
2098
|
+
# # │ a │
|
2099
|
+
# # │ --- │
|
2100
|
+
# # │ i64 │
|
2101
|
+
# # ╞═════╡
|
2102
|
+
# # │ 6 │
|
2103
|
+
# # └─────┘
|
987
2104
|
def product
|
988
2105
|
wrap_expr(_rbexpr.product)
|
989
2106
|
end
|
990
2107
|
|
2108
|
+
# Count unique values.
|
2109
|
+
#
|
2110
|
+
# @return [Expr]
|
2111
|
+
#
|
2112
|
+
# @example
|
2113
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
2114
|
+
# df.select(Polars.col("a").n_unique)
|
2115
|
+
# # =>
|
2116
|
+
# # shape: (1, 1)
|
2117
|
+
# # ┌─────┐
|
2118
|
+
# # │ a │
|
2119
|
+
# # │ --- │
|
2120
|
+
# # │ u32 │
|
2121
|
+
# # ╞═════╡
|
2122
|
+
# # │ 2 │
|
2123
|
+
# # └─────┘
|
991
2124
|
def n_unique
|
992
2125
|
wrap_expr(_rbexpr.n_unique)
|
993
2126
|
end
|
994
2127
|
|
2128
|
+
# Count null values.
|
2129
|
+
#
|
2130
|
+
# @return [Expr]
|
2131
|
+
#
|
2132
|
+
# @example
|
2133
|
+
# df = Polars::DataFrame.new(
|
2134
|
+
# {
|
2135
|
+
# "a" => [nil, 1, nil],
|
2136
|
+
# "b" => [1, 2, 3]
|
2137
|
+
# }
|
2138
|
+
# )
|
2139
|
+
# df.select(Polars.all.null_count)
|
2140
|
+
# # =>
|
2141
|
+
# # shape: (1, 2)
|
2142
|
+
# # ┌─────┬─────┐
|
2143
|
+
# # │ a ┆ b │
|
2144
|
+
# # │ --- ┆ --- │
|
2145
|
+
# # │ u32 ┆ u32 │
|
2146
|
+
# # ╞═════╪═════╡
|
2147
|
+
# # │ 2 ┆ 0 │
|
2148
|
+
# # └─────┴─────┘
|
995
2149
|
def null_count
|
996
2150
|
wrap_expr(_rbexpr.null_count)
|
997
2151
|
end
|
998
2152
|
|
2153
|
+
# Get index of first unique value.
|
2154
|
+
#
|
2155
|
+
# @return [Expr]
|
2156
|
+
#
|
2157
|
+
# @example
|
2158
|
+
# df = Polars::DataFrame.new(
|
2159
|
+
# {
|
2160
|
+
# "a" => [8, 9, 10],
|
2161
|
+
# "b" => [nil, 4, 4]
|
2162
|
+
# }
|
2163
|
+
# )
|
2164
|
+
# df.select(Polars.col("a").arg_unique)
|
2165
|
+
# # =>
|
2166
|
+
# # shape: (3, 1)
|
2167
|
+
# # ┌─────┐
|
2168
|
+
# # │ a │
|
2169
|
+
# # │ --- │
|
2170
|
+
# # │ u32 │
|
2171
|
+
# # ╞═════╡
|
2172
|
+
# # │ 0 │
|
2173
|
+
# # ├╌╌╌╌╌┤
|
2174
|
+
# # │ 1 │
|
2175
|
+
# # ├╌╌╌╌╌┤
|
2176
|
+
# # │ 2 │
|
2177
|
+
# # └─────┘
|
2178
|
+
#
|
2179
|
+
# @example
|
2180
|
+
# df.select(Polars.col("b").arg_unique)
|
2181
|
+
# # =>
|
2182
|
+
# # shape: (2, 1)
|
2183
|
+
# # ┌─────┐
|
2184
|
+
# # │ b │
|
2185
|
+
# # │ --- │
|
2186
|
+
# # │ u32 │
|
2187
|
+
# # ╞═════╡
|
2188
|
+
# # │ 0 │
|
2189
|
+
# # ├╌╌╌╌╌┤
|
2190
|
+
# # │ 1 │
|
2191
|
+
# # └─────┘
|
999
2192
|
def arg_unique
|
1000
2193
|
wrap_expr(_rbexpr.arg_unique)
|
1001
2194
|
end
|
1002
2195
|
|
2196
|
+
# Get unique values of this expression.
|
2197
|
+
#
|
2198
|
+
# @param maintain_order [Boolean]
|
2199
|
+
# Maintain order of data. This requires more work.
|
2200
|
+
#
|
2201
|
+
# @return [Expr]
|
2202
|
+
#
|
2203
|
+
# @example
|
2204
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
2205
|
+
# df.select(Polars.col("a").unique(maintain_order: true))
|
2206
|
+
# # =>
|
2207
|
+
# # shape: (2, 1)
|
2208
|
+
# # ┌─────┐
|
2209
|
+
# # │ a │
|
2210
|
+
# # │ --- │
|
2211
|
+
# # │ i64 │
|
2212
|
+
# # ╞═════╡
|
2213
|
+
# # │ 1 │
|
2214
|
+
# # ├╌╌╌╌╌┤
|
2215
|
+
# # │ 2 │
|
2216
|
+
# # └─────┘
|
1003
2217
|
def unique(maintain_order: false)
|
1004
2218
|
if maintain_order
|
1005
2219
|
wrap_expr(_rbexpr.unique_stable)
|
@@ -1008,243 +2222,2508 @@ module Polars
|
|
1008
2222
|
end
|
1009
2223
|
end
|
1010
2224
|
|
2225
|
+
# Get the first value.
|
2226
|
+
#
|
2227
|
+
# @return [Expr]
|
2228
|
+
#
|
2229
|
+
# @example
|
2230
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
2231
|
+
# df.select(Polars.col("a").first)
|
2232
|
+
# # =>
|
2233
|
+
# # shape: (1, 1)
|
2234
|
+
# # ┌─────┐
|
2235
|
+
# # │ a │
|
2236
|
+
# # │ --- │
|
2237
|
+
# # │ i64 │
|
2238
|
+
# # ╞═════╡
|
2239
|
+
# # │ 1 │
|
2240
|
+
# # └─────┘
|
1011
2241
|
def first
|
1012
2242
|
wrap_expr(_rbexpr.first)
|
1013
2243
|
end
|
1014
2244
|
|
2245
|
+
# Get the last value.
|
2246
|
+
#
|
2247
|
+
# @return [Expr]
|
2248
|
+
#
|
2249
|
+
# @example
|
2250
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
2251
|
+
# df.select(Polars.col("a").last)
|
2252
|
+
# # =>
|
2253
|
+
# # shape: (1, 1)
|
2254
|
+
# # ┌─────┐
|
2255
|
+
# # │ a │
|
2256
|
+
# # │ --- │
|
2257
|
+
# # │ i64 │
|
2258
|
+
# # ╞═════╡
|
2259
|
+
# # │ 2 │
|
2260
|
+
# # └─────┘
|
1015
2261
|
def last
|
1016
2262
|
wrap_expr(_rbexpr.last)
|
1017
2263
|
end
|
1018
2264
|
|
2265
|
+
# Apply window function over a subgroup.
|
2266
|
+
#
|
2267
|
+
# This is similar to a groupby + aggregation + self join.
|
2268
|
+
# Or similar to [window functions in Postgres](https://www.postgresql.org/docs/current/tutorial-window.html).
|
2269
|
+
#
|
2270
|
+
# @param expr [Object]
|
2271
|
+
# Column(s) to group by.
|
2272
|
+
#
|
2273
|
+
# @return [Expr]
|
2274
|
+
#
|
2275
|
+
# @example
|
2276
|
+
# df = Polars::DataFrame.new(
|
2277
|
+
# {
|
2278
|
+
# "groups" => ["g1", "g1", "g2"],
|
2279
|
+
# "values" => [1, 2, 3]
|
2280
|
+
# }
|
2281
|
+
# )
|
2282
|
+
# df.with_column(
|
2283
|
+
# Polars.col("values").max.over("groups").alias("max_by_group")
|
2284
|
+
# )
|
2285
|
+
# # =>
|
2286
|
+
# # shape: (3, 3)
|
2287
|
+
# # ┌────────┬────────┬──────────────┐
|
2288
|
+
# # │ groups ┆ values ┆ max_by_group │
|
2289
|
+
# # │ --- ┆ --- ┆ --- │
|
2290
|
+
# # │ str ┆ i64 ┆ i64 │
|
2291
|
+
# # ╞════════╪════════╪══════════════╡
|
2292
|
+
# # │ g1 ┆ 1 ┆ 2 │
|
2293
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2294
|
+
# # │ g1 ┆ 2 ┆ 2 │
|
2295
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2296
|
+
# # │ g2 ┆ 3 ┆ 3 │
|
2297
|
+
# # └────────┴────────┴──────────────┘
|
2298
|
+
#
|
2299
|
+
# @example
|
2300
|
+
# df = Polars::DataFrame.new(
|
2301
|
+
# {
|
2302
|
+
# "groups" => [1, 1, 2, 2, 1, 2, 3, 3, 1],
|
2303
|
+
# "values" => [1, 2, 3, 4, 5, 6, 7, 8, 8]
|
2304
|
+
# }
|
2305
|
+
# )
|
2306
|
+
# df.lazy
|
2307
|
+
# .select([Polars.col("groups").sum.over("groups")])
|
2308
|
+
# .collect
|
2309
|
+
# # =>
|
2310
|
+
# # shape: (9, 1)
|
2311
|
+
# # ┌────────┐
|
2312
|
+
# # │ groups │
|
2313
|
+
# # │ --- │
|
2314
|
+
# # │ i64 │
|
2315
|
+
# # ╞════════╡
|
2316
|
+
# # │ 4 │
|
2317
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2318
|
+
# # │ 4 │
|
2319
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2320
|
+
# # │ 6 │
|
2321
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2322
|
+
# # │ 6 │
|
2323
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2324
|
+
# # │ ... │
|
2325
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2326
|
+
# # │ 6 │
|
2327
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2328
|
+
# # │ 6 │
|
2329
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2330
|
+
# # │ 6 │
|
2331
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2332
|
+
# # │ 4 │
|
2333
|
+
# # └────────┘
|
1019
2334
|
def over(expr)
|
1020
2335
|
rbexprs = Utils.selection_to_rbexpr_list(expr)
|
1021
2336
|
wrap_expr(_rbexpr.over(rbexprs))
|
1022
2337
|
end
|
1023
2338
|
|
2339
|
+
# Get mask of unique values.
|
2340
|
+
#
|
2341
|
+
# @return [Expr]
|
2342
|
+
#
|
2343
|
+
# @example
|
2344
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
2345
|
+
# df.select(Polars.col("a").is_unique)
|
2346
|
+
# # =>
|
2347
|
+
# # shape: (3, 1)
|
2348
|
+
# # ┌───────┐
|
2349
|
+
# # │ a │
|
2350
|
+
# # │ --- │
|
2351
|
+
# # │ bool │
|
2352
|
+
# # ╞═══════╡
|
2353
|
+
# # │ false │
|
2354
|
+
# # ├╌╌╌╌╌╌╌┤
|
2355
|
+
# # │ false │
|
2356
|
+
# # ├╌╌╌╌╌╌╌┤
|
2357
|
+
# # │ true │
|
2358
|
+
# # └───────┘
|
1024
2359
|
def is_unique
|
1025
2360
|
wrap_expr(_rbexpr.is_unique)
|
1026
2361
|
end
|
1027
2362
|
|
2363
|
+
# Get a mask of the first unique value.
|
2364
|
+
#
|
2365
|
+
# @return [Expr]
|
2366
|
+
#
|
2367
|
+
# @example
|
2368
|
+
# df = Polars::DataFrame.new(
|
2369
|
+
# {
|
2370
|
+
# "num" => [1, 2, 3, 1, 5]
|
2371
|
+
# }
|
2372
|
+
# )
|
2373
|
+
# df.with_column(Polars.col("num").is_first.alias("is_first"))
|
2374
|
+
# # =>
|
2375
|
+
# # shape: (5, 2)
|
2376
|
+
# # ┌─────┬──────────┐
|
2377
|
+
# # │ num ┆ is_first │
|
2378
|
+
# # │ --- ┆ --- │
|
2379
|
+
# # │ i64 ┆ bool │
|
2380
|
+
# # ╞═════╪══════════╡
|
2381
|
+
# # │ 1 ┆ true │
|
2382
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
2383
|
+
# # │ 2 ┆ true │
|
2384
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
2385
|
+
# # │ 3 ┆ true │
|
2386
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
2387
|
+
# # │ 1 ┆ false │
|
2388
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
2389
|
+
# # │ 5 ┆ true │
|
2390
|
+
# # └─────┴──────────┘
|
1028
2391
|
def is_first
|
1029
2392
|
wrap_expr(_rbexpr.is_first)
|
1030
2393
|
end
|
1031
2394
|
|
2395
|
+
# Get mask of duplicated values.
|
2396
|
+
#
|
2397
|
+
# @return [Expr]
|
2398
|
+
#
|
2399
|
+
# @example
|
2400
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
2401
|
+
# df.select(Polars.col("a").is_duplicated)
|
2402
|
+
# # =>
|
2403
|
+
# # shape: (3, 1)
|
2404
|
+
# # ┌───────┐
|
2405
|
+
# # │ a │
|
2406
|
+
# # │ --- │
|
2407
|
+
# # │ bool │
|
2408
|
+
# # ╞═══════╡
|
2409
|
+
# # │ true │
|
2410
|
+
# # ├╌╌╌╌╌╌╌┤
|
2411
|
+
# # │ true │
|
2412
|
+
# # ├╌╌╌╌╌╌╌┤
|
2413
|
+
# # │ false │
|
2414
|
+
# # └───────┘
|
1032
2415
|
def is_duplicated
|
1033
2416
|
wrap_expr(_rbexpr.is_duplicated)
|
1034
2417
|
end
|
1035
2418
|
|
1036
|
-
|
1037
|
-
wrap_expr(_rbexpr.quantile(quantile, interpolation))
|
1038
|
-
end
|
1039
|
-
|
1040
|
-
def filter(predicate)
|
1041
|
-
wrap_expr(_rbexpr.filter(predicate._rbexpr))
|
1042
|
-
end
|
1043
|
-
|
1044
|
-
def where(predicate)
|
1045
|
-
filter(predicate)
|
1046
|
-
end
|
1047
|
-
|
1048
|
-
# def map
|
1049
|
-
# end
|
1050
|
-
|
1051
|
-
# def apply
|
1052
|
-
# end
|
1053
|
-
|
2419
|
+
# Get quantile value.
|
1054
2420
|
#
|
1055
|
-
|
1056
|
-
|
1057
|
-
|
1058
|
-
|
1059
|
-
|
1060
|
-
|
2421
|
+
# @param quantile [Float]
|
2422
|
+
# Quantile between 0.0 and 1.0.
|
2423
|
+
# @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
|
2424
|
+
# Interpolation method.
|
2425
|
+
#
|
2426
|
+
# @return [Expr]
|
2427
|
+
#
|
2428
|
+
# @example
|
2429
|
+
# df = Polars::DataFrame.new({"a" => [0, 1, 2, 3, 4, 5]})
|
2430
|
+
# df.select(Polars.col("a").quantile(0.3))
|
2431
|
+
# # =>
|
2432
|
+
# # shape: (1, 1)
|
2433
|
+
# # ┌─────┐
|
2434
|
+
# # │ a │
|
2435
|
+
# # │ --- │
|
2436
|
+
# # │ f64 │
|
2437
|
+
# # ╞═════╡
|
2438
|
+
# # │ 1.0 │
|
2439
|
+
# # └─────┘
|
2440
|
+
#
|
2441
|
+
# @example
|
2442
|
+
# df.select(Polars.col("a").quantile(0.3, interpolation: "higher"))
|
2443
|
+
# # =>
|
2444
|
+
# # shape: (1, 1)
|
2445
|
+
# # ┌─────┐
|
2446
|
+
# # │ a │
|
2447
|
+
# # │ --- │
|
2448
|
+
# # │ f64 │
|
2449
|
+
# # ╞═════╡
|
2450
|
+
# # │ 2.0 │
|
2451
|
+
# # └─────┘
|
2452
|
+
#
|
2453
|
+
# @example
|
2454
|
+
# df.select(Polars.col("a").quantile(0.3, interpolation: "lower"))
|
2455
|
+
# # =>
|
2456
|
+
# # shape: (1, 1)
|
2457
|
+
# # ┌─────┐
|
2458
|
+
# # │ a │
|
2459
|
+
# # │ --- │
|
2460
|
+
# # │ f64 │
|
2461
|
+
# # ╞═════╡
|
2462
|
+
# # │ 1.0 │
|
2463
|
+
# # └─────┘
|
2464
|
+
#
|
2465
|
+
# @example
|
2466
|
+
# df.select(Polars.col("a").quantile(0.3, interpolation: "midpoint"))
|
2467
|
+
# # =>
|
2468
|
+
# # shape: (1, 1)
|
2469
|
+
# # ┌─────┐
|
2470
|
+
# # │ a │
|
2471
|
+
# # │ --- │
|
2472
|
+
# # │ f64 │
|
2473
|
+
# # ╞═════╡
|
2474
|
+
# # │ 1.5 │
|
2475
|
+
# # └─────┘
|
2476
|
+
#
|
2477
|
+
# @example
|
2478
|
+
# df.select(Polars.col("a").quantile(0.3, interpolation: "linear"))
|
2479
|
+
# # =>
|
2480
|
+
# # shape: (1, 1)
|
2481
|
+
# # ┌─────┐
|
2482
|
+
# # │ a │
|
2483
|
+
# # │ --- │
|
2484
|
+
# # │ f64 │
|
2485
|
+
# # ╞═════╡
|
2486
|
+
# # │ 1.5 │
|
2487
|
+
# # └─────┘
|
2488
|
+
def quantile(quantile, interpolation: "nearest")
|
2489
|
+
wrap_expr(_rbexpr.quantile(quantile, interpolation))
|
2490
|
+
end
|
2491
|
+
|
2492
|
+
# Filter a single column.
|
2493
|
+
#
|
2494
|
+
# Mostly useful in an aggregation context. If you want to filter on a DataFrame
|
2495
|
+
# level, use `LazyFrame#filter`.
|
2496
|
+
#
|
2497
|
+
# @param predicate [Expr]
|
2498
|
+
# Boolean expression.
|
2499
|
+
#
|
2500
|
+
# @return [Expr]
|
2501
|
+
#
|
2502
|
+
# @example
|
2503
|
+
# df = Polars::DataFrame.new(
|
2504
|
+
# {
|
2505
|
+
# "group_col" => ["g1", "g1", "g2"],
|
2506
|
+
# "b" => [1, 2, 3]
|
2507
|
+
# }
|
2508
|
+
# )
|
2509
|
+
# (
|
2510
|
+
# df.groupby("group_col").agg(
|
2511
|
+
# [
|
2512
|
+
# Polars.col("b").filter(Polars.col("b") < 2).sum.alias("lt"),
|
2513
|
+
# Polars.col("b").filter(Polars.col("b") >= 2).sum.alias("gte")
|
2514
|
+
# ]
|
2515
|
+
# )
|
2516
|
+
# ).sort("group_col")
|
2517
|
+
# # =>
|
2518
|
+
# # shape: (2, 3)
|
2519
|
+
# # ┌───────────┬──────┬─────┐
|
2520
|
+
# # │ group_col ┆ lt ┆ gte │
|
2521
|
+
# # │ --- ┆ --- ┆ --- │
|
2522
|
+
# # │ str ┆ i64 ┆ i64 │
|
2523
|
+
# # ╞═══════════╪══════╪═════╡
|
2524
|
+
# # │ g1 ┆ 1 ┆ 2 │
|
2525
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
|
2526
|
+
# # │ g2 ┆ null ┆ 3 │
|
2527
|
+
# # └───────────┴──────┴─────┘
|
2528
|
+
def filter(predicate)
|
2529
|
+
wrap_expr(_rbexpr.filter(predicate._rbexpr))
|
2530
|
+
end
|
2531
|
+
|
2532
|
+
# Filter a single column.
|
2533
|
+
#
|
2534
|
+
# Alias for {#filter}.
|
2535
|
+
#
|
2536
|
+
# @param predicate [Expr]
|
2537
|
+
# Boolean expression.
|
2538
|
+
#
|
2539
|
+
# @return [Expr]
|
2540
|
+
#
|
2541
|
+
# @example
|
2542
|
+
# df = Polars::DataFrame.new(
|
2543
|
+
# {
|
2544
|
+
# "group_col" => ["g1", "g1", "g2"],
|
2545
|
+
# "b" => [1, 2, 3]
|
2546
|
+
# }
|
2547
|
+
# )
|
2548
|
+
# (
|
2549
|
+
# df.groupby("group_col").agg(
|
2550
|
+
# [
|
2551
|
+
# Polars.col("b").where(Polars.col("b") < 2).sum.alias("lt"),
|
2552
|
+
# Polars.col("b").where(Polars.col("b") >= 2).sum.alias("gte")
|
2553
|
+
# ]
|
2554
|
+
# )
|
2555
|
+
# ).sort("group_col")
|
2556
|
+
# # =>
|
2557
|
+
# # shape: (2, 3)
|
2558
|
+
# # ┌───────────┬──────┬─────┐
|
2559
|
+
# # │ group_col ┆ lt ┆ gte │
|
2560
|
+
# # │ --- ┆ --- ┆ --- │
|
2561
|
+
# # │ str ┆ i64 ┆ i64 │
|
2562
|
+
# # ╞═══════════╪══════╪═════╡
|
2563
|
+
# # │ g1 ┆ 1 ┆ 2 │
|
2564
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
|
2565
|
+
# # │ g2 ┆ null ┆ 3 │
|
2566
|
+
# # └───────────┴──────┴─────┘
|
2567
|
+
def where(predicate)
|
2568
|
+
filter(predicate)
|
2569
|
+
end
|
2570
|
+
|
2571
|
+
# Apply a custom Ruby function to a Series or sequence of Series.
|
2572
|
+
#
|
2573
|
+
# The output of this custom function must be a Series.
|
2574
|
+
# If you want to apply a custom function elementwise over single values, see
|
2575
|
+
# {#apply}. A use case for `map` is when you want to transform an
|
2576
|
+
# expression with a third-party library.
|
2577
|
+
#
|
2578
|
+
# Read more in [the book](https://pola-rs.github.io/polars-book/user-guide/dsl/custom_functions.html).
|
2579
|
+
#
|
2580
|
+
# @param return_dtype [Symbol]
|
2581
|
+
# Dtype of the output Series.
|
2582
|
+
# @param agg_list [Boolean]
|
2583
|
+
# Aggregate list.
|
2584
|
+
#
|
2585
|
+
# @return [Expr]
|
2586
|
+
#
|
2587
|
+
# @example
|
2588
|
+
# df = Polars::DataFrame.new(
|
2589
|
+
# {
|
2590
|
+
# "sine" => [0.0, 1.0, 0.0, -1.0],
|
2591
|
+
# "cosine" => [1.0, 0.0, -1.0, 0.0]
|
2592
|
+
# }
|
2593
|
+
# )
|
2594
|
+
# df.select(Polars.all.map { |x| x.to_numpy.argmax })
|
2595
|
+
# # =>
|
2596
|
+
# # shape: (1, 2)
|
2597
|
+
# # ┌──────┬────────┐
|
2598
|
+
# # │ sine ┆ cosine │
|
2599
|
+
# # │ --- ┆ --- │
|
2600
|
+
# # │ i64 ┆ i64 │
|
2601
|
+
# # ╞══════╪════════╡
|
2602
|
+
# # │ 1 ┆ 0 │
|
2603
|
+
# # └──────┴────────┘
|
2604
|
+
# def map(return_dtype: nil, agg_list: false, &f)
|
2605
|
+
# if !return_dtype.nil?
|
2606
|
+
# return_dtype = Utils.rb_type_to_dtype(return_dtype)
|
2607
|
+
# end
|
2608
|
+
# wrap_expr(_rbexpr.map(f, return_dtype, agg_list))
|
2609
|
+
# end
|
2610
|
+
|
2611
|
+
# Apply a custom/user-defined function (UDF) in a GroupBy or Projection context.
|
2612
|
+
#
|
2613
|
+
# Depending on the context it has the following behavior:
|
2614
|
+
#
|
2615
|
+
# * Selection
|
2616
|
+
# Expects `f` to be of type Callable[[Any], Any].
|
2617
|
+
# Applies a Ruby function over each individual value in the column.
|
2618
|
+
# * GroupBy
|
2619
|
+
# Expects `f` to be of type Callable[[Series], Series].
|
2620
|
+
# Applies a Ruby function over each group.
|
2621
|
+
#
|
2622
|
+
# Implementing logic using a Ruby function is almost always _significantly_
|
2623
|
+
# slower and more memory intensive than implementing the same logic using
|
2624
|
+
# the native expression API because:
|
2625
|
+
#
|
2626
|
+
# - The native expression engine runs in Rust; UDFs run in Ruby.
|
2627
|
+
# - Use of Ruby UDFs forces the DataFrame to be materialized in memory.
|
2628
|
+
# - Polars-native expressions can be parallelised (UDFs cannot).
|
2629
|
+
# - Polars-native expressions can be logically optimised (UDFs cannot).
|
2630
|
+
#
|
2631
|
+
# Wherever possible you should strongly prefer the native expression API
|
2632
|
+
# to achieve the best performance.
|
2633
|
+
#
|
2634
|
+
# @param return_dtype [Symbol]
|
2635
|
+
# Dtype of the output Series.
|
2636
|
+
# If not set, polars will assume that
|
2637
|
+
# the dtype remains unchanged.
|
2638
|
+
#
|
2639
|
+
# @return [Expr]
|
2640
|
+
#
|
2641
|
+
# @example
|
2642
|
+
# df = Polars::DataFrame.new(
|
2643
|
+
# {
|
2644
|
+
# "a" => [1, 2, 3, 1],
|
2645
|
+
# "b" => ["a", "b", "c", "c"]
|
2646
|
+
# }
|
2647
|
+
# )
|
2648
|
+
#
|
2649
|
+
# @example In a selection context, the function is applied by row.
|
2650
|
+
# df.with_column(
|
2651
|
+
# Polars.col("a").apply { |x| x * 2 }.alias("a_times_2")
|
2652
|
+
# )
|
2653
|
+
# # =>
|
2654
|
+
# # shape: (4, 3)
|
2655
|
+
# # ┌─────┬─────┬───────────┐
|
2656
|
+
# # │ a ┆ b ┆ a_times_2 │
|
2657
|
+
# # │ --- ┆ --- ┆ --- │
|
2658
|
+
# # │ i64 ┆ str ┆ i64 │
|
2659
|
+
# # ╞═════╪═════╪═══════════╡
|
2660
|
+
# # │ 1 ┆ a ┆ 2 │
|
2661
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
2662
|
+
# # │ 2 ┆ b ┆ 4 │
|
2663
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
2664
|
+
# # │ 3 ┆ c ┆ 6 │
|
2665
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
2666
|
+
# # │ 1 ┆ c ┆ 2 │
|
2667
|
+
# # └─────┴─────┴───────────┘
|
2668
|
+
#
|
2669
|
+
# @example In a GroupBy context the function is applied by group:
|
2670
|
+
# df.lazy
|
2671
|
+
# .groupby("b", maintain_order: true)
|
2672
|
+
# .agg(
|
2673
|
+
# [
|
2674
|
+
# Polars.col("a").apply { |x| x.sum }
|
2675
|
+
# ]
|
2676
|
+
# )
|
2677
|
+
# .collect
|
2678
|
+
# # =>
|
2679
|
+
# # shape: (3, 2)
|
2680
|
+
# # ┌─────┬─────┐
|
2681
|
+
# # │ b ┆ a │
|
2682
|
+
# # │ --- ┆ --- │
|
2683
|
+
# # │ str ┆ i64 │
|
2684
|
+
# # ╞═════╪═════╡
|
2685
|
+
# # │ a ┆ 1 │
|
2686
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
2687
|
+
# # │ b ┆ 2 │
|
2688
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
2689
|
+
# # │ c ┆ 4 │
|
2690
|
+
# # └─────┴─────┘
|
2691
|
+
# def apply(return_dtype: nil, &f)
|
2692
|
+
# wrap_f = lambda do |x|
|
2693
|
+
# x.apply(return_dtype: return_dtype, &f)
|
2694
|
+
# end
|
2695
|
+
# map(agg_list: true, return_dtype: return_dtype, &wrap_f)
|
2696
|
+
# end
|
2697
|
+
|
2698
|
+
# Explode a list or utf8 Series. This means that every item is expanded to a new
|
2699
|
+
# row.
|
2700
|
+
#
|
2701
|
+
# Alias for {#explode}.
|
2702
|
+
#
|
2703
|
+
# @return [Expr]
|
2704
|
+
#
|
2705
|
+
# @example
|
2706
|
+
# df = Polars::DataFrame.new({"foo" => ["hello", "world"]})
|
2707
|
+
# df.select(Polars.col("foo").flatten)
|
2708
|
+
# # =>
|
2709
|
+
# # shape: (10, 1)
|
2710
|
+
# # ┌─────┐
|
2711
|
+
# # │ foo │
|
2712
|
+
# # │ --- │
|
2713
|
+
# # │ str │
|
2714
|
+
# # ╞═════╡
|
2715
|
+
# # │ h │
|
2716
|
+
# # ├╌╌╌╌╌┤
|
2717
|
+
# # │ e │
|
2718
|
+
# # ├╌╌╌╌╌┤
|
2719
|
+
# # │ l │
|
2720
|
+
# # ├╌╌╌╌╌┤
|
2721
|
+
# # │ l │
|
2722
|
+
# # ├╌╌╌╌╌┤
|
2723
|
+
# # │ ... │
|
2724
|
+
# # ├╌╌╌╌╌┤
|
2725
|
+
# # │ o │
|
2726
|
+
# # ├╌╌╌╌╌┤
|
2727
|
+
# # │ r │
|
2728
|
+
# # ├╌╌╌╌╌┤
|
2729
|
+
# # │ l │
|
2730
|
+
# # ├╌╌╌╌╌┤
|
2731
|
+
# # │ d │
|
2732
|
+
# # └─────┘
|
2733
|
+
def flatten
|
2734
|
+
wrap_expr(_rbexpr.explode)
|
2735
|
+
end
|
2736
|
+
|
2737
|
+
# Explode a list or utf8 Series.
|
2738
|
+
#
|
2739
|
+
# This means that every item is expanded to a new row.
|
2740
|
+
#
|
2741
|
+
# @return [Expr]
|
2742
|
+
#
|
2743
|
+
# @example
|
2744
|
+
# df = Polars::DataFrame.new({"b" => [[1, 2, 3], [4, 5, 6]]})
|
2745
|
+
# df.select(Polars.col("b").explode)
|
2746
|
+
# # =>
|
2747
|
+
# # shape: (6, 1)
|
2748
|
+
# # ┌─────┐
|
2749
|
+
# # │ b │
|
2750
|
+
# # │ --- │
|
2751
|
+
# # │ i64 │
|
2752
|
+
# # ╞═════╡
|
2753
|
+
# # │ 1 │
|
2754
|
+
# # ├╌╌╌╌╌┤
|
2755
|
+
# # │ 2 │
|
2756
|
+
# # ├╌╌╌╌╌┤
|
2757
|
+
# # │ 3 │
|
2758
|
+
# # ├╌╌╌╌╌┤
|
2759
|
+
# # │ 4 │
|
2760
|
+
# # ├╌╌╌╌╌┤
|
2761
|
+
# # │ 5 │
|
2762
|
+
# # ├╌╌╌╌╌┤
|
2763
|
+
# # │ 6 │
|
2764
|
+
# # └─────┘
|
2765
|
+
def explode
|
2766
|
+
wrap_expr(_rbexpr.explode)
|
1061
2767
|
end
|
1062
2768
|
|
2769
|
+
# Take every nth value in the Series and return as a new Series.
|
2770
|
+
#
|
2771
|
+
# @return [Expr]
|
2772
|
+
#
|
2773
|
+
# @example
|
2774
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
|
2775
|
+
# df.select(Polars.col("foo").take_every(3))
|
2776
|
+
# # =>
|
2777
|
+
# # shape: (3, 1)
|
2778
|
+
# # ┌─────┐
|
2779
|
+
# # │ foo │
|
2780
|
+
# # │ --- │
|
2781
|
+
# # │ i64 │
|
2782
|
+
# # ╞═════╡
|
2783
|
+
# # │ 1 │
|
2784
|
+
# # ├╌╌╌╌╌┤
|
2785
|
+
# # │ 4 │
|
2786
|
+
# # ├╌╌╌╌╌┤
|
2787
|
+
# # │ 7 │
|
2788
|
+
# # └─────┘
|
1063
2789
|
def take_every(n)
|
1064
2790
|
wrap_expr(_rbexpr.take_every(n))
|
1065
2791
|
end
|
1066
2792
|
|
2793
|
+
# Get the first `n` rows.
|
2794
|
+
#
|
2795
|
+
# @param n [Integer]
|
2796
|
+
# Number of rows to return.
|
2797
|
+
#
|
2798
|
+
# @return [Expr]
|
2799
|
+
#
|
2800
|
+
# @example
|
2801
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
|
2802
|
+
# df.head(3)
|
2803
|
+
# # =>
|
2804
|
+
# # shape: (3, 1)
|
2805
|
+
# # ┌─────┐
|
2806
|
+
# # │ foo │
|
2807
|
+
# # │ --- │
|
2808
|
+
# # │ i64 │
|
2809
|
+
# # ╞═════╡
|
2810
|
+
# # │ 1 │
|
2811
|
+
# # ├╌╌╌╌╌┤
|
2812
|
+
# # │ 2 │
|
2813
|
+
# # ├╌╌╌╌╌┤
|
2814
|
+
# # │ 3 │
|
2815
|
+
# # └─────┘
|
1067
2816
|
def head(n = 10)
|
1068
2817
|
wrap_expr(_rbexpr.head(n))
|
1069
2818
|
end
|
1070
2819
|
|
2820
|
+
# Get the last `n` rows.
|
2821
|
+
#
|
2822
|
+
# @param n [Integer]
|
2823
|
+
# Number of rows to return.
|
2824
|
+
#
|
2825
|
+
# @return [Expr]
|
2826
|
+
#
|
2827
|
+
# @example
|
2828
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
|
2829
|
+
# df.tail(3)
|
2830
|
+
# # =>
|
2831
|
+
# # shape: (3, 1)
|
2832
|
+
# # ┌─────┐
|
2833
|
+
# # │ foo │
|
2834
|
+
# # │ --- │
|
2835
|
+
# # │ i64 │
|
2836
|
+
# # ╞═════╡
|
2837
|
+
# # │ 5 │
|
2838
|
+
# # ├╌╌╌╌╌┤
|
2839
|
+
# # │ 6 │
|
2840
|
+
# # ├╌╌╌╌╌┤
|
2841
|
+
# # │ 7 │
|
2842
|
+
# # └─────┘
|
1071
2843
|
def tail(n = 10)
|
1072
2844
|
wrap_expr(_rbexpr.tail(n))
|
1073
2845
|
end
|
1074
2846
|
|
2847
|
+
# Get the first `n` rows.
|
2848
|
+
#
|
2849
|
+
# Alias for {#head}.
|
2850
|
+
#
|
2851
|
+
# @param n [Integer]
|
2852
|
+
# Number of rows to return.
|
2853
|
+
#
|
2854
|
+
# @return [Expr]
|
1075
2855
|
def limit(n = 10)
|
1076
2856
|
head(n)
|
1077
2857
|
end
|
1078
2858
|
|
2859
|
+
# Raise expression to the power of exponent.
|
2860
|
+
#
|
2861
|
+
# @return [Expr]
|
2862
|
+
#
|
2863
|
+
# @example
|
2864
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
|
2865
|
+
# df.select(Polars.col("foo").pow(3))
|
2866
|
+
# # =>
|
2867
|
+
# # shape: (4, 1)
|
2868
|
+
# # ┌──────┐
|
2869
|
+
# # │ foo │
|
2870
|
+
# # │ --- │
|
2871
|
+
# # │ f64 │
|
2872
|
+
# # ╞══════╡
|
2873
|
+
# # │ 1.0 │
|
2874
|
+
# # ├╌╌╌╌╌╌┤
|
2875
|
+
# # │ 8.0 │
|
2876
|
+
# # ├╌╌╌╌╌╌┤
|
2877
|
+
# # │ 27.0 │
|
2878
|
+
# # ├╌╌╌╌╌╌┤
|
2879
|
+
# # │ 64.0 │
|
2880
|
+
# # └──────┘
|
1079
2881
|
def pow(exponent)
|
1080
2882
|
exponent = Utils.expr_to_lit_or_expr(exponent)
|
1081
2883
|
wrap_expr(_rbexpr.pow(exponent._rbexpr))
|
1082
2884
|
end
|
1083
2885
|
|
1084
|
-
#
|
1085
|
-
# end
|
1086
|
-
|
2886
|
+
# Check if elements of this expression are present in the other Series.
|
1087
2887
|
#
|
1088
|
-
|
1089
|
-
|
1090
|
-
wrap_expr(_rbexpr.repeat_by(by._rbexpr))
|
1091
|
-
end
|
1092
|
-
|
1093
|
-
# def is_between
|
1094
|
-
# end
|
1095
|
-
|
1096
|
-
# def _hash
|
1097
|
-
# end
|
1098
|
-
|
2888
|
+
# @param other [Object]
|
2889
|
+
# Series or sequence of primitive type.
|
1099
2890
|
#
|
1100
|
-
|
1101
|
-
wrap_expr(_rbexpr.reinterpret(signed))
|
1102
|
-
end
|
1103
|
-
|
1104
|
-
# def _inspect
|
1105
|
-
# end
|
1106
|
-
|
2891
|
+
# @return [Expr]
|
1107
2892
|
#
|
1108
|
-
|
1109
|
-
|
1110
|
-
|
1111
|
-
|
1112
|
-
#
|
1113
|
-
#
|
1114
|
-
|
1115
|
-
#
|
1116
|
-
#
|
2893
|
+
# @example
|
2894
|
+
# df = Polars::DataFrame.new(
|
2895
|
+
# {"sets" => [[1, 2, 3], [1, 2], [9, 10]], "optional_members" => [1, 2, 3]}
|
2896
|
+
# )
|
2897
|
+
# df.select([Polars.col("optional_members").is_in("sets").alias("contains")])
|
2898
|
+
# # =>
|
2899
|
+
# # shape: (3, 1)
|
2900
|
+
# # ┌──────────┐
|
2901
|
+
# # │ contains │
|
2902
|
+
# # │ --- │
|
2903
|
+
# # │ bool │
|
2904
|
+
# # ╞══════════╡
|
2905
|
+
# # │ true │
|
2906
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
2907
|
+
# # │ true │
|
2908
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
2909
|
+
# # │ false │
|
2910
|
+
# # └──────────┘
|
2911
|
+
def is_in(other)
|
2912
|
+
if other.is_a?(Array)
|
2913
|
+
if other.length == 0
|
2914
|
+
other = Polars.lit(nil)
|
2915
|
+
else
|
2916
|
+
other = Polars.lit(Series.new(other))
|
2917
|
+
end
|
2918
|
+
else
|
2919
|
+
other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
|
2920
|
+
end
|
2921
|
+
wrap_expr(_rbexpr.is_in(other._rbexpr))
|
2922
|
+
end
|
1117
2923
|
|
1118
|
-
#
|
1119
|
-
#
|
2924
|
+
# Repeat the elements in this Series as specified in the given expression.
|
2925
|
+
#
|
2926
|
+
# The repeated elements are expanded into a `List`.
|
2927
|
+
#
|
2928
|
+
# @param by [Object]
|
2929
|
+
# Numeric column that determines how often the values will be repeated.
|
2930
|
+
# The column will be coerced to UInt32. Give this dtype to make the coercion a
|
2931
|
+
# no-op.
|
2932
|
+
#
|
2933
|
+
# @return [Expr]
|
2934
|
+
#
|
2935
|
+
# @example
|
2936
|
+
# df = Polars::DataFrame.new(
|
2937
|
+
# {
|
2938
|
+
# "a" => ["x", "y", "z"],
|
2939
|
+
# "n" => [1, 2, 3]
|
2940
|
+
# }
|
2941
|
+
# )
|
2942
|
+
# df.select(Polars.col("a").repeat_by("n"))
|
2943
|
+
# # =>
|
2944
|
+
# # shape: (3, 1)
|
2945
|
+
# # ┌─────────────────┐
|
2946
|
+
# # │ a │
|
2947
|
+
# # │ --- │
|
2948
|
+
# # │ list[str] │
|
2949
|
+
# # ╞═════════════════╡
|
2950
|
+
# # │ ["x"] │
|
2951
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2952
|
+
# # │ ["y", "y"] │
|
2953
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2954
|
+
# # │ ["z", "z", "z"] │
|
2955
|
+
# # └─────────────────┘
|
2956
|
+
def repeat_by(by)
|
2957
|
+
by = Utils.expr_to_lit_or_expr(by, str_to_lit: false)
|
2958
|
+
wrap_expr(_rbexpr.repeat_by(by._rbexpr))
|
2959
|
+
end
|
1120
2960
|
|
1121
|
-
#
|
1122
|
-
#
|
2961
|
+
# Check if this expression is between start and end.
|
2962
|
+
#
|
2963
|
+
# @param start [Object]
|
2964
|
+
# Lower bound as primitive type or datetime.
|
2965
|
+
# @param _end [Object]
|
2966
|
+
# Upper bound as primitive type or datetime.
|
2967
|
+
# @param include_bounds [Boolean]
|
2968
|
+
# False: Exclude both start and end (default).
|
2969
|
+
# True: Include both start and end.
|
2970
|
+
# (False, False): Exclude start and exclude end.
|
2971
|
+
# (True, True): Include start and include end.
|
2972
|
+
# (False, True): Exclude start and include end.
|
2973
|
+
# (True, False): Include start and exclude end.
|
2974
|
+
#
|
2975
|
+
# @return [Expr]
|
2976
|
+
#
|
2977
|
+
# @example
|
2978
|
+
# df = Polars::DataFrame.new({"num" => [1, 2, 3, 4, 5]})
|
2979
|
+
# df.with_column(Polars.col("num").is_between(2, 4))
|
2980
|
+
# # =>
|
2981
|
+
# # shape: (5, 2)
|
2982
|
+
# # ┌─────┬────────────┐
|
2983
|
+
# # │ num ┆ is_between │
|
2984
|
+
# # │ --- ┆ --- │
|
2985
|
+
# # │ i64 ┆ bool │
|
2986
|
+
# # ╞═════╪════════════╡
|
2987
|
+
# # │ 1 ┆ false │
|
2988
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2989
|
+
# # │ 2 ┆ false │
|
2990
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2991
|
+
# # │ 3 ┆ true │
|
2992
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2993
|
+
# # │ 4 ┆ false │
|
2994
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2995
|
+
# # │ 5 ┆ false │
|
2996
|
+
# # └─────┴────────────┘
|
2997
|
+
def is_between(start, _end, include_bounds: false)
|
2998
|
+
if include_bounds == false || include_bounds == [false, false]
|
2999
|
+
((self > start) & (self < _end)).alias("is_between")
|
3000
|
+
elsif include_bounds == true || include_bounds == [true, true]
|
3001
|
+
((self >= start) & (self <= _end)).alias("is_between")
|
3002
|
+
elsif include_bounds == [false, true]
|
3003
|
+
((self > start) & (self <= _end)).alias("is_between")
|
3004
|
+
elsif include_bounds == [true, false]
|
3005
|
+
((self >= start) & (self < _end)).alias("is_between")
|
3006
|
+
else
|
3007
|
+
raise ArgumentError, "include_bounds should be a bool or [bool, bool]."
|
3008
|
+
end
|
3009
|
+
end
|
1123
3010
|
|
1124
|
-
#
|
1125
|
-
#
|
3011
|
+
# Hash the elements in the selection.
|
3012
|
+
#
|
3013
|
+
# The hash value is of type `:u64`.
|
3014
|
+
#
|
3015
|
+
# @param seed [Integer]
|
3016
|
+
# Random seed parameter. Defaults to 0.
|
3017
|
+
# @param seed_1 [Integer]
|
3018
|
+
# Random seed parameter. Defaults to `seed` if not set.
|
3019
|
+
# @param seed_2 [Integer]
|
3020
|
+
# Random seed parameter. Defaults to `seed` if not set.
|
3021
|
+
# @param seed_3 [Integer]
|
3022
|
+
# Random seed parameter. Defaults to `seed` if not set.
|
3023
|
+
#
|
3024
|
+
# @return [Expr]
|
3025
|
+
#
|
3026
|
+
# @example
|
3027
|
+
# df = Polars::DataFrame.new(
|
3028
|
+
# {
|
3029
|
+
# "a" => [1, 2, nil],
|
3030
|
+
# "b" => ["x", nil, "z"]
|
3031
|
+
# }
|
3032
|
+
# )
|
3033
|
+
# df.with_column(Polars.all._hash(10, 20, 30, 40))
|
3034
|
+
# # =>
|
3035
|
+
# # shape: (3, 2)
|
3036
|
+
# # ┌──────────────────────┬──────────────────────┐
|
3037
|
+
# # │ a ┆ b │
|
3038
|
+
# # │ --- ┆ --- │
|
3039
|
+
# # │ u64 ┆ u64 │
|
3040
|
+
# # ╞══════════════════════╪══════════════════════╡
|
3041
|
+
# # │ 4629889412789719550 ┆ 6959506404929392568 │
|
3042
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
3043
|
+
# # │ 16386608652769605760 ┆ 11638928888656214026 │
|
3044
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
3045
|
+
# # │ 11638928888656214026 ┆ 11040941213715918520 │
|
3046
|
+
# # └──────────────────────┴──────────────────────┘
|
3047
|
+
def _hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
|
3048
|
+
k0 = seed
|
3049
|
+
k1 = seed_1.nil? ? seed : seed_1
|
3050
|
+
k2 = seed_2.nil? ? seed : seed_2
|
3051
|
+
k3 = seed_3.nil? ? seed : seed_3
|
3052
|
+
wrap_expr(_rbexpr._hash(k0, k1, k2, k3))
|
3053
|
+
end
|
3054
|
+
|
3055
|
+
# Reinterpret the underlying bits as a signed/unsigned integer.
|
3056
|
+
#
|
3057
|
+
# This operation is only allowed for 64bit integers. For lower bits integers,
|
3058
|
+
# you can safely use that cast operation.
|
3059
|
+
#
|
3060
|
+
# @param signed [Boolean]
|
3061
|
+
# If true, reinterpret as `:i64`. Otherwise, reinterpret as `:u64`.
|
3062
|
+
#
|
3063
|
+
# @return [Expr]
|
3064
|
+
#
|
3065
|
+
# @example
|
3066
|
+
# s = Polars::Series.new("a", [1, 1, 2], dtype: :u64)
|
3067
|
+
# df = Polars::DataFrame.new([s])
|
3068
|
+
# df.select(
|
3069
|
+
# [
|
3070
|
+
# Polars.col("a").reinterpret(signed: true).alias("reinterpreted"),
|
3071
|
+
# Polars.col("a").alias("original")
|
3072
|
+
# ]
|
3073
|
+
# )
|
3074
|
+
# # =>
|
3075
|
+
# # shape: (3, 2)
|
3076
|
+
# # ┌───────────────┬──────────┐
|
3077
|
+
# # │ reinterpreted ┆ original │
|
3078
|
+
# # │ --- ┆ --- │
|
3079
|
+
# # │ i64 ┆ u64 │
|
3080
|
+
# # ╞═══════════════╪══════════╡
|
3081
|
+
# # │ 1 ┆ 1 │
|
3082
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
3083
|
+
# # │ 1 ┆ 1 │
|
3084
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
3085
|
+
# # │ 2 ┆ 2 │
|
3086
|
+
# # └───────────────┴──────────┘
|
3087
|
+
def reinterpret(signed: false)
|
3088
|
+
wrap_expr(_rbexpr.reinterpret(signed))
|
3089
|
+
end
|
1126
3090
|
|
1127
|
-
#
|
1128
|
-
#
|
3091
|
+
# Print the value that this expression evaluates to and pass on the value.
|
3092
|
+
#
|
3093
|
+
# @return [Expr]
|
3094
|
+
#
|
3095
|
+
# @example
|
3096
|
+
# df = Polars::DataFrame.new({"foo" => [1, 1, 2]})
|
3097
|
+
# df.select(Polars.col("foo").cumsum._inspect("value is: %s").alias("bar"))
|
3098
|
+
# # =>
|
3099
|
+
# # value is: shape: (3,)
|
3100
|
+
# # Series: 'foo' [i64]
|
3101
|
+
# # [
|
3102
|
+
# # 1
|
3103
|
+
# # 2
|
3104
|
+
# # 4
|
3105
|
+
# # ]
|
3106
|
+
# # shape: (3, 1)
|
3107
|
+
# # ┌─────┐
|
3108
|
+
# # │ bar │
|
3109
|
+
# # │ --- │
|
3110
|
+
# # │ i64 │
|
3111
|
+
# # ╞═════╡
|
3112
|
+
# # │ 1 │
|
3113
|
+
# # ├╌╌╌╌╌┤
|
3114
|
+
# # │ 2 │
|
3115
|
+
# # ├╌╌╌╌╌┤
|
3116
|
+
# # │ 4 │
|
3117
|
+
# # └─────┘
|
3118
|
+
# def _inspect(fmt = "%s")
|
3119
|
+
# inspect = lambda do |s|
|
3120
|
+
# puts(fmt % [s])
|
3121
|
+
# s
|
3122
|
+
# end
|
1129
3123
|
|
1130
|
-
#
|
3124
|
+
# map(return_dtype: nil, agg_list: true, &inspect)
|
1131
3125
|
# end
|
1132
3126
|
|
1133
|
-
#
|
1134
|
-
#
|
3127
|
+
# Fill nulls with linear interpolation over missing values.
|
3128
|
+
#
|
3129
|
+
# Can also be used to regrid data to a new grid - see examples below.
|
3130
|
+
#
|
3131
|
+
# @return [Expr]
|
3132
|
+
#
|
3133
|
+
# @example Fill nulls with linear interpolation
|
3134
|
+
# df = Polars::DataFrame.new(
|
3135
|
+
# {
|
3136
|
+
# "a" => [1, nil, 3],
|
3137
|
+
# "b" => [1.0, Float::NAN, 3.0]
|
3138
|
+
# }
|
3139
|
+
# )
|
3140
|
+
# df.select(Polars.all.interpolate)
|
3141
|
+
# # =>
|
3142
|
+
# # shape: (3, 2)
|
3143
|
+
# # ┌─────┬─────┐
|
3144
|
+
# # │ a ┆ b │
|
3145
|
+
# # │ --- ┆ --- │
|
3146
|
+
# # │ i64 ┆ f64 │
|
3147
|
+
# # ╞═════╪═════╡
|
3148
|
+
# # │ 1 ┆ 1.0 │
|
3149
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
3150
|
+
# # │ 2 ┆ NaN │
|
3151
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
3152
|
+
# # │ 3 ┆ 3.0 │
|
3153
|
+
# # └─────┴─────┘
|
3154
|
+
def interpolate
|
3155
|
+
wrap_expr(_rbexpr.interpolate)
|
3156
|
+
end
|
1135
3157
|
|
1136
|
-
#
|
3158
|
+
# Apply a rolling min (moving min) over the values in this array.
|
3159
|
+
#
|
3160
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
3161
|
+
# this window will (optionally) be multiplied with the weights given by the
|
3162
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
3163
|
+
#
|
3164
|
+
# @param window_size [Integer]
|
3165
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
3166
|
+
# size indicated by a timedelta or the following string language:
|
3167
|
+
#
|
3168
|
+
# - 1ns (1 nanosecond)
|
3169
|
+
# - 1us (1 microsecond)
|
3170
|
+
# - 1ms (1 millisecond)
|
3171
|
+
# - 1s (1 second)
|
3172
|
+
# - 1m (1 minute)
|
3173
|
+
# - 1h (1 hour)
|
3174
|
+
# - 1d (1 day)
|
3175
|
+
# - 1w (1 week)
|
3176
|
+
# - 1mo (1 calendar month)
|
3177
|
+
# - 1y (1 calendar year)
|
3178
|
+
# - 1i (1 index count)
|
3179
|
+
#
|
3180
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
3181
|
+
# and `closed` arguments must also be set.
|
3182
|
+
# @param weights [Array]
|
3183
|
+
# An optional slice with the same length as the window that will be multiplied
|
3184
|
+
# elementwise with the values in the window.
|
3185
|
+
# @param min_periods [Integer]
|
3186
|
+
# The number of values in the window that should be non-null before computing
|
3187
|
+
# a result. If None, it will be set equal to window size.
|
3188
|
+
# @param center [Boolean]
|
3189
|
+
# Set the labels at the center of the window
|
3190
|
+
# @param by [String]
|
3191
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3192
|
+
# set the column that will be used to determine the windows. This column must
|
3193
|
+
# be of dtype `{Date, Datetime}`
|
3194
|
+
# @param closed ["left", "right", "both", "none"]
|
3195
|
+
# Define whether the temporal window interval is closed or not.
|
3196
|
+
#
|
3197
|
+
# @note
|
3198
|
+
# This functionality is experimental and may change without it being considered a
|
3199
|
+
# breaking change.
|
3200
|
+
#
|
3201
|
+
# @note
|
3202
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3203
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3204
|
+
# computation.
|
3205
|
+
#
|
3206
|
+
# @return [Expr]
|
3207
|
+
#
|
3208
|
+
# @example
|
3209
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
|
3210
|
+
# df.select(
|
3211
|
+
# [
|
3212
|
+
# Polars.col("A").rolling_min(2)
|
3213
|
+
# ]
|
3214
|
+
# )
|
3215
|
+
# # =>
|
3216
|
+
# # shape: (6, 1)
|
3217
|
+
# # ┌──────┐
|
3218
|
+
# # │ A │
|
3219
|
+
# # │ --- │
|
3220
|
+
# # │ f64 │
|
3221
|
+
# # ╞══════╡
|
3222
|
+
# # │ null │
|
3223
|
+
# # ├╌╌╌╌╌╌┤
|
3224
|
+
# # │ 1.0 │
|
3225
|
+
# # ├╌╌╌╌╌╌┤
|
3226
|
+
# # │ 2.0 │
|
3227
|
+
# # ├╌╌╌╌╌╌┤
|
3228
|
+
# # │ 3.0 │
|
3229
|
+
# # ├╌╌╌╌╌╌┤
|
3230
|
+
# # │ 4.0 │
|
3231
|
+
# # ├╌╌╌╌╌╌┤
|
3232
|
+
# # │ 5.0 │
|
3233
|
+
# # └──────┘
|
3234
|
+
def rolling_min(
|
3235
|
+
window_size,
|
3236
|
+
weights: nil,
|
3237
|
+
min_periods: nil,
|
3238
|
+
center: false,
|
3239
|
+
by: nil,
|
3240
|
+
closed: "left"
|
3241
|
+
)
|
3242
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3243
|
+
window_size, min_periods
|
3244
|
+
)
|
3245
|
+
wrap_expr(
|
3246
|
+
_rbexpr.rolling_min(
|
3247
|
+
window_size, weights, min_periods, center, by, closed
|
3248
|
+
)
|
3249
|
+
)
|
3250
|
+
end
|
3251
|
+
|
3252
|
+
# Apply a rolling max (moving max) over the values in this array.
|
3253
|
+
#
|
3254
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
3255
|
+
# this window will (optionally) be multiplied with the weights given by the
|
3256
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
3257
|
+
#
|
3258
|
+
# @param window_size [Integer]
|
3259
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
3260
|
+
# size indicated by a timedelta or the following string language:
|
3261
|
+
#
|
3262
|
+
# - 1ns (1 nanosecond)
|
3263
|
+
# - 1us (1 microsecond)
|
3264
|
+
# - 1ms (1 millisecond)
|
3265
|
+
# - 1s (1 second)
|
3266
|
+
# - 1m (1 minute)
|
3267
|
+
# - 1h (1 hour)
|
3268
|
+
# - 1d (1 day)
|
3269
|
+
# - 1w (1 week)
|
3270
|
+
# - 1mo (1 calendar month)
|
3271
|
+
# - 1y (1 calendar year)
|
3272
|
+
# - 1i (1 index count)
|
3273
|
+
#
|
3274
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
3275
|
+
# and `closed` arguments must also be set.
|
3276
|
+
# @param weights [Array]
|
3277
|
+
# An optional slice with the same length as the window that will be multiplied
|
3278
|
+
# elementwise with the values in the window.
|
3279
|
+
# @param min_periods [Integer]
|
3280
|
+
# The number of values in the window that should be non-null before computing
|
3281
|
+
# a result. If None, it will be set equal to window size.
|
3282
|
+
# @param center [Boolean]
|
3283
|
+
# Set the labels at the center of the window
|
3284
|
+
# @param by [String]
|
3285
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3286
|
+
# set the column that will be used to determine the windows. This column must
|
3287
|
+
# be of dtype `{Date, Datetime}`
|
3288
|
+
# @param closed ["left", "right", "both", "none"]
|
3289
|
+
# Define whether the temporal window interval is closed or not.
|
3290
|
+
#
|
3291
|
+
# @note
|
3292
|
+
# This functionality is experimental and may change without it being considered a
|
3293
|
+
# breaking change.
|
3294
|
+
#
|
3295
|
+
# @note
|
3296
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3297
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3298
|
+
# computation.
|
3299
|
+
#
|
3300
|
+
# @return [Expr]
|
3301
|
+
#
|
3302
|
+
# @example
|
3303
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
|
3304
|
+
# df.select(
|
3305
|
+
# [
|
3306
|
+
# Polars.col("A").rolling_max(2)
|
3307
|
+
# ]
|
3308
|
+
# )
|
3309
|
+
# # =>
|
3310
|
+
# # shape: (6, 1)
|
3311
|
+
# # ┌──────┐
|
3312
|
+
# # │ A │
|
3313
|
+
# # │ --- │
|
3314
|
+
# # │ f64 │
|
3315
|
+
# # ╞══════╡
|
3316
|
+
# # │ null │
|
3317
|
+
# # ├╌╌╌╌╌╌┤
|
3318
|
+
# # │ 2.0 │
|
3319
|
+
# # ├╌╌╌╌╌╌┤
|
3320
|
+
# # │ 3.0 │
|
3321
|
+
# # ├╌╌╌╌╌╌┤
|
3322
|
+
# # │ 4.0 │
|
3323
|
+
# # ├╌╌╌╌╌╌┤
|
3324
|
+
# # │ 5.0 │
|
3325
|
+
# # ├╌╌╌╌╌╌┤
|
3326
|
+
# # │ 6.0 │
|
3327
|
+
# # └──────┘
|
3328
|
+
def rolling_max(
|
3329
|
+
window_size,
|
3330
|
+
weights: nil,
|
3331
|
+
min_periods: nil,
|
3332
|
+
center: false,
|
3333
|
+
by: nil,
|
3334
|
+
closed: "left"
|
3335
|
+
)
|
3336
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3337
|
+
window_size, min_periods
|
3338
|
+
)
|
3339
|
+
wrap_expr(
|
3340
|
+
_rbexpr.rolling_max(
|
3341
|
+
window_size, weights, min_periods, center, by, closed
|
3342
|
+
)
|
3343
|
+
)
|
3344
|
+
end
|
3345
|
+
|
3346
|
+
# Apply a rolling mean (moving mean) over the values in this array.
|
3347
|
+
#
|
3348
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
3349
|
+
# this window will (optionally) be multiplied with the weights given by the
|
3350
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
3351
|
+
#
|
3352
|
+
# @param window_size [Integer]
|
3353
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
3354
|
+
# size indicated by a timedelta or the following string language:
|
3355
|
+
#
|
3356
|
+
# - 1ns (1 nanosecond)
|
3357
|
+
# - 1us (1 microsecond)
|
3358
|
+
# - 1ms (1 millisecond)
|
3359
|
+
# - 1s (1 second)
|
3360
|
+
# - 1m (1 minute)
|
3361
|
+
# - 1h (1 hour)
|
3362
|
+
# - 1d (1 day)
|
3363
|
+
# - 1w (1 week)
|
3364
|
+
# - 1mo (1 calendar month)
|
3365
|
+
# - 1y (1 calendar year)
|
3366
|
+
# - 1i (1 index count)
|
3367
|
+
#
|
3368
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
3369
|
+
# and `closed` arguments must also be set.
|
3370
|
+
# @param weights [Array]
|
3371
|
+
# An optional slice with the same length as the window that will be multiplied
|
3372
|
+
# elementwise with the values in the window.
|
3373
|
+
# @param min_periods [Integer]
|
3374
|
+
# The number of values in the window that should be non-null before computing
|
3375
|
+
# a result. If None, it will be set equal to window size.
|
3376
|
+
# @param center [Boolean]
|
3377
|
+
# Set the labels at the center of the window
|
3378
|
+
# @param by [String]
|
3379
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3380
|
+
# set the column that will be used to determine the windows. This column must
|
3381
|
+
# be of dtype `{Date, Datetime}`
|
3382
|
+
# @param closed ["left", "right", "both", "none"]
|
3383
|
+
# Define whether the temporal window interval is closed or not.
|
3384
|
+
#
|
3385
|
+
# @note
|
3386
|
+
# This functionality is experimental and may change without it being considered a
|
3387
|
+
# breaking change.
|
3388
|
+
#
|
3389
|
+
# @note
|
3390
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3391
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3392
|
+
# computation.
|
3393
|
+
#
|
3394
|
+
# @return [Expr]
|
3395
|
+
#
|
3396
|
+
# @example
|
3397
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 8.0, 6.0, 2.0, 16.0, 10.0]})
|
3398
|
+
# df.select(
|
3399
|
+
# [
|
3400
|
+
# Polars.col("A").rolling_mean(2)
|
3401
|
+
# ]
|
3402
|
+
# )
|
3403
|
+
# # =>
|
3404
|
+
# # shape: (6, 1)
|
3405
|
+
# # ┌──────┐
|
3406
|
+
# # │ A │
|
3407
|
+
# # │ --- │
|
3408
|
+
# # │ f64 │
|
3409
|
+
# # ╞══════╡
|
3410
|
+
# # │ null │
|
3411
|
+
# # ├╌╌╌╌╌╌┤
|
3412
|
+
# # │ 4.5 │
|
3413
|
+
# # ├╌╌╌╌╌╌┤
|
3414
|
+
# # │ 7.0 │
|
3415
|
+
# # ├╌╌╌╌╌╌┤
|
3416
|
+
# # │ 4.0 │
|
3417
|
+
# # ├╌╌╌╌╌╌┤
|
3418
|
+
# # │ 9.0 │
|
3419
|
+
# # ├╌╌╌╌╌╌┤
|
3420
|
+
# # │ 13.0 │
|
3421
|
+
# # └──────┘
|
3422
|
+
def rolling_mean(
|
3423
|
+
window_size,
|
3424
|
+
weights: nil,
|
3425
|
+
min_periods: nil,
|
3426
|
+
center: false,
|
3427
|
+
by: nil,
|
3428
|
+
closed: "left"
|
3429
|
+
)
|
3430
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3431
|
+
window_size, min_periods
|
3432
|
+
)
|
3433
|
+
wrap_expr(
|
3434
|
+
_rbexpr.rolling_mean(
|
3435
|
+
window_size, weights, min_periods, center, by, closed
|
3436
|
+
)
|
3437
|
+
)
|
3438
|
+
end
|
3439
|
+
|
3440
|
+
# Apply a rolling sum (moving sum) over the values in this array.
|
3441
|
+
#
|
3442
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
3443
|
+
# this window will (optionally) be multiplied with the weights given by the
|
3444
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
3445
|
+
#
|
3446
|
+
# @param window_size [Integer]
|
3447
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
3448
|
+
# size indicated by a timedelta or the following string language:
|
3449
|
+
#
|
3450
|
+
# - 1ns (1 nanosecond)
|
3451
|
+
# - 1us (1 microsecond)
|
3452
|
+
# - 1ms (1 millisecond)
|
3453
|
+
# - 1s (1 second)
|
3454
|
+
# - 1m (1 minute)
|
3455
|
+
# - 1h (1 hour)
|
3456
|
+
# - 1d (1 day)
|
3457
|
+
# - 1w (1 week)
|
3458
|
+
# - 1mo (1 calendar month)
|
3459
|
+
# - 1y (1 calendar year)
|
3460
|
+
# - 1i (1 index count)
|
3461
|
+
#
|
3462
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
3463
|
+
# and `closed` arguments must also be set.
|
3464
|
+
# @param weights [Array]
|
3465
|
+
# An optional slice with the same length as the window that will be multiplied
|
3466
|
+
# elementwise with the values in the window.
|
3467
|
+
# @param min_periods [Integer]
|
3468
|
+
# The number of values in the window that should be non-null before computing
|
3469
|
+
# a result. If None, it will be set equal to window size.
|
3470
|
+
# @param center [Boolean]
|
3471
|
+
# Set the labels at the center of the window
|
3472
|
+
# @param by [String]
|
3473
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3474
|
+
# set the column that will be used to determine the windows. This column must
|
3475
|
+
# be of dtype `{Date, Datetime}`
|
3476
|
+
# @param closed ["left", "right", "both", "none"]
|
3477
|
+
# Define whether the temporal window interval is closed or not.
|
3478
|
+
#
|
3479
|
+
# @note
|
3480
|
+
# This functionality is experimental and may change without it being considered a
|
3481
|
+
# breaking change.
|
3482
|
+
#
|
3483
|
+
# @note
|
3484
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3485
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3486
|
+
# computation.
|
3487
|
+
#
|
3488
|
+
# @return [Expr]
|
3489
|
+
#
|
3490
|
+
# @example
|
3491
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
|
3492
|
+
# df.select(
|
3493
|
+
# [
|
3494
|
+
# Polars.col("A").rolling_sum(2)
|
3495
|
+
# ]
|
3496
|
+
# )
|
3497
|
+
# # =>
|
3498
|
+
# # shape: (6, 1)
|
3499
|
+
# # ┌──────┐
|
3500
|
+
# # │ A │
|
3501
|
+
# # │ --- │
|
3502
|
+
# # │ f64 │
|
3503
|
+
# # ╞══════╡
|
3504
|
+
# # │ null │
|
3505
|
+
# # ├╌╌╌╌╌╌┤
|
3506
|
+
# # │ 3.0 │
|
3507
|
+
# # ├╌╌╌╌╌╌┤
|
3508
|
+
# # │ 5.0 │
|
3509
|
+
# # ├╌╌╌╌╌╌┤
|
3510
|
+
# # │ 7.0 │
|
3511
|
+
# # ├╌╌╌╌╌╌┤
|
3512
|
+
# # │ 9.0 │
|
3513
|
+
# # ├╌╌╌╌╌╌┤
|
3514
|
+
# # │ 11.0 │
|
3515
|
+
# # └──────┘
|
3516
|
+
def rolling_sum(
|
3517
|
+
window_size,
|
3518
|
+
weights: nil,
|
3519
|
+
min_periods: nil,
|
3520
|
+
center: false,
|
3521
|
+
by: nil,
|
3522
|
+
closed: "left"
|
3523
|
+
)
|
3524
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3525
|
+
window_size, min_periods
|
3526
|
+
)
|
3527
|
+
wrap_expr(
|
3528
|
+
_rbexpr.rolling_sum(
|
3529
|
+
window_size, weights, min_periods, center, by, closed
|
3530
|
+
)
|
3531
|
+
)
|
3532
|
+
end
|
3533
|
+
|
3534
|
+
# Compute a rolling standard deviation.
|
3535
|
+
#
|
3536
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
3537
|
+
# this window will (optionally) be multiplied with the weights given by the
|
3538
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
3539
|
+
#
|
3540
|
+
# @param window_size [Integer]
|
3541
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
3542
|
+
# size indicated by a timedelta or the following string language:
|
3543
|
+
#
|
3544
|
+
# - 1ns (1 nanosecond)
|
3545
|
+
# - 1us (1 microsecond)
|
3546
|
+
# - 1ms (1 millisecond)
|
3547
|
+
# - 1s (1 second)
|
3548
|
+
# - 1m (1 minute)
|
3549
|
+
# - 1h (1 hour)
|
3550
|
+
# - 1d (1 day)
|
3551
|
+
# - 1w (1 week)
|
3552
|
+
# - 1mo (1 calendar month)
|
3553
|
+
# - 1y (1 calendar year)
|
3554
|
+
# - 1i (1 index count)
|
3555
|
+
#
|
3556
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
3557
|
+
# and `closed` arguments must also be set.
|
3558
|
+
# @param weights [Array]
|
3559
|
+
# An optional slice with the same length as the window that will be multiplied
|
3560
|
+
# elementwise with the values in the window.
|
3561
|
+
# @param min_periods [Integer]
|
3562
|
+
# The number of values in the window that should be non-null before computing
|
3563
|
+
# a result. If None, it will be set equal to window size.
|
3564
|
+
# @param center [Boolean]
|
3565
|
+
# Set the labels at the center of the window
|
3566
|
+
# @param by [String]
|
3567
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3568
|
+
# set the column that will be used to determine the windows. This column must
|
3569
|
+
# be of dtype `{Date, Datetime}`
|
3570
|
+
# @param closed ["left", "right", "both", "none"]
|
3571
|
+
# Define whether the temporal window interval is closed or not.
|
3572
|
+
#
|
3573
|
+
# @note
|
3574
|
+
# This functionality is experimental and may change without it being considered a
|
3575
|
+
# breaking change.
|
3576
|
+
#
|
3577
|
+
# @note
|
3578
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3579
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3580
|
+
# computation.
|
3581
|
+
#
|
3582
|
+
# @return [Expr]
|
3583
|
+
#
|
3584
|
+
# @example
|
3585
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
|
3586
|
+
# df.select(
|
3587
|
+
# [
|
3588
|
+
# Polars.col("A").rolling_std(3)
|
3589
|
+
# ]
|
3590
|
+
# )
|
3591
|
+
# # =>
|
3592
|
+
# # shape: (6, 1)
|
3593
|
+
# # ┌──────────┐
|
3594
|
+
# # │ A │
|
3595
|
+
# # │ --- │
|
3596
|
+
# # │ f64 │
|
3597
|
+
# # ╞══════════╡
|
3598
|
+
# # │ null │
|
3599
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3600
|
+
# # │ null │
|
3601
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3602
|
+
# # │ 1.0 │
|
3603
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3604
|
+
# # │ 1.0 │
|
3605
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3606
|
+
# # │ 1.527525 │
|
3607
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3608
|
+
# # │ 2.0 │
|
3609
|
+
# # └──────────┘
|
3610
|
+
def rolling_std(
|
3611
|
+
window_size,
|
3612
|
+
weights: nil,
|
3613
|
+
min_periods: nil,
|
3614
|
+
center: false,
|
3615
|
+
by: nil,
|
3616
|
+
closed: "left"
|
3617
|
+
)
|
3618
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3619
|
+
window_size, min_periods
|
3620
|
+
)
|
3621
|
+
wrap_expr(
|
3622
|
+
_rbexpr.rolling_std(
|
3623
|
+
window_size, weights, min_periods, center, by, closed
|
3624
|
+
)
|
3625
|
+
)
|
3626
|
+
end
|
3627
|
+
|
3628
|
+
# Compute a rolling variance.
|
3629
|
+
#
|
3630
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
3631
|
+
# this window will (optionally) be multiplied with the weights given by the
|
3632
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
3633
|
+
#
|
3634
|
+
# @param window_size [Integer]
|
3635
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
3636
|
+
# size indicated by a timedelta or the following string language:
|
3637
|
+
#
|
3638
|
+
# - 1ns (1 nanosecond)
|
3639
|
+
# - 1us (1 microsecond)
|
3640
|
+
# - 1ms (1 millisecond)
|
3641
|
+
# - 1s (1 second)
|
3642
|
+
# - 1m (1 minute)
|
3643
|
+
# - 1h (1 hour)
|
3644
|
+
# - 1d (1 day)
|
3645
|
+
# - 1w (1 week)
|
3646
|
+
# - 1mo (1 calendar month)
|
3647
|
+
# - 1y (1 calendar year)
|
3648
|
+
# - 1i (1 index count)
|
3649
|
+
#
|
3650
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
3651
|
+
# and `closed` arguments must also be set.
|
3652
|
+
# @param weights [Array]
|
3653
|
+
# An optional slice with the same length as the window that will be multiplied
|
3654
|
+
# elementwise with the values in the window.
|
3655
|
+
# @param min_periods [Integer]
|
3656
|
+
# The number of values in the window that should be non-null before computing
|
3657
|
+
# a result. If None, it will be set equal to window size.
|
3658
|
+
# @param center [Boolean]
|
3659
|
+
# Set the labels at the center of the window
|
3660
|
+
# @param by [String]
|
3661
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3662
|
+
# set the column that will be used to determine the windows. This column must
|
3663
|
+
# be of dtype `{Date, Datetime}`
|
3664
|
+
# @param closed ["left", "right", "both", "none"]
|
3665
|
+
# Define whether the temporal window interval is closed or not.
|
3666
|
+
#
|
3667
|
+
# @note
|
3668
|
+
# This functionality is experimental and may change without it being considered a
|
3669
|
+
# breaking change.
|
3670
|
+
#
|
3671
|
+
# @note
|
3672
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3673
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3674
|
+
# computation.
|
3675
|
+
#
|
3676
|
+
# @return [Expr]
|
3677
|
+
#
|
3678
|
+
# @example
|
3679
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
|
3680
|
+
# df.select(
|
3681
|
+
# [
|
3682
|
+
# Polars.col("A").rolling_var(3)
|
3683
|
+
# ]
|
3684
|
+
# )
|
3685
|
+
# # =>
|
3686
|
+
# # shape: (6, 1)
|
3687
|
+
# # ┌──────────┐
|
3688
|
+
# # │ A │
|
3689
|
+
# # │ --- │
|
3690
|
+
# # │ f64 │
|
3691
|
+
# # ╞══════════╡
|
3692
|
+
# # │ null │
|
3693
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3694
|
+
# # │ null │
|
3695
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3696
|
+
# # │ 1.0 │
|
3697
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3698
|
+
# # │ 1.0 │
|
3699
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3700
|
+
# # │ 2.333333 │
|
3701
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3702
|
+
# # │ 4.0 │
|
3703
|
+
# # └──────────┘
|
3704
|
+
def rolling_var(
|
3705
|
+
window_size,
|
3706
|
+
weights: nil,
|
3707
|
+
min_periods: nil,
|
3708
|
+
center: false,
|
3709
|
+
by: nil,
|
3710
|
+
closed: "left"
|
3711
|
+
)
|
3712
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3713
|
+
window_size, min_periods
|
3714
|
+
)
|
3715
|
+
wrap_expr(
|
3716
|
+
_rbexpr.rolling_var(
|
3717
|
+
window_size, weights, min_periods, center, by, closed
|
3718
|
+
)
|
3719
|
+
)
|
3720
|
+
end
|
3721
|
+
|
3722
|
+
# Compute a rolling median.
|
3723
|
+
#
|
3724
|
+
# @param window_size [Integer]
|
3725
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
3726
|
+
# size indicated by a timedelta or the following string language:
|
3727
|
+
#
|
3728
|
+
# - 1ns (1 nanosecond)
|
3729
|
+
# - 1us (1 microsecond)
|
3730
|
+
# - 1ms (1 millisecond)
|
3731
|
+
# - 1s (1 second)
|
3732
|
+
# - 1m (1 minute)
|
3733
|
+
# - 1h (1 hour)
|
3734
|
+
# - 1d (1 day)
|
3735
|
+
# - 1w (1 week)
|
3736
|
+
# - 1mo (1 calendar month)
|
3737
|
+
# - 1y (1 calendar year)
|
3738
|
+
# - 1i (1 index count)
|
3739
|
+
#
|
3740
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
3741
|
+
# and `closed` arguments must also be set.
|
3742
|
+
# @param weights [Array]
|
3743
|
+
# An optional slice with the same length as the window that will be multiplied
|
3744
|
+
# elementwise with the values in the window.
|
3745
|
+
# @param min_periods [Integer]
|
3746
|
+
# The number of values in the window that should be non-null before computing
|
3747
|
+
# a result. If None, it will be set equal to window size.
|
3748
|
+
# @param center [Boolean]
|
3749
|
+
# Set the labels at the center of the window
|
3750
|
+
# @param by [String]
|
3751
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3752
|
+
# set the column that will be used to determine the windows. This column must
|
3753
|
+
# be of dtype `{Date, Datetime}`
|
3754
|
+
# @param closed ["left", "right", "both", "none"]
|
3755
|
+
# Define whether the temporal window interval is closed or not.
|
3756
|
+
#
|
3757
|
+
# @note
|
3758
|
+
# This functionality is experimental and may change without it being considered a
|
3759
|
+
# breaking change.
|
3760
|
+
#
|
3761
|
+
# @note
|
3762
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3763
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3764
|
+
# computation.
|
3765
|
+
#
|
3766
|
+
# @return [Expr]
|
3767
|
+
#
|
3768
|
+
# @example
|
3769
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
|
3770
|
+
# df.select(
|
3771
|
+
# [
|
3772
|
+
# Polars.col("A").rolling_median(3)
|
3773
|
+
# ]
|
3774
|
+
# )
|
3775
|
+
# # =>
|
3776
|
+
# # shape: (6, 1)
|
3777
|
+
# # ┌──────┐
|
3778
|
+
# # │ A │
|
3779
|
+
# # │ --- │
|
3780
|
+
# # │ f64 │
|
3781
|
+
# # ╞══════╡
|
3782
|
+
# # │ null │
|
3783
|
+
# # ├╌╌╌╌╌╌┤
|
3784
|
+
# # │ null │
|
3785
|
+
# # ├╌╌╌╌╌╌┤
|
3786
|
+
# # │ 2.0 │
|
3787
|
+
# # ├╌╌╌╌╌╌┤
|
3788
|
+
# # │ 3.0 │
|
3789
|
+
# # ├╌╌╌╌╌╌┤
|
3790
|
+
# # │ 4.0 │
|
3791
|
+
# # ├╌╌╌╌╌╌┤
|
3792
|
+
# # │ 6.0 │
|
3793
|
+
# # └──────┘
|
3794
|
+
def rolling_median(
|
3795
|
+
window_size,
|
3796
|
+
weights: nil,
|
3797
|
+
min_periods: nil,
|
3798
|
+
center: false,
|
3799
|
+
by: nil,
|
3800
|
+
closed: "left"
|
3801
|
+
)
|
3802
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3803
|
+
window_size, min_periods
|
3804
|
+
)
|
3805
|
+
wrap_expr(
|
3806
|
+
_rbexpr.rolling_median(
|
3807
|
+
window_size, weights, min_periods, center, by, closed
|
3808
|
+
)
|
3809
|
+
)
|
3810
|
+
end
|
3811
|
+
|
3812
|
+
# Compute a rolling quantile.
|
3813
|
+
#
|
3814
|
+
# @param quantile [Float]
|
3815
|
+
# Quantile between 0.0 and 1.0.
|
3816
|
+
# @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
|
3817
|
+
# Interpolation method.
|
3818
|
+
# @param window_size [Integer]
|
3819
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
3820
|
+
# size indicated by a timedelta or the following string language:
|
3821
|
+
#
|
3822
|
+
# - 1ns (1 nanosecond)
|
3823
|
+
# - 1us (1 microsecond)
|
3824
|
+
# - 1ms (1 millisecond)
|
3825
|
+
# - 1s (1 second)
|
3826
|
+
# - 1m (1 minute)
|
3827
|
+
# - 1h (1 hour)
|
3828
|
+
# - 1d (1 day)
|
3829
|
+
# - 1w (1 week)
|
3830
|
+
# - 1mo (1 calendar month)
|
3831
|
+
# - 1y (1 calendar year)
|
3832
|
+
# - 1i (1 index count)
|
3833
|
+
#
|
3834
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
3835
|
+
# and `closed` arguments must also be set.
|
3836
|
+
# @param weights [Array]
|
3837
|
+
# An optional slice with the same length as the window that will be multiplied
|
3838
|
+
# elementwise with the values in the window.
|
3839
|
+
# @param min_periods [Integer]
|
3840
|
+
# The number of values in the window that should be non-null before computing
|
3841
|
+
# a result. If None, it will be set equal to window size.
|
3842
|
+
# @param center [Boolean]
|
3843
|
+
# Set the labels at the center of the window
|
3844
|
+
# @param by [String]
|
3845
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3846
|
+
# set the column that will be used to determine the windows. This column must
|
3847
|
+
# be of dtype `{Date, Datetime}`
|
3848
|
+
# @param closed ["left", "right", "both", "none"]
|
3849
|
+
# Define whether the temporal window interval is closed or not.
|
3850
|
+
#
|
3851
|
+
# @note
|
3852
|
+
# This functionality is experimental and may change without it being considered a
|
3853
|
+
# breaking change.
|
3854
|
+
#
|
3855
|
+
# @note
|
3856
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3857
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3858
|
+
# computation.
|
3859
|
+
#
|
3860
|
+
# @return [Expr]
|
3861
|
+
#
|
3862
|
+
# @example
|
3863
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
|
3864
|
+
# df.select(
|
3865
|
+
# [
|
3866
|
+
# Polars.col("A").rolling_quantile(0.33, window_size: 3)
|
3867
|
+
# ]
|
3868
|
+
# )
|
3869
|
+
# # =>
|
3870
|
+
# # shape: (6, 1)
|
3871
|
+
# # ┌──────┐
|
3872
|
+
# # │ A │
|
3873
|
+
# # │ --- │
|
3874
|
+
# # │ f64 │
|
3875
|
+
# # ╞══════╡
|
3876
|
+
# # │ null │
|
3877
|
+
# # ├╌╌╌╌╌╌┤
|
3878
|
+
# # │ null │
|
3879
|
+
# # ├╌╌╌╌╌╌┤
|
3880
|
+
# # │ 1.0 │
|
3881
|
+
# # ├╌╌╌╌╌╌┤
|
3882
|
+
# # │ 2.0 │
|
3883
|
+
# # ├╌╌╌╌╌╌┤
|
3884
|
+
# # │ 3.0 │
|
3885
|
+
# # ├╌╌╌╌╌╌┤
|
3886
|
+
# # │ 4.0 │
|
3887
|
+
# # └──────┘
|
3888
|
+
def rolling_quantile(
|
3889
|
+
quantile,
|
3890
|
+
interpolation: "nearest",
|
3891
|
+
window_size: 2,
|
3892
|
+
weights: nil,
|
3893
|
+
min_periods: nil,
|
3894
|
+
center: false,
|
3895
|
+
by: nil,
|
3896
|
+
closed: "left"
|
3897
|
+
)
|
3898
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3899
|
+
window_size, min_periods
|
3900
|
+
)
|
3901
|
+
wrap_expr(
|
3902
|
+
_rbexpr.rolling_quantile(
|
3903
|
+
quantile, interpolation, window_size, weights, min_periods, center, by, closed
|
3904
|
+
)
|
3905
|
+
)
|
3906
|
+
end
|
3907
|
+
|
3908
|
+
# Apply a custom rolling window function.
|
3909
|
+
#
|
3910
|
+
# Prefer the specific rolling window functions over this one, as they are faster.
|
3911
|
+
#
|
3912
|
+
# Prefer:
|
3913
|
+
# * rolling_min
|
3914
|
+
# * rolling_max
|
3915
|
+
# * rolling_mean
|
3916
|
+
# * rolling_sum
|
3917
|
+
#
|
3918
|
+
# @param window_size [Integer]
|
3919
|
+
# The length of the window.
|
3920
|
+
# @param weights [Object]
|
3921
|
+
# An optional slice with the same length as the window that will be multiplied
|
3922
|
+
# elementwise with the values in the window.
|
3923
|
+
# @param min_periods [Integer]
|
3924
|
+
# The number of values in the window that should be non-null before computing
|
3925
|
+
# a result. If nil, it will be set equal to window size.
|
3926
|
+
# @param center [Boolean]
|
3927
|
+
# Set the labels at the center of the window
|
3928
|
+
#
|
3929
|
+
# @return [Expr]
|
3930
|
+
#
|
3931
|
+
# @example
|
3932
|
+
# df = Polars::DataFrame.new(
|
3933
|
+
# {
|
3934
|
+
# "A" => [1.0, 2.0, 9.0, 2.0, 13.0]
|
3935
|
+
# }
|
3936
|
+
# )
|
3937
|
+
# df.select(
|
3938
|
+
# [
|
3939
|
+
# Polars.col("A").rolling_apply(window_size: 3) { |s| s.std }
|
3940
|
+
# ]
|
3941
|
+
# )
|
3942
|
+
# # =>
|
3943
|
+
# # shape: (5, 1)
|
3944
|
+
# # ┌──────────┐
|
3945
|
+
# # │ A │
|
3946
|
+
# # │ --- │
|
3947
|
+
# # │ f64 │
|
3948
|
+
# # ╞══════════╡
|
3949
|
+
# # │ null │
|
3950
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3951
|
+
# # │ null │
|
3952
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3953
|
+
# # │ 4.358899 │
|
3954
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3955
|
+
# # │ 4.041452 │
|
3956
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3957
|
+
# # │ 5.567764 │
|
3958
|
+
# # └──────────┘
|
3959
|
+
# def rolling_apply(
|
3960
|
+
# window_size:,
|
3961
|
+
# weights: nil,
|
3962
|
+
# min_periods: nil,
|
3963
|
+
# center: false,
|
3964
|
+
# &function
|
3965
|
+
# )
|
3966
|
+
# if min_periods.nil?
|
3967
|
+
# min_periods = window_size
|
3968
|
+
# end
|
3969
|
+
# wrap_expr(
|
3970
|
+
# _rbexpr.rolling_apply(
|
3971
|
+
# function, window_size, weights, min_periods, center
|
3972
|
+
# )
|
3973
|
+
# )
|
1137
3974
|
# end
|
1138
3975
|
|
3976
|
+
# Compute a rolling skew.
|
3977
|
+
#
|
3978
|
+
# @param window_size [Integer]
|
3979
|
+
# Integer size of the rolling window.
|
3980
|
+
# @param bias [Boolean]
|
3981
|
+
# If false, the calculations are corrected for statistical bias.
|
1139
3982
|
#
|
3983
|
+
# @return [Expr]
|
1140
3984
|
def rolling_skew(window_size, bias: true)
|
1141
3985
|
wrap_expr(_rbexpr.rolling_skew(window_size, bias))
|
1142
3986
|
end
|
1143
3987
|
|
3988
|
+
# Compute absolute values.
|
3989
|
+
#
|
3990
|
+
# @return [Expr]
|
3991
|
+
#
|
3992
|
+
# @example
|
3993
|
+
# df = Polars::DataFrame.new(
|
3994
|
+
# {
|
3995
|
+
# "A" => [-1.0, 0.0, 1.0, 2.0]
|
3996
|
+
# }
|
3997
|
+
# )
|
3998
|
+
# df.select(Polars.col("A").abs)
|
3999
|
+
# # =>
|
4000
|
+
# # shape: (4, 1)
|
4001
|
+
# # ┌─────┐
|
4002
|
+
# # │ A │
|
4003
|
+
# # │ --- │
|
4004
|
+
# # │ f64 │
|
4005
|
+
# # ╞═════╡
|
4006
|
+
# # │ 1.0 │
|
4007
|
+
# # ├╌╌╌╌╌┤
|
4008
|
+
# # │ 0.0 │
|
4009
|
+
# # ├╌╌╌╌╌┤
|
4010
|
+
# # │ 1.0 │
|
4011
|
+
# # ├╌╌╌╌╌┤
|
4012
|
+
# # │ 2.0 │
|
4013
|
+
# # └─────┘
|
1144
4014
|
def abs
|
1145
4015
|
wrap_expr(_rbexpr.abs)
|
1146
4016
|
end
|
1147
4017
|
|
4018
|
+
# Get the index values that would sort this column.
|
4019
|
+
#
|
4020
|
+
# Alias for {#arg_sort}.
|
4021
|
+
#
|
4022
|
+
# @param reverse [Boolean]
|
4023
|
+
# Sort in reverse (descending) order.
|
4024
|
+
# @param nulls_last [Boolean]
|
4025
|
+
# Place null values last instead of first.
|
4026
|
+
#
|
4027
|
+
# @return [expr]
|
4028
|
+
#
|
4029
|
+
# @example
|
4030
|
+
# df = Polars::DataFrame.new(
|
4031
|
+
# {
|
4032
|
+
# "a" => [20, 10, 30]
|
4033
|
+
# }
|
4034
|
+
# )
|
4035
|
+
# df.select(Polars.col("a").argsort)
|
4036
|
+
# # =>
|
4037
|
+
# # shape: (3, 1)
|
4038
|
+
# # ┌─────┐
|
4039
|
+
# # │ a │
|
4040
|
+
# # │ --- │
|
4041
|
+
# # │ u32 │
|
4042
|
+
# # ╞═════╡
|
4043
|
+
# # │ 1 │
|
4044
|
+
# # ├╌╌╌╌╌┤
|
4045
|
+
# # │ 0 │
|
4046
|
+
# # ├╌╌╌╌╌┤
|
4047
|
+
# # │ 2 │
|
4048
|
+
# # └─────┘
|
1148
4049
|
def argsort(reverse: false, nulls_last: false)
|
1149
4050
|
arg_sort(reverse: reverse, nulls_last: nulls_last)
|
1150
4051
|
end
|
1151
4052
|
|
4053
|
+
# Assign ranks to data, dealing with ties appropriately.
|
4054
|
+
#
|
4055
|
+
# @param method ["average", "min", "max", "dense", "ordinal", "random"]
|
4056
|
+
# The method used to assign ranks to tied elements.
|
4057
|
+
# The following methods are available:
|
4058
|
+
#
|
4059
|
+
# - 'average' : The average of the ranks that would have been assigned to
|
4060
|
+
# all the tied values is assigned to each value.
|
4061
|
+
# - 'min' : The minimum of the ranks that would have been assigned to all
|
4062
|
+
# the tied values is assigned to each value. (This is also referred to
|
4063
|
+
# as "competition" ranking.)
|
4064
|
+
# - 'max' : The maximum of the ranks that would have been assigned to all
|
4065
|
+
# the tied values is assigned to each value.
|
4066
|
+
# - 'dense' : Like 'min', but the rank of the next highest element is
|
4067
|
+
# assigned the rank immediately after those assigned to the tied
|
4068
|
+
# elements.
|
4069
|
+
# - 'ordinal' : All values are given a distinct rank, corresponding to
|
4070
|
+
# the order that the values occur in the Series.
|
4071
|
+
# - 'random' : Like 'ordinal', but the rank for ties is not dependent
|
4072
|
+
# on the order that the values occur in the Series.
|
4073
|
+
# @param reverse [Boolean]
|
4074
|
+
# Reverse the operation.
|
4075
|
+
#
|
4076
|
+
# @return [Expr]
|
4077
|
+
#
|
4078
|
+
# @example The 'average' method:
|
4079
|
+
# df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
|
4080
|
+
# df.select(Polars.col("a").rank)
|
4081
|
+
# # =>
|
4082
|
+
# # shape: (5, 1)
|
4083
|
+
# # ┌─────┐
|
4084
|
+
# # │ a │
|
4085
|
+
# # │ --- │
|
4086
|
+
# # │ f32 │
|
4087
|
+
# # ╞═════╡
|
4088
|
+
# # │ 3.0 │
|
4089
|
+
# # ├╌╌╌╌╌┤
|
4090
|
+
# # │ 4.5 │
|
4091
|
+
# # ├╌╌╌╌╌┤
|
4092
|
+
# # │ 1.5 │
|
4093
|
+
# # ├╌╌╌╌╌┤
|
4094
|
+
# # │ 1.5 │
|
4095
|
+
# # ├╌╌╌╌╌┤
|
4096
|
+
# # │ 4.5 │
|
4097
|
+
# # └─────┘
|
4098
|
+
#
|
4099
|
+
# @example The 'ordinal' method:
|
4100
|
+
# df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
|
4101
|
+
# df.select(Polars.col("a").rank(method: "ordinal"))
|
4102
|
+
# # =>
|
4103
|
+
# # shape: (5, 1)
|
4104
|
+
# # ┌─────┐
|
4105
|
+
# # │ a │
|
4106
|
+
# # │ --- │
|
4107
|
+
# # │ u32 │
|
4108
|
+
# # ╞═════╡
|
4109
|
+
# # │ 3 │
|
4110
|
+
# # ├╌╌╌╌╌┤
|
4111
|
+
# # │ 4 │
|
4112
|
+
# # ├╌╌╌╌╌┤
|
4113
|
+
# # │ 1 │
|
4114
|
+
# # ├╌╌╌╌╌┤
|
4115
|
+
# # │ 2 │
|
4116
|
+
# # ├╌╌╌╌╌┤
|
4117
|
+
# # │ 5 │
|
4118
|
+
# # └─────┘
|
1152
4119
|
def rank(method: "average", reverse: false)
|
1153
4120
|
wrap_expr(_rbexpr.rank(method, reverse))
|
1154
4121
|
end
|
1155
4122
|
|
4123
|
+
# Calculate the n-th discrete difference.
|
4124
|
+
#
|
4125
|
+
# @param n [Integer]
|
4126
|
+
# Number of slots to shift.
|
4127
|
+
# @param null_behavior ["ignore", "drop"]
|
4128
|
+
# How to handle null values.
|
4129
|
+
#
|
4130
|
+
# @return [Expr]
|
4131
|
+
#
|
4132
|
+
# @example
|
4133
|
+
# df = Polars::DataFrame.new(
|
4134
|
+
# {
|
4135
|
+
# "a" => [20, 10, 30]
|
4136
|
+
# }
|
4137
|
+
# )
|
4138
|
+
# df.select(Polars.col("a").diff)
|
4139
|
+
# # =>
|
4140
|
+
# # shape: (3, 1)
|
4141
|
+
# # ┌──────┐
|
4142
|
+
# # │ a │
|
4143
|
+
# # │ --- │
|
4144
|
+
# # │ i64 │
|
4145
|
+
# # ╞══════╡
|
4146
|
+
# # │ null │
|
4147
|
+
# # ├╌╌╌╌╌╌┤
|
4148
|
+
# # │ -10 │
|
4149
|
+
# # ├╌╌╌╌╌╌┤
|
4150
|
+
# # │ 20 │
|
4151
|
+
# # └──────┘
|
1156
4152
|
def diff(n: 1, null_behavior: "ignore")
|
1157
4153
|
wrap_expr(_rbexpr.diff(n, null_behavior))
|
1158
4154
|
end
|
1159
4155
|
|
4156
|
+
# Computes percentage change between values.
|
4157
|
+
#
|
4158
|
+
# Percentage change (as fraction) between current element and most-recent
|
4159
|
+
# non-null element at least `n` period(s) before the current element.
|
4160
|
+
#
|
4161
|
+
# Computes the change from the previous row by default.
|
4162
|
+
#
|
4163
|
+
# @param n [Integer]
|
4164
|
+
# Periods to shift for forming percent change.
|
4165
|
+
#
|
4166
|
+
# @return [Expr]
|
4167
|
+
#
|
4168
|
+
# @example
|
4169
|
+
# df = Polars::DataFrame.new(
|
4170
|
+
# {
|
4171
|
+
# "a" => [10, 11, 12, nil, 12]
|
4172
|
+
# }
|
4173
|
+
# )
|
4174
|
+
# df.with_column(Polars.col("a").pct_change.alias("pct_change"))
|
4175
|
+
# # =>
|
4176
|
+
# # shape: (5, 2)
|
4177
|
+
# # ┌──────┬────────────┐
|
4178
|
+
# # │ a ┆ pct_change │
|
4179
|
+
# # │ --- ┆ --- │
|
4180
|
+
# # │ i64 ┆ f64 │
|
4181
|
+
# # ╞══════╪════════════╡
|
4182
|
+
# # │ 10 ┆ null │
|
4183
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4184
|
+
# # │ 11 ┆ 0.1 │
|
4185
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4186
|
+
# # │ 12 ┆ 0.090909 │
|
4187
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4188
|
+
# # │ null ┆ 0.0 │
|
4189
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4190
|
+
# # │ 12 ┆ 0.0 │
|
4191
|
+
# # └──────┴────────────┘
|
1160
4192
|
def pct_change(n: 1)
|
1161
4193
|
wrap_expr(_rbexpr.pct_change(n))
|
1162
4194
|
end
|
1163
4195
|
|
4196
|
+
# Compute the sample skewness of a data set.
|
4197
|
+
#
|
4198
|
+
# For normally distributed data, the skewness should be about zero. For
|
4199
|
+
# unimodal continuous distributions, a skewness value greater than zero means
|
4200
|
+
# that there is more weight in the right tail of the distribution. The
|
4201
|
+
# function `skewtest` can be used to determine if the skewness value
|
4202
|
+
# is close enough to zero, statistically speaking.
|
4203
|
+
#
|
4204
|
+
# @param bias [Boolean]
|
4205
|
+
# If false, the calculations are corrected for statistical bias.
|
4206
|
+
#
|
4207
|
+
# @return [Expr]
|
4208
|
+
#
|
4209
|
+
# @example
|
4210
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
|
4211
|
+
# df.select(Polars.col("a").skew)
|
4212
|
+
# # =>
|
4213
|
+
# # shape: (1, 1)
|
4214
|
+
# # ┌──────────┐
|
4215
|
+
# # │ a │
|
4216
|
+
# # │ --- │
|
4217
|
+
# # │ f64 │
|
4218
|
+
# # ╞══════════╡
|
4219
|
+
# # │ 0.343622 │
|
4220
|
+
# # └──────────┘
|
1164
4221
|
def skew(bias: true)
|
1165
4222
|
wrap_expr(_rbexpr.skew(bias))
|
1166
4223
|
end
|
1167
4224
|
|
4225
|
+
# Compute the kurtosis (Fisher or Pearson) of a dataset.
|
4226
|
+
#
|
4227
|
+
# Kurtosis is the fourth central moment divided by the square of the
|
4228
|
+
# variance. If Fisher's definition is used, then 3.0 is subtracted from
|
4229
|
+
# the result to give 0.0 for a normal distribution.
|
4230
|
+
# If bias is False then the kurtosis is calculated using k statistics to
|
4231
|
+
# eliminate bias coming from biased moment estimators
|
4232
|
+
#
|
4233
|
+
# @param fisher [Boolean]
|
4234
|
+
# If true, Fisher's definition is used (normal ==> 0.0). If false,
|
4235
|
+
# Pearson's definition is used (normal ==> 3.0).
|
4236
|
+
# @param bias [Boolean]
|
4237
|
+
# If false, the calculations are corrected for statistical bias.
|
4238
|
+
#
|
4239
|
+
# @return [Expr]
|
4240
|
+
#
|
4241
|
+
# @example
|
4242
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
|
4243
|
+
# df.select(Polars.col("a").kurtosis)
|
4244
|
+
# # =>
|
4245
|
+
# # shape: (1, 1)
|
4246
|
+
# # ┌───────────┐
|
4247
|
+
# # │ a │
|
4248
|
+
# # │ --- │
|
4249
|
+
# # │ f64 │
|
4250
|
+
# # ╞═══════════╡
|
4251
|
+
# # │ -1.153061 │
|
4252
|
+
# # └───────────┘
|
1168
4253
|
def kurtosis(fisher: true, bias: true)
|
1169
4254
|
wrap_expr(_rbexpr.kurtosis(fisher, bias))
|
1170
4255
|
end
|
1171
4256
|
|
4257
|
+
# Clip (limit) the values in an array to a `min` and `max` boundary.
|
4258
|
+
#
|
4259
|
+
# Only works for numerical types.
|
4260
|
+
#
|
4261
|
+
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
4262
|
+
# expression. See `when` for more information.
|
4263
|
+
#
|
4264
|
+
# @param min_val [Numeric]
|
4265
|
+
# Minimum value.
|
4266
|
+
# @param max_val [Numeric]
|
4267
|
+
# Maximum value.
|
4268
|
+
#
|
4269
|
+
# @return [Expr]
|
4270
|
+
#
|
4271
|
+
# @example
|
4272
|
+
# df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
|
4273
|
+
# df.with_column(Polars.col("foo").clip(1, 10).alias("foo_clipped"))
|
4274
|
+
# # =>
|
4275
|
+
# # shape: (4, 2)
|
4276
|
+
# # ┌──────┬─────────────┐
|
4277
|
+
# # │ foo ┆ foo_clipped │
|
4278
|
+
# # │ --- ┆ --- │
|
4279
|
+
# # │ i64 ┆ i64 │
|
4280
|
+
# # ╞══════╪═════════════╡
|
4281
|
+
# # │ -50 ┆ 1 │
|
4282
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4283
|
+
# # │ 5 ┆ 5 │
|
4284
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4285
|
+
# # │ null ┆ null │
|
4286
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4287
|
+
# # │ 50 ┆ 10 │
|
4288
|
+
# # └──────┴─────────────┘
|
1172
4289
|
def clip(min_val, max_val)
|
1173
4290
|
wrap_expr(_rbexpr.clip(min_val, max_val))
|
1174
4291
|
end
|
1175
4292
|
|
4293
|
+
# Clip (limit) the values in an array to a `min` boundary.
|
4294
|
+
#
|
4295
|
+
# Only works for numerical types.
|
4296
|
+
#
|
4297
|
+
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
4298
|
+
# expression. See `when` for more information.
|
4299
|
+
#
|
4300
|
+
# @param min_val [Numeric]
|
4301
|
+
# Minimum value.
|
4302
|
+
#
|
4303
|
+
# @return [Expr]
|
4304
|
+
#
|
4305
|
+
# @example
|
4306
|
+
# df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
|
4307
|
+
# df.with_column(Polars.col("foo").clip_min(0).alias("foo_clipped"))
|
4308
|
+
# # =>
|
4309
|
+
# # shape: (4, 2)
|
4310
|
+
# # ┌──────┬─────────────┐
|
4311
|
+
# # │ foo ┆ foo_clipped │
|
4312
|
+
# # │ --- ┆ --- │
|
4313
|
+
# # │ i64 ┆ i64 │
|
4314
|
+
# # ╞══════╪═════════════╡
|
4315
|
+
# # │ -50 ┆ 0 │
|
4316
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4317
|
+
# # │ 5 ┆ 5 │
|
4318
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4319
|
+
# # │ null ┆ null │
|
4320
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4321
|
+
# # │ 50 ┆ 50 │
|
4322
|
+
# # └──────┴─────────────┘
|
1176
4323
|
def clip_min(min_val)
|
1177
4324
|
wrap_expr(_rbexpr.clip_min(min_val))
|
1178
4325
|
end
|
1179
4326
|
|
4327
|
+
# Clip (limit) the values in an array to a `max` boundary.
|
4328
|
+
#
|
4329
|
+
# Only works for numerical types.
|
4330
|
+
#
|
4331
|
+
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
4332
|
+
# expression. See `when` for more information.
|
4333
|
+
#
|
4334
|
+
# @param max_val [Numeric]
|
4335
|
+
# Maximum value.
|
4336
|
+
#
|
4337
|
+
# @return [Expr]
|
4338
|
+
#
|
4339
|
+
# @example
|
4340
|
+
# df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
|
4341
|
+
# df.with_column(Polars.col("foo").clip_max(0).alias("foo_clipped"))
|
4342
|
+
# # =>
|
4343
|
+
# # shape: (4, 2)
|
4344
|
+
# # ┌──────┬─────────────┐
|
4345
|
+
# # │ foo ┆ foo_clipped │
|
4346
|
+
# # │ --- ┆ --- │
|
4347
|
+
# # │ i64 ┆ i64 │
|
4348
|
+
# # ╞══════╪═════════════╡
|
4349
|
+
# # │ -50 ┆ -50 │
|
4350
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4351
|
+
# # │ 5 ┆ 0 │
|
4352
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4353
|
+
# # │ null ┆ null │
|
4354
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4355
|
+
# # │ 50 ┆ 0 │
|
4356
|
+
# # └──────┴─────────────┘
|
1180
4357
|
def clip_max(max_val)
|
1181
4358
|
wrap_expr(_rbexpr.clip_max(max_val))
|
1182
4359
|
end
|
1183
4360
|
|
4361
|
+
# Calculate the lower bound.
|
4362
|
+
#
|
4363
|
+
# Returns a unit Series with the lowest value possible for the dtype of this
|
4364
|
+
# expression.
|
4365
|
+
#
|
4366
|
+
# @return [Expr]
|
4367
|
+
#
|
4368
|
+
# @example
|
4369
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
|
4370
|
+
# df.select(Polars.col("a").lower_bound)
|
4371
|
+
# # =>
|
4372
|
+
# # shape: (1, 1)
|
4373
|
+
# # ┌──────────────────────┐
|
4374
|
+
# # │ a │
|
4375
|
+
# # │ --- │
|
4376
|
+
# # │ i64 │
|
4377
|
+
# # ╞══════════════════════╡
|
4378
|
+
# # │ -9223372036854775808 │
|
4379
|
+
# # └──────────────────────┘
|
1184
4380
|
def lower_bound
|
1185
4381
|
wrap_expr(_rbexpr.lower_bound)
|
1186
4382
|
end
|
1187
4383
|
|
4384
|
+
# Calculate the upper bound.
|
4385
|
+
#
|
4386
|
+
# Returns a unit Series with the highest value possible for the dtype of this
|
4387
|
+
# expression.
|
4388
|
+
#
|
4389
|
+
# @return [Expr]
|
4390
|
+
#
|
4391
|
+
# @example
|
4392
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
|
4393
|
+
# df.select(Polars.col("a").upper_bound)
|
4394
|
+
# # =>
|
4395
|
+
# # shape: (1, 1)
|
4396
|
+
# # ┌─────────────────────┐
|
4397
|
+
# # │ a │
|
4398
|
+
# # │ --- │
|
4399
|
+
# # │ i64 │
|
4400
|
+
# # ╞═════════════════════╡
|
4401
|
+
# # │ 9223372036854775807 │
|
4402
|
+
# # └─────────────────────┘
|
1188
4403
|
def upper_bound
|
1189
4404
|
wrap_expr(_rbexpr.upper_bound)
|
1190
4405
|
end
|
1191
4406
|
|
4407
|
+
# Compute the element-wise indication of the sign.
|
4408
|
+
#
|
4409
|
+
# @return [Expr]
|
4410
|
+
#
|
4411
|
+
# @example
|
4412
|
+
# df = Polars::DataFrame.new({"a" => [-9.0, -0.0, 0.0, 4.0, nil]})
|
4413
|
+
# df.select(Polars.col("a").sign)
|
4414
|
+
# # =>
|
4415
|
+
# # shape: (5, 1)
|
4416
|
+
# # ┌──────┐
|
4417
|
+
# # │ a │
|
4418
|
+
# # │ --- │
|
4419
|
+
# # │ i64 │
|
4420
|
+
# # ╞══════╡
|
4421
|
+
# # │ -1 │
|
4422
|
+
# # ├╌╌╌╌╌╌┤
|
4423
|
+
# # │ 0 │
|
4424
|
+
# # ├╌╌╌╌╌╌┤
|
4425
|
+
# # │ 0 │
|
4426
|
+
# # ├╌╌╌╌╌╌┤
|
4427
|
+
# # │ 1 │
|
4428
|
+
# # ├╌╌╌╌╌╌┤
|
4429
|
+
# # │ null │
|
4430
|
+
# # └──────┘
|
1192
4431
|
def sign
|
1193
4432
|
wrap_expr(_rbexpr.sign)
|
1194
4433
|
end
|
1195
4434
|
|
4435
|
+
# Compute the element-wise value for the sine.
|
4436
|
+
#
|
4437
|
+
# @return [Expr]
|
4438
|
+
#
|
4439
|
+
# @example
|
4440
|
+
# df = Polars::DataFrame.new({"a" => [0.0]})
|
4441
|
+
# df.select(Polars.col("a").sin)
|
4442
|
+
# # =>
|
4443
|
+
# # shape: (1, 1)
|
4444
|
+
# # ┌─────┐
|
4445
|
+
# # │ a │
|
4446
|
+
# # │ --- │
|
4447
|
+
# # │ f64 │
|
4448
|
+
# # ╞═════╡
|
4449
|
+
# # │ 0.0 │
|
4450
|
+
# # └─────┘
|
1196
4451
|
def sin
|
1197
4452
|
wrap_expr(_rbexpr.sin)
|
1198
4453
|
end
|
1199
4454
|
|
4455
|
+
# Compute the element-wise value for the cosine.
|
4456
|
+
#
|
4457
|
+
# @return [Expr]
|
4458
|
+
#
|
4459
|
+
# @example
|
4460
|
+
# df = Polars::DataFrame.new({"a" => [0.0]})
|
4461
|
+
# df.select(Polars.col("a").cos)
|
4462
|
+
# # =>
|
4463
|
+
# # shape: (1, 1)
|
4464
|
+
# # ┌─────┐
|
4465
|
+
# # │ a │
|
4466
|
+
# # │ --- │
|
4467
|
+
# # │ f64 │
|
4468
|
+
# # ╞═════╡
|
4469
|
+
# # │ 1.0 │
|
4470
|
+
# # └─────┘
|
1200
4471
|
def cos
|
1201
4472
|
wrap_expr(_rbexpr.cos)
|
1202
4473
|
end
|
1203
4474
|
|
4475
|
+
# Compute the element-wise value for the tangent.
|
4476
|
+
#
|
4477
|
+
# @return [Expr]
|
4478
|
+
#
|
4479
|
+
# @example
|
4480
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4481
|
+
# df.select(Polars.col("a").tan)
|
4482
|
+
# # =>
|
4483
|
+
# # shape: (1, 1)
|
4484
|
+
# # ┌──────────┐
|
4485
|
+
# # │ a │
|
4486
|
+
# # │ --- │
|
4487
|
+
# # │ f64 │
|
4488
|
+
# # ╞══════════╡
|
4489
|
+
# # │ 1.557408 │
|
4490
|
+
# # └──────────┘
|
1204
4491
|
def tan
|
1205
4492
|
wrap_expr(_rbexpr.tan)
|
1206
4493
|
end
|
1207
4494
|
|
4495
|
+
# Compute the element-wise value for the inverse sine.
|
4496
|
+
#
|
4497
|
+
# @return [Expr]
|
4498
|
+
#
|
4499
|
+
# @example
|
4500
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4501
|
+
# df.select(Polars.col("a").arcsin)
|
4502
|
+
# # =>
|
4503
|
+
# # shape: (1, 1)
|
4504
|
+
# # ┌──────────┐
|
4505
|
+
# # │ a │
|
4506
|
+
# # │ --- │
|
4507
|
+
# # │ f64 │
|
4508
|
+
# # ╞══════════╡
|
4509
|
+
# # │ 1.570796 │
|
4510
|
+
# # └──────────┘
|
1208
4511
|
def arcsin
|
1209
4512
|
wrap_expr(_rbexpr.arcsin)
|
1210
4513
|
end
|
1211
4514
|
|
4515
|
+
# Compute the element-wise value for the inverse cosine.
|
4516
|
+
#
|
4517
|
+
# @return [Expr]
|
4518
|
+
#
|
4519
|
+
# @example
|
4520
|
+
# df = Polars::DataFrame.new({"a" => [0.0]})
|
4521
|
+
# df.select(Polars.col("a").arccos)
|
4522
|
+
# # =>
|
4523
|
+
# # shape: (1, 1)
|
4524
|
+
# # ┌──────────┐
|
4525
|
+
# # │ a │
|
4526
|
+
# # │ --- │
|
4527
|
+
# # │ f64 │
|
4528
|
+
# # ╞══════════╡
|
4529
|
+
# # │ 1.570796 │
|
4530
|
+
# # └──────────┘
|
1212
4531
|
def arccos
|
1213
4532
|
wrap_expr(_rbexpr.arccos)
|
1214
4533
|
end
|
1215
4534
|
|
4535
|
+
# Compute the element-wise value for the inverse tangent.
|
4536
|
+
#
|
4537
|
+
# @return [Expr]
|
4538
|
+
#
|
4539
|
+
# @example
|
4540
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4541
|
+
# df.select(Polars.col("a").arctan)
|
4542
|
+
# # =>
|
4543
|
+
# # shape: (1, 1)
|
4544
|
+
# # ┌──────────┐
|
4545
|
+
# # │ a │
|
4546
|
+
# # │ --- │
|
4547
|
+
# # │ f64 │
|
4548
|
+
# # ╞══════════╡
|
4549
|
+
# # │ 0.785398 │
|
4550
|
+
# # └──────────┘
|
1216
4551
|
def arctan
|
1217
4552
|
wrap_expr(_rbexpr.arctan)
|
1218
4553
|
end
|
1219
4554
|
|
4555
|
+
# Compute the element-wise value for the hyperbolic sine.
|
4556
|
+
#
|
4557
|
+
# @return [Expr]
|
4558
|
+
#
|
4559
|
+
# @example
|
4560
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4561
|
+
# df.select(Polars.col("a").sinh)
|
4562
|
+
# # =>
|
4563
|
+
# # shape: (1, 1)
|
4564
|
+
# # ┌──────────┐
|
4565
|
+
# # │ a │
|
4566
|
+
# # │ --- │
|
4567
|
+
# # │ f64 │
|
4568
|
+
# # ╞══════════╡
|
4569
|
+
# # │ 1.175201 │
|
4570
|
+
# # └──────────┘
|
1220
4571
|
def sinh
|
1221
4572
|
wrap_expr(_rbexpr.sinh)
|
1222
4573
|
end
|
1223
4574
|
|
4575
|
+
# Compute the element-wise value for the hyperbolic cosine.
|
4576
|
+
#
|
4577
|
+
# @return [Expr]
|
4578
|
+
#
|
4579
|
+
# @example
|
4580
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4581
|
+
# df.select(Polars.col("a").cosh)
|
4582
|
+
# # =>
|
4583
|
+
# # shape: (1, 1)
|
4584
|
+
# # ┌──────────┐
|
4585
|
+
# # │ a │
|
4586
|
+
# # │ --- │
|
4587
|
+
# # │ f64 │
|
4588
|
+
# # ╞══════════╡
|
4589
|
+
# # │ 1.543081 │
|
4590
|
+
# # └──────────┘
|
1224
4591
|
def cosh
|
1225
4592
|
wrap_expr(_rbexpr.cosh)
|
1226
4593
|
end
|
1227
4594
|
|
4595
|
+
# Compute the element-wise value for the hyperbolic tangent.
|
4596
|
+
#
|
4597
|
+
# @return [Expr]
|
4598
|
+
#
|
4599
|
+
# @example
|
4600
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4601
|
+
# df.select(Polars.col("a").tanh)
|
4602
|
+
# # =>
|
4603
|
+
# # shape: (1, 1)
|
4604
|
+
# # ┌──────────┐
|
4605
|
+
# # │ a │
|
4606
|
+
# # │ --- │
|
4607
|
+
# # │ f64 │
|
4608
|
+
# # ╞══════════╡
|
4609
|
+
# # │ 0.761594 │
|
4610
|
+
# # └──────────┘
|
1228
4611
|
def tanh
|
1229
4612
|
wrap_expr(_rbexpr.tanh)
|
1230
4613
|
end
|
1231
4614
|
|
4615
|
+
# Compute the element-wise value for the inverse hyperbolic sine.
|
4616
|
+
#
|
4617
|
+
# @return [Expr]
|
4618
|
+
#
|
4619
|
+
# @example
|
4620
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4621
|
+
# df.select(Polars.col("a").arcsinh)
|
4622
|
+
# # =>
|
4623
|
+
# # shape: (1, 1)
|
4624
|
+
# # ┌──────────┐
|
4625
|
+
# # │ a │
|
4626
|
+
# # │ --- │
|
4627
|
+
# # │ f64 │
|
4628
|
+
# # ╞══════════╡
|
4629
|
+
# # │ 0.881374 │
|
4630
|
+
# # └──────────┘
|
1232
4631
|
def arcsinh
|
1233
4632
|
wrap_expr(_rbexpr.arcsinh)
|
1234
4633
|
end
|
1235
4634
|
|
4635
|
+
# Compute the element-wise value for the inverse hyperbolic cosine.
|
4636
|
+
#
|
4637
|
+
# @return [Expr]
|
4638
|
+
#
|
4639
|
+
# @example
|
4640
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4641
|
+
# df.select(Polars.col("a").arccosh)
|
4642
|
+
# # =>
|
4643
|
+
# # shape: (1, 1)
|
4644
|
+
# # ┌─────┐
|
4645
|
+
# # │ a │
|
4646
|
+
# # │ --- │
|
4647
|
+
# # │ f64 │
|
4648
|
+
# # ╞═════╡
|
4649
|
+
# # │ 0.0 │
|
4650
|
+
# # └─────┘
|
1236
4651
|
def arccosh
|
1237
4652
|
wrap_expr(_rbexpr.arccosh)
|
1238
4653
|
end
|
1239
4654
|
|
4655
|
+
# Compute the element-wise value for the inverse hyperbolic tangent.
|
4656
|
+
#
|
4657
|
+
# @return [Expr]
|
4658
|
+
#
|
4659
|
+
# @example
|
4660
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4661
|
+
# df.select(Polars.col("a").arctanh)
|
4662
|
+
# # =>
|
4663
|
+
# # shape: (1, 1)
|
4664
|
+
# # ┌─────┐
|
4665
|
+
# # │ a │
|
4666
|
+
# # │ --- │
|
4667
|
+
# # │ f64 │
|
4668
|
+
# # ╞═════╡
|
4669
|
+
# # │ inf │
|
4670
|
+
# # └─────┘
|
1240
4671
|
def arctanh
|
1241
4672
|
wrap_expr(_rbexpr.arctanh)
|
1242
4673
|
end
|
1243
4674
|
|
4675
|
+
# Reshape this Expr to a flat Series or a Series of Lists.
|
4676
|
+
#
|
4677
|
+
# @param dims [Array]
|
4678
|
+
# Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that
|
4679
|
+
# dimension is inferred.
|
4680
|
+
#
|
4681
|
+
# @return [Expr]
|
4682
|
+
#
|
4683
|
+
# @example
|
4684
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
|
4685
|
+
# df.select(Polars.col("foo").reshape([3, 3]))
|
4686
|
+
# # =>
|
4687
|
+
# # shape: (3, 1)
|
4688
|
+
# # ┌───────────┐
|
4689
|
+
# # │ foo │
|
4690
|
+
# # │ --- │
|
4691
|
+
# # │ list[i64] │
|
4692
|
+
# # ╞═══════════╡
|
4693
|
+
# # │ [1, 2, 3] │
|
4694
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌┤
|
4695
|
+
# # │ [4, 5, 6] │
|
4696
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌┤
|
4697
|
+
# # │ [7, 8, 9] │
|
4698
|
+
# # └───────────┘
|
1244
4699
|
def reshape(dims)
|
1245
4700
|
wrap_expr(_rbexpr.reshape(dims))
|
1246
4701
|
end
|
1247
4702
|
|
4703
|
+
# Shuffle the contents of this expr.
|
4704
|
+
#
|
4705
|
+
# @param seed [Integer]
|
4706
|
+
# Seed for the random number generator. If set to None (default), a random
|
4707
|
+
# seed is generated using the `random` module.
|
4708
|
+
#
|
4709
|
+
# @return [Expr]
|
4710
|
+
#
|
4711
|
+
# @example
|
4712
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
4713
|
+
# df.select(Polars.col("a").shuffle(seed: 1))
|
4714
|
+
# # =>
|
4715
|
+
# # shape: (3, 1)
|
4716
|
+
# # ┌─────┐
|
4717
|
+
# # │ a │
|
4718
|
+
# # │ --- │
|
4719
|
+
# # │ i64 │
|
4720
|
+
# # ╞═════╡
|
4721
|
+
# # │ 2 │
|
4722
|
+
# # ├╌╌╌╌╌┤
|
4723
|
+
# # │ 1 │
|
4724
|
+
# # ├╌╌╌╌╌┤
|
4725
|
+
# # │ 3 │
|
4726
|
+
# # └─────┘
|
1248
4727
|
def shuffle(seed: nil)
|
1249
4728
|
if seed.nil?
|
1250
4729
|
seed = rand(10000)
|
@@ -1252,74 +4731,514 @@ module Polars
|
|
1252
4731
|
wrap_expr(_rbexpr.shuffle(seed))
|
1253
4732
|
end
|
1254
4733
|
|
1255
|
-
#
|
1256
|
-
#
|
1257
|
-
|
1258
|
-
#
|
1259
|
-
#
|
4734
|
+
# Sample from this expression.
|
4735
|
+
#
|
4736
|
+
# @param frac [Float]
|
4737
|
+
# Fraction of items to return. Cannot be used with `n`.
|
4738
|
+
# @param with_replacement [Boolean]
|
4739
|
+
# Allow values to be sampled more than once.
|
4740
|
+
# @param shuffle [Boolean]
|
4741
|
+
# Shuffle the order of sampled data points.
|
4742
|
+
# @param seed [Integer]
|
4743
|
+
# Seed for the random number generator. If set to None (default), a random
|
4744
|
+
# seed is used.
|
4745
|
+
# @param n [Integer]
|
4746
|
+
# Number of items to return. Cannot be used with `frac`.
|
4747
|
+
#
|
4748
|
+
# @return [Expr]
|
4749
|
+
#
|
4750
|
+
# @example
|
4751
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
4752
|
+
# df.select(Polars.col("a").sample(frac: 1.0, with_replacement: true, seed: 1))
|
4753
|
+
# # =>
|
4754
|
+
# # shape: (3, 1)
|
4755
|
+
# # ┌─────┐
|
4756
|
+
# # │ a │
|
4757
|
+
# # │ --- │
|
4758
|
+
# # │ i64 │
|
4759
|
+
# # ╞═════╡
|
4760
|
+
# # │ 3 │
|
4761
|
+
# # ├╌╌╌╌╌┤
|
4762
|
+
# # │ 1 │
|
4763
|
+
# # ├╌╌╌╌╌┤
|
4764
|
+
# # │ 1 │
|
4765
|
+
# # └─────┘
|
4766
|
+
def sample(
|
4767
|
+
frac: nil,
|
4768
|
+
with_replacement: true,
|
4769
|
+
shuffle: false,
|
4770
|
+
seed: nil,
|
4771
|
+
n: nil
|
4772
|
+
)
|
4773
|
+
if !n.nil? && !frac.nil?
|
4774
|
+
raise ArgumentError, "cannot specify both `n` and `frac`"
|
4775
|
+
end
|
1260
4776
|
|
1261
|
-
|
1262
|
-
|
4777
|
+
if !n.nil? && frac.nil?
|
4778
|
+
return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
|
4779
|
+
end
|
1263
4780
|
|
1264
|
-
|
1265
|
-
|
4781
|
+
if frac.nil?
|
4782
|
+
frac = 1.0
|
4783
|
+
end
|
4784
|
+
wrap_expr(
|
4785
|
+
_rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
|
4786
|
+
)
|
4787
|
+
end
|
1266
4788
|
|
4789
|
+
# Exponentially-weighted moving average.
|
4790
|
+
#
|
4791
|
+
# @return [Expr]
|
4792
|
+
#
|
4793
|
+
# @example
|
4794
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
4795
|
+
# df.select(Polars.col("a").ewm_mean(com: 1))
|
4796
|
+
# # =>
|
4797
|
+
# # shape: (3, 1)
|
4798
|
+
# # ┌──────────┐
|
4799
|
+
# # │ a │
|
4800
|
+
# # │ --- │
|
4801
|
+
# # │ f64 │
|
4802
|
+
# # ╞══════════╡
|
4803
|
+
# # │ 1.0 │
|
4804
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
4805
|
+
# # │ 1.666667 │
|
4806
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
4807
|
+
# # │ 2.428571 │
|
4808
|
+
# # └──────────┘
|
4809
|
+
def ewm_mean(
|
4810
|
+
com: nil,
|
4811
|
+
span: nil,
|
4812
|
+
half_life: nil,
|
4813
|
+
alpha: nil,
|
4814
|
+
adjust: true,
|
4815
|
+
min_periods: 1
|
4816
|
+
)
|
4817
|
+
alpha = _prepare_alpha(com, span, half_life, alpha)
|
4818
|
+
wrap_expr(_rbexpr.ewm_mean(alpha, adjust, min_periods))
|
4819
|
+
end
|
4820
|
+
|
4821
|
+
# Exponentially-weighted moving standard deviation.
|
4822
|
+
#
|
4823
|
+
# @return [Expr]
|
4824
|
+
#
|
4825
|
+
# @example
|
4826
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
4827
|
+
# df.select(Polars.col("a").ewm_std(com: 1))
|
4828
|
+
# # =>
|
4829
|
+
# # shape: (3, 1)
|
4830
|
+
# # ┌──────────┐
|
4831
|
+
# # │ a │
|
4832
|
+
# # │ --- │
|
4833
|
+
# # │ f64 │
|
4834
|
+
# # ╞══════════╡
|
4835
|
+
# # │ 0.0 │
|
4836
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
4837
|
+
# # │ 0.707107 │
|
4838
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
4839
|
+
# # │ 0.963624 │
|
4840
|
+
# # └──────────┘
|
4841
|
+
def ewm_std(
|
4842
|
+
com: nil,
|
4843
|
+
span: nil,
|
4844
|
+
half_life: nil,
|
4845
|
+
alpha: nil,
|
4846
|
+
adjust: true,
|
4847
|
+
bias: false,
|
4848
|
+
min_periods: 1
|
4849
|
+
)
|
4850
|
+
alpha = _prepare_alpha(com, span, half_life, alpha)
|
4851
|
+
wrap_expr(_rbexpr.ewm_std(alpha, adjust, bias, min_periods))
|
4852
|
+
end
|
4853
|
+
|
4854
|
+
# Exponentially-weighted moving variance.
|
4855
|
+
#
|
4856
|
+
# @return [Expr]
|
4857
|
+
#
|
4858
|
+
# @example
|
4859
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
4860
|
+
# df.select(Polars.col("a").ewm_var(com: 1))
|
4861
|
+
# # =>
|
4862
|
+
# # shape: (3, 1)
|
4863
|
+
# # ┌──────────┐
|
4864
|
+
# # │ a │
|
4865
|
+
# # │ --- │
|
4866
|
+
# # │ f64 │
|
4867
|
+
# # ╞══════════╡
|
4868
|
+
# # │ 0.0 │
|
4869
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
4870
|
+
# # │ 0.5 │
|
4871
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
4872
|
+
# # │ 0.928571 │
|
4873
|
+
# # └──────────┘
|
4874
|
+
def ewm_var(
|
4875
|
+
com: nil,
|
4876
|
+
span: nil,
|
4877
|
+
half_life: nil,
|
4878
|
+
alpha: nil,
|
4879
|
+
adjust: true,
|
4880
|
+
bias: false,
|
4881
|
+
min_periods: 1
|
4882
|
+
)
|
4883
|
+
alpha = _prepare_alpha(com, span, half_life, alpha)
|
4884
|
+
wrap_expr(_rbexpr.ewm_var(alpha, adjust, bias, min_periods))
|
4885
|
+
end
|
4886
|
+
|
4887
|
+
# Extend the Series with given number of values.
|
4888
|
+
#
|
4889
|
+
# @param value [Object]
|
4890
|
+
# The value to extend the Series with. This value may be nil to fill with
|
4891
|
+
# nulls.
|
4892
|
+
# @param n [Integer]
|
4893
|
+
# The number of values to extend.
|
4894
|
+
#
|
4895
|
+
# @return [Expr]
|
1267
4896
|
#
|
4897
|
+
# @example
|
4898
|
+
# df = Polars::DataFrame.new({"values" => [1, 2, 3]})
|
4899
|
+
# df.select(Polars.col("values").extend_constant(99, 2))
|
4900
|
+
# # =>
|
4901
|
+
# # shape: (5, 1)
|
4902
|
+
# # ┌────────┐
|
4903
|
+
# # │ values │
|
4904
|
+
# # │ --- │
|
4905
|
+
# # │ i64 │
|
4906
|
+
# # ╞════════╡
|
4907
|
+
# # │ 1 │
|
4908
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
4909
|
+
# # │ 2 │
|
4910
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
4911
|
+
# # │ 3 │
|
4912
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
4913
|
+
# # │ 99 │
|
4914
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
4915
|
+
# # │ 99 │
|
4916
|
+
# # └────────┘
|
1268
4917
|
def extend_constant(value, n)
|
1269
4918
|
wrap_expr(_rbexpr.extend_constant(value, n))
|
1270
4919
|
end
|
1271
4920
|
|
4921
|
+
# Count all unique values and create a struct mapping value to count.
|
4922
|
+
#
|
4923
|
+
# @param multithreaded [Boolean]
|
4924
|
+
# Better to turn this off in the aggregation context, as it can lead to
|
4925
|
+
# contention.
|
4926
|
+
# @param sort [Boolean]
|
4927
|
+
# Ensure the output is sorted from most values to least.
|
4928
|
+
#
|
4929
|
+
# @return [Expr]
|
4930
|
+
#
|
4931
|
+
# @example
|
4932
|
+
# df = Polars::DataFrame.new(
|
4933
|
+
# {
|
4934
|
+
# "id" => ["a", "b", "b", "c", "c", "c"]
|
4935
|
+
# }
|
4936
|
+
# )
|
4937
|
+
# df.select(
|
4938
|
+
# [
|
4939
|
+
# Polars.col("id").value_counts(sort: true),
|
4940
|
+
# ]
|
4941
|
+
# )
|
4942
|
+
# # =>
|
4943
|
+
# # shape: (3, 1)
|
4944
|
+
# # ┌───────────┐
|
4945
|
+
# # │ id │
|
4946
|
+
# # │ --- │
|
4947
|
+
# # │ struct[2] │
|
4948
|
+
# # ╞═══════════╡
|
4949
|
+
# # │ {"c",3} │
|
4950
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌┤
|
4951
|
+
# # │ {"b",2} │
|
4952
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌┤
|
4953
|
+
# # │ {"a",1} │
|
4954
|
+
# # └───────────┘
|
1272
4955
|
def value_counts(multithreaded: false, sort: false)
|
1273
4956
|
wrap_expr(_rbexpr.value_counts(multithreaded, sort))
|
1274
4957
|
end
|
1275
4958
|
|
4959
|
+
# Return a count of the unique values in the order of appearance.
|
4960
|
+
#
|
4961
|
+
# This method differs from `value_counts` in that it does not return the
|
4962
|
+
# values, only the counts and might be faster
|
4963
|
+
#
|
4964
|
+
# @return [Expr]
|
4965
|
+
#
|
4966
|
+
# @example
|
4967
|
+
# df = Polars::DataFrame.new(
|
4968
|
+
# {
|
4969
|
+
# "id" => ["a", "b", "b", "c", "c", "c"]
|
4970
|
+
# }
|
4971
|
+
# )
|
4972
|
+
# df.select(
|
4973
|
+
# [
|
4974
|
+
# Polars.col("id").unique_counts
|
4975
|
+
# ]
|
4976
|
+
# )
|
4977
|
+
# # =>
|
4978
|
+
# # shape: (3, 1)
|
4979
|
+
# # ┌─────┐
|
4980
|
+
# # │ id │
|
4981
|
+
# # │ --- │
|
4982
|
+
# # │ u32 │
|
4983
|
+
# # ╞═════╡
|
4984
|
+
# # │ 1 │
|
4985
|
+
# # ├╌╌╌╌╌┤
|
4986
|
+
# # │ 2 │
|
4987
|
+
# # ├╌╌╌╌╌┤
|
4988
|
+
# # │ 3 │
|
4989
|
+
# # └─────┘
|
1276
4990
|
def unique_counts
|
1277
4991
|
wrap_expr(_rbexpr.unique_counts)
|
1278
4992
|
end
|
1279
4993
|
|
4994
|
+
# Compute the logarithm to a given base.
|
4995
|
+
#
|
4996
|
+
# @param base [Float]
|
4997
|
+
# Given base, defaults to `e`.
|
4998
|
+
#
|
4999
|
+
# @return [Expr]
|
5000
|
+
#
|
5001
|
+
# @example
|
5002
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
5003
|
+
# df.select(Polars.col("a").log(2))
|
5004
|
+
# # =>
|
5005
|
+
# # shape: (3, 1)
|
5006
|
+
# # ┌──────────┐
|
5007
|
+
# # │ a │
|
5008
|
+
# # │ --- │
|
5009
|
+
# # │ f64 │
|
5010
|
+
# # ╞══════════╡
|
5011
|
+
# # │ 0.0 │
|
5012
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
5013
|
+
# # │ 1.0 │
|
5014
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
5015
|
+
# # │ 1.584963 │
|
5016
|
+
# # └──────────┘
|
1280
5017
|
def log(base = Math::E)
|
1281
5018
|
wrap_expr(_rbexpr.log(base))
|
1282
5019
|
end
|
1283
5020
|
|
1284
|
-
|
5021
|
+
# Computes the entropy.
|
5022
|
+
#
|
5023
|
+
# Uses the formula `-sum(pk * log(pk)` where `pk` are discrete probabilities.
|
5024
|
+
#
|
5025
|
+
# @param base [Float]
|
5026
|
+
# Given base, defaults to `e`.
|
5027
|
+
# @param normalize [Boolean]
|
5028
|
+
# Normalize pk if it doesn't sum to 1.
|
5029
|
+
#
|
5030
|
+
# @return [Expr]
|
5031
|
+
#
|
5032
|
+
# @example
|
5033
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
5034
|
+
# df.select(Polars.col("a").entropy(base: 2))
|
5035
|
+
# # =>
|
5036
|
+
# # shape: (1, 1)
|
5037
|
+
# # ┌──────────┐
|
5038
|
+
# # │ a │
|
5039
|
+
# # │ --- │
|
5040
|
+
# # │ f64 │
|
5041
|
+
# # ╞══════════╡
|
5042
|
+
# # │ 1.459148 │
|
5043
|
+
# # └──────────┘
|
5044
|
+
#
|
5045
|
+
# @example
|
5046
|
+
# df.select(Polars.col("a").entropy(base: 2, normalize: false))
|
5047
|
+
# # =>
|
5048
|
+
# # shape: (1, 1)
|
5049
|
+
# # ┌───────────┐
|
5050
|
+
# # │ a │
|
5051
|
+
# # │ --- │
|
5052
|
+
# # │ f64 │
|
5053
|
+
# # ╞═══════════╡
|
5054
|
+
# # │ -6.754888 │
|
5055
|
+
# # └───────────┘
|
5056
|
+
def entropy(base: 2, normalize: true)
|
1285
5057
|
wrap_expr(_rbexpr.entropy(base, normalize))
|
1286
5058
|
end
|
1287
5059
|
|
1288
|
-
#
|
1289
|
-
#
|
1290
|
-
|
1291
|
-
#
|
5060
|
+
# Run an expression over a sliding window that increases `1` slot every iteration.
|
5061
|
+
#
|
5062
|
+
# @param expr [Expr]
|
5063
|
+
# Expression to evaluate
|
5064
|
+
# @param min_periods [Integer]
|
5065
|
+
# Number of valid values there should be in the window before the expression
|
5066
|
+
# is evaluated. valid values = `length - null_count`
|
5067
|
+
# @param parallel [Boolean]
|
5068
|
+
# Run in parallel. Don't do this in a groupby or another operation that
|
5069
|
+
# already has much parallelization.
|
5070
|
+
#
|
5071
|
+
# @return [Expr]
|
5072
|
+
#
|
5073
|
+
# @note
|
5074
|
+
# This functionality is experimental and may change without it being considered a
|
5075
|
+
# breaking change.
|
5076
|
+
#
|
5077
|
+
# @note
|
5078
|
+
# This can be really slow as it can have `O(n^2)` complexity. Don't use this
|
5079
|
+
# for operations that visit all elements.
|
5080
|
+
#
|
5081
|
+
# @example
|
5082
|
+
# df = Polars::DataFrame.new({"values" => [1, 2, 3, 4, 5]})
|
5083
|
+
# df.select(
|
5084
|
+
# [
|
5085
|
+
# Polars.col("values").cumulative_eval(
|
5086
|
+
# Polars.element.first - Polars.element.last ** 2
|
5087
|
+
# )
|
5088
|
+
# ]
|
5089
|
+
# )
|
5090
|
+
# # =>
|
5091
|
+
# # shape: (5, 1)
|
5092
|
+
# # ┌────────┐
|
5093
|
+
# # │ values │
|
5094
|
+
# # │ --- │
|
5095
|
+
# # │ f64 │
|
5096
|
+
# # ╞════════╡
|
5097
|
+
# # │ 0.0 │
|
5098
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
5099
|
+
# # │ -3.0 │
|
5100
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
5101
|
+
# # │ -8.0 │
|
5102
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
5103
|
+
# # │ -15.0 │
|
5104
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
5105
|
+
# # │ -24.0 │
|
5106
|
+
# # └────────┘
|
5107
|
+
def cumulative_eval(expr, min_periods: 1, parallel: false)
|
5108
|
+
wrap_expr(
|
5109
|
+
_rbexpr.cumulative_eval(expr._rbexpr, min_periods, parallel)
|
5110
|
+
)
|
5111
|
+
end
|
5112
|
+
|
5113
|
+
# Flags the expression as 'sorted'.
|
5114
|
+
#
|
5115
|
+
# Enables downstream code to user fast paths for sorted arrays.
|
5116
|
+
#
|
5117
|
+
# @param reverse [Boolean]
|
5118
|
+
# If the `Series` order is reversed, e.g. descending.
|
5119
|
+
#
|
5120
|
+
# @return [Expr]
|
5121
|
+
#
|
5122
|
+
# @note
|
5123
|
+
# This can lead to incorrect results if this `Series` is not sorted!!
|
5124
|
+
# Use with care!
|
5125
|
+
#
|
5126
|
+
# @example
|
5127
|
+
# df = Polars::DataFrame.new({"values" => [1, 2, 3]})
|
5128
|
+
# df.select(Polars.col("values").set_sorted.max)
|
5129
|
+
# # =>
|
5130
|
+
# # shape: (1, 1)
|
5131
|
+
# # ┌────────┐
|
5132
|
+
# # │ values │
|
5133
|
+
# # │ --- │
|
5134
|
+
# # │ i64 │
|
5135
|
+
# # ╞════════╡
|
5136
|
+
# # │ 3 │
|
5137
|
+
# # └────────┘
|
5138
|
+
# def set_sorted(reverse: false)
|
5139
|
+
# map { |s| s.set_sorted(reverse) }
|
1292
5140
|
# end
|
1293
5141
|
|
5142
|
+
# Aggregate to list.
|
5143
|
+
#
|
5144
|
+
# @return [Expr]
|
1294
5145
|
#
|
5146
|
+
# @example
|
5147
|
+
# df = Polars::DataFrame.new(
|
5148
|
+
# {
|
5149
|
+
# "a" => [1, 2, 3],
|
5150
|
+
# "b" => [4, 5, 6]
|
5151
|
+
# }
|
5152
|
+
# )
|
5153
|
+
# df.select(Polars.all.list)
|
5154
|
+
# # =>
|
5155
|
+
# # shape: (1, 2)
|
5156
|
+
# # ┌───────────┬───────────┐
|
5157
|
+
# # │ a ┆ b │
|
5158
|
+
# # │ --- ┆ --- │
|
5159
|
+
# # │ list[i64] ┆ list[i64] │
|
5160
|
+
# # ╞═══════════╪═══════════╡
|
5161
|
+
# # │ [1, 2, 3] ┆ [4, 5, 6] │
|
5162
|
+
# # └───────────┴───────────┘
|
1295
5163
|
def list
|
1296
5164
|
wrap_expr(_rbexpr.list)
|
1297
5165
|
end
|
1298
5166
|
|
5167
|
+
# Shrink numeric columns to the minimal required datatype.
|
5168
|
+
#
|
5169
|
+
# Shrink to the dtype needed to fit the extrema of this `Series`.
|
5170
|
+
# This can be used to reduce memory pressure.
|
5171
|
+
#
|
5172
|
+
# @return [Expr]
|
5173
|
+
#
|
5174
|
+
# @example
|
5175
|
+
# Polars::DataFrame.new(
|
5176
|
+
# {
|
5177
|
+
# "a" => [1, 2, 3],
|
5178
|
+
# "b" => [1, 2, 2 << 32],
|
5179
|
+
# "c" => [-1, 2, 1 << 30],
|
5180
|
+
# "d" => [-112, 2, 112],
|
5181
|
+
# "e" => [-112, 2, 129],
|
5182
|
+
# "f" => ["a", "b", "c"],
|
5183
|
+
# "g" => [0.1, 1.32, 0.12],
|
5184
|
+
# "h" => [true, nil, false]
|
5185
|
+
# }
|
5186
|
+
# ).select(Polars.all.shrink_dtype)
|
5187
|
+
# # =>
|
5188
|
+
# # shape: (3, 8)
|
5189
|
+
# # ┌─────┬────────────┬────────────┬──────┬──────┬─────┬──────┬───────┐
|
5190
|
+
# # │ a ┆ b ┆ c ┆ d ┆ e ┆ f ┆ g ┆ h │
|
5191
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
5192
|
+
# # │ i8 ┆ i64 ┆ i32 ┆ i8 ┆ i16 ┆ str ┆ f32 ┆ bool │
|
5193
|
+
# # ╞═════╪════════════╪════════════╪══════╪══════╪═════╪══════╪═══════╡
|
5194
|
+
# # │ 1 ┆ 1 ┆ -1 ┆ -112 ┆ -112 ┆ a ┆ 0.1 ┆ true │
|
5195
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
5196
|
+
# # │ 2 ┆ 2 ┆ 2 ┆ 2 ┆ 2 ┆ b ┆ 1.32 ┆ null │
|
5197
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
5198
|
+
# # │ 3 ┆ 8589934592 ┆ 1073741824 ┆ 112 ┆ 129 ┆ c ┆ 0.12 ┆ false │
|
5199
|
+
# # └─────┴────────────┴────────────┴──────┴──────┴─────┴──────┴───────┘
|
1299
5200
|
def shrink_dtype
|
1300
5201
|
wrap_expr(_rbexpr.shrink_dtype)
|
1301
5202
|
end
|
1302
5203
|
|
5204
|
+
# Create an object namespace of all list related methods.
|
5205
|
+
#
|
5206
|
+
# @return [ListExpr]
|
1303
5207
|
def arr
|
1304
5208
|
ListExpr.new(self)
|
1305
5209
|
end
|
1306
5210
|
|
5211
|
+
# Create an object namespace of all categorical related methods.
|
5212
|
+
#
|
5213
|
+
# @return [CatExpr]
|
1307
5214
|
def cat
|
1308
5215
|
CatExpr.new(self)
|
1309
5216
|
end
|
1310
5217
|
|
5218
|
+
# Create an object namespace of all datetime related methods.
|
5219
|
+
#
|
5220
|
+
# @return [DateTimeExpr]
|
1311
5221
|
def dt
|
1312
5222
|
DateTimeExpr.new(self)
|
1313
5223
|
end
|
1314
5224
|
|
5225
|
+
# Create an object namespace of all meta related expression methods.
|
5226
|
+
#
|
5227
|
+
# @return [MetaExpr]
|
1315
5228
|
def meta
|
1316
5229
|
MetaExpr.new(self)
|
1317
5230
|
end
|
1318
5231
|
|
5232
|
+
# Create an object namespace of all string related methods.
|
5233
|
+
#
|
5234
|
+
# @return [StringExpr]
|
1319
5235
|
def str
|
1320
5236
|
StringExpr.new(self)
|
1321
5237
|
end
|
1322
5238
|
|
5239
|
+
# Create an object namespace of all struct related methods.
|
5240
|
+
#
|
5241
|
+
# @return [StructExpr]
|
1323
5242
|
def struct
|
1324
5243
|
StructExpr.new(self)
|
1325
5244
|
end
|
@@ -1337,5 +5256,51 @@ module Polars
|
|
1337
5256
|
def _to_expr(other)
|
1338
5257
|
other.is_a?(Expr) ? other : Utils.lit(other)
|
1339
5258
|
end
|
5259
|
+
|
5260
|
+
def _prepare_alpha(com, span, half_life, alpha)
|
5261
|
+
if [com, span, half_life, alpha].count { |v| !v.nil? } > 1
|
5262
|
+
raise ArgumentError, "Parameters 'com', 'span', 'half_life', and 'alpha' are mutually exclusive"
|
5263
|
+
end
|
5264
|
+
|
5265
|
+
if !com.nil?
|
5266
|
+
if com < 0.0
|
5267
|
+
raise ArgumentError, "Require 'com' >= 0 (found #{com})"
|
5268
|
+
end
|
5269
|
+
alpha = 1.0 / (1.0 + com)
|
5270
|
+
|
5271
|
+
elsif !span.nil?
|
5272
|
+
if span < 1.0
|
5273
|
+
raise ArgumentError, "Require 'span' >= 1 (found #{span})"
|
5274
|
+
end
|
5275
|
+
alpha = 2.0 / (span + 1.0)
|
5276
|
+
|
5277
|
+
elsif !half_life.nil?
|
5278
|
+
if half_life <= 0.0
|
5279
|
+
raise ArgumentError, "Require 'half_life' > 0 (found #{half_life})"
|
5280
|
+
end
|
5281
|
+
alpha = 1.0 - Math.exp(-Math.log(2.0) / half_life)
|
5282
|
+
|
5283
|
+
elsif alpha.nil?
|
5284
|
+
raise ArgumentError, "One of 'com', 'span', 'half_life', or 'alpha' must be set"
|
5285
|
+
|
5286
|
+
elsif alpha <= 0 || alpha > 1
|
5287
|
+
raise ArgumentError, "Require 0 < 'alpha' <= 1 (found #{alpha})"
|
5288
|
+
end
|
5289
|
+
|
5290
|
+
alpha
|
5291
|
+
end
|
5292
|
+
|
5293
|
+
def _prepare_rolling_window_args(window_size, min_periods)
|
5294
|
+
if window_size.is_a?(Integer)
|
5295
|
+
if min_periods.nil?
|
5296
|
+
min_periods = window_size
|
5297
|
+
end
|
5298
|
+
window_size = "#{window_size}i"
|
5299
|
+
end
|
5300
|
+
if min_periods.nil?
|
5301
|
+
min_periods = 1
|
5302
|
+
end
|
5303
|
+
[window_size, min_periods]
|
5304
|
+
end
|
1340
5305
|
end
|
1341
5306
|
end
|