polars-df 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +127 -1
- data/Cargo.lock +72 -58
- data/README.md +31 -27
- data/ext/polars/Cargo.toml +15 -6
- data/ext/polars/src/batched_csv.rs +35 -39
- data/ext/polars/src/c_api/allocator.rs +7 -0
- data/ext/polars/src/c_api/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +123 -101
- data/ext/polars/src/conversion/any_value.rs +13 -17
- data/ext/polars/src/conversion/chunked_array.rs +5 -5
- data/ext/polars/src/conversion/datetime.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +50 -45
- data/ext/polars/src/dataframe/export.rs +13 -13
- data/ext/polars/src/dataframe/general.rs +223 -223
- data/ext/polars/src/dataframe/io.rs +27 -141
- data/ext/polars/src/dataframe/mod.rs +13 -5
- data/ext/polars/src/dataframe/serde.rs +1 -1
- data/ext/polars/src/error.rs +44 -7
- data/ext/polars/src/exceptions.rs +45 -12
- data/ext/polars/src/expr/array.rs +12 -0
- data/ext/polars/src/expr/datatype.rs +2 -2
- data/ext/polars/src/expr/datetime.rs +4 -5
- data/ext/polars/src/expr/general.rs +49 -13
- data/ext/polars/src/expr/list.rs +4 -0
- data/ext/polars/src/expr/meta.rs +8 -3
- data/ext/polars/src/expr/mod.rs +22 -6
- data/ext/polars/src/expr/name.rs +19 -8
- data/ext/polars/src/expr/rolling.rs +50 -1
- data/ext/polars/src/expr/string.rs +0 -1
- data/ext/polars/src/expr/struct.rs +7 -2
- data/ext/polars/src/file.rs +136 -103
- data/ext/polars/src/functions/aggregation.rs +9 -8
- data/ext/polars/src/functions/io.rs +81 -10
- data/ext/polars/src/functions/lazy.rs +95 -21
- data/ext/polars/src/functions/mod.rs +2 -0
- data/ext/polars/src/functions/range.rs +19 -3
- data/ext/polars/src/functions/strings.rs +6 -0
- data/ext/polars/src/functions/utils.rs +6 -0
- data/ext/polars/src/interop/arrow/mod.rs +50 -1
- data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
- data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/lazyframe/exitable.rs +39 -0
- data/ext/polars/src/lazyframe/general.rs +340 -236
- data/ext/polars/src/lazyframe/mod.rs +46 -10
- data/ext/polars/src/lazyframe/optflags.rs +5 -4
- data/ext/polars/src/lazyframe/serde.rs +11 -3
- data/ext/polars/src/lazyframe/sink.rs +10 -5
- data/ext/polars/src/lazygroupby.rs +6 -7
- data/ext/polars/src/lib.rs +141 -76
- data/ext/polars/src/map/dataframe.rs +12 -12
- data/ext/polars/src/map/lazy.rs +7 -5
- data/ext/polars/src/map/mod.rs +15 -8
- data/ext/polars/src/map/series.rs +3 -3
- data/ext/polars/src/on_startup.rs +16 -8
- data/ext/polars/src/prelude.rs +1 -0
- data/ext/polars/src/rb_modules.rs +19 -49
- data/ext/polars/src/series/aggregation.rs +79 -140
- data/ext/polars/src/series/arithmetic.rs +16 -22
- data/ext/polars/src/series/comparison.rs +101 -222
- data/ext/polars/src/series/construction.rs +17 -18
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +254 -289
- data/ext/polars/src/series/import.rs +17 -0
- data/ext/polars/src/series/map.rs +178 -160
- data/ext/polars/src/series/mod.rs +28 -12
- data/ext/polars/src/series/scatter.rs +12 -9
- data/ext/polars/src/sql.rs +16 -9
- data/ext/polars/src/testing/frame.rs +31 -0
- data/ext/polars/src/testing/mod.rs +5 -0
- data/ext/polars/src/testing/series.rs +31 -0
- data/ext/polars/src/timeout.rs +105 -0
- data/ext/polars/src/utils.rs +159 -1
- data/lib/polars/array_expr.rb +81 -12
- data/lib/polars/array_name_space.rb +74 -7
- data/lib/polars/batched_csv_reader.rb +21 -21
- data/lib/polars/binary_name_space.rb +1 -1
- data/lib/polars/cat_expr.rb +7 -7
- data/lib/polars/config.rb +1 -1
- data/lib/polars/convert.rb +189 -34
- data/lib/polars/data_frame.rb +1066 -831
- data/lib/polars/data_frame_plot.rb +173 -0
- data/lib/polars/data_type_group.rb +1 -0
- data/lib/polars/data_types.rb +31 -12
- data/lib/polars/date_time_expr.rb +51 -69
- data/lib/polars/date_time_name_space.rb +80 -112
- data/lib/polars/dynamic_group_by.rb +7 -7
- data/lib/polars/exceptions.rb +50 -10
- data/lib/polars/expr.rb +470 -517
- data/lib/polars/functions/aggregation/horizontal.rb +0 -1
- data/lib/polars/functions/aggregation/vertical.rb +2 -3
- data/lib/polars/functions/as_datatype.rb +290 -8
- data/lib/polars/functions/eager.rb +204 -10
- data/lib/polars/functions/escape_regex.rb +21 -0
- data/lib/polars/functions/lazy.rb +409 -169
- data/lib/polars/functions/lit.rb +17 -1
- data/lib/polars/functions/range/int_range.rb +74 -2
- data/lib/polars/functions/range/linear_space.rb +77 -0
- data/lib/polars/functions/range/time_range.rb +1 -1
- data/lib/polars/functions/repeat.rb +3 -12
- data/lib/polars/functions/whenthen.rb +2 -2
- data/lib/polars/group_by.rb +72 -20
- data/lib/polars/iceberg_dataset.rb +1 -6
- data/lib/polars/in_process_query.rb +37 -0
- data/lib/polars/io/cloud.rb +18 -0
- data/lib/polars/io/csv.rb +265 -126
- data/lib/polars/io/database.rb +0 -1
- data/lib/polars/io/delta.rb +15 -7
- data/lib/polars/io/ipc.rb +24 -17
- data/lib/polars/io/ndjson.rb +161 -24
- data/lib/polars/io/parquet.rb +101 -38
- data/lib/polars/lazy_frame.rb +849 -558
- data/lib/polars/lazy_group_by.rb +327 -2
- data/lib/polars/list_expr.rb +94 -16
- data/lib/polars/list_name_space.rb +88 -24
- data/lib/polars/meta_expr.rb +42 -1
- data/lib/polars/name_expr.rb +41 -4
- data/lib/polars/query_opt_flags.rb +198 -2
- data/lib/polars/rolling_group_by.rb +3 -3
- data/lib/polars/schema.rb +21 -3
- data/lib/polars/selector.rb +37 -2
- data/lib/polars/selectors.rb +45 -9
- data/lib/polars/series.rb +1156 -728
- data/lib/polars/series_plot.rb +72 -0
- data/lib/polars/slice.rb +1 -1
- data/lib/polars/sql_context.rb +11 -4
- data/lib/polars/string_expr.rb +59 -68
- data/lib/polars/string_name_space.rb +51 -87
- data/lib/polars/struct_expr.rb +36 -18
- data/lib/polars/testing.rb +24 -273
- data/lib/polars/utils/constants.rb +2 -0
- data/lib/polars/utils/construction/data_frame.rb +410 -0
- data/lib/polars/utils/construction/series.rb +364 -0
- data/lib/polars/utils/construction/utils.rb +9 -0
- data/lib/polars/utils/deprecation.rb +11 -0
- data/lib/polars/utils/serde.rb +8 -3
- data/lib/polars/utils/unstable.rb +19 -0
- data/lib/polars/utils/various.rb +59 -0
- data/lib/polars/utils.rb +46 -47
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +47 -1
- metadata +25 -6
- data/ext/polars/src/allocator.rs +0 -13
- data/lib/polars/plot.rb +0 -109
data/lib/polars/expr.rb
CHANGED
|
@@ -1,9 +1,6 @@
|
|
|
1
1
|
module Polars
|
|
2
2
|
# Expressions that can be used in various contexts.
|
|
3
3
|
class Expr
|
|
4
|
-
# @private
|
|
5
|
-
NO_DEFAULT = Object.new
|
|
6
|
-
|
|
7
4
|
# @private
|
|
8
5
|
attr_accessor :_rbexpr
|
|
9
6
|
|
|
@@ -182,21 +179,14 @@ module Polars
|
|
|
182
179
|
|
|
183
180
|
# Cast to physical representation of the logical dtype.
|
|
184
181
|
#
|
|
185
|
-
# - `:date` -> `:i32`
|
|
186
|
-
# - `:datetime` -> `:i64`
|
|
187
|
-
# - `:time` -> `:i64`
|
|
188
|
-
# - `:duration` -> `:i64`
|
|
189
|
-
# - `:cat` -> `:u32`
|
|
190
|
-
# - Other data types will be left unchanged.
|
|
191
|
-
#
|
|
192
182
|
# @return [Expr]
|
|
193
183
|
#
|
|
194
184
|
# @example
|
|
195
185
|
# Polars::DataFrame.new({"vals" => ["a", "x", nil, "a"]}).with_columns(
|
|
196
186
|
# [
|
|
197
|
-
# Polars.col("vals").cast(
|
|
187
|
+
# Polars.col("vals").cast(Polars::Categorical),
|
|
198
188
|
# Polars.col("vals")
|
|
199
|
-
# .cast(
|
|
189
|
+
# .cast(Polars::Categorical)
|
|
200
190
|
# .to_physical
|
|
201
191
|
# .alias("vals_physical")
|
|
202
192
|
# ]
|
|
@@ -233,8 +223,8 @@ module Polars
|
|
|
233
223
|
# # ╞══════╪═══════╡
|
|
234
224
|
# # │ true ┆ false │
|
|
235
225
|
# # └──────┴───────┘
|
|
236
|
-
def any(
|
|
237
|
-
wrap_expr(_rbexpr.any(
|
|
226
|
+
def any(ignore_nulls: true)
|
|
227
|
+
wrap_expr(_rbexpr.any(ignore_nulls))
|
|
238
228
|
end
|
|
239
229
|
|
|
240
230
|
# Check if all boolean values in a Boolean column are `true`.
|
|
@@ -258,8 +248,8 @@ module Polars
|
|
|
258
248
|
# # ╞══════╪═══════╪═══════╡
|
|
259
249
|
# # │ true ┆ false ┆ false │
|
|
260
250
|
# # └──────┴───────┴───────┘
|
|
261
|
-
def all(
|
|
262
|
-
wrap_expr(_rbexpr.all(
|
|
251
|
+
def all(ignore_nulls: true)
|
|
252
|
+
wrap_expr(_rbexpr.all(ignore_nulls))
|
|
263
253
|
end
|
|
264
254
|
|
|
265
255
|
# Return indices where expression evaluates `true`.
|
|
@@ -449,114 +439,6 @@ module Polars
|
|
|
449
439
|
meta.as_selector.exclude(columns, *more_columns).as_expr
|
|
450
440
|
end
|
|
451
441
|
|
|
452
|
-
# Keep the original root name of the expression.
|
|
453
|
-
#
|
|
454
|
-
# @return [Expr]
|
|
455
|
-
#
|
|
456
|
-
# @example
|
|
457
|
-
# df = Polars::DataFrame.new(
|
|
458
|
-
# {
|
|
459
|
-
# "a" => [1, 2],
|
|
460
|
-
# "b" => [3, 4]
|
|
461
|
-
# }
|
|
462
|
-
# )
|
|
463
|
-
# df.with_columns([(Polars.col("a") * 9).alias("c").keep_name])
|
|
464
|
-
# # =>
|
|
465
|
-
# # shape: (2, 2)
|
|
466
|
-
# # ┌─────┬─────┐
|
|
467
|
-
# # │ a ┆ b │
|
|
468
|
-
# # │ --- ┆ --- │
|
|
469
|
-
# # │ i64 ┆ i64 │
|
|
470
|
-
# # ╞═════╪═════╡
|
|
471
|
-
# # │ 9 ┆ 3 │
|
|
472
|
-
# # │ 18 ┆ 4 │
|
|
473
|
-
# # └─────┴─────┘
|
|
474
|
-
def keep_name
|
|
475
|
-
name.keep
|
|
476
|
-
end
|
|
477
|
-
|
|
478
|
-
# Add a prefix to the root column name of the expression.
|
|
479
|
-
#
|
|
480
|
-
# @return [Expr]
|
|
481
|
-
#
|
|
482
|
-
# @example
|
|
483
|
-
# df = Polars::DataFrame.new(
|
|
484
|
-
# {
|
|
485
|
-
# "a" => [1, 2, 3],
|
|
486
|
-
# "b" => ["x", "y", "z"]
|
|
487
|
-
# }
|
|
488
|
-
# )
|
|
489
|
-
# df.with_columns(Polars.all.reverse.name.prefix("reverse_"))
|
|
490
|
-
# # =>
|
|
491
|
-
# # shape: (3, 4)
|
|
492
|
-
# # ┌─────┬─────┬───────────┬───────────┐
|
|
493
|
-
# # │ a ┆ b ┆ reverse_a ┆ reverse_b │
|
|
494
|
-
# # │ --- ┆ --- ┆ --- ┆ --- │
|
|
495
|
-
# # │ i64 ┆ str ┆ i64 ┆ str │
|
|
496
|
-
# # ╞═════╪═════╪═══════════╪═══════════╡
|
|
497
|
-
# # │ 1 ┆ x ┆ 3 ┆ z │
|
|
498
|
-
# # │ 2 ┆ y ┆ 2 ┆ y │
|
|
499
|
-
# # │ 3 ┆ z ┆ 1 ┆ x │
|
|
500
|
-
# # └─────┴─────┴───────────┴───────────┘
|
|
501
|
-
def prefix(prefix)
|
|
502
|
-
name.prefix(prefix)
|
|
503
|
-
end
|
|
504
|
-
|
|
505
|
-
# Add a suffix to the root column name of the expression.
|
|
506
|
-
#
|
|
507
|
-
# @return [Expr]
|
|
508
|
-
#
|
|
509
|
-
# @example
|
|
510
|
-
# df = Polars::DataFrame.new(
|
|
511
|
-
# {
|
|
512
|
-
# "a" => [1, 2, 3],
|
|
513
|
-
# "b" => ["x", "y", "z"]
|
|
514
|
-
# }
|
|
515
|
-
# )
|
|
516
|
-
# df.with_columns(Polars.all.reverse.name.suffix("_reverse"))
|
|
517
|
-
# # =>
|
|
518
|
-
# # shape: (3, 4)
|
|
519
|
-
# # ┌─────┬─────┬───────────┬───────────┐
|
|
520
|
-
# # │ a ┆ b ┆ a_reverse ┆ b_reverse │
|
|
521
|
-
# # │ --- ┆ --- ┆ --- ┆ --- │
|
|
522
|
-
# # │ i64 ┆ str ┆ i64 ┆ str │
|
|
523
|
-
# # ╞═════╪═════╪═══════════╪═══════════╡
|
|
524
|
-
# # │ 1 ┆ x ┆ 3 ┆ z │
|
|
525
|
-
# # │ 2 ┆ y ┆ 2 ┆ y │
|
|
526
|
-
# # │ 3 ┆ z ┆ 1 ┆ x │
|
|
527
|
-
# # └─────┴─────┴───────────┴───────────┘
|
|
528
|
-
def suffix(suffix)
|
|
529
|
-
name.suffix(suffix)
|
|
530
|
-
end
|
|
531
|
-
|
|
532
|
-
# Rename the output of an expression by mapping a function over the root name.
|
|
533
|
-
#
|
|
534
|
-
# @return [Expr]
|
|
535
|
-
#
|
|
536
|
-
# @example
|
|
537
|
-
# df = Polars::DataFrame.new(
|
|
538
|
-
# {
|
|
539
|
-
# "A" => [1, 2],
|
|
540
|
-
# "B" => [3, 4]
|
|
541
|
-
# }
|
|
542
|
-
# )
|
|
543
|
-
# df.select(
|
|
544
|
-
# Polars.all.reverse.map_alias { |colName| colName + "_reverse" }
|
|
545
|
-
# )
|
|
546
|
-
# # =>
|
|
547
|
-
# # shape: (2, 2)
|
|
548
|
-
# # ┌───────────┬───────────┐
|
|
549
|
-
# # │ A_reverse ┆ B_reverse │
|
|
550
|
-
# # │ --- ┆ --- │
|
|
551
|
-
# # │ i64 ┆ i64 │
|
|
552
|
-
# # ╞═══════════╪═══════════╡
|
|
553
|
-
# # │ 2 ┆ 4 │
|
|
554
|
-
# # │ 1 ┆ 3 │
|
|
555
|
-
# # └───────────┴───────────┘
|
|
556
|
-
def map_alias(&f)
|
|
557
|
-
name.map(&f)
|
|
558
|
-
end
|
|
559
|
-
|
|
560
442
|
# Negate a boolean expression.
|
|
561
443
|
#
|
|
562
444
|
# @return [Expr]
|
|
@@ -609,7 +491,7 @@ module Polars
|
|
|
609
491
|
# "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
|
|
610
492
|
# }
|
|
611
493
|
# )
|
|
612
|
-
# df.
|
|
494
|
+
# df.with_columns(Polars.all.is_null.name.suffix("_isnull"))
|
|
613
495
|
# # =>
|
|
614
496
|
# # shape: (5, 4)
|
|
615
497
|
# # ┌──────┬─────┬──────────┬──────────┐
|
|
@@ -638,7 +520,7 @@ module Polars
|
|
|
638
520
|
# "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
|
|
639
521
|
# }
|
|
640
522
|
# )
|
|
641
|
-
# df.
|
|
523
|
+
# df.with_columns(Polars.all.is_not_null.name.suffix("_not_null"))
|
|
642
524
|
# # =>
|
|
643
525
|
# # shape: (5, 4)
|
|
644
526
|
# # ┌──────┬─────┬────────────┬────────────┐
|
|
@@ -723,7 +605,7 @@ module Polars
|
|
|
723
605
|
# "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
|
|
724
606
|
# }
|
|
725
607
|
# )
|
|
726
|
-
# df.
|
|
608
|
+
# df.with_columns(Polars.col(Polars::Float64).is_nan.name.suffix("_isnan"))
|
|
727
609
|
# # =>
|
|
728
610
|
# # shape: (5, 3)
|
|
729
611
|
# # ┌──────┬─────┬─────────┐
|
|
@@ -756,7 +638,7 @@ module Polars
|
|
|
756
638
|
# "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
|
|
757
639
|
# }
|
|
758
640
|
# )
|
|
759
|
-
# df.
|
|
641
|
+
# df.with_columns(Polars.col(Polars::Float64).is_not_nan.name.suffix("_is_not_nan"))
|
|
760
642
|
# # =>
|
|
761
643
|
# # shape: (5, 3)
|
|
762
644
|
# # ┌──────┬─────┬──────────────┐
|
|
@@ -1009,8 +891,8 @@ module Polars
|
|
|
1009
891
|
# @return [Expr]
|
|
1010
892
|
#
|
|
1011
893
|
# @note
|
|
1012
|
-
# Dtypes in
|
|
1013
|
-
#
|
|
894
|
+
# Dtypes in \\\\{Int8, UInt8, Int16, UInt16} are cast to
|
|
895
|
+
# Int64 before summing to prevent overflow issues.
|
|
1014
896
|
#
|
|
1015
897
|
# @example
|
|
1016
898
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
|
@@ -1035,7 +917,6 @@ module Polars
|
|
|
1035
917
|
def cum_sum(reverse: false)
|
|
1036
918
|
wrap_expr(_rbexpr.cum_sum(reverse))
|
|
1037
919
|
end
|
|
1038
|
-
alias_method :cumsum, :cum_sum
|
|
1039
920
|
|
|
1040
921
|
# Get an array with the cumulative product computed at every element.
|
|
1041
922
|
#
|
|
@@ -1045,8 +926,8 @@ module Polars
|
|
|
1045
926
|
# @return [Expr]
|
|
1046
927
|
#
|
|
1047
928
|
# @note
|
|
1048
|
-
# Dtypes in
|
|
1049
|
-
#
|
|
929
|
+
# Dtypes in \\\\{Int8, UInt8, Int16, UInt16} are cast to
|
|
930
|
+
# Int64 before summing to prevent overflow issues.
|
|
1050
931
|
#
|
|
1051
932
|
# @example
|
|
1052
933
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
|
@@ -1071,7 +952,6 @@ module Polars
|
|
|
1071
952
|
def cum_prod(reverse: false)
|
|
1072
953
|
wrap_expr(_rbexpr.cum_prod(reverse))
|
|
1073
954
|
end
|
|
1074
|
-
alias_method :cumprod, :cum_prod
|
|
1075
955
|
|
|
1076
956
|
# Get an array with the cumulative min computed at every element.
|
|
1077
957
|
#
|
|
@@ -1103,7 +983,6 @@ module Polars
|
|
|
1103
983
|
def cum_min(reverse: false)
|
|
1104
984
|
wrap_expr(_rbexpr.cum_min(reverse))
|
|
1105
985
|
end
|
|
1106
|
-
alias_method :cummin, :cum_min
|
|
1107
986
|
|
|
1108
987
|
# Get an array with the cumulative max computed at every element.
|
|
1109
988
|
#
|
|
@@ -1135,7 +1014,6 @@ module Polars
|
|
|
1135
1014
|
def cum_max(reverse: false)
|
|
1136
1015
|
wrap_expr(_rbexpr.cum_max(reverse))
|
|
1137
1016
|
end
|
|
1138
|
-
alias_method :cummax, :cum_max
|
|
1139
1017
|
|
|
1140
1018
|
# Get an array with the cumulative count computed at every element.
|
|
1141
1019
|
#
|
|
@@ -1169,7 +1047,6 @@ module Polars
|
|
|
1169
1047
|
def cum_count(reverse: false)
|
|
1170
1048
|
wrap_expr(_rbexpr.cum_count(reverse))
|
|
1171
1049
|
end
|
|
1172
|
-
alias_method :cumcount, :cum_count
|
|
1173
1050
|
|
|
1174
1051
|
# Rounds down to the nearest integer value.
|
|
1175
1052
|
#
|
|
@@ -1338,11 +1215,14 @@ module Polars
|
|
|
1338
1215
|
|
|
1339
1216
|
# Cast between data types.
|
|
1340
1217
|
#
|
|
1341
|
-
# @param dtype [
|
|
1218
|
+
# @param dtype [Object]
|
|
1342
1219
|
# DataType to cast to.
|
|
1343
1220
|
# @param strict [Boolean]
|
|
1344
1221
|
# Throw an error if a cast could not be done.
|
|
1345
1222
|
# For instance, due to an overflow.
|
|
1223
|
+
# @param wrap_numerical [Boolean]
|
|
1224
|
+
# If true numeric casts wrap overflowing values instead of
|
|
1225
|
+
# marking the cast as invalid.
|
|
1346
1226
|
#
|
|
1347
1227
|
# @return [Expr]
|
|
1348
1228
|
#
|
|
@@ -1355,8 +1235,8 @@ module Polars
|
|
|
1355
1235
|
# )
|
|
1356
1236
|
# df.with_columns(
|
|
1357
1237
|
# [
|
|
1358
|
-
# Polars.col("a").cast(
|
|
1359
|
-
# Polars.col("b").cast(
|
|
1238
|
+
# Polars.col("a").cast(Polars::Float64),
|
|
1239
|
+
# Polars.col("b").cast(Polars::Int32)
|
|
1360
1240
|
# ]
|
|
1361
1241
|
# )
|
|
1362
1242
|
# # =>
|
|
@@ -1370,16 +1250,16 @@ module Polars
|
|
|
1370
1250
|
# # │ 2.0 ┆ 5 │
|
|
1371
1251
|
# # │ 3.0 ┆ 6 │
|
|
1372
1252
|
# # └─────┴─────┘
|
|
1373
|
-
def cast(dtype, strict: true)
|
|
1374
|
-
dtype = Utils.
|
|
1375
|
-
wrap_expr(_rbexpr.cast(dtype, strict))
|
|
1253
|
+
def cast(dtype, strict: true, wrap_numerical: false)
|
|
1254
|
+
dtype = Utils.parse_into_datatype_expr(dtype)
|
|
1255
|
+
wrap_expr(_rbexpr.cast(dtype._rbdatatype_expr, strict, wrap_numerical))
|
|
1376
1256
|
end
|
|
1377
1257
|
|
|
1378
1258
|
# Sort this column. In projection/ selection context the whole column is sorted.
|
|
1379
1259
|
#
|
|
1380
1260
|
# If used in a group by context, the groups are sorted.
|
|
1381
1261
|
#
|
|
1382
|
-
# @param
|
|
1262
|
+
# @param descending [Boolean]
|
|
1383
1263
|
# false -> order from small to large.
|
|
1384
1264
|
# true -> order from large to small.
|
|
1385
1265
|
# @param nulls_last [Boolean]
|
|
@@ -1446,8 +1326,8 @@ module Polars
|
|
|
1446
1326
|
# # │ two ┆ [3, 4, 99] │
|
|
1447
1327
|
# # │ one ┆ [1, 2, 98] │
|
|
1448
1328
|
# # └───────┴────────────┘
|
|
1449
|
-
def sort(
|
|
1450
|
-
wrap_expr(_rbexpr.sort_with(
|
|
1329
|
+
def sort(descending: false, nulls_last: false)
|
|
1330
|
+
wrap_expr(_rbexpr.sort_with(descending, nulls_last))
|
|
1451
1331
|
end
|
|
1452
1332
|
|
|
1453
1333
|
# Return the `k` largest elements.
|
|
@@ -1503,7 +1383,7 @@ module Polars
|
|
|
1503
1383
|
# Number of elements to return.
|
|
1504
1384
|
# @param reverse [Object]
|
|
1505
1385
|
# Consider the `k` smallest elements of the `by` column(s) (instead of the `k`
|
|
1506
|
-
# largest). This can be specified per column by passing
|
|
1386
|
+
# largest). This can be specified per column by passing an array of
|
|
1507
1387
|
# booleans.
|
|
1508
1388
|
#
|
|
1509
1389
|
# @return [Expr]
|
|
@@ -1648,7 +1528,7 @@ module Polars
|
|
|
1648
1528
|
# Number of elements to return.
|
|
1649
1529
|
# @param reverse [Object]
|
|
1650
1530
|
# Consider the `k` largest elements of the `by` column(s) (instead of the `k`
|
|
1651
|
-
# smallest). This can be specified per column by passing
|
|
1531
|
+
# smallest). This can be specified per column by passing an array of
|
|
1652
1532
|
# booleans.
|
|
1653
1533
|
#
|
|
1654
1534
|
# @return [Expr]
|
|
@@ -1742,7 +1622,7 @@ module Polars
|
|
|
1742
1622
|
|
|
1743
1623
|
# Get the index values that would sort this column.
|
|
1744
1624
|
#
|
|
1745
|
-
# @param
|
|
1625
|
+
# @param descending [Boolean]
|
|
1746
1626
|
# Sort in reverse (descending) order.
|
|
1747
1627
|
# @param nulls_last [Boolean]
|
|
1748
1628
|
# Place null values last instead of first.
|
|
@@ -1767,8 +1647,8 @@ module Polars
|
|
|
1767
1647
|
# # │ 0 │
|
|
1768
1648
|
# # │ 2 │
|
|
1769
1649
|
# # └─────┘
|
|
1770
|
-
def arg_sort(
|
|
1771
|
-
wrap_expr(_rbexpr.arg_sort(
|
|
1650
|
+
def arg_sort(descending: false, nulls_last: false)
|
|
1651
|
+
wrap_expr(_rbexpr.arg_sort(descending, nulls_last))
|
|
1772
1652
|
end
|
|
1773
1653
|
|
|
1774
1654
|
# Get the index of the maximal value.
|
|
@@ -1899,12 +1779,12 @@ module Polars
|
|
|
1899
1779
|
# The column(s) used for sorting.
|
|
1900
1780
|
# @param more_by [Array]
|
|
1901
1781
|
# Additional columns to sort by, specified as positional arguments.
|
|
1902
|
-
# @param
|
|
1782
|
+
# @param descending [Boolean]
|
|
1903
1783
|
# false -> order from small to large.
|
|
1904
1784
|
# true -> order from large to small.
|
|
1905
1785
|
# @param nulls_last [Boolean]
|
|
1906
1786
|
# Place null values last; can specify a single boolean applying to all columns
|
|
1907
|
-
# or
|
|
1787
|
+
# or an array of booleans for per-column control.
|
|
1908
1788
|
# @param multithreaded [Boolean]
|
|
1909
1789
|
# Sort using multiple threads.
|
|
1910
1790
|
# @param maintain_order [Boolean]
|
|
@@ -1941,13 +1821,13 @@ module Polars
|
|
|
1941
1821
|
# # │ one │
|
|
1942
1822
|
# # │ two │
|
|
1943
1823
|
# # └───────┘
|
|
1944
|
-
def sort_by(by, *more_by,
|
|
1824
|
+
def sort_by(by, *more_by, descending: false, nulls_last: false, multithreaded: true, maintain_order: false)
|
|
1945
1825
|
by = Utils.parse_into_list_of_expressions(by, *more_by)
|
|
1946
|
-
|
|
1826
|
+
descending = Utils.extend_bool(descending, by.length, "descending", "by")
|
|
1947
1827
|
nulls_last = Utils.extend_bool(nulls_last, by.length, "nulls_last", "by")
|
|
1948
1828
|
wrap_expr(
|
|
1949
1829
|
_rbexpr.sort_by(
|
|
1950
|
-
by,
|
|
1830
|
+
by, descending, nulls_last, multithreaded, maintain_order
|
|
1951
1831
|
)
|
|
1952
1832
|
)
|
|
1953
1833
|
end
|
|
@@ -1973,7 +1853,7 @@ module Polars
|
|
|
1973
1853
|
# "value" => [1, 98, 2, 3, 99, 4]
|
|
1974
1854
|
# }
|
|
1975
1855
|
# )
|
|
1976
|
-
# df.group_by("group", maintain_order: true).agg(Polars.col("value").
|
|
1856
|
+
# df.group_by("group", maintain_order: true).agg(Polars.col("value").gather([2, 1]))
|
|
1977
1857
|
# # =>
|
|
1978
1858
|
# # shape: (2, 2)
|
|
1979
1859
|
# # ┌───────┬───────────┐
|
|
@@ -1986,13 +1866,12 @@ module Polars
|
|
|
1986
1866
|
# # └───────┴───────────┘
|
|
1987
1867
|
def gather(indices)
|
|
1988
1868
|
if indices.is_a?(::Array)
|
|
1989
|
-
|
|
1869
|
+
indices_lit_rbexpr = Polars.lit(Series.new("", indices, dtype: Int64))._rbexpr
|
|
1990
1870
|
else
|
|
1991
|
-
|
|
1871
|
+
indices_lit_rbexpr = Utils.parse_into_expression(indices)
|
|
1992
1872
|
end
|
|
1993
|
-
wrap_expr(_rbexpr.gather(
|
|
1873
|
+
wrap_expr(_rbexpr.gather(indices_lit_rbexpr))
|
|
1994
1874
|
end
|
|
1995
|
-
alias_method :take, :gather
|
|
1996
1875
|
|
|
1997
1876
|
# Return a single value by index.
|
|
1998
1877
|
#
|
|
@@ -2063,34 +1942,6 @@ module Polars
|
|
|
2063
1942
|
wrap_expr(_rbexpr.shift(n, fill_value))
|
|
2064
1943
|
end
|
|
2065
1944
|
|
|
2066
|
-
# Shift the values by a given period and fill the resulting null values.
|
|
2067
|
-
#
|
|
2068
|
-
# @param periods [Integer]
|
|
2069
|
-
# Number of places to shift (may be negative).
|
|
2070
|
-
# @param fill_value [Object]
|
|
2071
|
-
# Fill nil values with the result of this expression.
|
|
2072
|
-
#
|
|
2073
|
-
# @return [Expr]
|
|
2074
|
-
#
|
|
2075
|
-
# @example
|
|
2076
|
-
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
|
|
2077
|
-
# df.select(Polars.col("foo").shift_and_fill(1, "a"))
|
|
2078
|
-
# # =>
|
|
2079
|
-
# # shape: (4, 1)
|
|
2080
|
-
# # ┌─────┐
|
|
2081
|
-
# # │ foo │
|
|
2082
|
-
# # │ --- │
|
|
2083
|
-
# # │ str │
|
|
2084
|
-
# # ╞═════╡
|
|
2085
|
-
# # │ a │
|
|
2086
|
-
# # │ 1 │
|
|
2087
|
-
# # │ 2 │
|
|
2088
|
-
# # │ 3 │
|
|
2089
|
-
# # └─────┘
|
|
2090
|
-
def shift_and_fill(periods, fill_value)
|
|
2091
|
-
shift(periods, fill_value: fill_value)
|
|
2092
|
-
end
|
|
2093
|
-
|
|
2094
1945
|
# Fill null values using the specified value or strategy.
|
|
2095
1946
|
#
|
|
2096
1947
|
# To interpolate over null values see interpolate.
|
|
@@ -2192,9 +2043,9 @@ module Polars
|
|
|
2192
2043
|
# # │ null ┆ zero │
|
|
2193
2044
|
# # │ zero ┆ 6.0 │
|
|
2194
2045
|
# # └──────┴──────┘
|
|
2195
|
-
def fill_nan(
|
|
2196
|
-
|
|
2197
|
-
wrap_expr(_rbexpr.fill_nan(
|
|
2046
|
+
def fill_nan(value)
|
|
2047
|
+
fill_value_rbexpr = Utils.parse_into_expression(value, str_as_lit: true)
|
|
2048
|
+
wrap_expr(_rbexpr.fill_nan(fill_value_rbexpr))
|
|
2198
2049
|
end
|
|
2199
2050
|
|
|
2200
2051
|
# Fill missing values with the latest seen values.
|
|
@@ -2424,8 +2275,8 @@ module Polars
|
|
|
2424
2275
|
# @return [Expr]
|
|
2425
2276
|
#
|
|
2426
2277
|
# @note
|
|
2427
|
-
# Dtypes in
|
|
2428
|
-
#
|
|
2278
|
+
# Dtypes in \\\\{Int8, UInt8, Int16, UInt16} are cast to
|
|
2279
|
+
# Int64 before summing to prevent overflow issues.
|
|
2429
2280
|
#
|
|
2430
2281
|
# @example
|
|
2431
2282
|
# df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
|
|
@@ -2544,7 +2395,6 @@ module Polars
|
|
|
2544
2395
|
def approx_n_unique
|
|
2545
2396
|
wrap_expr(_rbexpr.approx_n_unique)
|
|
2546
2397
|
end
|
|
2547
|
-
alias_method :approx_unique, :approx_n_unique
|
|
2548
2398
|
|
|
2549
2399
|
# Count null values.
|
|
2550
2400
|
#
|
|
@@ -2705,13 +2555,82 @@ module Polars
|
|
|
2705
2555
|
wrap_expr(_rbexpr.last)
|
|
2706
2556
|
end
|
|
2707
2557
|
|
|
2558
|
+
# Get the single value.
|
|
2559
|
+
#
|
|
2560
|
+
# This raises an error if there is not exactly one value.
|
|
2561
|
+
#
|
|
2562
|
+
# @param allow_empty [Boolean]
|
|
2563
|
+
# Allow having no values to return `null`.
|
|
2564
|
+
#
|
|
2565
|
+
# @return [Expr]
|
|
2566
|
+
#
|
|
2567
|
+
# @example
|
|
2568
|
+
# df = Polars::DataFrame.new({"a" => [1]})
|
|
2569
|
+
# df.select(Polars.col("a").item)
|
|
2570
|
+
# # =>
|
|
2571
|
+
# # shape: (1, 1)
|
|
2572
|
+
# # ┌─────┐
|
|
2573
|
+
# # │ a │
|
|
2574
|
+
# # │ --- │
|
|
2575
|
+
# # │ i64 │
|
|
2576
|
+
# # ╞═════╡
|
|
2577
|
+
# # │ 1 │
|
|
2578
|
+
# # └─────┘
|
|
2579
|
+
#
|
|
2580
|
+
# @example
|
|
2581
|
+
# df.head(0).select(Polars.col("a").item(allow_empty: true))
|
|
2582
|
+
# # =>
|
|
2583
|
+
# # shape: (1, 1)
|
|
2584
|
+
# # ┌──────┐
|
|
2585
|
+
# # │ a │
|
|
2586
|
+
# # │ --- │
|
|
2587
|
+
# # │ i64 │
|
|
2588
|
+
# # ╞══════╡
|
|
2589
|
+
# # │ null │
|
|
2590
|
+
# # └──────┘
|
|
2591
|
+
def item(allow_empty: false)
|
|
2592
|
+
Utils.wrap_expr(_rbexpr.item(allow_empty))
|
|
2593
|
+
end
|
|
2594
|
+
|
|
2708
2595
|
# Apply window function over a subgroup.
|
|
2709
2596
|
#
|
|
2710
2597
|
# This is similar to a group by + aggregation + self join.
|
|
2711
2598
|
# Or similar to [window functions in Postgres](https://www.postgresql.org/docs/current/tutorial-window.html).
|
|
2712
2599
|
#
|
|
2713
|
-
# @param
|
|
2714
|
-
# Column(s) to group by.
|
|
2600
|
+
# @param partition_by [Object]
|
|
2601
|
+
# Column(s) to group by. Accepts expression input. Strings are parsed as
|
|
2602
|
+
# column names.
|
|
2603
|
+
# @param more_exprs [Array]
|
|
2604
|
+
# Additional columns to group by, specified as positional arguments.
|
|
2605
|
+
# @param order_by [Object]
|
|
2606
|
+
# Order the window functions/aggregations with the partitioned groups by
|
|
2607
|
+
# the result of the expression passed to `order_by`.
|
|
2608
|
+
# @param descending [Boolean]
|
|
2609
|
+
# In case 'order_by' is given, indicate whether to order in ascending or
|
|
2610
|
+
# descending order.
|
|
2611
|
+
# @param nulls_last [Boolean]
|
|
2612
|
+
# In case 'order_by' is given, indicate whether to order
|
|
2613
|
+
# the nulls in last position.
|
|
2614
|
+
# @param mapping_strategy ['group_to_rows', 'join', 'explode']
|
|
2615
|
+
# - group_to_rows
|
|
2616
|
+
# If the aggregation results in multiple values per group, map them back
|
|
2617
|
+
# to their row position in the DataFrame. This can only be done if each
|
|
2618
|
+
# group yields the same elements before aggregation as after. If the
|
|
2619
|
+
# aggregation results in one scalar value per group, this value will be
|
|
2620
|
+
# mapped to every row.
|
|
2621
|
+
# - join
|
|
2622
|
+
# If the aggregation may result in multiple values per group, join the
|
|
2623
|
+
# values as 'List<group_dtype>' to each row position. Warning: this can be
|
|
2624
|
+
# memory intensive. If the aggregation always results in one scalar value
|
|
2625
|
+
# per group, join this value as '<group_dtype>' to each row position.
|
|
2626
|
+
# - explode
|
|
2627
|
+
# If the aggregation may result in multiple values per group, map each
|
|
2628
|
+
# value to a new row, similar to the results of `group_by` + `agg` +
|
|
2629
|
+
# `explode`. If the aggregation always results in one scalar value per
|
|
2630
|
+
# group, map this value to one row position. Sorting of the given groups
|
|
2631
|
+
# is required if the groups are not part of the window operation for the
|
|
2632
|
+
# operation, otherwise the result would not make sense. This operation
|
|
2633
|
+
# changes the number of rows.
|
|
2715
2634
|
#
|
|
2716
2635
|
# @return [Expr]
|
|
2717
2636
|
#
|
|
@@ -2722,7 +2641,7 @@ module Polars
|
|
|
2722
2641
|
# "values" => [1, 2, 3]
|
|
2723
2642
|
# }
|
|
2724
2643
|
# )
|
|
2725
|
-
# df.
|
|
2644
|
+
# df.with_columns(
|
|
2726
2645
|
# Polars.col("values").max.over("groups").alias("max_by_group")
|
|
2727
2646
|
# )
|
|
2728
2647
|
# # =>
|
|
@@ -2764,9 +2683,41 @@ module Polars
|
|
|
2764
2683
|
# # │ 6 │
|
|
2765
2684
|
# # │ 4 │
|
|
2766
2685
|
# # └────────┘
|
|
2767
|
-
|
|
2768
|
-
|
|
2769
|
-
|
|
2686
|
+
#
|
|
2687
|
+
# @example
|
|
2688
|
+
# df = Polars::DataFrame.new(
|
|
2689
|
+
# {
|
|
2690
|
+
# "store_id" => ["a", "a", "b", "b"],
|
|
2691
|
+
# "date" => [Date.new(2024, 9, 18), Date.new(2024, 9, 17), Date.new(2024, 9, 18), Date.new(2024, 9, 16)],
|
|
2692
|
+
# "sales" => [7, 9, 8, 10]
|
|
2693
|
+
# }
|
|
2694
|
+
# )
|
|
2695
|
+
# df.with_columns(
|
|
2696
|
+
# cumulative_sales: Polars.col("sales").cum_sum.over("store_id", order_by: "date")
|
|
2697
|
+
# )
|
|
2698
|
+
# # =>
|
|
2699
|
+
# # shape: (4, 4)
|
|
2700
|
+
# # ┌──────────┬────────────┬───────┬──────────────────┐
|
|
2701
|
+
# # │ store_id ┆ date ┆ sales ┆ cumulative_sales │
|
|
2702
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
|
2703
|
+
# # │ str ┆ date ┆ i64 ┆ i64 │
|
|
2704
|
+
# # ╞══════════╪════════════╪═══════╪══════════════════╡
|
|
2705
|
+
# # │ a ┆ 2024-09-18 ┆ 7 ┆ 16 │
|
|
2706
|
+
# # │ a ┆ 2024-09-17 ┆ 9 ┆ 9 │
|
|
2707
|
+
# # │ b ┆ 2024-09-18 ┆ 8 ┆ 18 │
|
|
2708
|
+
# # │ b ┆ 2024-09-16 ┆ 10 ┆ 10 │
|
|
2709
|
+
# # └──────────┴────────────┴───────┴──────────────────┘
|
|
2710
|
+
def over(partition_by = nil, *more_exprs, order_by: nil, descending: false, nulls_last: false, mapping_strategy: "group_to_rows")
|
|
2711
|
+
partition_by_rbexprs =
|
|
2712
|
+
if !partition_by.nil?
|
|
2713
|
+
Utils.parse_into_list_of_expressions(partition_by, *more_exprs)
|
|
2714
|
+
else
|
|
2715
|
+
nil
|
|
2716
|
+
end
|
|
2717
|
+
|
|
2718
|
+
order_by_rbexprs = !order_by.nil? ? Utils.parse_into_list_of_expressions(order_by) : nil
|
|
2719
|
+
|
|
2720
|
+
wrap_expr(_rbexpr.over(partition_by_rbexprs, order_by_rbexprs, descending, nulls_last, mapping_strategy))
|
|
2770
2721
|
end
|
|
2771
2722
|
|
|
2772
2723
|
# Create rolling groups based on a temporal or integer column.
|
|
@@ -2904,7 +2855,7 @@ module Polars
|
|
|
2904
2855
|
# "num" => [1, 2, 3, 1, 5]
|
|
2905
2856
|
# }
|
|
2906
2857
|
# )
|
|
2907
|
-
# df.
|
|
2858
|
+
# df.with_columns(Polars.col("num").is_first_distinct.alias("is_first"))
|
|
2908
2859
|
# # =>
|
|
2909
2860
|
# # shape: (5, 2)
|
|
2910
2861
|
# # ┌─────┬──────────┐
|
|
@@ -2921,7 +2872,6 @@ module Polars
|
|
|
2921
2872
|
def is_first_distinct
|
|
2922
2873
|
wrap_expr(_rbexpr.is_first_distinct)
|
|
2923
2874
|
end
|
|
2924
|
-
alias_method :is_first, :is_first_distinct
|
|
2925
2875
|
|
|
2926
2876
|
# Return a boolean mask indicating the last occurrence of each distinct value.
|
|
2927
2877
|
#
|
|
@@ -3296,8 +3246,12 @@ module Polars
|
|
|
3296
3246
|
# Mostly useful in an aggregation context. If you want to filter on a DataFrame
|
|
3297
3247
|
# level, use `LazyFrame#filter`.
|
|
3298
3248
|
#
|
|
3299
|
-
# @param
|
|
3300
|
-
#
|
|
3249
|
+
# @param predicates [Array]
|
|
3250
|
+
# Expression(s) that evaluates to a boolean Series.
|
|
3251
|
+
# @param constraints [Hash]
|
|
3252
|
+
# Column filters; use `name = value` to filter columns by the supplied value.
|
|
3253
|
+
# Each constraint will behave the same as `Polars.col(name).eq(value)`, and
|
|
3254
|
+
# be implicitly joined with the other filter conditions using `&`.
|
|
3301
3255
|
#
|
|
3302
3256
|
# @return [Expr]
|
|
3303
3257
|
#
|
|
@@ -3326,49 +3280,14 @@ module Polars
|
|
|
3326
3280
|
# # │ g1 ┆ 1 ┆ 2 │
|
|
3327
3281
|
# # │ g2 ┆ 0 ┆ 3 │
|
|
3328
3282
|
# # └───────────┴─────┴─────┘
|
|
3329
|
-
def filter(
|
|
3330
|
-
|
|
3331
|
-
|
|
3332
|
-
|
|
3333
|
-
|
|
3334
|
-
#
|
|
3335
|
-
# Alias for {#filter}.
|
|
3336
|
-
#
|
|
3337
|
-
# @param predicate [Expr]
|
|
3338
|
-
# Boolean expression.
|
|
3339
|
-
#
|
|
3340
|
-
# @return [Expr]
|
|
3341
|
-
#
|
|
3342
|
-
# @example
|
|
3343
|
-
# df = Polars::DataFrame.new(
|
|
3344
|
-
# {
|
|
3345
|
-
# "group_col" => ["g1", "g1", "g2"],
|
|
3346
|
-
# "b" => [1, 2, 3]
|
|
3347
|
-
# }
|
|
3348
|
-
# )
|
|
3349
|
-
# (
|
|
3350
|
-
# df.group_by("group_col").agg(
|
|
3351
|
-
# [
|
|
3352
|
-
# Polars.col("b").where(Polars.col("b") < 2).sum.alias("lt"),
|
|
3353
|
-
# Polars.col("b").where(Polars.col("b") >= 2).sum.alias("gte")
|
|
3354
|
-
# ]
|
|
3355
|
-
# )
|
|
3356
|
-
# ).sort("group_col")
|
|
3357
|
-
# # =>
|
|
3358
|
-
# # shape: (2, 3)
|
|
3359
|
-
# # ┌───────────┬─────┬─────┐
|
|
3360
|
-
# # │ group_col ┆ lt ┆ gte │
|
|
3361
|
-
# # │ --- ┆ --- ┆ --- │
|
|
3362
|
-
# # │ str ┆ i64 ┆ i64 │
|
|
3363
|
-
# # ╞═══════════╪═════╪═════╡
|
|
3364
|
-
# # │ g1 ┆ 1 ┆ 2 │
|
|
3365
|
-
# # │ g2 ┆ 0 ┆ 3 │
|
|
3366
|
-
# # └───────────┴─────┴─────┘
|
|
3367
|
-
def where(predicate)
|
|
3368
|
-
filter(predicate)
|
|
3283
|
+
def filter(*predicates, **constraints)
|
|
3284
|
+
predicate = Utils.parse_predicates_constraints_into_expression(
|
|
3285
|
+
*predicates, **constraints
|
|
3286
|
+
)
|
|
3287
|
+
wrap_expr(_rbexpr.filter(predicate))
|
|
3369
3288
|
end
|
|
3370
3289
|
|
|
3371
|
-
# Apply a custom Ruby function to a Series or
|
|
3290
|
+
# Apply a custom Ruby function to a Series or array of Series.
|
|
3372
3291
|
#
|
|
3373
3292
|
# The output of this custom function must be a Series.
|
|
3374
3293
|
# If you want to apply a custom function elementwise over single values, see
|
|
@@ -3406,11 +3325,11 @@ module Polars
|
|
|
3406
3325
|
# # └──────┴────────┘
|
|
3407
3326
|
# def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, returns_scalar: false, &f)
|
|
3408
3327
|
# if !return_dtype.nil?
|
|
3409
|
-
# return_dtype = Utils.
|
|
3328
|
+
# return_dtype = Utils.parse_into_dtype(return_dtype)
|
|
3410
3329
|
# end
|
|
3411
3330
|
# wrap_expr(
|
|
3412
3331
|
# _rbexpr.map_batches(
|
|
3413
|
-
#
|
|
3332
|
+
# _map_batches_wrapper,
|
|
3414
3333
|
# f,
|
|
3415
3334
|
# return_dtype,
|
|
3416
3335
|
# agg_list,
|
|
@@ -3460,7 +3379,7 @@ module Polars
|
|
|
3460
3379
|
# )
|
|
3461
3380
|
#
|
|
3462
3381
|
# @example In a selection context, the function is applied by row.
|
|
3463
|
-
# df.
|
|
3382
|
+
# df.with_columns(
|
|
3464
3383
|
# Polars.col("a").map_elements { |x| x * 2 }.alias("a_times_2")
|
|
3465
3384
|
# )
|
|
3466
3385
|
# # =>
|
|
@@ -3591,7 +3510,6 @@ module Polars
|
|
|
3591
3510
|
def gather_every(n, offset = 0)
|
|
3592
3511
|
wrap_expr(_rbexpr.gather_every(n, offset))
|
|
3593
3512
|
end
|
|
3594
|
-
alias_method :take_every, :gather_every
|
|
3595
3513
|
|
|
3596
3514
|
# Get the first `n` rows.
|
|
3597
3515
|
#
|
|
@@ -4304,7 +4222,7 @@ module Polars
|
|
|
4304
4222
|
# Check if elements of this expression are present in the other Series.
|
|
4305
4223
|
#
|
|
4306
4224
|
# @param other [Object]
|
|
4307
|
-
# Series or
|
|
4225
|
+
# Series or array of primitive type.
|
|
4308
4226
|
# @param nulls_equal [Boolean]
|
|
4309
4227
|
# If true, treat null as a distinct value. Null values will not propagate.
|
|
4310
4228
|
#
|
|
@@ -4472,7 +4390,7 @@ module Polars
|
|
|
4472
4390
|
def is_close(
|
|
4473
4391
|
other,
|
|
4474
4392
|
abs_tol: 0.0,
|
|
4475
|
-
rel_tol:
|
|
4393
|
+
rel_tol: 1.0e-09,
|
|
4476
4394
|
nans_equal: false
|
|
4477
4395
|
)
|
|
4478
4396
|
other = Utils.parse_into_expression(other)
|
|
@@ -4481,7 +4399,7 @@ module Polars
|
|
|
4481
4399
|
|
|
4482
4400
|
# Hash the elements in the selection.
|
|
4483
4401
|
#
|
|
4484
|
-
# The hash value is of type
|
|
4402
|
+
# The hash value is of type `UInt64`.
|
|
4485
4403
|
#
|
|
4486
4404
|
# @param seed [Integer]
|
|
4487
4405
|
# Random seed parameter. Defaults to 0.
|
|
@@ -4501,7 +4419,7 @@ module Polars
|
|
|
4501
4419
|
# "b" => ["x", nil, "z"]
|
|
4502
4420
|
# }
|
|
4503
4421
|
# )
|
|
4504
|
-
# df.
|
|
4422
|
+
# df.with_columns(Polars.all.hash_(10, 20, 30, 40))
|
|
4505
4423
|
# # =>
|
|
4506
4424
|
# # shape: (3, 2)
|
|
4507
4425
|
# # ┌──────────────────────┬──────────────────────┐
|
|
@@ -4513,7 +4431,7 @@ module Polars
|
|
|
4513
4431
|
# # │ 16386608652769605760 ┆ 11638928888656214026 │
|
|
4514
4432
|
# # │ 11638928888656214026 ┆ 11040941213715918520 │
|
|
4515
4433
|
# # └──────────────────────┴──────────────────────┘
|
|
4516
|
-
def
|
|
4434
|
+
def hash_(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
|
|
4517
4435
|
k0 = seed
|
|
4518
4436
|
k1 = seed_1.nil? ? seed : seed_1
|
|
4519
4437
|
k2 = seed_2.nil? ? seed : seed_2
|
|
@@ -4527,12 +4445,12 @@ module Polars
|
|
|
4527
4445
|
# you can safely use that cast operation.
|
|
4528
4446
|
#
|
|
4529
4447
|
# @param signed [Boolean]
|
|
4530
|
-
# If true, reinterpret as
|
|
4448
|
+
# If true, reinterpret as `Polars::Int64`. Otherwise, reinterpret as `Polars::UInt64`.
|
|
4531
4449
|
#
|
|
4532
4450
|
# @return [Expr]
|
|
4533
4451
|
#
|
|
4534
4452
|
# @example
|
|
4535
|
-
# s = Polars::Series.new("a", [1, 1, 2], dtype:
|
|
4453
|
+
# s = Polars::Series.new("a", [1, 1, 2], dtype: Polars::UInt64)
|
|
4536
4454
|
# df = Polars::DataFrame.new([s])
|
|
4537
4455
|
# df.select(
|
|
4538
4456
|
# [
|
|
@@ -4551,7 +4469,13 @@ module Polars
|
|
|
4551
4469
|
# # │ 1 ┆ 1 │
|
|
4552
4470
|
# # │ 2 ┆ 2 │
|
|
4553
4471
|
# # └───────────────┴──────────┘
|
|
4554
|
-
def reinterpret(signed:
|
|
4472
|
+
def reinterpret(signed: nil)
|
|
4473
|
+
# TODO update
|
|
4474
|
+
if signed.nil?
|
|
4475
|
+
warn "The default `signed` for `reinterpret` method will change from `false` to `true` in a future version"
|
|
4476
|
+
signed = false
|
|
4477
|
+
end
|
|
4478
|
+
|
|
4555
4479
|
wrap_expr(_rbexpr.reinterpret(signed))
|
|
4556
4480
|
end
|
|
4557
4481
|
|
|
@@ -4561,7 +4485,7 @@ module Polars
|
|
|
4561
4485
|
#
|
|
4562
4486
|
# @example
|
|
4563
4487
|
# df = Polars::DataFrame.new({"foo" => [1, 1, 2]})
|
|
4564
|
-
# df.select(Polars.col("foo").cumsum.
|
|
4488
|
+
# df.select(Polars.col("foo").cumsum.inspect_("value is: %s").alias("bar"))
|
|
4565
4489
|
# # =>
|
|
4566
4490
|
# # value is: shape: (3,)
|
|
4567
4491
|
# # Series: 'foo' [i64]
|
|
@@ -4580,7 +4504,7 @@ module Polars
|
|
|
4580
4504
|
# # │ 2 │
|
|
4581
4505
|
# # │ 4 │
|
|
4582
4506
|
# # └─────┘
|
|
4583
|
-
# def
|
|
4507
|
+
# def inspect_(fmt = "%s")
|
|
4584
4508
|
# inspect = lambda do |s|
|
|
4585
4509
|
# puts(fmt % [s])
|
|
4586
4510
|
# s
|
|
@@ -4673,14 +4597,12 @@ module Polars
|
|
|
4673
4597
|
# (which may not be 24 hours, due to daylight savings). Similarly for
|
|
4674
4598
|
# "calendar week", "calendar month", "calendar quarter", and
|
|
4675
4599
|
# "calendar year".
|
|
4676
|
-
# @param
|
|
4600
|
+
# @param min_samples [Integer]
|
|
4677
4601
|
# The number of values in the window that should be non-null before computing
|
|
4678
4602
|
# a result.
|
|
4679
4603
|
# @param closed ['left', 'right', 'both', 'none']
|
|
4680
4604
|
# Define which sides of the temporal interval are closed (inclusive),
|
|
4681
4605
|
# defaults to `'right'`.
|
|
4682
|
-
# @param warn_if_unsorted [Boolean]
|
|
4683
|
-
# Warn if data is not known to be sorted by `by` column.
|
|
4684
4606
|
#
|
|
4685
4607
|
# @return [Expr]
|
|
4686
4608
|
#
|
|
@@ -4741,14 +4663,13 @@ module Polars
|
|
|
4741
4663
|
def rolling_min_by(
|
|
4742
4664
|
by,
|
|
4743
4665
|
window_size,
|
|
4744
|
-
|
|
4745
|
-
closed: "right"
|
|
4746
|
-
warn_if_unsorted: nil
|
|
4666
|
+
min_samples: 1,
|
|
4667
|
+
closed: "right"
|
|
4747
4668
|
)
|
|
4748
4669
|
window_size = _prepare_rolling_by_window_args(window_size)
|
|
4749
4670
|
by = Utils.parse_into_expression(by)
|
|
4750
4671
|
wrap_expr(
|
|
4751
|
-
_rbexpr.rolling_min_by(by, window_size,
|
|
4672
|
+
_rbexpr.rolling_min_by(by, window_size, min_samples, closed)
|
|
4752
4673
|
)
|
|
4753
4674
|
end
|
|
4754
4675
|
|
|
@@ -4776,14 +4697,12 @@ module Polars
|
|
|
4776
4697
|
# (which may not be 24 hours, due to daylight savings). Similarly for
|
|
4777
4698
|
# "calendar week", "calendar month", "calendar quarter", and
|
|
4778
4699
|
# "calendar year".
|
|
4779
|
-
# @param
|
|
4700
|
+
# @param min_samples [Integer]
|
|
4780
4701
|
# The number of values in the window that should be non-null before computing
|
|
4781
4702
|
# a result.
|
|
4782
4703
|
# @param closed ['left', 'right', 'both', 'none']
|
|
4783
4704
|
# Define which sides of the temporal interval are closed (inclusive),
|
|
4784
4705
|
# defaults to `'right'`.
|
|
4785
|
-
# @param warn_if_unsorted [Boolean]
|
|
4786
|
-
# Warn if data is not known to be sorted by `by` column.
|
|
4787
4706
|
#
|
|
4788
4707
|
# @return [Expr]
|
|
4789
4708
|
#
|
|
@@ -4870,14 +4789,13 @@ module Polars
|
|
|
4870
4789
|
def rolling_max_by(
|
|
4871
4790
|
by,
|
|
4872
4791
|
window_size,
|
|
4873
|
-
|
|
4874
|
-
closed: "right"
|
|
4875
|
-
warn_if_unsorted: nil
|
|
4792
|
+
min_samples: 1,
|
|
4793
|
+
closed: "right"
|
|
4876
4794
|
)
|
|
4877
4795
|
window_size = _prepare_rolling_by_window_args(window_size)
|
|
4878
4796
|
by = Utils.parse_into_expression(by)
|
|
4879
4797
|
wrap_expr(
|
|
4880
|
-
_rbexpr.rolling_max_by(by, window_size,
|
|
4798
|
+
_rbexpr.rolling_max_by(by, window_size, min_samples, closed)
|
|
4881
4799
|
)
|
|
4882
4800
|
end
|
|
4883
4801
|
|
|
@@ -4905,14 +4823,12 @@ module Polars
|
|
|
4905
4823
|
# (which may not be 24 hours, due to daylight savings). Similarly for
|
|
4906
4824
|
# "calendar week", "calendar month", "calendar quarter", and
|
|
4907
4825
|
# "calendar year".
|
|
4908
|
-
# @param
|
|
4826
|
+
# @param min_samples [Integer]
|
|
4909
4827
|
# The number of values in the window that should be non-null before computing
|
|
4910
4828
|
# a result.
|
|
4911
4829
|
# @param closed ['left', 'right', 'both', 'none']
|
|
4912
4830
|
# Define which sides of the temporal interval are closed (inclusive),
|
|
4913
4831
|
# defaults to `'right'`.
|
|
4914
|
-
# @param warn_if_unsorted [Boolean]
|
|
4915
|
-
# Warn if data is not known to be sorted by `by` column.
|
|
4916
4832
|
#
|
|
4917
4833
|
# @return [Expr]
|
|
4918
4834
|
#
|
|
@@ -5001,9 +4917,8 @@ module Polars
|
|
|
5001
4917
|
def rolling_mean_by(
|
|
5002
4918
|
by,
|
|
5003
4919
|
window_size,
|
|
5004
|
-
|
|
5005
|
-
closed: "right"
|
|
5006
|
-
warn_if_unsorted: nil
|
|
4920
|
+
min_samples: 1,
|
|
4921
|
+
closed: "right"
|
|
5007
4922
|
)
|
|
5008
4923
|
window_size = _prepare_rolling_by_window_args(window_size)
|
|
5009
4924
|
by = Utils.parse_into_expression(by)
|
|
@@ -5011,7 +4926,7 @@ module Polars
|
|
|
5011
4926
|
_rbexpr.rolling_mean_by(
|
|
5012
4927
|
by,
|
|
5013
4928
|
window_size,
|
|
5014
|
-
|
|
4929
|
+
min_samples,
|
|
5015
4930
|
closed
|
|
5016
4931
|
)
|
|
5017
4932
|
)
|
|
@@ -5041,14 +4956,12 @@ module Polars
|
|
|
5041
4956
|
# (which may not be 24 hours, due to daylight savings). Similarly for
|
|
5042
4957
|
# "calendar week", "calendar month", "calendar quarter", and
|
|
5043
4958
|
# "calendar year".
|
|
5044
|
-
# @param
|
|
4959
|
+
# @param min_samples [Integer]
|
|
5045
4960
|
# The number of values in the window that should be non-null before computing
|
|
5046
4961
|
# a result.
|
|
5047
4962
|
# @param closed ['left', 'right', 'both', 'none']
|
|
5048
4963
|
# Define which sides of the temporal interval are closed (inclusive),
|
|
5049
4964
|
# defaults to `'right'`.
|
|
5050
|
-
# @param warn_if_unsorted [Boolean]
|
|
5051
|
-
# Warn if data is not known to be sorted by `by` column.
|
|
5052
4965
|
#
|
|
5053
4966
|
# @return [Expr]
|
|
5054
4967
|
#
|
|
@@ -5135,14 +5048,13 @@ module Polars
|
|
|
5135
5048
|
def rolling_sum_by(
|
|
5136
5049
|
by,
|
|
5137
5050
|
window_size,
|
|
5138
|
-
|
|
5139
|
-
closed: "right"
|
|
5140
|
-
warn_if_unsorted: nil
|
|
5051
|
+
min_samples: 1,
|
|
5052
|
+
closed: "right"
|
|
5141
5053
|
)
|
|
5142
5054
|
window_size = _prepare_rolling_by_window_args(window_size)
|
|
5143
5055
|
by = Utils.parse_into_expression(by)
|
|
5144
5056
|
wrap_expr(
|
|
5145
|
-
_rbexpr.rolling_sum_by(by, window_size,
|
|
5057
|
+
_rbexpr.rolling_sum_by(by, window_size, min_samples, closed)
|
|
5146
5058
|
)
|
|
5147
5059
|
end
|
|
5148
5060
|
|
|
@@ -5170,7 +5082,7 @@ module Polars
|
|
|
5170
5082
|
# (which may not be 24 hours, due to daylight savings). Similarly for
|
|
5171
5083
|
# "calendar week", "calendar month", "calendar quarter", and
|
|
5172
5084
|
# "calendar year".
|
|
5173
|
-
# @param
|
|
5085
|
+
# @param min_samples [Integer]
|
|
5174
5086
|
# The number of values in the window that should be non-null before computing
|
|
5175
5087
|
# a result.
|
|
5176
5088
|
# @param closed ['left', 'right', 'both', 'none']
|
|
@@ -5178,8 +5090,6 @@ module Polars
|
|
|
5178
5090
|
# defaults to `'right'`.
|
|
5179
5091
|
# @param ddof [Integer]
|
|
5180
5092
|
# "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
|
|
5181
|
-
# @param warn_if_unsorted [Boolean]
|
|
5182
|
-
# Warn if data is not known to be sorted by `by` column.
|
|
5183
5093
|
#
|
|
5184
5094
|
# @return [Expr]
|
|
5185
5095
|
#
|
|
@@ -5266,10 +5176,9 @@ module Polars
|
|
|
5266
5176
|
def rolling_std_by(
|
|
5267
5177
|
by,
|
|
5268
5178
|
window_size,
|
|
5269
|
-
|
|
5179
|
+
min_samples: 1,
|
|
5270
5180
|
closed: "right",
|
|
5271
|
-
ddof: 1
|
|
5272
|
-
warn_if_unsorted: nil
|
|
5181
|
+
ddof: 1
|
|
5273
5182
|
)
|
|
5274
5183
|
window_size = _prepare_rolling_by_window_args(window_size)
|
|
5275
5184
|
by = Utils.parse_into_expression(by)
|
|
@@ -5277,7 +5186,7 @@ module Polars
|
|
|
5277
5186
|
_rbexpr.rolling_std_by(
|
|
5278
5187
|
by,
|
|
5279
5188
|
window_size,
|
|
5280
|
-
|
|
5189
|
+
min_samples,
|
|
5281
5190
|
closed,
|
|
5282
5191
|
ddof
|
|
5283
5192
|
)
|
|
@@ -5308,7 +5217,7 @@ module Polars
|
|
|
5308
5217
|
# (which may not be 24 hours, due to daylight savings). Similarly for
|
|
5309
5218
|
# "calendar week", "calendar month", "calendar quarter", and
|
|
5310
5219
|
# "calendar year".
|
|
5311
|
-
# @param
|
|
5220
|
+
# @param min_samples [Integer]
|
|
5312
5221
|
# The number of values in the window that should be non-null before computing
|
|
5313
5222
|
# a result.
|
|
5314
5223
|
# @param closed ['left', 'right', 'both', 'none']
|
|
@@ -5316,8 +5225,6 @@ module Polars
|
|
|
5316
5225
|
# defaults to `'right'`.
|
|
5317
5226
|
# @param ddof [Integer]
|
|
5318
5227
|
# "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
|
|
5319
|
-
# @param warn_if_unsorted [Boolean]
|
|
5320
|
-
# Warn if data is not known to be sorted by `by` column.
|
|
5321
5228
|
#
|
|
5322
5229
|
# @return [Expr]
|
|
5323
5230
|
#
|
|
@@ -5404,10 +5311,9 @@ module Polars
|
|
|
5404
5311
|
def rolling_var_by(
|
|
5405
5312
|
by,
|
|
5406
5313
|
window_size,
|
|
5407
|
-
|
|
5314
|
+
min_samples: 1,
|
|
5408
5315
|
closed: "right",
|
|
5409
|
-
ddof: 1
|
|
5410
|
-
warn_if_unsorted: nil
|
|
5316
|
+
ddof: 1
|
|
5411
5317
|
)
|
|
5412
5318
|
window_size = _prepare_rolling_by_window_args(window_size)
|
|
5413
5319
|
by = Utils.parse_into_expression(by)
|
|
@@ -5415,7 +5321,7 @@ module Polars
|
|
|
5415
5321
|
_rbexpr.rolling_var_by(
|
|
5416
5322
|
by,
|
|
5417
5323
|
window_size,
|
|
5418
|
-
|
|
5324
|
+
min_samples,
|
|
5419
5325
|
closed,
|
|
5420
5326
|
ddof
|
|
5421
5327
|
)
|
|
@@ -5446,14 +5352,12 @@ module Polars
|
|
|
5446
5352
|
# (which may not be 24 hours, due to daylight savings). Similarly for
|
|
5447
5353
|
# "calendar week", "calendar month", "calendar quarter", and
|
|
5448
5354
|
# "calendar year".
|
|
5449
|
-
# @param
|
|
5355
|
+
# @param min_samples [Integer]
|
|
5450
5356
|
# The number of values in the window that should be non-null before computing
|
|
5451
5357
|
# a result.
|
|
5452
5358
|
# @param closed ['left', 'right', 'both', 'none']
|
|
5453
5359
|
# Define which sides of the temporal interval are closed (inclusive),
|
|
5454
5360
|
# defaults to `'right'`.
|
|
5455
|
-
# @param warn_if_unsorted [Boolean]
|
|
5456
|
-
# Warn if data is not known to be sorted by `by` column.
|
|
5457
5361
|
#
|
|
5458
5362
|
# @return [Expr]
|
|
5459
5363
|
#
|
|
@@ -5516,14 +5420,13 @@ module Polars
|
|
|
5516
5420
|
def rolling_median_by(
|
|
5517
5421
|
by,
|
|
5518
5422
|
window_size,
|
|
5519
|
-
|
|
5520
|
-
closed: "right"
|
|
5521
|
-
warn_if_unsorted: nil
|
|
5423
|
+
min_samples: 1,
|
|
5424
|
+
closed: "right"
|
|
5522
5425
|
)
|
|
5523
5426
|
window_size = _prepare_rolling_by_window_args(window_size)
|
|
5524
5427
|
by = Utils.parse_into_expression(by)
|
|
5525
5428
|
wrap_expr(
|
|
5526
|
-
_rbexpr.rolling_median_by(by, window_size,
|
|
5429
|
+
_rbexpr.rolling_median_by(by, window_size, min_samples, closed)
|
|
5527
5430
|
)
|
|
5528
5431
|
end
|
|
5529
5432
|
|
|
@@ -5555,14 +5458,12 @@ module Polars
|
|
|
5555
5458
|
# Quantile between 0.0 and 1.0.
|
|
5556
5459
|
# @param interpolation ['nearest', 'higher', 'lower', 'midpoint', 'linear']
|
|
5557
5460
|
# Interpolation method.
|
|
5558
|
-
# @param
|
|
5461
|
+
# @param min_samples [Integer]
|
|
5559
5462
|
# The number of values in the window that should be non-null before computing
|
|
5560
5463
|
# a result.
|
|
5561
5464
|
# @param closed ['left', 'right', 'both', 'none']
|
|
5562
5465
|
# Define which sides of the temporal interval are closed (inclusive),
|
|
5563
5466
|
# defaults to `'right'`.
|
|
5564
|
-
# @param warn_if_unsorted [Boolean]
|
|
5565
|
-
# Warn if data is not known to be sorted by `by` column.
|
|
5566
5467
|
#
|
|
5567
5468
|
# @return [Expr]
|
|
5568
5469
|
#
|
|
@@ -5627,9 +5528,8 @@ module Polars
|
|
|
5627
5528
|
window_size,
|
|
5628
5529
|
quantile:,
|
|
5629
5530
|
interpolation: "nearest",
|
|
5630
|
-
|
|
5631
|
-
closed: "right"
|
|
5632
|
-
warn_if_unsorted: nil
|
|
5531
|
+
min_samples: 1,
|
|
5532
|
+
closed: "right"
|
|
5633
5533
|
)
|
|
5634
5534
|
window_size = _prepare_rolling_by_window_args(window_size)
|
|
5635
5535
|
by = Utils.parse_into_expression(by)
|
|
@@ -5639,12 +5539,99 @@ module Polars
|
|
|
5639
5539
|
quantile,
|
|
5640
5540
|
interpolation,
|
|
5641
5541
|
window_size,
|
|
5642
|
-
|
|
5542
|
+
min_samples,
|
|
5643
5543
|
closed,
|
|
5644
5544
|
)
|
|
5645
5545
|
)
|
|
5646
5546
|
end
|
|
5647
5547
|
|
|
5548
|
+
# Compute a rolling rank based on another column.
|
|
5549
|
+
#
|
|
5550
|
+
# @note
|
|
5551
|
+
# This functionality is considered **unstable**. It may be changed
|
|
5552
|
+
# at any point without it being considered a breaking change.
|
|
5553
|
+
#
|
|
5554
|
+
# Given a `by` column `<t_0, t_1, ..., t_n>`, then `closed: "right"`
|
|
5555
|
+
# (the default) means the windows will be:
|
|
5556
|
+
#
|
|
5557
|
+
# - (t_0 - window_size, t_0]
|
|
5558
|
+
# - (t_1 - window_size, t_1]
|
|
5559
|
+
# - ...
|
|
5560
|
+
# - (t_n - window_size, t_n]
|
|
5561
|
+
#
|
|
5562
|
+
# @param by [Expr]
|
|
5563
|
+
# Should be `DateTime`, `Date`, `UInt64`, `UInt32`, `Int64`,
|
|
5564
|
+
# or `Int32` data type (note that the integral ones require using `'i'`
|
|
5565
|
+
# in `window size`).
|
|
5566
|
+
# @param window_size [String]
|
|
5567
|
+
# The length of the window. Can be a dynamic
|
|
5568
|
+
# temporal size indicated by a timedelta or the following string language:
|
|
5569
|
+
#
|
|
5570
|
+
# - 1ns (1 nanosecond)
|
|
5571
|
+
# - 1us (1 microsecond)
|
|
5572
|
+
# - 1ms (1 millisecond)
|
|
5573
|
+
# - 1s (1 second)
|
|
5574
|
+
# - 1m (1 minute)
|
|
5575
|
+
# - 1h (1 hour)
|
|
5576
|
+
# - 1d (1 calendar day)
|
|
5577
|
+
# - 1w (1 calendar week)
|
|
5578
|
+
# - 1mo (1 calendar month)
|
|
5579
|
+
# - 1q (1 calendar quarter)
|
|
5580
|
+
# - 1y (1 calendar year)
|
|
5581
|
+
# - 1i (1 index count)
|
|
5582
|
+
#
|
|
5583
|
+
# By "calendar day", we mean the corresponding time on the next day
|
|
5584
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
|
5585
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
|
5586
|
+
# "calendar year".
|
|
5587
|
+
# @param method ['average', 'min', 'max', 'dense', 'random']
|
|
5588
|
+
# The method used to assign ranks to tied elements.
|
|
5589
|
+
# The following methods are available (default is 'average'):
|
|
5590
|
+
#
|
|
5591
|
+
# - 'average' : The average of the ranks that would have been assigned to
|
|
5592
|
+
# all the tied values is assigned to each value.
|
|
5593
|
+
# - 'min' : The minimum of the ranks that would have been assigned to all
|
|
5594
|
+
# the tied values is assigned to each value. (This is also referred to
|
|
5595
|
+
# as "competition" ranking.)
|
|
5596
|
+
# - 'max' : The maximum of the ranks that would have been assigned to all
|
|
5597
|
+
# the tied values is assigned to each value.
|
|
5598
|
+
# - 'dense' : Like 'min', but the rank of the next highest element is
|
|
5599
|
+
# assigned the rank immediately after those assigned to the tied
|
|
5600
|
+
# elements.
|
|
5601
|
+
# - 'random' : Choose a random rank for each value in a tie.
|
|
5602
|
+
# @param seed [Integer]
|
|
5603
|
+
# Random seed used when `method: 'random'`. If set to nil (default), a
|
|
5604
|
+
# random seed is generated for each rolling rank operation.
|
|
5605
|
+
# @param min_samples [Integer]
|
|
5606
|
+
# The number of values in the window that should be non-null before computing
|
|
5607
|
+
# a result.
|
|
5608
|
+
# @param closed ['left', 'right', 'both', 'none']
|
|
5609
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
|
5610
|
+
# defaults to `'right'`.
|
|
5611
|
+
#
|
|
5612
|
+
# @return [Expr]
|
|
5613
|
+
def rolling_rank_by(
|
|
5614
|
+
by,
|
|
5615
|
+
window_size,
|
|
5616
|
+
method: "average",
|
|
5617
|
+
seed: nil,
|
|
5618
|
+
min_samples: 1,
|
|
5619
|
+
closed: "right"
|
|
5620
|
+
)
|
|
5621
|
+
window_size = _prepare_rolling_by_window_args(window_size)
|
|
5622
|
+
by_rbexpr = Utils.parse_into_expression(by)
|
|
5623
|
+
Utils.wrap_expr(
|
|
5624
|
+
_rbexpr.rolling_rank_by(
|
|
5625
|
+
by_rbexpr,
|
|
5626
|
+
window_size,
|
|
5627
|
+
method,
|
|
5628
|
+
seed,
|
|
5629
|
+
min_samples,
|
|
5630
|
+
closed
|
|
5631
|
+
)
|
|
5632
|
+
)
|
|
5633
|
+
end
|
|
5634
|
+
|
|
5648
5635
|
# Apply a rolling min (moving min) over the values in this array.
|
|
5649
5636
|
#
|
|
5650
5637
|
# A window of length `window_size` will traverse the array. The values that fill
|
|
@@ -5672,7 +5659,7 @@ module Polars
|
|
|
5672
5659
|
# @param weights [Array]
|
|
5673
5660
|
# An optional slice with the same length as the window that will be multiplied
|
|
5674
5661
|
# elementwise with the values in the window.
|
|
5675
|
-
# @param
|
|
5662
|
+
# @param min_samples [Integer]
|
|
5676
5663
|
# The number of values in the window that should be non-null before computing
|
|
5677
5664
|
# a result. If nil, it will be set equal to window size.
|
|
5678
5665
|
# @param center [Boolean]
|
|
@@ -5684,7 +5671,7 @@ module Polars
|
|
|
5684
5671
|
#
|
|
5685
5672
|
# @note
|
|
5686
5673
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
|
5687
|
-
# window, consider using `
|
|
5674
|
+
# window, consider using `rolling` this method can cache the window size
|
|
5688
5675
|
# computation.
|
|
5689
5676
|
#
|
|
5690
5677
|
# @return [Expr]
|
|
@@ -5713,12 +5700,12 @@ module Polars
|
|
|
5713
5700
|
def rolling_min(
|
|
5714
5701
|
window_size,
|
|
5715
5702
|
weights: nil,
|
|
5716
|
-
|
|
5703
|
+
min_samples: nil,
|
|
5717
5704
|
center: false
|
|
5718
5705
|
)
|
|
5719
5706
|
wrap_expr(
|
|
5720
5707
|
_rbexpr.rolling_min(
|
|
5721
|
-
window_size, weights,
|
|
5708
|
+
window_size, weights, min_samples, center
|
|
5722
5709
|
)
|
|
5723
5710
|
)
|
|
5724
5711
|
end
|
|
@@ -5750,7 +5737,7 @@ module Polars
|
|
|
5750
5737
|
# @param weights [Array]
|
|
5751
5738
|
# An optional slice with the same length as the window that will be multiplied
|
|
5752
5739
|
# elementwise with the values in the window.
|
|
5753
|
-
# @param
|
|
5740
|
+
# @param min_samples [Integer]
|
|
5754
5741
|
# The number of values in the window that should be non-null before computing
|
|
5755
5742
|
# a result. If nil, it will be set equal to window size.
|
|
5756
5743
|
# @param center [Boolean]
|
|
@@ -5762,7 +5749,7 @@ module Polars
|
|
|
5762
5749
|
#
|
|
5763
5750
|
# @note
|
|
5764
5751
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
|
5765
|
-
# window, consider using `
|
|
5752
|
+
# window, consider using `rolling` this method can cache the window size
|
|
5766
5753
|
# computation.
|
|
5767
5754
|
#
|
|
5768
5755
|
# @return [Expr]
|
|
@@ -5791,12 +5778,12 @@ module Polars
|
|
|
5791
5778
|
def rolling_max(
|
|
5792
5779
|
window_size,
|
|
5793
5780
|
weights: nil,
|
|
5794
|
-
|
|
5781
|
+
min_samples: nil,
|
|
5795
5782
|
center: false
|
|
5796
5783
|
)
|
|
5797
5784
|
wrap_expr(
|
|
5798
5785
|
_rbexpr.rolling_max(
|
|
5799
|
-
window_size, weights,
|
|
5786
|
+
window_size, weights, min_samples, center
|
|
5800
5787
|
)
|
|
5801
5788
|
)
|
|
5802
5789
|
end
|
|
@@ -5828,7 +5815,7 @@ module Polars
|
|
|
5828
5815
|
# @param weights [Array]
|
|
5829
5816
|
# An optional slice with the same length as the window that will be multiplied
|
|
5830
5817
|
# elementwise with the values in the window.
|
|
5831
|
-
# @param
|
|
5818
|
+
# @param min_samples [Integer]
|
|
5832
5819
|
# The number of values in the window that should be non-null before computing
|
|
5833
5820
|
# a result. If nil, it will be set equal to window size.
|
|
5834
5821
|
# @param center [Boolean]
|
|
@@ -5840,7 +5827,7 @@ module Polars
|
|
|
5840
5827
|
#
|
|
5841
5828
|
# @note
|
|
5842
5829
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
|
5843
|
-
# window, consider using `
|
|
5830
|
+
# window, consider using `rolling` this method can cache the window size
|
|
5844
5831
|
# computation.
|
|
5845
5832
|
#
|
|
5846
5833
|
# @return [Expr]
|
|
@@ -5869,12 +5856,12 @@ module Polars
|
|
|
5869
5856
|
def rolling_mean(
|
|
5870
5857
|
window_size,
|
|
5871
5858
|
weights: nil,
|
|
5872
|
-
|
|
5859
|
+
min_samples: nil,
|
|
5873
5860
|
center: false
|
|
5874
5861
|
)
|
|
5875
5862
|
wrap_expr(
|
|
5876
5863
|
_rbexpr.rolling_mean(
|
|
5877
|
-
window_size, weights,
|
|
5864
|
+
window_size, weights, min_samples, center
|
|
5878
5865
|
)
|
|
5879
5866
|
)
|
|
5880
5867
|
end
|
|
@@ -5906,7 +5893,7 @@ module Polars
|
|
|
5906
5893
|
# @param weights [Array]
|
|
5907
5894
|
# An optional slice with the same length as the window that will be multiplied
|
|
5908
5895
|
# elementwise with the values in the window.
|
|
5909
|
-
# @param
|
|
5896
|
+
# @param min_samples [Integer]
|
|
5910
5897
|
# The number of values in the window that should be non-null before computing
|
|
5911
5898
|
# a result. If nil, it will be set equal to window size.
|
|
5912
5899
|
# @param center [Boolean]
|
|
@@ -5918,7 +5905,7 @@ module Polars
|
|
|
5918
5905
|
#
|
|
5919
5906
|
# @note
|
|
5920
5907
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
|
5921
|
-
# window, consider using `
|
|
5908
|
+
# window, consider using `rolling` this method can cache the window size
|
|
5922
5909
|
# computation.
|
|
5923
5910
|
#
|
|
5924
5911
|
# @return [Expr]
|
|
@@ -5947,12 +5934,12 @@ module Polars
|
|
|
5947
5934
|
def rolling_sum(
|
|
5948
5935
|
window_size,
|
|
5949
5936
|
weights: nil,
|
|
5950
|
-
|
|
5937
|
+
min_samples: nil,
|
|
5951
5938
|
center: false
|
|
5952
5939
|
)
|
|
5953
5940
|
wrap_expr(
|
|
5954
5941
|
_rbexpr.rolling_sum(
|
|
5955
|
-
window_size, weights,
|
|
5942
|
+
window_size, weights, min_samples, center
|
|
5956
5943
|
)
|
|
5957
5944
|
)
|
|
5958
5945
|
end
|
|
@@ -5984,7 +5971,7 @@ module Polars
|
|
|
5984
5971
|
# @param weights [Array]
|
|
5985
5972
|
# An optional slice with the same length as the window that will be multiplied
|
|
5986
5973
|
# elementwise with the values in the window.
|
|
5987
|
-
# @param
|
|
5974
|
+
# @param min_samples [Integer]
|
|
5988
5975
|
# The number of values in the window that should be non-null before computing
|
|
5989
5976
|
# a result. If nil, it will be set equal to window size.
|
|
5990
5977
|
# @param center [Boolean]
|
|
@@ -5998,7 +5985,7 @@ module Polars
|
|
|
5998
5985
|
#
|
|
5999
5986
|
# @note
|
|
6000
5987
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
|
6001
|
-
# window, consider using `
|
|
5988
|
+
# window, consider using `rolling` this method can cache the window size
|
|
6002
5989
|
# computation.
|
|
6003
5990
|
#
|
|
6004
5991
|
# @return [Expr]
|
|
@@ -6027,13 +6014,13 @@ module Polars
|
|
|
6027
6014
|
def rolling_std(
|
|
6028
6015
|
window_size,
|
|
6029
6016
|
weights: nil,
|
|
6030
|
-
|
|
6017
|
+
min_samples: nil,
|
|
6031
6018
|
center: false,
|
|
6032
6019
|
ddof: 1
|
|
6033
6020
|
)
|
|
6034
6021
|
wrap_expr(
|
|
6035
6022
|
_rbexpr.rolling_std(
|
|
6036
|
-
window_size, weights,
|
|
6023
|
+
window_size, weights, min_samples, center, ddof
|
|
6037
6024
|
)
|
|
6038
6025
|
)
|
|
6039
6026
|
end
|
|
@@ -6065,7 +6052,7 @@ module Polars
|
|
|
6065
6052
|
# @param weights [Array]
|
|
6066
6053
|
# An optional slice with the same length as the window that will be multiplied
|
|
6067
6054
|
# elementwise with the values in the window.
|
|
6068
|
-
# @param
|
|
6055
|
+
# @param min_samples [Integer]
|
|
6069
6056
|
# The number of values in the window that should be non-null before computing
|
|
6070
6057
|
# a result. If nil, it will be set equal to window size.
|
|
6071
6058
|
# @param center [Boolean]
|
|
@@ -6079,7 +6066,7 @@ module Polars
|
|
|
6079
6066
|
#
|
|
6080
6067
|
# @note
|
|
6081
6068
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
|
6082
|
-
# window, consider using `
|
|
6069
|
+
# window, consider using `rolling` this method can cache the window size
|
|
6083
6070
|
# computation.
|
|
6084
6071
|
#
|
|
6085
6072
|
# @return [Expr]
|
|
@@ -6108,13 +6095,13 @@ module Polars
|
|
|
6108
6095
|
def rolling_var(
|
|
6109
6096
|
window_size,
|
|
6110
6097
|
weights: nil,
|
|
6111
|
-
|
|
6098
|
+
min_samples: nil,
|
|
6112
6099
|
center: false,
|
|
6113
6100
|
ddof: 1
|
|
6114
6101
|
)
|
|
6115
6102
|
wrap_expr(
|
|
6116
6103
|
_rbexpr.rolling_var(
|
|
6117
|
-
window_size, weights,
|
|
6104
|
+
window_size, weights, min_samples, center, ddof
|
|
6118
6105
|
)
|
|
6119
6106
|
)
|
|
6120
6107
|
end
|
|
@@ -6142,7 +6129,7 @@ module Polars
|
|
|
6142
6129
|
# @param weights [Array]
|
|
6143
6130
|
# An optional slice with the same length as the window that will be multiplied
|
|
6144
6131
|
# elementwise with the values in the window.
|
|
6145
|
-
# @param
|
|
6132
|
+
# @param min_samples [Integer]
|
|
6146
6133
|
# The number of values in the window that should be non-null before computing
|
|
6147
6134
|
# a result. If nil, it will be set equal to window size.
|
|
6148
6135
|
# @param center [Boolean]
|
|
@@ -6154,7 +6141,7 @@ module Polars
|
|
|
6154
6141
|
#
|
|
6155
6142
|
# @note
|
|
6156
6143
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
|
6157
|
-
# window, consider using `
|
|
6144
|
+
# window, consider using `rolling` this method can cache the window size
|
|
6158
6145
|
# computation.
|
|
6159
6146
|
#
|
|
6160
6147
|
# @return [Expr]
|
|
@@ -6183,12 +6170,12 @@ module Polars
|
|
|
6183
6170
|
def rolling_median(
|
|
6184
6171
|
window_size,
|
|
6185
6172
|
weights: nil,
|
|
6186
|
-
|
|
6173
|
+
min_samples: nil,
|
|
6187
6174
|
center: false
|
|
6188
6175
|
)
|
|
6189
6176
|
wrap_expr(
|
|
6190
6177
|
_rbexpr.rolling_median(
|
|
6191
|
-
window_size, weights,
|
|
6178
|
+
window_size, weights, min_samples, center
|
|
6192
6179
|
)
|
|
6193
6180
|
)
|
|
6194
6181
|
end
|
|
@@ -6220,7 +6207,7 @@ module Polars
|
|
|
6220
6207
|
# @param weights [Array]
|
|
6221
6208
|
# An optional slice with the same length as the window that will be multiplied
|
|
6222
6209
|
# elementwise with the values in the window.
|
|
6223
|
-
# @param
|
|
6210
|
+
# @param min_samples [Integer]
|
|
6224
6211
|
# The number of values in the window that should be non-null before computing
|
|
6225
6212
|
# a result. If nil, it will be set equal to window size.
|
|
6226
6213
|
# @param center [Boolean]
|
|
@@ -6232,7 +6219,7 @@ module Polars
|
|
|
6232
6219
|
#
|
|
6233
6220
|
# @note
|
|
6234
6221
|
# If you want to compute multiple aggregation statistics over the same dynamic
|
|
6235
|
-
# window, consider using `
|
|
6222
|
+
# window, consider using `rolling` this method can cache the window size
|
|
6236
6223
|
# computation.
|
|
6237
6224
|
#
|
|
6238
6225
|
# @return [Expr]
|
|
@@ -6263,12 +6250,85 @@ module Polars
|
|
|
6263
6250
|
interpolation: "nearest",
|
|
6264
6251
|
window_size: 2,
|
|
6265
6252
|
weights: nil,
|
|
6266
|
-
|
|
6253
|
+
min_samples: nil,
|
|
6267
6254
|
center: false
|
|
6268
6255
|
)
|
|
6269
6256
|
wrap_expr(
|
|
6270
6257
|
_rbexpr.rolling_quantile(
|
|
6271
|
-
quantile, interpolation, window_size, weights,
|
|
6258
|
+
quantile, interpolation, window_size, weights, min_samples, center
|
|
6259
|
+
)
|
|
6260
|
+
)
|
|
6261
|
+
end
|
|
6262
|
+
|
|
6263
|
+
# Compute a rolling rank.
|
|
6264
|
+
#
|
|
6265
|
+
# @note
|
|
6266
|
+
# This functionality is considered **unstable**. It may be changed
|
|
6267
|
+
# at any point without it being considered a breaking change.
|
|
6268
|
+
#
|
|
6269
|
+
# A window of length `window_size` will traverse the array. The values
|
|
6270
|
+
# that fill this window will be ranked according to the `method`
|
|
6271
|
+
# parameter. The resulting values will be the rank of the value that is
|
|
6272
|
+
# at the end of the sliding window.
|
|
6273
|
+
#
|
|
6274
|
+
# @param window_size [Integer]
|
|
6275
|
+
# Integer size of the rolling window.
|
|
6276
|
+
# @param method ['average', 'min', 'max', 'dense', 'random']
|
|
6277
|
+
# The method used to assign ranks to tied elements.
|
|
6278
|
+
# The following methods are available (default is 'average'):
|
|
6279
|
+
#
|
|
6280
|
+
# - 'average' : The average of the ranks that would have been assigned to
|
|
6281
|
+
# all the tied values is assigned to each value.
|
|
6282
|
+
# - 'min' : The minimum of the ranks that would have been assigned to all
|
|
6283
|
+
# the tied values is assigned to each value. (This is also referred to
|
|
6284
|
+
# as "competition" ranking.)
|
|
6285
|
+
# - 'max' : The maximum of the ranks that would have been assigned to all
|
|
6286
|
+
# the tied values is assigned to each value.
|
|
6287
|
+
# - 'dense' : Like 'min', but the rank of the next highest element is
|
|
6288
|
+
# assigned the rank immediately after those assigned to the tied
|
|
6289
|
+
# elements.
|
|
6290
|
+
# - 'random' : Choose a random rank for each value in a tie.
|
|
6291
|
+
# @param seed [Integer]
|
|
6292
|
+
# Random seed used when `method: 'random'`. If set to nil (default), a
|
|
6293
|
+
# random seed is generated for each rolling rank operation.
|
|
6294
|
+
# @param min_samples [Integer]
|
|
6295
|
+
# The number of values in the window that should be non-null before computing
|
|
6296
|
+
# a result. If set to `nil` (default), it will be set equal to `window_size`.
|
|
6297
|
+
# @param center [Boolean]
|
|
6298
|
+
# Set the labels at the center of the window.
|
|
6299
|
+
#
|
|
6300
|
+
# @return [Expr]
|
|
6301
|
+
#
|
|
6302
|
+
# @example
|
|
6303
|
+
# df = Polars::DataFrame.new({"a" => [1, 4, 4, 1, 9]})
|
|
6304
|
+
# df.select(Polars.col("a").rolling_rank(3, method: "average"))
|
|
6305
|
+
# # =>
|
|
6306
|
+
# # shape: (5, 1)
|
|
6307
|
+
# # ┌──────┐
|
|
6308
|
+
# # │ a │
|
|
6309
|
+
# # │ --- │
|
|
6310
|
+
# # │ f64 │
|
|
6311
|
+
# # ╞══════╡
|
|
6312
|
+
# # │ null │
|
|
6313
|
+
# # │ null │
|
|
6314
|
+
# # │ 2.5 │
|
|
6315
|
+
# # │ 1.0 │
|
|
6316
|
+
# # │ 3.0 │
|
|
6317
|
+
# # └──────┘
|
|
6318
|
+
def rolling_rank(
|
|
6319
|
+
window_size,
|
|
6320
|
+
method: "average",
|
|
6321
|
+
seed: nil,
|
|
6322
|
+
min_samples: nil,
|
|
6323
|
+
center: false
|
|
6324
|
+
)
|
|
6325
|
+
Utils.wrap_expr(
|
|
6326
|
+
_rbexpr.rolling_rank(
|
|
6327
|
+
window_size,
|
|
6328
|
+
method,
|
|
6329
|
+
seed,
|
|
6330
|
+
min_samples,
|
|
6331
|
+
center
|
|
6272
6332
|
)
|
|
6273
6333
|
)
|
|
6274
6334
|
end
|
|
@@ -6288,7 +6348,7 @@ module Polars
|
|
|
6288
6348
|
# @param weights [Object]
|
|
6289
6349
|
# An optional slice with the same length as the window that will be multiplied
|
|
6290
6350
|
# elementwise with the values in the window.
|
|
6291
|
-
# @param
|
|
6351
|
+
# @param min_samples [Integer]
|
|
6292
6352
|
# The number of values in the window that should be non-null before computing
|
|
6293
6353
|
# a result. If nil, it will be set equal to window size.
|
|
6294
6354
|
# @param center [Boolean]
|
|
@@ -6323,16 +6383,16 @@ module Polars
|
|
|
6323
6383
|
# def rolling_apply(
|
|
6324
6384
|
# window_size:,
|
|
6325
6385
|
# weights: nil,
|
|
6326
|
-
#
|
|
6386
|
+
# min_samples: nil,
|
|
6327
6387
|
# center: false,
|
|
6328
6388
|
# &function
|
|
6329
6389
|
# )
|
|
6330
|
-
# if
|
|
6331
|
-
#
|
|
6390
|
+
# if min_samples.nil?
|
|
6391
|
+
# min_samples = window_size
|
|
6332
6392
|
# end
|
|
6333
6393
|
# wrap_expr(
|
|
6334
6394
|
# _rbexpr.rolling_apply(
|
|
6335
|
-
# function, window_size, weights,
|
|
6395
|
+
# function, window_size, weights, min_samples, center
|
|
6336
6396
|
# )
|
|
6337
6397
|
# )
|
|
6338
6398
|
# end
|
|
@@ -6454,39 +6514,6 @@ module Polars
|
|
|
6454
6514
|
wrap_expr(_rbexpr.abs)
|
|
6455
6515
|
end
|
|
6456
6516
|
|
|
6457
|
-
# Get the index values that would sort this column.
|
|
6458
|
-
#
|
|
6459
|
-
# Alias for {#arg_sort}.
|
|
6460
|
-
#
|
|
6461
|
-
# @param reverse [Boolean]
|
|
6462
|
-
# Sort in reverse (descending) order.
|
|
6463
|
-
# @param nulls_last [Boolean]
|
|
6464
|
-
# Place null values last instead of first.
|
|
6465
|
-
#
|
|
6466
|
-
# @return [expr]
|
|
6467
|
-
#
|
|
6468
|
-
# @example
|
|
6469
|
-
# df = Polars::DataFrame.new(
|
|
6470
|
-
# {
|
|
6471
|
-
# "a" => [20, 10, 30]
|
|
6472
|
-
# }
|
|
6473
|
-
# )
|
|
6474
|
-
# df.select(Polars.col("a").argsort)
|
|
6475
|
-
# # =>
|
|
6476
|
-
# # shape: (3, 1)
|
|
6477
|
-
# # ┌─────┐
|
|
6478
|
-
# # │ a │
|
|
6479
|
-
# # │ --- │
|
|
6480
|
-
# # │ u32 │
|
|
6481
|
-
# # ╞═════╡
|
|
6482
|
-
# # │ 1 │
|
|
6483
|
-
# # │ 0 │
|
|
6484
|
-
# # │ 2 │
|
|
6485
|
-
# # └─────┘
|
|
6486
|
-
def argsort(reverse: false, nulls_last: false)
|
|
6487
|
-
arg_sort(reverse: reverse, nulls_last: nulls_last)
|
|
6488
|
-
end
|
|
6489
|
-
|
|
6490
6517
|
# Assign ranks to data, dealing with ties appropriately.
|
|
6491
6518
|
#
|
|
6492
6519
|
# @param method ["average", "min", "max", "dense", "ordinal", "random"]
|
|
@@ -6507,7 +6534,7 @@ module Polars
|
|
|
6507
6534
|
# the order that the values occur in the Series.
|
|
6508
6535
|
# - 'random' : Like 'ordinal', but the rank for ties is not dependent
|
|
6509
6536
|
# on the order that the values occur in the Series.
|
|
6510
|
-
# @param
|
|
6537
|
+
# @param descending [Boolean]
|
|
6511
6538
|
# Reverse the operation.
|
|
6512
6539
|
# @param seed [Integer]
|
|
6513
6540
|
# If `method: "random"`, use this as seed.
|
|
@@ -6547,8 +6574,8 @@ module Polars
|
|
|
6547
6574
|
# # │ 2 │
|
|
6548
6575
|
# # │ 5 │
|
|
6549
6576
|
# # └─────┘
|
|
6550
|
-
def rank(method: "average",
|
|
6551
|
-
wrap_expr(_rbexpr.rank(method,
|
|
6577
|
+
def rank(method: "average", descending: false, seed: nil)
|
|
6578
|
+
wrap_expr(_rbexpr.rank(method, descending, seed))
|
|
6552
6579
|
end
|
|
6553
6580
|
|
|
6554
6581
|
# Calculate the n-th discrete difference.
|
|
@@ -6601,7 +6628,7 @@ module Polars
|
|
|
6601
6628
|
# "a" => [10, 11, 12, nil, 12]
|
|
6602
6629
|
# }
|
|
6603
6630
|
# )
|
|
6604
|
-
# df.
|
|
6631
|
+
# df.with_columns(Polars.col("a").pct_change.alias("pct_change"))
|
|
6605
6632
|
# # =>
|
|
6606
6633
|
# # shape: (5, 2)
|
|
6607
6634
|
# # ┌──────┬────────────┐
|
|
@@ -6654,7 +6681,7 @@ module Polars
|
|
|
6654
6681
|
# Kurtosis is the fourth central moment divided by the square of the
|
|
6655
6682
|
# variance. If Fisher's definition is used, then 3.0 is subtracted from
|
|
6656
6683
|
# the result to give 0.0 for a normal distribution.
|
|
6657
|
-
# If bias is
|
|
6684
|
+
# If bias is false then the kurtosis is calculated using k statistics to
|
|
6658
6685
|
# eliminate bias coming from biased moment estimators
|
|
6659
6686
|
#
|
|
6660
6687
|
# @param fisher [Boolean]
|
|
@@ -6695,7 +6722,7 @@ module Polars
|
|
|
6695
6722
|
#
|
|
6696
6723
|
# @example
|
|
6697
6724
|
# df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
|
|
6698
|
-
# df.
|
|
6725
|
+
# df.with_columns(Polars.col("foo").clip(1, 10).alias("foo_clipped"))
|
|
6699
6726
|
# # =>
|
|
6700
6727
|
# # shape: (4, 2)
|
|
6701
6728
|
# # ┌──────┬─────────────┐
|
|
@@ -6718,68 +6745,6 @@ module Polars
|
|
|
6718
6745
|
wrap_expr(_rbexpr.clip(lower_bound, upper_bound))
|
|
6719
6746
|
end
|
|
6720
6747
|
|
|
6721
|
-
# Clip (limit) the values in an array to a `min` boundary.
|
|
6722
|
-
#
|
|
6723
|
-
# Only works for numerical types.
|
|
6724
|
-
#
|
|
6725
|
-
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
|
6726
|
-
# expression. See `when` for more information.
|
|
6727
|
-
#
|
|
6728
|
-
# @param lower_bound [Numeric]
|
|
6729
|
-
# Minimum value.
|
|
6730
|
-
#
|
|
6731
|
-
# @return [Expr]
|
|
6732
|
-
#
|
|
6733
|
-
# @example
|
|
6734
|
-
# df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
|
|
6735
|
-
# df.with_column(Polars.col("foo").clip_min(0).alias("foo_clipped"))
|
|
6736
|
-
# # =>
|
|
6737
|
-
# # shape: (4, 2)
|
|
6738
|
-
# # ┌──────┬─────────────┐
|
|
6739
|
-
# # │ foo ┆ foo_clipped │
|
|
6740
|
-
# # │ --- ┆ --- │
|
|
6741
|
-
# # │ i64 ┆ i64 │
|
|
6742
|
-
# # ╞══════╪═════════════╡
|
|
6743
|
-
# # │ -50 ┆ 0 │
|
|
6744
|
-
# # │ 5 ┆ 5 │
|
|
6745
|
-
# # │ null ┆ null │
|
|
6746
|
-
# # │ 50 ┆ 50 │
|
|
6747
|
-
# # └──────┴─────────────┘
|
|
6748
|
-
def clip_min(lower_bound)
|
|
6749
|
-
clip(lower_bound, nil)
|
|
6750
|
-
end
|
|
6751
|
-
|
|
6752
|
-
# Clip (limit) the values in an array to a `max` boundary.
|
|
6753
|
-
#
|
|
6754
|
-
# Only works for numerical types.
|
|
6755
|
-
#
|
|
6756
|
-
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
|
6757
|
-
# expression. See `when` for more information.
|
|
6758
|
-
#
|
|
6759
|
-
# @param upper_bound [Numeric]
|
|
6760
|
-
# Maximum value.
|
|
6761
|
-
#
|
|
6762
|
-
# @return [Expr]
|
|
6763
|
-
#
|
|
6764
|
-
# @example
|
|
6765
|
-
# df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
|
|
6766
|
-
# df.with_column(Polars.col("foo").clip_max(0).alias("foo_clipped"))
|
|
6767
|
-
# # =>
|
|
6768
|
-
# # shape: (4, 2)
|
|
6769
|
-
# # ┌──────┬─────────────┐
|
|
6770
|
-
# # │ foo ┆ foo_clipped │
|
|
6771
|
-
# # │ --- ┆ --- │
|
|
6772
|
-
# # │ i64 ┆ i64 │
|
|
6773
|
-
# # ╞══════╪═════════════╡
|
|
6774
|
-
# # │ -50 ┆ -50 │
|
|
6775
|
-
# # │ 5 ┆ 0 │
|
|
6776
|
-
# # │ null ┆ null │
|
|
6777
|
-
# # │ 50 ┆ 0 │
|
|
6778
|
-
# # └──────┴─────────────┘
|
|
6779
|
-
def clip_max(upper_bound)
|
|
6780
|
-
clip(nil, upper_bound)
|
|
6781
|
-
end
|
|
6782
|
-
|
|
6783
6748
|
# Calculate the lower bound.
|
|
6784
6749
|
#
|
|
6785
6750
|
# Returns a unit Series with the lowest value possible for the dtype of this
|
|
@@ -7168,7 +7133,7 @@ module Polars
|
|
|
7168
7133
|
|
|
7169
7134
|
# Reshape this Expr to a flat Series or a Series of Lists.
|
|
7170
7135
|
#
|
|
7171
|
-
# @param
|
|
7136
|
+
# @param dimensions [Array]
|
|
7172
7137
|
# Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that
|
|
7173
7138
|
# dimension is inferred.
|
|
7174
7139
|
#
|
|
@@ -7208,8 +7173,8 @@ module Polars
|
|
|
7208
7173
|
# # │ 8 │
|
|
7209
7174
|
# # │ 9 │
|
|
7210
7175
|
# # └─────┘
|
|
7211
|
-
def reshape(
|
|
7212
|
-
wrap_expr(_rbexpr.reshape(
|
|
7176
|
+
def reshape(dimensions)
|
|
7177
|
+
wrap_expr(_rbexpr.reshape(dimensions))
|
|
7213
7178
|
end
|
|
7214
7179
|
|
|
7215
7180
|
# Shuffle the contents of this expr.
|
|
@@ -7243,7 +7208,7 @@ module Polars
|
|
|
7243
7208
|
|
|
7244
7209
|
# Sample from this expression.
|
|
7245
7210
|
#
|
|
7246
|
-
# @param
|
|
7211
|
+
# @param fraction [Float]
|
|
7247
7212
|
# Fraction of items to return. Cannot be used with `n`.
|
|
7248
7213
|
# @param with_replacement [Boolean]
|
|
7249
7214
|
# Allow values to be sampled more than once.
|
|
@@ -7253,13 +7218,13 @@ module Polars
|
|
|
7253
7218
|
# Seed for the random number generator. If set to nil (default), a random
|
|
7254
7219
|
# seed is used.
|
|
7255
7220
|
# @param n [Integer]
|
|
7256
|
-
# Number of items to return. Cannot be used with `
|
|
7221
|
+
# Number of items to return. Cannot be used with `fraction`.
|
|
7257
7222
|
#
|
|
7258
7223
|
# @return [Expr]
|
|
7259
7224
|
#
|
|
7260
7225
|
# @example
|
|
7261
7226
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
|
7262
|
-
# df.select(Polars.col("a").sample(
|
|
7227
|
+
# df.select(Polars.col("a").sample(fraction: 1.0, with_replacement: true, seed: 1))
|
|
7263
7228
|
# # =>
|
|
7264
7229
|
# # shape: (3, 1)
|
|
7265
7230
|
# # ┌─────┐
|
|
@@ -7272,27 +7237,33 @@ module Polars
|
|
|
7272
7237
|
# # │ 1 │
|
|
7273
7238
|
# # └─────┘
|
|
7274
7239
|
def sample(
|
|
7275
|
-
|
|
7276
|
-
with_replacement:
|
|
7240
|
+
fraction: nil,
|
|
7241
|
+
with_replacement: nil,
|
|
7277
7242
|
shuffle: false,
|
|
7278
7243
|
seed: nil,
|
|
7279
7244
|
n: nil
|
|
7280
7245
|
)
|
|
7281
|
-
|
|
7282
|
-
|
|
7246
|
+
# TODO update
|
|
7247
|
+
if with_replacement.nil?
|
|
7248
|
+
warn "The default `with_replacement` for `sample` method will change from `true` to `false` in a future version"
|
|
7249
|
+
with_replacement = true
|
|
7283
7250
|
end
|
|
7284
7251
|
|
|
7285
|
-
if !n.nil? &&
|
|
7252
|
+
if !n.nil? && !fraction.nil?
|
|
7253
|
+
raise ArgumentError, "cannot specify both `n` and `fraction`"
|
|
7254
|
+
end
|
|
7255
|
+
|
|
7256
|
+
if !n.nil? && fraction.nil?
|
|
7286
7257
|
n = Utils.parse_into_expression(n)
|
|
7287
7258
|
return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
|
|
7288
7259
|
end
|
|
7289
7260
|
|
|
7290
|
-
if
|
|
7291
|
-
|
|
7261
|
+
if fraction.nil?
|
|
7262
|
+
fraction = 1.0
|
|
7292
7263
|
end
|
|
7293
|
-
|
|
7264
|
+
fraction = Utils.parse_into_expression(fraction)
|
|
7294
7265
|
wrap_expr(
|
|
7295
|
-
_rbexpr.sample_frac(
|
|
7266
|
+
_rbexpr.sample_frac(fraction, with_replacement, shuffle, seed)
|
|
7296
7267
|
)
|
|
7297
7268
|
end
|
|
7298
7269
|
|
|
@@ -7320,11 +7291,11 @@ module Polars
|
|
|
7320
7291
|
half_life: nil,
|
|
7321
7292
|
alpha: nil,
|
|
7322
7293
|
adjust: true,
|
|
7323
|
-
|
|
7324
|
-
ignore_nulls:
|
|
7294
|
+
min_samples: 1,
|
|
7295
|
+
ignore_nulls: false
|
|
7325
7296
|
)
|
|
7326
7297
|
alpha = _prepare_alpha(com, span, half_life, alpha)
|
|
7327
|
-
wrap_expr(_rbexpr.ewm_mean(alpha, adjust,
|
|
7298
|
+
wrap_expr(_rbexpr.ewm_mean(alpha, adjust, min_samples, ignore_nulls))
|
|
7328
7299
|
end
|
|
7329
7300
|
|
|
7330
7301
|
# Compute time-based exponentially weighted moving average.
|
|
@@ -7421,11 +7392,11 @@ module Polars
|
|
|
7421
7392
|
alpha: nil,
|
|
7422
7393
|
adjust: true,
|
|
7423
7394
|
bias: false,
|
|
7424
|
-
|
|
7425
|
-
ignore_nulls:
|
|
7395
|
+
min_samples: 1,
|
|
7396
|
+
ignore_nulls: false
|
|
7426
7397
|
)
|
|
7427
7398
|
alpha = _prepare_alpha(com, span, half_life, alpha)
|
|
7428
|
-
wrap_expr(_rbexpr.ewm_std(alpha, adjust, bias,
|
|
7399
|
+
wrap_expr(_rbexpr.ewm_std(alpha, adjust, bias, min_samples, ignore_nulls))
|
|
7429
7400
|
end
|
|
7430
7401
|
|
|
7431
7402
|
# Exponentially-weighted moving variance.
|
|
@@ -7453,11 +7424,11 @@ module Polars
|
|
|
7453
7424
|
alpha: nil,
|
|
7454
7425
|
adjust: true,
|
|
7455
7426
|
bias: false,
|
|
7456
|
-
|
|
7457
|
-
ignore_nulls:
|
|
7427
|
+
min_samples: 1,
|
|
7428
|
+
ignore_nulls: false
|
|
7458
7429
|
)
|
|
7459
7430
|
alpha = _prepare_alpha(com, span, half_life, alpha)
|
|
7460
|
-
wrap_expr(_rbexpr.ewm_var(alpha, adjust, bias,
|
|
7431
|
+
wrap_expr(_rbexpr.ewm_var(alpha, adjust, bias, min_samples, ignore_nulls))
|
|
7461
7432
|
end
|
|
7462
7433
|
|
|
7463
7434
|
# Extend the Series with given number of values.
|
|
@@ -7636,7 +7607,7 @@ module Polars
|
|
|
7636
7607
|
# Uses the formula `-sum(pk * log(pk)` where `pk` are discrete probabilities.
|
|
7637
7608
|
#
|
|
7638
7609
|
# @param base [Float]
|
|
7639
|
-
# Given base, defaults to `
|
|
7610
|
+
# Given base, defaults to `2`.
|
|
7640
7611
|
# @param normalize [Boolean]
|
|
7641
7612
|
# Normalize pk if it doesn't sum to 1.
|
|
7642
7613
|
#
|
|
@@ -7666,7 +7637,13 @@ module Polars
|
|
|
7666
7637
|
# # ╞═══════════╡
|
|
7667
7638
|
# # │ -6.754888 │
|
|
7668
7639
|
# # └───────────┘
|
|
7669
|
-
def entropy(base:
|
|
7640
|
+
def entropy(base: nil, normalize: true)
|
|
7641
|
+
# TODO update (including param docs)
|
|
7642
|
+
if base.nil?
|
|
7643
|
+
warn "The default `base` for `entropy` method will change from `2` to `Math::E` in a future version"
|
|
7644
|
+
base = 2
|
|
7645
|
+
end
|
|
7646
|
+
|
|
7670
7647
|
wrap_expr(_rbexpr.entropy(base, normalize))
|
|
7671
7648
|
end
|
|
7672
7649
|
|
|
@@ -7674,7 +7651,7 @@ module Polars
|
|
|
7674
7651
|
#
|
|
7675
7652
|
# @param expr [Expr]
|
|
7676
7653
|
# Expression to evaluate
|
|
7677
|
-
# @param
|
|
7654
|
+
# @param min_samples [Integer]
|
|
7678
7655
|
# Number of valid values there should be in the window before the expression
|
|
7679
7656
|
# is evaluated. valid values = `length - null_count`
|
|
7680
7657
|
#
|
|
@@ -7710,9 +7687,9 @@ module Polars
|
|
|
7710
7687
|
# # │ -15 │
|
|
7711
7688
|
# # │ -24 │
|
|
7712
7689
|
# # └────────┘
|
|
7713
|
-
def cumulative_eval(expr,
|
|
7690
|
+
def cumulative_eval(expr, min_samples: 1)
|
|
7714
7691
|
wrap_expr(
|
|
7715
|
-
_rbexpr.cumulative_eval(expr._rbexpr,
|
|
7692
|
+
_rbexpr.cumulative_eval(expr._rbexpr, min_samples)
|
|
7716
7693
|
)
|
|
7717
7694
|
end
|
|
7718
7695
|
|
|
@@ -7770,17 +7747,6 @@ module Polars
|
|
|
7770
7747
|
wrap_expr(_rbexpr.implode)
|
|
7771
7748
|
end
|
|
7772
7749
|
|
|
7773
|
-
# Shrink numeric columns to the minimal required datatype.
|
|
7774
|
-
#
|
|
7775
|
-
# Shrink to the dtype needed to fit the extrema of this `Series`.
|
|
7776
|
-
# This can be used to reduce memory pressure.
|
|
7777
|
-
#
|
|
7778
|
-
# @return [Expr]
|
|
7779
|
-
def shrink_dtype
|
|
7780
|
-
warn "`Expr.shrink_dtype` is deprecated and is a no-op; use `Series.shrink_dtype` instead."
|
|
7781
|
-
self
|
|
7782
|
-
end
|
|
7783
|
-
|
|
7784
7750
|
# Bin values into buckets and count their occurrences.
|
|
7785
7751
|
#
|
|
7786
7752
|
# @note
|
|
@@ -7849,13 +7815,13 @@ module Polars
|
|
|
7849
7815
|
# Replace values by different values.
|
|
7850
7816
|
#
|
|
7851
7817
|
# @param old [Object]
|
|
7852
|
-
# Value or
|
|
7853
|
-
# Accepts expression input.
|
|
7818
|
+
# Value or array of values to replace.
|
|
7819
|
+
# Accepts expression input. Arrays are parsed as Series,
|
|
7854
7820
|
# other non-expression inputs are parsed as literals.
|
|
7855
7821
|
# Also accepts a mapping of values to their replacement.
|
|
7856
7822
|
# @param new [Object]
|
|
7857
|
-
# Value or
|
|
7858
|
-
# Accepts expression input.
|
|
7823
|
+
# Value or array of values to replace by.
|
|
7824
|
+
# Accepts expression input. Arrays are parsed as Series,
|
|
7859
7825
|
# other non-expression inputs are parsed as literals.
|
|
7860
7826
|
# Length must match the length of `old` or have length 1.
|
|
7861
7827
|
# @param default [Object]
|
|
@@ -7884,7 +7850,7 @@ module Polars
|
|
|
7884
7850
|
# # │ 3 ┆ 3 │
|
|
7885
7851
|
# # └─────┴──────────┘
|
|
7886
7852
|
#
|
|
7887
|
-
# @example Replace multiple values by passing
|
|
7853
|
+
# @example Replace multiple values by passing arrays to the `old` and `new` parameters.
|
|
7888
7854
|
# df.with_columns(replaced: Polars.col("a").replace([2, 3], [100, 200]))
|
|
7889
7855
|
# # =>
|
|
7890
7856
|
# # shape: (4, 2)
|
|
@@ -8014,14 +7980,14 @@ module Polars
|
|
|
8014
7980
|
# Replace all values by different values.
|
|
8015
7981
|
#
|
|
8016
7982
|
# @param old [Object]
|
|
8017
|
-
# Value or
|
|
8018
|
-
# Accepts expression input.
|
|
7983
|
+
# Value or array of values to replace.
|
|
7984
|
+
# Accepts expression input. Arrays are parsed as Series,
|
|
8019
7985
|
# other non-expression inputs are parsed as literals.
|
|
8020
7986
|
# Also accepts a mapping of values to their replacement as syntactic sugar for
|
|
8021
7987
|
# `replace_all(old: Series.new(mapping.keys), new: Series.new(mapping.values))`.
|
|
8022
7988
|
# @param new [Object]
|
|
8023
|
-
# Value or
|
|
8024
|
-
# Accepts expression input.
|
|
7989
|
+
# Value or array of values to replace by.
|
|
7990
|
+
# Accepts expression input. Arrays are parsed as Series,
|
|
8025
7991
|
# other non-expression inputs are parsed as literals.
|
|
8026
7992
|
# Length must match the length of `old` or have length 1.
|
|
8027
7993
|
# @param default [Object]
|
|
@@ -8037,7 +8003,7 @@ module Polars
|
|
|
8037
8003
|
# @note
|
|
8038
8004
|
# The global string cache must be enabled when replacing categorical values.
|
|
8039
8005
|
#
|
|
8040
|
-
# @example Replace values by passing
|
|
8006
|
+
# @example Replace values by passing arrays to the `old` and `new` parameters.
|
|
8041
8007
|
# df = Polars::DataFrame.new({"a" => [1, 2, 2, 3]})
|
|
8042
8008
|
# df.with_columns(
|
|
8043
8009
|
# replaced: Polars.col("a").replace_strict([1, 2, 3], [100, 200, 300])
|
|
@@ -8419,19 +8385,6 @@ module Polars
|
|
|
8419
8385
|
alpha
|
|
8420
8386
|
end
|
|
8421
8387
|
|
|
8422
|
-
def _prepare_rolling_window_args(window_size, min_periods)
|
|
8423
|
-
if window_size.is_a?(Integer)
|
|
8424
|
-
if min_periods.nil?
|
|
8425
|
-
min_periods = window_size
|
|
8426
|
-
end
|
|
8427
|
-
window_size = "#{window_size}i"
|
|
8428
|
-
end
|
|
8429
|
-
if min_periods.nil?
|
|
8430
|
-
min_periods = 1
|
|
8431
|
-
end
|
|
8432
|
-
[window_size, min_periods]
|
|
8433
|
-
end
|
|
8434
|
-
|
|
8435
8388
|
def _prepare_rolling_by_window_args(window_size)
|
|
8436
8389
|
window_size
|
|
8437
8390
|
end
|