polars-df 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +127 -1
- data/Cargo.lock +72 -58
- data/README.md +31 -27
- data/ext/polars/Cargo.toml +15 -6
- data/ext/polars/src/batched_csv.rs +35 -39
- data/ext/polars/src/c_api/allocator.rs +7 -0
- data/ext/polars/src/c_api/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +123 -101
- data/ext/polars/src/conversion/any_value.rs +13 -17
- data/ext/polars/src/conversion/chunked_array.rs +5 -5
- data/ext/polars/src/conversion/datetime.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +50 -45
- data/ext/polars/src/dataframe/export.rs +13 -13
- data/ext/polars/src/dataframe/general.rs +223 -223
- data/ext/polars/src/dataframe/io.rs +27 -141
- data/ext/polars/src/dataframe/mod.rs +13 -5
- data/ext/polars/src/dataframe/serde.rs +1 -1
- data/ext/polars/src/error.rs +44 -7
- data/ext/polars/src/exceptions.rs +45 -12
- data/ext/polars/src/expr/array.rs +12 -0
- data/ext/polars/src/expr/datatype.rs +2 -2
- data/ext/polars/src/expr/datetime.rs +4 -5
- data/ext/polars/src/expr/general.rs +49 -13
- data/ext/polars/src/expr/list.rs +4 -0
- data/ext/polars/src/expr/meta.rs +8 -3
- data/ext/polars/src/expr/mod.rs +22 -6
- data/ext/polars/src/expr/name.rs +19 -8
- data/ext/polars/src/expr/rolling.rs +50 -1
- data/ext/polars/src/expr/string.rs +0 -1
- data/ext/polars/src/expr/struct.rs +7 -2
- data/ext/polars/src/file.rs +136 -103
- data/ext/polars/src/functions/aggregation.rs +9 -8
- data/ext/polars/src/functions/io.rs +81 -10
- data/ext/polars/src/functions/lazy.rs +95 -21
- data/ext/polars/src/functions/mod.rs +2 -0
- data/ext/polars/src/functions/range.rs +19 -3
- data/ext/polars/src/functions/strings.rs +6 -0
- data/ext/polars/src/functions/utils.rs +6 -0
- data/ext/polars/src/interop/arrow/mod.rs +50 -1
- data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
- data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/lazyframe/exitable.rs +39 -0
- data/ext/polars/src/lazyframe/general.rs +340 -236
- data/ext/polars/src/lazyframe/mod.rs +46 -10
- data/ext/polars/src/lazyframe/optflags.rs +5 -4
- data/ext/polars/src/lazyframe/serde.rs +11 -3
- data/ext/polars/src/lazyframe/sink.rs +10 -5
- data/ext/polars/src/lazygroupby.rs +6 -7
- data/ext/polars/src/lib.rs +141 -76
- data/ext/polars/src/map/dataframe.rs +12 -12
- data/ext/polars/src/map/lazy.rs +7 -5
- data/ext/polars/src/map/mod.rs +15 -8
- data/ext/polars/src/map/series.rs +3 -3
- data/ext/polars/src/on_startup.rs +16 -8
- data/ext/polars/src/prelude.rs +1 -0
- data/ext/polars/src/rb_modules.rs +19 -49
- data/ext/polars/src/series/aggregation.rs +79 -140
- data/ext/polars/src/series/arithmetic.rs +16 -22
- data/ext/polars/src/series/comparison.rs +101 -222
- data/ext/polars/src/series/construction.rs +17 -18
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +254 -289
- data/ext/polars/src/series/import.rs +17 -0
- data/ext/polars/src/series/map.rs +178 -160
- data/ext/polars/src/series/mod.rs +28 -12
- data/ext/polars/src/series/scatter.rs +12 -9
- data/ext/polars/src/sql.rs +16 -9
- data/ext/polars/src/testing/frame.rs +31 -0
- data/ext/polars/src/testing/mod.rs +5 -0
- data/ext/polars/src/testing/series.rs +31 -0
- data/ext/polars/src/timeout.rs +105 -0
- data/ext/polars/src/utils.rs +159 -1
- data/lib/polars/array_expr.rb +81 -12
- data/lib/polars/array_name_space.rb +74 -7
- data/lib/polars/batched_csv_reader.rb +21 -21
- data/lib/polars/binary_name_space.rb +1 -1
- data/lib/polars/cat_expr.rb +7 -7
- data/lib/polars/config.rb +1 -1
- data/lib/polars/convert.rb +189 -34
- data/lib/polars/data_frame.rb +1066 -831
- data/lib/polars/data_frame_plot.rb +173 -0
- data/lib/polars/data_type_group.rb +1 -0
- data/lib/polars/data_types.rb +31 -12
- data/lib/polars/date_time_expr.rb +51 -69
- data/lib/polars/date_time_name_space.rb +80 -112
- data/lib/polars/dynamic_group_by.rb +7 -7
- data/lib/polars/exceptions.rb +50 -10
- data/lib/polars/expr.rb +470 -517
- data/lib/polars/functions/aggregation/horizontal.rb +0 -1
- data/lib/polars/functions/aggregation/vertical.rb +2 -3
- data/lib/polars/functions/as_datatype.rb +290 -8
- data/lib/polars/functions/eager.rb +204 -10
- data/lib/polars/functions/escape_regex.rb +21 -0
- data/lib/polars/functions/lazy.rb +409 -169
- data/lib/polars/functions/lit.rb +17 -1
- data/lib/polars/functions/range/int_range.rb +74 -2
- data/lib/polars/functions/range/linear_space.rb +77 -0
- data/lib/polars/functions/range/time_range.rb +1 -1
- data/lib/polars/functions/repeat.rb +3 -12
- data/lib/polars/functions/whenthen.rb +2 -2
- data/lib/polars/group_by.rb +72 -20
- data/lib/polars/iceberg_dataset.rb +1 -6
- data/lib/polars/in_process_query.rb +37 -0
- data/lib/polars/io/cloud.rb +18 -0
- data/lib/polars/io/csv.rb +265 -126
- data/lib/polars/io/database.rb +0 -1
- data/lib/polars/io/delta.rb +15 -7
- data/lib/polars/io/ipc.rb +24 -17
- data/lib/polars/io/ndjson.rb +161 -24
- data/lib/polars/io/parquet.rb +101 -38
- data/lib/polars/lazy_frame.rb +849 -558
- data/lib/polars/lazy_group_by.rb +327 -2
- data/lib/polars/list_expr.rb +94 -16
- data/lib/polars/list_name_space.rb +88 -24
- data/lib/polars/meta_expr.rb +42 -1
- data/lib/polars/name_expr.rb +41 -4
- data/lib/polars/query_opt_flags.rb +198 -2
- data/lib/polars/rolling_group_by.rb +3 -3
- data/lib/polars/schema.rb +21 -3
- data/lib/polars/selector.rb +37 -2
- data/lib/polars/selectors.rb +45 -9
- data/lib/polars/series.rb +1156 -728
- data/lib/polars/series_plot.rb +72 -0
- data/lib/polars/slice.rb +1 -1
- data/lib/polars/sql_context.rb +11 -4
- data/lib/polars/string_expr.rb +59 -68
- data/lib/polars/string_name_space.rb +51 -87
- data/lib/polars/struct_expr.rb +36 -18
- data/lib/polars/testing.rb +24 -273
- data/lib/polars/utils/constants.rb +2 -0
- data/lib/polars/utils/construction/data_frame.rb +410 -0
- data/lib/polars/utils/construction/series.rb +364 -0
- data/lib/polars/utils/construction/utils.rb +9 -0
- data/lib/polars/utils/deprecation.rb +11 -0
- data/lib/polars/utils/serde.rb +8 -3
- data/lib/polars/utils/unstable.rb +19 -0
- data/lib/polars/utils/various.rb +59 -0
- data/lib/polars/utils.rb +46 -47
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +47 -1
- metadata +25 -6
- data/ext/polars/src/allocator.rs +0 -13
- data/lib/polars/plot.rb +0 -109
data/lib/polars/series.rb
CHANGED
|
@@ -16,9 +16,6 @@ module Polars
|
|
|
16
16
|
# Throw error on numeric overflow.
|
|
17
17
|
# @param nan_to_null [Boolean]
|
|
18
18
|
# Not used.
|
|
19
|
-
# @param dtype_if_empty [Symbol, nil]
|
|
20
|
-
# If no dtype is specified and values contains `nil` or an empty array,
|
|
21
|
-
# set the Polars dtype of the Series data. If not specified, Float32 is used.
|
|
22
19
|
#
|
|
23
20
|
# @example Constructing a Series by specifying name and values positionally:
|
|
24
21
|
# s = Polars::Series.new("a", [1, 2, 3])
|
|
@@ -28,53 +25,56 @@ module Polars
|
|
|
28
25
|
# # => Polars::Int64
|
|
29
26
|
#
|
|
30
27
|
# @example Constructing a Series with a specific dtype:
|
|
31
|
-
# s2 = Polars::Series.new("a", [1, 2, 3], dtype:
|
|
28
|
+
# s2 = Polars::Series.new("a", [1, 2, 3], dtype: Polars::Float32)
|
|
32
29
|
#
|
|
33
30
|
# @example It is possible to construct a Series with values as the first positional argument. This syntax considered an anti-pattern, but it can be useful in certain scenarios. You must specify any other arguments through keywords.
|
|
34
31
|
# s3 = Polars::Series.new([1, 2, 3])
|
|
35
|
-
def initialize(name = nil, values = nil, dtype: nil, strict: true, nan_to_null: false
|
|
32
|
+
def initialize(name = nil, values = nil, dtype: nil, strict: true, nan_to_null: false)
|
|
33
|
+
# If 'Unknown' treat as nil to trigger type inference
|
|
34
|
+
if dtype == Unknown
|
|
35
|
+
dtype = nil
|
|
36
|
+
elsif !dtype.nil? && !Utils.is_polars_dtype(dtype)
|
|
37
|
+
dtype = Utils.parse_into_dtype(dtype)
|
|
38
|
+
end
|
|
39
|
+
|
|
36
40
|
# Handle case where values are passed as the first argument
|
|
37
|
-
|
|
41
|
+
original_name = nil
|
|
42
|
+
if name.nil?
|
|
43
|
+
name = ""
|
|
44
|
+
elsif name.is_a?(::String)
|
|
45
|
+
original_name = name
|
|
46
|
+
else
|
|
38
47
|
if values.nil?
|
|
39
48
|
values = name
|
|
40
|
-
name =
|
|
49
|
+
name = ""
|
|
41
50
|
else
|
|
42
|
-
raise
|
|
51
|
+
raise TypeError, "Series name must be a string"
|
|
43
52
|
end
|
|
44
53
|
end
|
|
45
54
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
self._s = sequence_to_rbseries(name, [], dtype: dtype
|
|
55
|
-
elsif values.is_a?(Series)
|
|
56
|
-
self._s = series_to_rbseries(name, values)
|
|
57
|
-
elsif values.is_a?(Range)
|
|
58
|
-
self._s =
|
|
59
|
-
Polars.arange(
|
|
60
|
-
values.first,
|
|
61
|
-
values.last + (values.exclude_end? ? 0 : 1),
|
|
62
|
-
step: 1,
|
|
63
|
-
eager: true,
|
|
64
|
-
dtype: dtype
|
|
65
|
-
)
|
|
66
|
-
.rename(name, in_place: true)
|
|
67
|
-
._s
|
|
68
|
-
elsif values.is_a?(::Array)
|
|
69
|
-
self._s = sequence_to_rbseries(name, values, dtype: dtype, strict: strict, dtype_if_empty: dtype_if_empty)
|
|
55
|
+
if values.is_a?(::Array) || values.is_a?(Range)
|
|
56
|
+
self._s = Utils.sequence_to_rbseries(
|
|
57
|
+
name,
|
|
58
|
+
values,
|
|
59
|
+
dtype: dtype,
|
|
60
|
+
strict: strict
|
|
61
|
+
)
|
|
62
|
+
elsif values.nil?
|
|
63
|
+
self._s = Utils.sequence_to_rbseries(name, [], dtype: dtype)
|
|
70
64
|
elsif defined?(Numo::NArray) && values.is_a?(Numo::NArray)
|
|
71
|
-
self._s = numo_to_rbseries(name, values, strict: strict, nan_to_null: nan_to_null)
|
|
65
|
+
self._s = Utils.numo_to_rbseries(name, values, strict: strict, nan_to_null: nan_to_null)
|
|
72
66
|
|
|
73
67
|
if !dtype.nil?
|
|
74
|
-
self._s =
|
|
68
|
+
self._s = cast(dtype, strict: strict)._s
|
|
75
69
|
end
|
|
70
|
+
elsif values.is_a?(Series)
|
|
71
|
+
self._s = Utils.series_to_rbseries(original_name, values, dtype: dtype, strict: strict)
|
|
72
|
+
elsif values.is_a?(DataFrame)
|
|
73
|
+
self._s = Utils.dataframe_to_rbseries(
|
|
74
|
+
original_name, values, dtype: dtype, strict: strict
|
|
75
|
+
)
|
|
76
76
|
else
|
|
77
|
-
raise
|
|
77
|
+
raise TypeError, "Series constructor called with unsupported type; got #{values.class.name}"
|
|
78
78
|
end
|
|
79
79
|
end
|
|
80
80
|
|
|
@@ -358,7 +358,7 @@ module Polars
|
|
|
358
358
|
#
|
|
359
359
|
# @return [Series]
|
|
360
360
|
def *(other)
|
|
361
|
-
if
|
|
361
|
+
if dtype.temporal?
|
|
362
362
|
raise ArgumentError, "first cast to integer before multiplying datelike dtypes"
|
|
363
363
|
elsif other.is_a?(DataFrame)
|
|
364
364
|
other * self
|
|
@@ -371,11 +371,11 @@ module Polars
|
|
|
371
371
|
#
|
|
372
372
|
# @return [Series]
|
|
373
373
|
def /(other)
|
|
374
|
-
if
|
|
374
|
+
if dtype.temporal?
|
|
375
375
|
raise ArgumentError, "first cast to integer before dividing datelike dtypes"
|
|
376
376
|
end
|
|
377
377
|
|
|
378
|
-
if
|
|
378
|
+
if dtype.float?
|
|
379
379
|
return _arithmetic(other, :div)
|
|
380
380
|
end
|
|
381
381
|
|
|
@@ -386,7 +386,7 @@ module Polars
|
|
|
386
386
|
#
|
|
387
387
|
# @return [Series]
|
|
388
388
|
def %(other)
|
|
389
|
-
if
|
|
389
|
+
if dtype.temporal?
|
|
390
390
|
raise ArgumentError, "first cast to integer before applying modulo on datelike dtypes"
|
|
391
391
|
end
|
|
392
392
|
_arithmetic(other, :rem)
|
|
@@ -396,7 +396,7 @@ module Polars
|
|
|
396
396
|
#
|
|
397
397
|
# @return [Series]
|
|
398
398
|
def **(power)
|
|
399
|
-
if
|
|
399
|
+
if dtype.temporal?
|
|
400
400
|
raise ArgumentError, "first cast to integer before raising datelike dtypes to a power"
|
|
401
401
|
end
|
|
402
402
|
to_frame.select(Polars.col(name).pow(power)).to_series
|
|
@@ -435,7 +435,7 @@ module Polars
|
|
|
435
435
|
# @return [Object]
|
|
436
436
|
def [](item)
|
|
437
437
|
if item.is_a?(Series) && [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64].include?(item.dtype)
|
|
438
|
-
return Utils.wrap_s(_s.
|
|
438
|
+
return Utils.wrap_s(_s.gather_with_series(_pos_idxs(item)._s))
|
|
439
439
|
end
|
|
440
440
|
|
|
441
441
|
if item.is_a?(Series) && item.bool?
|
|
@@ -455,7 +455,7 @@ module Polars
|
|
|
455
455
|
end
|
|
456
456
|
|
|
457
457
|
if Utils.is_int_sequence(item)
|
|
458
|
-
return Utils.wrap_s(_s.
|
|
458
|
+
return Utils.wrap_s(_s.gather_with_series(_pos_idxs(Series.new("", item))._s))
|
|
459
459
|
end
|
|
460
460
|
|
|
461
461
|
raise ArgumentError, "Cannot get item of type: #{item.class.name}"
|
|
@@ -466,7 +466,7 @@ module Polars
|
|
|
466
466
|
# @return [Object]
|
|
467
467
|
def []=(key, value)
|
|
468
468
|
if value.is_a?(::Array)
|
|
469
|
-
if
|
|
469
|
+
if dtype.numeric? || dtype.temporal?
|
|
470
470
|
scatter(key, value)
|
|
471
471
|
return
|
|
472
472
|
end
|
|
@@ -484,7 +484,7 @@ module Polars
|
|
|
484
484
|
raise Todo
|
|
485
485
|
end
|
|
486
486
|
elsif key.is_a?(::Array)
|
|
487
|
-
s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: UInt32))
|
|
487
|
+
s = Utils.wrap_s(Utils.sequence_to_rbseries("", key, dtype: UInt32))
|
|
488
488
|
self[s] = value
|
|
489
489
|
elsif key.is_a?(Range)
|
|
490
490
|
s = Series.new("", key, dtype: UInt32)
|
|
@@ -548,7 +548,7 @@ module Polars
|
|
|
548
548
|
# @return [Numeric]
|
|
549
549
|
#
|
|
550
550
|
# @example
|
|
551
|
-
# s = Polars::Series.new("values", 1..1_000_000, dtype:
|
|
551
|
+
# s = Polars::Series.new("values", 1..1_000_000, dtype: Polars::UInt32)
|
|
552
552
|
# s.estimated_size
|
|
553
553
|
# # => 4000000
|
|
554
554
|
# s.estimated_size("mb")
|
|
@@ -613,7 +613,7 @@ module Polars
|
|
|
613
613
|
# # => false
|
|
614
614
|
def any?(ignore_nulls: true, &block)
|
|
615
615
|
if block_given?
|
|
616
|
-
|
|
616
|
+
map_elements(return_dtype: Boolean, skip_nulls: ignore_nulls, &block).any?
|
|
617
617
|
else
|
|
618
618
|
_s.any(ignore_nulls)
|
|
619
619
|
end
|
|
@@ -637,7 +637,7 @@ module Polars
|
|
|
637
637
|
# # => true
|
|
638
638
|
def all?(ignore_nulls: true, &block)
|
|
639
639
|
if block_given?
|
|
640
|
-
|
|
640
|
+
map_elements(return_dtype: Boolean, skip_nulls: ignore_nulls, &block).all?
|
|
641
641
|
else
|
|
642
642
|
_s.all(ignore_nulls)
|
|
643
643
|
end
|
|
@@ -661,7 +661,7 @@ module Polars
|
|
|
661
661
|
# # => true
|
|
662
662
|
def none?(&block)
|
|
663
663
|
if block_given?
|
|
664
|
-
|
|
664
|
+
map_elements(return_dtype: Boolean, &block).none?
|
|
665
665
|
else
|
|
666
666
|
to_frame.select(Polars.col(name).is_not.all).to_series[0]
|
|
667
667
|
end
|
|
@@ -827,81 +827,60 @@ module Polars
|
|
|
827
827
|
# Series with mixed datatypes will return summary statistics for the datatype of
|
|
828
828
|
# the first value.
|
|
829
829
|
#
|
|
830
|
+
# @param percentiles [Array]
|
|
831
|
+
# One or more percentiles to include in the summary statistics (if the
|
|
832
|
+
# Series has a numeric dtype). All values must be in the range `[0, 1]`.
|
|
833
|
+
# @param interpolation ['nearest', 'higher', 'lower', 'midpoint', 'linear', 'equiprobable']
|
|
834
|
+
# Interpolation method used when calculating percentiles.
|
|
835
|
+
#
|
|
830
836
|
# @return [DataFrame]
|
|
831
837
|
#
|
|
832
838
|
# @example
|
|
833
|
-
#
|
|
834
|
-
#
|
|
839
|
+
# s = Polars::Series.new([1, 2, 3, 4, 5])
|
|
840
|
+
# s.describe
|
|
835
841
|
# # =>
|
|
836
|
-
# # shape: (
|
|
842
|
+
# # shape: (9, 2)
|
|
837
843
|
# # ┌────────────┬──────────┐
|
|
838
844
|
# # │ statistic ┆ value │
|
|
839
845
|
# # │ --- ┆ --- │
|
|
840
846
|
# # │ str ┆ f64 │
|
|
841
847
|
# # ╞════════════╪══════════╡
|
|
842
|
-
# # │
|
|
843
|
-
# # │ max ┆ 5.0 │
|
|
848
|
+
# # │ count ┆ 5.0 │
|
|
844
849
|
# # │ null_count ┆ 0.0 │
|
|
845
850
|
# # │ mean ┆ 3.0 │
|
|
846
851
|
# # │ std ┆ 1.581139 │
|
|
847
|
-
# # │
|
|
852
|
+
# # │ min ┆ 1.0 │
|
|
853
|
+
# # │ 25% ┆ 2.0 │
|
|
854
|
+
# # │ 50% ┆ 3.0 │
|
|
855
|
+
# # │ 75% ┆ 4.0 │
|
|
856
|
+
# # │ max ┆ 5.0 │
|
|
848
857
|
# # └────────────┴──────────┘
|
|
849
858
|
#
|
|
850
|
-
# @example
|
|
851
|
-
#
|
|
852
|
-
#
|
|
859
|
+
# @example Non-numeric data types may not have all statistics available.
|
|
860
|
+
# s = Polars::Series.new(["aa", "aa", nil, "bb", "cc"])
|
|
861
|
+
# s.describe
|
|
853
862
|
# # =>
|
|
854
|
-
# # shape: (
|
|
863
|
+
# # shape: (4, 2)
|
|
855
864
|
# # ┌────────────┬───────┐
|
|
856
865
|
# # │ statistic ┆ value │
|
|
857
866
|
# # │ --- ┆ --- │
|
|
858
|
-
# # │ str ┆
|
|
867
|
+
# # │ str ┆ str │
|
|
859
868
|
# # ╞════════════╪═══════╡
|
|
860
|
-
# # │
|
|
869
|
+
# # │ count ┆ 4 │
|
|
861
870
|
# # │ null_count ┆ 1 │
|
|
862
|
-
# # │
|
|
871
|
+
# # │ min ┆ aa │
|
|
872
|
+
# # │ max ┆ cc │
|
|
863
873
|
# # └────────────┴───────┘
|
|
864
|
-
def describe
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
"max" => s.max,
|
|
872
|
-
"null_count" => s.null_count,
|
|
873
|
-
"mean" => s.mean,
|
|
874
|
-
"std" => s.std,
|
|
875
|
-
"count" => s.len
|
|
876
|
-
}
|
|
877
|
-
elsif is_boolean
|
|
878
|
-
stats = {
|
|
879
|
-
"sum" => sum,
|
|
880
|
-
"null_count" => null_count,
|
|
881
|
-
"count" => len
|
|
882
|
-
}
|
|
883
|
-
elsif is_utf8
|
|
884
|
-
stats = {
|
|
885
|
-
"unique" => unique.length,
|
|
886
|
-
"null_count" => null_count,
|
|
887
|
-
"count" => len
|
|
888
|
-
}
|
|
889
|
-
elsif is_datelike
|
|
890
|
-
# we coerce all to string, because a polars column
|
|
891
|
-
# only has a single dtype and dates: datetime and count: int don't match
|
|
892
|
-
stats = {
|
|
893
|
-
"min" => dt.min.to_s,
|
|
894
|
-
"max" => dt.max.to_s,
|
|
895
|
-
"null_count" => null_count.to_s,
|
|
896
|
-
"count" => len.to_s
|
|
897
|
-
}
|
|
898
|
-
else
|
|
899
|
-
raise TypeError, "This type is not supported"
|
|
900
|
-
end
|
|
901
|
-
|
|
902
|
-
Polars::DataFrame.new(
|
|
903
|
-
{"statistic" => stats.keys, "value" => stats.values}
|
|
874
|
+
def describe(
|
|
875
|
+
percentiles: [0.25, 0.5, 0.75],
|
|
876
|
+
interpolation: "nearest"
|
|
877
|
+
)
|
|
878
|
+
stats = to_frame.describe(
|
|
879
|
+
percentiles: percentiles,
|
|
880
|
+
interpolation: interpolation
|
|
904
881
|
)
|
|
882
|
+
stats.columns = ["statistic", "value"]
|
|
883
|
+
stats.filter(F.col("value").is_not_null)
|
|
905
884
|
end
|
|
906
885
|
|
|
907
886
|
# Reduce this Series to the sum value.
|
|
@@ -909,8 +888,8 @@ module Polars
|
|
|
909
888
|
# @return [Numeric]
|
|
910
889
|
#
|
|
911
890
|
# @note
|
|
912
|
-
# Dtypes
|
|
913
|
-
#
|
|
891
|
+
# Dtypes in \\\\{Int8, UInt8, Int16, UInt16} are cast to
|
|
892
|
+
# Int64 before summing to prevent overflow issues.
|
|
914
893
|
#
|
|
915
894
|
# @example
|
|
916
895
|
# s = Polars::Series.new("a", [1, 2, 3])
|
|
@@ -1053,7 +1032,7 @@ module Polars
|
|
|
1053
1032
|
# s.std
|
|
1054
1033
|
# # => 1.0
|
|
1055
1034
|
def std(ddof: 1)
|
|
1056
|
-
if !
|
|
1035
|
+
if !dtype.numeric?
|
|
1057
1036
|
nil
|
|
1058
1037
|
else
|
|
1059
1038
|
to_frame.select(Polars.col(name).std(ddof: ddof)).to_series[0]
|
|
@@ -1073,7 +1052,7 @@ module Polars
|
|
|
1073
1052
|
# s.var
|
|
1074
1053
|
# # => 1.0
|
|
1075
1054
|
def var(ddof: 1)
|
|
1076
|
-
if !
|
|
1055
|
+
if !dtype.numeric?
|
|
1077
1056
|
nil
|
|
1078
1057
|
else
|
|
1079
1058
|
to_frame.select(Polars.col(name).var(ddof: ddof)).to_series[0]
|
|
@@ -1490,7 +1469,13 @@ module Polars
|
|
|
1490
1469
|
# b = Polars::Series.new([0.65, 0.10, 0.25])
|
|
1491
1470
|
# b.entropy(normalize: true)
|
|
1492
1471
|
# # => 0.8568409950394724
|
|
1493
|
-
def entropy(base: Math::E, normalize:
|
|
1472
|
+
def entropy(base: Math::E, normalize: nil)
|
|
1473
|
+
# TODO update
|
|
1474
|
+
if normalize.nil?
|
|
1475
|
+
warn "The default `normalize` for `entropy` method will change from `false` to `true` in a future version"
|
|
1476
|
+
normalize = false
|
|
1477
|
+
end
|
|
1478
|
+
|
|
1494
1479
|
Polars.select(Polars.lit(self).entropy(base: base, normalize: normalize)).to_series[0]
|
|
1495
1480
|
end
|
|
1496
1481
|
|
|
@@ -1498,7 +1483,7 @@ module Polars
|
|
|
1498
1483
|
#
|
|
1499
1484
|
# @param expr [Expr]
|
|
1500
1485
|
# Expression to evaluate
|
|
1501
|
-
# @param
|
|
1486
|
+
# @param min_samples [Integer]
|
|
1502
1487
|
# Number of valid values there should be in the window before the expression
|
|
1503
1488
|
# is evaluated. valid values = `length - null_count`
|
|
1504
1489
|
#
|
|
@@ -1525,7 +1510,7 @@ module Polars
|
|
|
1525
1510
|
# # -15
|
|
1526
1511
|
# # -24
|
|
1527
1512
|
# # ]
|
|
1528
|
-
def cumulative_eval(expr,
|
|
1513
|
+
def cumulative_eval(expr, min_samples: 1)
|
|
1529
1514
|
super
|
|
1530
1515
|
end
|
|
1531
1516
|
|
|
@@ -1537,8 +1522,16 @@ module Polars
|
|
|
1537
1522
|
# @return [Series]
|
|
1538
1523
|
#
|
|
1539
1524
|
# @example
|
|
1540
|
-
# s = Polars::Series.new("
|
|
1541
|
-
# s.alias("
|
|
1525
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
|
1526
|
+
# s.alias("b")
|
|
1527
|
+
# # =>
|
|
1528
|
+
# # shape: (3,)
|
|
1529
|
+
# # Series: 'b' [i64]
|
|
1530
|
+
# # [
|
|
1531
|
+
# # 1
|
|
1532
|
+
# # 2
|
|
1533
|
+
# # 3
|
|
1534
|
+
# # ]
|
|
1542
1535
|
def alias(name)
|
|
1543
1536
|
s = dup
|
|
1544
1537
|
s._s.rename(name)
|
|
@@ -1549,21 +1542,22 @@ module Polars
|
|
|
1549
1542
|
#
|
|
1550
1543
|
# @param name [String]
|
|
1551
1544
|
# New name.
|
|
1552
|
-
# @param in_place [Boolean]
|
|
1553
|
-
# Modify the Series in-place.
|
|
1554
1545
|
#
|
|
1555
1546
|
# @return [Series]
|
|
1556
1547
|
#
|
|
1557
1548
|
# @example
|
|
1558
1549
|
# s = Polars::Series.new("a", [1, 2, 3])
|
|
1559
1550
|
# s.rename("b")
|
|
1560
|
-
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
|
|
1551
|
+
# # =>
|
|
1552
|
+
# # shape: (3,)
|
|
1553
|
+
# # Series: 'b' [i64]
|
|
1554
|
+
# # [
|
|
1555
|
+
# # 1
|
|
1556
|
+
# # 2
|
|
1557
|
+
# # 3
|
|
1558
|
+
# # ]
|
|
1559
|
+
def rename(name)
|
|
1560
|
+
self.alias(name)
|
|
1567
1561
|
end
|
|
1568
1562
|
|
|
1569
1563
|
# Get the length of each individual chunk.
|
|
@@ -1575,7 +1569,7 @@ module Polars
|
|
|
1575
1569
|
# s2 = Polars::Series.new("b", [4, 5, 6])
|
|
1576
1570
|
#
|
|
1577
1571
|
# @example Concatenate Series with rechunk: true
|
|
1578
|
-
# Polars.concat([s, s2]).chunk_lengths
|
|
1572
|
+
# Polars.concat([s, s2], rechunk: true).chunk_lengths
|
|
1579
1573
|
# # => [6]
|
|
1580
1574
|
#
|
|
1581
1575
|
# @example Concatenate Series with rechunk: false
|
|
@@ -1594,7 +1588,7 @@ module Polars
|
|
|
1594
1588
|
# s2 = Polars::Series.new("b", [4, 5, 6])
|
|
1595
1589
|
#
|
|
1596
1590
|
# @example Concatenate Series with rechunk: true
|
|
1597
|
-
# Polars.concat([s, s2]).n_chunks
|
|
1591
|
+
# Polars.concat([s, s2], rechunk: true).n_chunks
|
|
1598
1592
|
# # => 1
|
|
1599
1593
|
#
|
|
1600
1594
|
# @example Concatenate Series with rechunk: false
|
|
@@ -1612,8 +1606,8 @@ module Polars
|
|
|
1612
1606
|
# @return [Series]
|
|
1613
1607
|
#
|
|
1614
1608
|
# @note
|
|
1615
|
-
# Dtypes
|
|
1616
|
-
#
|
|
1609
|
+
# Dtypes in \\\\{Int8, UInt8, Int16, UInt16} are cast to
|
|
1610
|
+
# Int64 before summing to prevent overflow issues.
|
|
1617
1611
|
#
|
|
1618
1612
|
# @example
|
|
1619
1613
|
# s = Polars::Series.new("a", [1, 2, 3])
|
|
@@ -1629,7 +1623,6 @@ module Polars
|
|
|
1629
1623
|
def cum_sum(reverse: false)
|
|
1630
1624
|
super
|
|
1631
1625
|
end
|
|
1632
|
-
alias_method :cumsum, :cum_sum
|
|
1633
1626
|
|
|
1634
1627
|
# Return the cumulative count of the non-null values in the column.
|
|
1635
1628
|
#
|
|
@@ -1675,7 +1668,6 @@ module Polars
|
|
|
1675
1668
|
def cum_min(reverse: false)
|
|
1676
1669
|
super
|
|
1677
1670
|
end
|
|
1678
|
-
alias_method :cummin, :cum_min
|
|
1679
1671
|
|
|
1680
1672
|
# Get an array with the cumulative max computed at every element.
|
|
1681
1673
|
#
|
|
@@ -1698,7 +1690,6 @@ module Polars
|
|
|
1698
1690
|
def cum_max(reverse: false)
|
|
1699
1691
|
super
|
|
1700
1692
|
end
|
|
1701
|
-
alias_method :cummax, :cum_max
|
|
1702
1693
|
|
|
1703
1694
|
# Get an array with the cumulative product computed at every element.
|
|
1704
1695
|
#
|
|
@@ -1708,8 +1699,8 @@ module Polars
|
|
|
1708
1699
|
# @return [Series]
|
|
1709
1700
|
#
|
|
1710
1701
|
# @note
|
|
1711
|
-
# Dtypes
|
|
1712
|
-
#
|
|
1702
|
+
# Dtypes in \\\\{Int8, UInt8, Int16, UInt16} are cast to
|
|
1703
|
+
# Int64 before summing to prevent overflow issues.
|
|
1713
1704
|
#
|
|
1714
1705
|
# @example
|
|
1715
1706
|
# s = Polars::Series.new("a", [1, 2, 3])
|
|
@@ -1725,7 +1716,6 @@ module Polars
|
|
|
1725
1716
|
def cum_prod(reverse: false)
|
|
1726
1717
|
super
|
|
1727
1718
|
end
|
|
1728
|
-
alias_method :cumprod, :cum_prod
|
|
1729
1719
|
|
|
1730
1720
|
# Get a slice of this Series.
|
|
1731
1721
|
#
|
|
@@ -1755,29 +1745,6 @@ module Polars
|
|
|
1755
1745
|
#
|
|
1756
1746
|
# @param other [Series]
|
|
1757
1747
|
# Series to append.
|
|
1758
|
-
# @param append_chunks [Boolean]
|
|
1759
|
-
# If set to `true` the append operation will add the chunks from `other` to
|
|
1760
|
-
# self. This is super cheap.
|
|
1761
|
-
#
|
|
1762
|
-
# If set to `false` the append operation will do the same as
|
|
1763
|
-
# {DataFrame#extend} which extends the memory backed by this Series with
|
|
1764
|
-
# the values from `other`.
|
|
1765
|
-
#
|
|
1766
|
-
# Different from `append_chunks`, `extend` appends the data from `other` to
|
|
1767
|
-
# the underlying memory locations and thus may cause a reallocation (which is
|
|
1768
|
-
# expensive).
|
|
1769
|
-
#
|
|
1770
|
-
# If this does not cause a reallocation, the resulting data structure will not
|
|
1771
|
-
# have any extra chunks and thus will yield faster queries.
|
|
1772
|
-
#
|
|
1773
|
-
# Prefer `extend` over `append_chunks` when you want to do a query after a
|
|
1774
|
-
# single append. For instance during online operations where you add `n` rows
|
|
1775
|
-
# and rerun a query.
|
|
1776
|
-
#
|
|
1777
|
-
# Prefer `append_chunks` over `extend` when you want to append many times
|
|
1778
|
-
# before doing a query. For instance, when you read in multiple files and when
|
|
1779
|
-
# to store them in a single Series. In the latter case, finish the sequence
|
|
1780
|
-
# of `append_chunks` operations with a `rechunk`.
|
|
1781
1748
|
#
|
|
1782
1749
|
# @return [Series]
|
|
1783
1750
|
#
|
|
@@ -1796,20 +1763,60 @@ module Polars
|
|
|
1796
1763
|
# # 5
|
|
1797
1764
|
# # 6
|
|
1798
1765
|
# # ]
|
|
1799
|
-
def append(other
|
|
1800
|
-
|
|
1801
|
-
|
|
1802
|
-
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
|
|
1806
|
-
|
|
1807
|
-
|
|
1808
|
-
|
|
1809
|
-
|
|
1810
|
-
|
|
1811
|
-
|
|
1812
|
-
|
|
1766
|
+
def append(other)
|
|
1767
|
+
Utils.require_same_type(self, other)
|
|
1768
|
+
_s.append(other._s)
|
|
1769
|
+
self
|
|
1770
|
+
end
|
|
1771
|
+
|
|
1772
|
+
# Extend the memory backed by this Series with the values from another.
|
|
1773
|
+
#
|
|
1774
|
+
# Different from `append`, which adds the chunks from `other` to the chunks of
|
|
1775
|
+
# this series, `extend` appends the data from `other` to the underlying memory
|
|
1776
|
+
# locations and thus may cause a reallocation (which is expensive).
|
|
1777
|
+
#
|
|
1778
|
+
# If this does `not` cause a reallocation, the resulting data structure will not
|
|
1779
|
+
# have any extra chunks and thus will yield faster queries.
|
|
1780
|
+
#
|
|
1781
|
+
# Prefer `extend` over `append` when you want to do a query after a single
|
|
1782
|
+
# append. For instance, during online operations where you add `n` rows
|
|
1783
|
+
# and rerun a query.
|
|
1784
|
+
#
|
|
1785
|
+
# Prefer `append` over `extend` when you want to append many times
|
|
1786
|
+
# before doing a query. For instance, when you read in multiple files and want
|
|
1787
|
+
# to store them in a single `Series`. In the latter case, finish the sequence
|
|
1788
|
+
# of `append` operations with a `rechunk`.
|
|
1789
|
+
#
|
|
1790
|
+
# @param other [Series]
|
|
1791
|
+
# Series to extend the series with.
|
|
1792
|
+
#
|
|
1793
|
+
# @return [Series]
|
|
1794
|
+
#
|
|
1795
|
+
# @note
|
|
1796
|
+
# This method modifies the series in-place. The series is returned for
|
|
1797
|
+
# convenience only.
|
|
1798
|
+
#
|
|
1799
|
+
# @example
|
|
1800
|
+
# a = Polars::Series.new("a", [1, 2, 3])
|
|
1801
|
+
# b = Polars::Series.new("b", [4, 5])
|
|
1802
|
+
# a.extend(b)
|
|
1803
|
+
# # =>
|
|
1804
|
+
# # shape: (5,)
|
|
1805
|
+
# # Series: 'a' [i64]
|
|
1806
|
+
# # [
|
|
1807
|
+
# # 1
|
|
1808
|
+
# # 2
|
|
1809
|
+
# # 3
|
|
1810
|
+
# # 4
|
|
1811
|
+
# # 5
|
|
1812
|
+
# # ]
|
|
1813
|
+
#
|
|
1814
|
+
# @example The resulting series will consist of a single chunk.
|
|
1815
|
+
# a.n_chunks
|
|
1816
|
+
# # => 1
|
|
1817
|
+
def extend(other)
|
|
1818
|
+
Utils.require_same_type(self, other)
|
|
1819
|
+
_s.extend(other._s)
|
|
1813
1820
|
self
|
|
1814
1821
|
end
|
|
1815
1822
|
|
|
@@ -1856,7 +1863,10 @@ module Polars
|
|
|
1856
1863
|
# # 2
|
|
1857
1864
|
# # ]
|
|
1858
1865
|
def head(n = 10)
|
|
1859
|
-
|
|
1866
|
+
if n < 0
|
|
1867
|
+
n = [0, len + n].max
|
|
1868
|
+
end
|
|
1869
|
+
self.class._from_rbseries(_s.head(n))
|
|
1860
1870
|
end
|
|
1861
1871
|
|
|
1862
1872
|
# Get the last `n` rows.
|
|
@@ -1877,7 +1887,10 @@ module Polars
|
|
|
1877
1887
|
# # 3
|
|
1878
1888
|
# # ]
|
|
1879
1889
|
def tail(n = 10)
|
|
1880
|
-
|
|
1890
|
+
if n < 0
|
|
1891
|
+
n = [0, len + n].max
|
|
1892
|
+
end
|
|
1893
|
+
self.class._from_rbseries(_s.tail(n))
|
|
1881
1894
|
end
|
|
1882
1895
|
|
|
1883
1896
|
# Get the first `n` rows.
|
|
@@ -1900,7 +1913,7 @@ module Polars
|
|
|
1900
1913
|
# # 2
|
|
1901
1914
|
# # ]
|
|
1902
1915
|
def limit(n = 10)
|
|
1903
|
-
|
|
1916
|
+
head(n)
|
|
1904
1917
|
end
|
|
1905
1918
|
|
|
1906
1919
|
# Take every nth value in the Series and return as new Series.
|
|
@@ -1935,11 +1948,10 @@ module Polars
|
|
|
1935
1948
|
def gather_every(n, offset = 0)
|
|
1936
1949
|
super
|
|
1937
1950
|
end
|
|
1938
|
-
alias_method :take_every, :gather_every
|
|
1939
1951
|
|
|
1940
1952
|
# Sort this Series.
|
|
1941
1953
|
#
|
|
1942
|
-
# @param
|
|
1954
|
+
# @param descending [Boolean]
|
|
1943
1955
|
# Reverse sort.
|
|
1944
1956
|
# @param nulls_last [Boolean]
|
|
1945
1957
|
# Place null values last instead of first.
|
|
@@ -1962,7 +1974,7 @@ module Polars
|
|
|
1962
1974
|
# # 3
|
|
1963
1975
|
# # 4
|
|
1964
1976
|
# # ]
|
|
1965
|
-
# s.sort(
|
|
1977
|
+
# s.sort(descending: true)
|
|
1966
1978
|
# # =>
|
|
1967
1979
|
# # shape: (4,)
|
|
1968
1980
|
# # Series: 'a' [i64]
|
|
@@ -1972,12 +1984,12 @@ module Polars
|
|
|
1972
1984
|
# # 2
|
|
1973
1985
|
# # 1
|
|
1974
1986
|
# # ]
|
|
1975
|
-
def sort(
|
|
1987
|
+
def sort(descending: false, nulls_last: false, multithreaded: true, in_place: false)
|
|
1976
1988
|
if in_place
|
|
1977
|
-
self._s = _s.sort(
|
|
1989
|
+
self._s = _s.sort(descending, nulls_last, multithreaded)
|
|
1978
1990
|
self
|
|
1979
1991
|
else
|
|
1980
|
-
Utils.wrap_s(_s.sort(
|
|
1992
|
+
Utils.wrap_s(_s.sort(descending, nulls_last, multithreaded))
|
|
1981
1993
|
end
|
|
1982
1994
|
end
|
|
1983
1995
|
|
|
@@ -2017,7 +2029,7 @@ module Polars
|
|
|
2017
2029
|
# Number of elements to return.
|
|
2018
2030
|
# @param reverse [Object]
|
|
2019
2031
|
# Consider the `k` smallest elements of the `by` column (instead of the `k`
|
|
2020
|
-
# largest). This can be specified per column by passing
|
|
2032
|
+
# largest). This can be specified per column by passing an array of
|
|
2021
2033
|
# booleans.
|
|
2022
2034
|
#
|
|
2023
2035
|
# @return [Series]
|
|
@@ -2077,7 +2089,7 @@ module Polars
|
|
|
2077
2089
|
# Number of elements to return.
|
|
2078
2090
|
# @param reverse [Object]
|
|
2079
2091
|
# Consider the `k` largest elements of the `by` column( (instead of the `k`
|
|
2080
|
-
# smallest). This can be specified per column by passing
|
|
2092
|
+
# smallest). This can be specified per column by passing an array of
|
|
2081
2093
|
# booleans.
|
|
2082
2094
|
#
|
|
2083
2095
|
# @return [Series]
|
|
@@ -2103,7 +2115,7 @@ module Polars
|
|
|
2103
2115
|
|
|
2104
2116
|
# Get the index values that would sort this Series.
|
|
2105
2117
|
#
|
|
2106
|
-
# @param
|
|
2118
|
+
# @param descending [Boolean]
|
|
2107
2119
|
# Sort in reverse (descending) order.
|
|
2108
2120
|
# @param nulls_last [Boolean]
|
|
2109
2121
|
# Place null values last instead of first.
|
|
@@ -2123,10 +2135,9 @@ module Polars
|
|
|
2123
2135
|
# # 2
|
|
2124
2136
|
# # 0
|
|
2125
2137
|
# # ]
|
|
2126
|
-
def arg_sort(
|
|
2138
|
+
def arg_sort(descending: false, nulls_last: false)
|
|
2127
2139
|
super
|
|
2128
2140
|
end
|
|
2129
|
-
alias_method :argsort, :arg_sort
|
|
2130
2141
|
|
|
2131
2142
|
# Get unique index as Series.
|
|
2132
2143
|
#
|
|
@@ -2281,7 +2292,6 @@ module Polars
|
|
|
2281
2292
|
def gather(indices)
|
|
2282
2293
|
super
|
|
2283
2294
|
end
|
|
2284
|
-
alias_method :take, :gather
|
|
2285
2295
|
|
|
2286
2296
|
# Count the null values in this Series.
|
|
2287
2297
|
#
|
|
@@ -2313,7 +2323,6 @@ module Polars
|
|
|
2313
2323
|
def has_nulls
|
|
2314
2324
|
_s.has_nulls
|
|
2315
2325
|
end
|
|
2316
|
-
alias_method :has_validity, :has_nulls
|
|
2317
2326
|
|
|
2318
2327
|
# Check if the Series is empty.
|
|
2319
2328
|
#
|
|
@@ -2613,8 +2622,6 @@ module Polars
|
|
|
2613
2622
|
def is_first_distinct
|
|
2614
2623
|
super
|
|
2615
2624
|
end
|
|
2616
|
-
alias_method :is_first, :is_first_distinct
|
|
2617
|
-
|
|
2618
2625
|
|
|
2619
2626
|
# Return a boolean mask indicating the last occurrence of each distinct value.
|
|
2620
2627
|
#
|
|
@@ -2685,7 +2692,7 @@ module Polars
|
|
|
2685
2692
|
#
|
|
2686
2693
|
# @param other [Series]
|
|
2687
2694
|
# Series to compare with.
|
|
2688
|
-
# @param
|
|
2695
|
+
# @param check_dtypes [Boolean]
|
|
2689
2696
|
# Require data types to match.
|
|
2690
2697
|
# @param check_names [Boolean]
|
|
2691
2698
|
# Require names to match.
|
|
@@ -2701,10 +2708,9 @@ module Polars
|
|
|
2701
2708
|
# # => true
|
|
2702
2709
|
# s.equals(s2)
|
|
2703
2710
|
# # => false
|
|
2704
|
-
def equals(other,
|
|
2705
|
-
_s.equals(other._s,
|
|
2711
|
+
def equals(other, check_dtypes: false, check_names: false, null_equal: true)
|
|
2712
|
+
_s.equals(other._s, check_dtypes, check_names, null_equal)
|
|
2706
2713
|
end
|
|
2707
|
-
alias_method :series_equal, :equals
|
|
2708
2714
|
|
|
2709
2715
|
# Return the number of elements in the Series.
|
|
2710
2716
|
#
|
|
@@ -2734,16 +2740,19 @@ module Polars
|
|
|
2734
2740
|
|
|
2735
2741
|
# Cast between data types.
|
|
2736
2742
|
#
|
|
2737
|
-
# @param dtype [
|
|
2743
|
+
# @param dtype [Object]
|
|
2738
2744
|
# DataType to cast to
|
|
2739
2745
|
# @param strict [Boolean]
|
|
2740
2746
|
# Throw an error if a cast could not be done for instance due to an overflow
|
|
2747
|
+
# @param wrap_numerical [Boolean]
|
|
2748
|
+
# If true numeric casts wrap overflowing values instead of
|
|
2749
|
+
# marking the cast as invalid.
|
|
2741
2750
|
#
|
|
2742
2751
|
# @return [Series]
|
|
2743
2752
|
#
|
|
2744
2753
|
# @example
|
|
2745
2754
|
# s = Polars::Series.new("a", [true, false, true])
|
|
2746
|
-
# s.cast(
|
|
2755
|
+
# s.cast(Polars::UInt32)
|
|
2747
2756
|
# # =>
|
|
2748
2757
|
# # shape: (3,)
|
|
2749
2758
|
# # Series: 'a' [u32]
|
|
@@ -2752,24 +2761,18 @@ module Polars
|
|
|
2752
2761
|
# # 0
|
|
2753
2762
|
# # 1
|
|
2754
2763
|
# # ]
|
|
2755
|
-
def cast(dtype, strict: true)
|
|
2756
|
-
|
|
2764
|
+
def cast(dtype, strict: true, wrap_numerical: false)
|
|
2765
|
+
dtype = Utils.parse_into_dtype(dtype)
|
|
2766
|
+
self.class._from_rbseries(_s.cast(dtype, strict, wrap_numerical))
|
|
2757
2767
|
end
|
|
2758
2768
|
|
|
2759
2769
|
# Cast to physical representation of the logical dtype.
|
|
2760
2770
|
#
|
|
2761
|
-
# - `:date` -> `:i32`
|
|
2762
|
-
# - `:datetime` -> `:i64`
|
|
2763
|
-
# - `:time` -> `:i64`
|
|
2764
|
-
# - `:duration` -> `:i64`
|
|
2765
|
-
# - `:cat` -> `:u32`
|
|
2766
|
-
# - other data types will be left unchanged.
|
|
2767
|
-
#
|
|
2768
2771
|
# @return [Series]
|
|
2769
2772
|
#
|
|
2770
2773
|
# @example
|
|
2771
2774
|
# s = Polars::Series.new("values", ["a", nil, "x", "a"])
|
|
2772
|
-
# s.cast(
|
|
2775
|
+
# s.cast(Polars::Categorical).to_physical
|
|
2773
2776
|
# # =>
|
|
2774
2777
|
# # shape: (4,)
|
|
2775
2778
|
# # Series: 'values' [u32]
|
|
@@ -2840,7 +2843,7 @@ module Polars
|
|
|
2840
2843
|
# @return [Series]
|
|
2841
2844
|
#
|
|
2842
2845
|
# @example
|
|
2843
|
-
# s = Polars::Series.new("a", [1, 2, 3], dtype:
|
|
2846
|
+
# s = Polars::Series.new("a", [1, 2, 3], dtype: Polars::Int8)
|
|
2844
2847
|
# s.reverse
|
|
2845
2848
|
# # =>
|
|
2846
2849
|
# # shape: (3,)
|
|
@@ -2869,7 +2872,7 @@ module Polars
|
|
|
2869
2872
|
#
|
|
2870
2873
|
# @note
|
|
2871
2874
|
# If the value of the `lower_bound` is greater than that of the `upper_bound`
|
|
2872
|
-
# then the result will be
|
|
2875
|
+
# then the result will be false, as no value can satisfy the condition.
|
|
2873
2876
|
#
|
|
2874
2877
|
# @example
|
|
2875
2878
|
# s = Polars::Series.new("num", [1, 2, 3, 4, 5])
|
|
@@ -2962,7 +2965,7 @@ module Polars
|
|
|
2962
2965
|
def is_close(
|
|
2963
2966
|
other,
|
|
2964
2967
|
abs_tol: 0.0,
|
|
2965
|
-
rel_tol:
|
|
2968
|
+
rel_tol: 1.0e-09,
|
|
2966
2969
|
nans_equal: false
|
|
2967
2970
|
)
|
|
2968
2971
|
F.select(
|
|
@@ -2972,75 +2975,6 @@ module Polars
|
|
|
2972
2975
|
).to_series
|
|
2973
2976
|
end
|
|
2974
2977
|
|
|
2975
|
-
# Check if this Series datatype is numeric.
|
|
2976
|
-
#
|
|
2977
|
-
# @return [Boolean]
|
|
2978
|
-
#
|
|
2979
|
-
# @example
|
|
2980
|
-
# s = Polars::Series.new("a", [1, 2, 3])
|
|
2981
|
-
# s.is_numeric
|
|
2982
|
-
# # => true
|
|
2983
|
-
def is_numeric
|
|
2984
|
-
[Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64].include?(dtype)
|
|
2985
|
-
end
|
|
2986
|
-
alias_method :numeric?, :is_numeric
|
|
2987
|
-
|
|
2988
|
-
# Check if this Series datatype is datelike.
|
|
2989
|
-
#
|
|
2990
|
-
# @return [Boolean]
|
|
2991
|
-
#
|
|
2992
|
-
# @example
|
|
2993
|
-
# s = Polars::Series.new([Date.new(2021, 1, 1), Date.new(2021, 1, 2), Date.new(2021, 1, 3)])
|
|
2994
|
-
# s.is_datelike
|
|
2995
|
-
# # => true
|
|
2996
|
-
def is_datelike
|
|
2997
|
-
[Date, Time].include?(dtype) || dtype.is_a?(Datetime) || dtype.is_a?(Duration)
|
|
2998
|
-
end
|
|
2999
|
-
alias_method :datelike?, :is_datelike
|
|
3000
|
-
alias_method :is_temporal, :is_datelike
|
|
3001
|
-
alias_method :temporal?, :is_datelike
|
|
3002
|
-
|
|
3003
|
-
# Check if this Series has floating point numbers.
|
|
3004
|
-
#
|
|
3005
|
-
# @return [Boolean]
|
|
3006
|
-
#
|
|
3007
|
-
# @example
|
|
3008
|
-
# s = Polars::Series.new("a", [1.0, 2.0, 3.0])
|
|
3009
|
-
# s.is_float
|
|
3010
|
-
# # => true
|
|
3011
|
-
def is_float
|
|
3012
|
-
[Float32, Float64].include?(dtype)
|
|
3013
|
-
end
|
|
3014
|
-
alias_method :float?, :is_float
|
|
3015
|
-
|
|
3016
|
-
# Check if this Series is a Boolean.
|
|
3017
|
-
#
|
|
3018
|
-
# @return [Boolean]
|
|
3019
|
-
#
|
|
3020
|
-
# @example
|
|
3021
|
-
# s = Polars::Series.new("a", [true, false, true])
|
|
3022
|
-
# s.is_boolean
|
|
3023
|
-
# # => true
|
|
3024
|
-
def is_boolean
|
|
3025
|
-
dtype == Boolean
|
|
3026
|
-
end
|
|
3027
|
-
alias_method :boolean?, :is_boolean
|
|
3028
|
-
alias_method :is_bool, :is_boolean
|
|
3029
|
-
alias_method :bool?, :is_boolean
|
|
3030
|
-
|
|
3031
|
-
# Check if this Series datatype is a Utf8.
|
|
3032
|
-
#
|
|
3033
|
-
# @return [Boolean]
|
|
3034
|
-
#
|
|
3035
|
-
# @example
|
|
3036
|
-
# s = Polars::Series.new("x", ["a", "b", "c"])
|
|
3037
|
-
# s.is_utf8
|
|
3038
|
-
# # => true
|
|
3039
|
-
def is_utf8
|
|
3040
|
-
dtype == String
|
|
3041
|
-
end
|
|
3042
|
-
alias_method :utf8?, :is_utf8
|
|
3043
|
-
|
|
3044
2978
|
# def view
|
|
3045
2979
|
# end
|
|
3046
2980
|
|
|
@@ -3055,7 +2989,7 @@ module Polars
|
|
|
3055
2989
|
# # Numo::Int64#shape=[3]
|
|
3056
2990
|
# # [1, 2, 3]
|
|
3057
2991
|
def to_numo
|
|
3058
|
-
if
|
|
2992
|
+
if dtype.temporal?
|
|
3059
2993
|
Numo::RObject.cast(to_a)
|
|
3060
2994
|
else
|
|
3061
2995
|
_s.to_numo
|
|
@@ -3093,16 +3027,16 @@ module Polars
|
|
|
3093
3027
|
|
|
3094
3028
|
# Set values at the index locations.
|
|
3095
3029
|
#
|
|
3096
|
-
# @param
|
|
3030
|
+
# @param indices [Object]
|
|
3097
3031
|
# Integers representing the index locations.
|
|
3098
|
-
# @param
|
|
3032
|
+
# @param values [Object]
|
|
3099
3033
|
# Replacement values.
|
|
3100
3034
|
#
|
|
3101
3035
|
# @return [Series]
|
|
3102
3036
|
#
|
|
3103
3037
|
# @example
|
|
3104
3038
|
# s = Polars::Series.new("a", [1, 2, 3])
|
|
3105
|
-
# s.
|
|
3039
|
+
# s.scatter(1, 10)
|
|
3106
3040
|
# # =>
|
|
3107
3041
|
# # shape: (3,)
|
|
3108
3042
|
# # Series: 'a' [i64]
|
|
@@ -3111,29 +3045,28 @@ module Polars
|
|
|
3111
3045
|
# # 10
|
|
3112
3046
|
# # 3
|
|
3113
3047
|
# # ]
|
|
3114
|
-
def scatter(
|
|
3115
|
-
if
|
|
3116
|
-
|
|
3048
|
+
def scatter(indices, values)
|
|
3049
|
+
if indices.is_a?(Integer)
|
|
3050
|
+
indices = [indices]
|
|
3117
3051
|
end
|
|
3118
|
-
if
|
|
3052
|
+
if indices.length == 0
|
|
3119
3053
|
return self
|
|
3120
3054
|
end
|
|
3121
3055
|
|
|
3122
|
-
|
|
3123
|
-
if
|
|
3124
|
-
|
|
3056
|
+
indices = Series.new("", indices)
|
|
3057
|
+
if values.is_a?(Integer) || values.is_a?(Float) || Utils.bool?(values) || values.is_a?(::String) || values.nil?
|
|
3058
|
+
values = Series.new("", [values])
|
|
3125
3059
|
|
|
3126
3060
|
# if we need to set more than a single value, we extend it
|
|
3127
|
-
if
|
|
3128
|
-
|
|
3061
|
+
if indices.length > 0
|
|
3062
|
+
values = values.extend_constant(values[0], indices.length - 1)
|
|
3129
3063
|
end
|
|
3130
|
-
elsif !
|
|
3131
|
-
|
|
3064
|
+
elsif !values.is_a?(Series)
|
|
3065
|
+
values = Series.new("", values)
|
|
3132
3066
|
end
|
|
3133
|
-
_s.scatter(
|
|
3067
|
+
_s.scatter(indices._s, values._s)
|
|
3134
3068
|
self
|
|
3135
3069
|
end
|
|
3136
|
-
alias_method :set_at_idx, :scatter
|
|
3137
3070
|
|
|
3138
3071
|
# Get the index of the first occurrence of a value, or `nil` if it's not found.
|
|
3139
3072
|
#
|
|
@@ -3197,13 +3130,12 @@ module Polars
|
|
|
3197
3130
|
s = len > 0 ? self.class.new(name, [], dtype: dtype) : clone
|
|
3198
3131
|
n > 0 ? s.extend_constant(nil, n) : s
|
|
3199
3132
|
end
|
|
3200
|
-
alias_method :cleared, :clear
|
|
3201
3133
|
|
|
3202
3134
|
# clone handled by initialize_copy
|
|
3203
3135
|
|
|
3204
3136
|
# Fill floating point NaN value with a fill value.
|
|
3205
3137
|
#
|
|
3206
|
-
# @param
|
|
3138
|
+
# @param value [Object]
|
|
3207
3139
|
# Value used to fill nan values.
|
|
3208
3140
|
#
|
|
3209
3141
|
# @return [Series]
|
|
@@ -3220,7 +3152,7 @@ module Polars
|
|
|
3220
3152
|
# # 3.0
|
|
3221
3153
|
# # 0.0
|
|
3222
3154
|
# # ]
|
|
3223
|
-
def fill_nan(
|
|
3155
|
+
def fill_nan(value)
|
|
3224
3156
|
super
|
|
3225
3157
|
end
|
|
3226
3158
|
|
|
@@ -3344,8 +3276,12 @@ module Polars
|
|
|
3344
3276
|
|
|
3345
3277
|
# Round underlying floating point data by `decimals` digits.
|
|
3346
3278
|
#
|
|
3279
|
+
# The default rounding mode is "half to even" (also known as "bankers' rounding").
|
|
3280
|
+
#
|
|
3347
3281
|
# @param decimals [Integer]
|
|
3348
|
-
#
|
|
3282
|
+
# Number of decimals to round by.
|
|
3283
|
+
# @param mode ['half_to_even', 'half_away_from_zero']
|
|
3284
|
+
# Rounding mode.
|
|
3349
3285
|
#
|
|
3350
3286
|
# @return [Series]
|
|
3351
3287
|
#
|
|
@@ -3360,7 +3296,7 @@ module Polars
|
|
|
3360
3296
|
# # 2.57
|
|
3361
3297
|
# # 3.9
|
|
3362
3298
|
# # ]
|
|
3363
|
-
def round(decimals = 0)
|
|
3299
|
+
def round(decimals = 0, mode: "half_to_even")
|
|
3364
3300
|
super
|
|
3365
3301
|
end
|
|
3366
3302
|
|
|
@@ -3543,7 +3479,6 @@ module Polars
|
|
|
3543
3479
|
def arcsin
|
|
3544
3480
|
super
|
|
3545
3481
|
end
|
|
3546
|
-
alias_method :asin, :arcsin
|
|
3547
3482
|
|
|
3548
3483
|
# Compute the element-wise value for the inverse cosine.
|
|
3549
3484
|
#
|
|
@@ -3563,7 +3498,6 @@ module Polars
|
|
|
3563
3498
|
def arccos
|
|
3564
3499
|
super
|
|
3565
3500
|
end
|
|
3566
|
-
alias_method :acos, :arccos
|
|
3567
3501
|
|
|
3568
3502
|
# Compute the element-wise value for the inverse tangent.
|
|
3569
3503
|
#
|
|
@@ -3583,7 +3517,6 @@ module Polars
|
|
|
3583
3517
|
def arctan
|
|
3584
3518
|
super
|
|
3585
3519
|
end
|
|
3586
|
-
alias_method :atan, :arctan
|
|
3587
3520
|
|
|
3588
3521
|
# Compute the element-wise value for the inverse hyperbolic sine.
|
|
3589
3522
|
#
|
|
@@ -3603,7 +3536,6 @@ module Polars
|
|
|
3603
3536
|
def arcsinh
|
|
3604
3537
|
super
|
|
3605
3538
|
end
|
|
3606
|
-
alias_method :asinh, :arcsinh
|
|
3607
3539
|
|
|
3608
3540
|
# Compute the element-wise value for the inverse hyperbolic cosine.
|
|
3609
3541
|
#
|
|
@@ -3624,7 +3556,6 @@ module Polars
|
|
|
3624
3556
|
def arccosh
|
|
3625
3557
|
super
|
|
3626
3558
|
end
|
|
3627
|
-
alias_method :acosh, :arccosh
|
|
3628
3559
|
|
|
3629
3560
|
# Compute the element-wise value for the inverse hyperbolic tangent.
|
|
3630
3561
|
#
|
|
@@ -3648,7 +3579,6 @@ module Polars
|
|
|
3648
3579
|
def arctanh
|
|
3649
3580
|
super
|
|
3650
3581
|
end
|
|
3651
|
-
alias_method :atanh, :arctanh
|
|
3652
3582
|
|
|
3653
3583
|
# Compute the element-wise value for the hyperbolic sine.
|
|
3654
3584
|
#
|
|
@@ -3734,21 +3664,23 @@ module Polars
|
|
|
3734
3664
|
# # 12
|
|
3735
3665
|
# # 13
|
|
3736
3666
|
# # ]
|
|
3737
|
-
def map_elements(return_dtype: nil, skip_nulls: true, &
|
|
3667
|
+
def map_elements(return_dtype: nil, skip_nulls: true, &function)
|
|
3738
3668
|
if return_dtype.nil?
|
|
3739
3669
|
pl_return_dtype = nil
|
|
3740
3670
|
else
|
|
3741
|
-
pl_return_dtype = Utils.
|
|
3671
|
+
pl_return_dtype = Utils.parse_into_dtype(return_dtype)
|
|
3742
3672
|
end
|
|
3743
|
-
Utils.wrap_s(_s.map_elements(
|
|
3673
|
+
Utils.wrap_s(_s.map_elements(function, pl_return_dtype, skip_nulls))
|
|
3744
3674
|
end
|
|
3745
3675
|
alias_method :map, :map_elements
|
|
3746
|
-
alias_method :apply, :map_elements
|
|
3747
3676
|
|
|
3748
3677
|
# Shift the values by a given period.
|
|
3749
3678
|
#
|
|
3750
|
-
# @param
|
|
3679
|
+
# @param n [Integer]
|
|
3751
3680
|
# Number of places to shift (may be negative).
|
|
3681
|
+
# @param fill_value [Object]
|
|
3682
|
+
# Fill the resulting null values with this value. Accepts scalar expression
|
|
3683
|
+
# input. Non-expression inputs are parsed as literals.
|
|
3752
3684
|
#
|
|
3753
3685
|
# @return [Series]
|
|
3754
3686
|
#
|
|
@@ -3774,19 +3706,7 @@ module Polars
|
|
|
3774
3706
|
# # 3
|
|
3775
3707
|
# # null
|
|
3776
3708
|
# # ]
|
|
3777
|
-
def shift(
|
|
3778
|
-
super
|
|
3779
|
-
end
|
|
3780
|
-
|
|
3781
|
-
# Shift the values by a given period and fill the resulting null values.
|
|
3782
|
-
#
|
|
3783
|
-
# @param periods [Integer]
|
|
3784
|
-
# Number of places to shift (may be negative).
|
|
3785
|
-
# @param fill_value [Object]
|
|
3786
|
-
# Fill nil values with the result of this expression.
|
|
3787
|
-
#
|
|
3788
|
-
# @return [Series]
|
|
3789
|
-
def shift_and_fill(periods, fill_value)
|
|
3709
|
+
def shift(n = 1, fill_value: nil)
|
|
3790
3710
|
super
|
|
3791
3711
|
end
|
|
3792
3712
|
|
|
@@ -3834,6 +3754,90 @@ module Polars
|
|
|
3834
3754
|
Utils.wrap_s(_s.zip_with(mask._s, other._s))
|
|
3835
3755
|
end
|
|
3836
3756
|
|
|
3757
|
+
# Compute a rolling min based on another series.
|
|
3758
|
+
#
|
|
3759
|
+
# @note
|
|
3760
|
+
# This functionality is considered **unstable**. It may be changed
|
|
3761
|
+
# at any point without it being considered a breaking change.
|
|
3762
|
+
#
|
|
3763
|
+
# Given a `by` column `<t_0, t_1, ..., t_n>`, then `closed: "right"`
|
|
3764
|
+
# (the default) means the windows will be:
|
|
3765
|
+
#
|
|
3766
|
+
# - (t_0 - window_size, t_0]
|
|
3767
|
+
# - (t_1 - window_size, t_1]
|
|
3768
|
+
# - ...
|
|
3769
|
+
# - (t_n - window_size, t_n]
|
|
3770
|
+
#
|
|
3771
|
+
# @param by [Object]
|
|
3772
|
+
# Should be `DateTime`, `Date`, `UInt64`, `UInt32`, `Int64`,
|
|
3773
|
+
# or `Int32` data type (note that the integral ones require using `'i'`
|
|
3774
|
+
# in `window size`).
|
|
3775
|
+
# @param window_size [String]
|
|
3776
|
+
# The length of the window. Can be a dynamic temporal
|
|
3777
|
+
# size indicated by a timedelta or the following string language:
|
|
3778
|
+
#
|
|
3779
|
+
# - 1ns (1 nanosecond)
|
|
3780
|
+
# - 1us (1 microsecond)
|
|
3781
|
+
# - 1ms (1 millisecond)
|
|
3782
|
+
# - 1s (1 second)
|
|
3783
|
+
# - 1m (1 minute)
|
|
3784
|
+
# - 1h (1 hour)
|
|
3785
|
+
# - 1d (1 calendar day)
|
|
3786
|
+
# - 1w (1 calendar week)
|
|
3787
|
+
# - 1mo (1 calendar month)
|
|
3788
|
+
# - 1q (1 calendar quarter)
|
|
3789
|
+
# - 1y (1 calendar year)
|
|
3790
|
+
# - 1i (1 index count)
|
|
3791
|
+
#
|
|
3792
|
+
# By "calendar day", we mean the corresponding time on the next day
|
|
3793
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
|
3794
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
|
3795
|
+
# "calendar year".
|
|
3796
|
+
# @param min_samples [Integer]
|
|
3797
|
+
# The number of values in the window that should be non-null before computing
|
|
3798
|
+
# a result.
|
|
3799
|
+
# @param closed ['left', 'right', 'both', 'none']
|
|
3800
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
|
3801
|
+
# defaults to `'right'`.
|
|
3802
|
+
#
|
|
3803
|
+
# @return [Series]
|
|
3804
|
+
#
|
|
3805
|
+
# @note
|
|
3806
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
|
3807
|
+
# window, consider using `rolling` - this method can cache the window size
|
|
3808
|
+
# computation.
|
|
3809
|
+
#
|
|
3810
|
+
# @example
|
|
3811
|
+
# start = DateTime.new(2001, 1, 1)
|
|
3812
|
+
# stop = DateTime.new(2001, 1, 2)
|
|
3813
|
+
# s = Polars::Series.new("index", 25.times.to_a)
|
|
3814
|
+
# d = Polars::Series.new("date", Polars.datetime_range(start, stop, "1h", eager: true))
|
|
3815
|
+
# s.rolling_min_by(d, "3h")
|
|
3816
|
+
# # =>
|
|
3817
|
+
# # shape: (25,)
|
|
3818
|
+
# # Series: 'index' [i64]
|
|
3819
|
+
# # [
|
|
3820
|
+
# # 0
|
|
3821
|
+
# # 0
|
|
3822
|
+
# # 0
|
|
3823
|
+
# # 1
|
|
3824
|
+
# # 2
|
|
3825
|
+
# # …
|
|
3826
|
+
# # 18
|
|
3827
|
+
# # 19
|
|
3828
|
+
# # 20
|
|
3829
|
+
# # 21
|
|
3830
|
+
# # 22
|
|
3831
|
+
# # ]
|
|
3832
|
+
def rolling_min_by(
|
|
3833
|
+
by,
|
|
3834
|
+
window_size,
|
|
3835
|
+
min_samples: 1,
|
|
3836
|
+
closed: "right"
|
|
3837
|
+
)
|
|
3838
|
+
super
|
|
3839
|
+
end
|
|
3840
|
+
|
|
3837
3841
|
# Apply a rolling min (moving min) over the values in this array.
|
|
3838
3842
|
#
|
|
3839
3843
|
# A window of length `window_size` will traverse the array. The values that fill
|
|
@@ -3845,7 +3849,7 @@ module Polars
|
|
|
3845
3849
|
# @param weights [Array]
|
|
3846
3850
|
# An optional slice with the same length as the window that will be multiplied
|
|
3847
3851
|
# elementwise with the values in the window.
|
|
3848
|
-
# @param
|
|
3852
|
+
# @param min_samples [Integer]
|
|
3849
3853
|
# The number of values in the window that should be non-null before computing
|
|
3850
3854
|
# a result. If nil, it will be set equal to window size.
|
|
3851
3855
|
# @param center [Boolean]
|
|
@@ -3869,12 +3873,96 @@ module Polars
|
|
|
3869
3873
|
def rolling_min(
|
|
3870
3874
|
window_size,
|
|
3871
3875
|
weights: nil,
|
|
3872
|
-
|
|
3876
|
+
min_samples: nil,
|
|
3873
3877
|
center: false
|
|
3874
3878
|
)
|
|
3875
3879
|
super
|
|
3876
3880
|
end
|
|
3877
3881
|
|
|
3882
|
+
# Compute a rolling max based on another series.
|
|
3883
|
+
#
|
|
3884
|
+
# @note
|
|
3885
|
+
# This functionality is considered **unstable**. It may be changed
|
|
3886
|
+
# at any point without it being considered a breaking change.
|
|
3887
|
+
#
|
|
3888
|
+
# Given a `by` column `<t_0, t_1, ..., t_n>`, then `closed="right"`
|
|
3889
|
+
# (the default) means the windows will be:
|
|
3890
|
+
#
|
|
3891
|
+
# - (t_0 - window_size, t_0]
|
|
3892
|
+
# - (t_1 - window_size, t_1]
|
|
3893
|
+
# - ...
|
|
3894
|
+
# - (t_n - window_size, t_n]
|
|
3895
|
+
#
|
|
3896
|
+
# @param by [Object]
|
|
3897
|
+
# Should be `DateTime`, `Date`, `UInt64`, `UInt32`, `Int64`,
|
|
3898
|
+
# or `Int32` data type (note that the integral ones require using `'i'`
|
|
3899
|
+
# in `window size`).
|
|
3900
|
+
# @param window_size [String]
|
|
3901
|
+
# The length of the window. Can be a dynamic temporal
|
|
3902
|
+
# size indicated by a timedelta or the following string language:
|
|
3903
|
+
#
|
|
3904
|
+
# - 1ns (1 nanosecond)
|
|
3905
|
+
# - 1us (1 microsecond)
|
|
3906
|
+
# - 1ms (1 millisecond)
|
|
3907
|
+
# - 1s (1 second)
|
|
3908
|
+
# - 1m (1 minute)
|
|
3909
|
+
# - 1h (1 hour)
|
|
3910
|
+
# - 1d (1 calendar day)
|
|
3911
|
+
# - 1w (1 calendar week)
|
|
3912
|
+
# - 1mo (1 calendar month)
|
|
3913
|
+
# - 1q (1 calendar quarter)
|
|
3914
|
+
# - 1y (1 calendar year)
|
|
3915
|
+
# - 1i (1 index count)
|
|
3916
|
+
#
|
|
3917
|
+
# By "calendar day", we mean the corresponding time on the next day
|
|
3918
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
|
3919
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
|
3920
|
+
# "calendar year".
|
|
3921
|
+
# @param min_samples [Integer]
|
|
3922
|
+
# The number of values in the window that should be non-null before computing
|
|
3923
|
+
# a result.
|
|
3924
|
+
# @param closed ['left', 'right', 'both', 'none']
|
|
3925
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
|
3926
|
+
# defaults to `'right'`.
|
|
3927
|
+
#
|
|
3928
|
+
# @return [Series]
|
|
3929
|
+
#
|
|
3930
|
+
# @note
|
|
3931
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
|
3932
|
+
# window, consider using `rolling` - this method can cache the window size
|
|
3933
|
+
# computation.
|
|
3934
|
+
#
|
|
3935
|
+
# @example
|
|
3936
|
+
# start = DateTime.new(2001, 1, 1)
|
|
3937
|
+
# stop = DateTime.new(2001, 1, 2)
|
|
3938
|
+
# s = Polars::Series.new("index", 25.times.to_a)
|
|
3939
|
+
# d = Polars::Series.new("date", Polars.datetime_range(start, stop, "1h", eager: true))
|
|
3940
|
+
# s.rolling_max_by(d, "3h")
|
|
3941
|
+
# # =>
|
|
3942
|
+
# # shape: (25,)
|
|
3943
|
+
# # Series: 'index' [i64]
|
|
3944
|
+
# # [
|
|
3945
|
+
# # 0
|
|
3946
|
+
# # 1
|
|
3947
|
+
# # 2
|
|
3948
|
+
# # 3
|
|
3949
|
+
# # 4
|
|
3950
|
+
# # …
|
|
3951
|
+
# # 20
|
|
3952
|
+
# # 21
|
|
3953
|
+
# # 22
|
|
3954
|
+
# # 23
|
|
3955
|
+
# # 24
|
|
3956
|
+
# # ]
|
|
3957
|
+
def rolling_max_by(
|
|
3958
|
+
by,
|
|
3959
|
+
window_size,
|
|
3960
|
+
min_samples: 1,
|
|
3961
|
+
closed: "right"
|
|
3962
|
+
)
|
|
3963
|
+
super
|
|
3964
|
+
end
|
|
3965
|
+
|
|
3878
3966
|
# Apply a rolling max (moving max) over the values in this array.
|
|
3879
3967
|
#
|
|
3880
3968
|
# A window of length `window_size` will traverse the array. The values that fill
|
|
@@ -3886,7 +3974,7 @@ module Polars
|
|
|
3886
3974
|
# @param weights [Array]
|
|
3887
3975
|
# An optional slice with the same length as the window that will be multiplied
|
|
3888
3976
|
# elementwise with the values in the window.
|
|
3889
|
-
# @param
|
|
3977
|
+
# @param min_samples [Integer]
|
|
3890
3978
|
# The number of values in the window that should be non-null before computing
|
|
3891
3979
|
# a result. If nil, it will be set equal to window size.
|
|
3892
3980
|
# @param center [Boolean]
|
|
@@ -3910,35 +3998,119 @@ module Polars
|
|
|
3910
3998
|
def rolling_max(
|
|
3911
3999
|
window_size,
|
|
3912
4000
|
weights: nil,
|
|
3913
|
-
|
|
4001
|
+
min_samples: nil,
|
|
3914
4002
|
center: false
|
|
3915
4003
|
)
|
|
3916
4004
|
super
|
|
3917
4005
|
end
|
|
3918
4006
|
|
|
3919
|
-
#
|
|
4007
|
+
# Compute a rolling mean based on another series.
|
|
3920
4008
|
#
|
|
3921
|
-
#
|
|
3922
|
-
#
|
|
3923
|
-
#
|
|
4009
|
+
# @note
|
|
4010
|
+
# This functionality is considered **unstable**. It may be changed
|
|
4011
|
+
# at any point without it being considered a breaking change.
|
|
3924
4012
|
#
|
|
3925
|
-
#
|
|
3926
|
-
#
|
|
3927
|
-
# @param weights [Array]
|
|
3928
|
-
# An optional slice with the same length as the window that will be multiplied
|
|
3929
|
-
# elementwise with the values in the window.
|
|
3930
|
-
# @param min_periods [Integer]
|
|
3931
|
-
# The number of values in the window that should be non-null before computing
|
|
3932
|
-
# a result. If nil, it will be set equal to window size.
|
|
3933
|
-
# @param center [Boolean]
|
|
3934
|
-
# Set the labels at the center of the window
|
|
4013
|
+
# Given a `by` column `<t_0, t_1, ..., t_n>`, then `closed: "right"`
|
|
4014
|
+
# (the default) means the windows will be:
|
|
3935
4015
|
#
|
|
3936
|
-
#
|
|
4016
|
+
# - (t_0 - window_size, t_0]
|
|
4017
|
+
# - (t_1 - window_size, t_1]
|
|
4018
|
+
# - ...
|
|
4019
|
+
# - (t_n - window_size, t_n]
|
|
3937
4020
|
#
|
|
3938
|
-
# @
|
|
3939
|
-
#
|
|
3940
|
-
#
|
|
3941
|
-
#
|
|
4021
|
+
# @param by [Object]
|
|
4022
|
+
# Should be `DateTime`, `Date`, `UInt64`, `UInt32`, `Int64`,
|
|
4023
|
+
# or `Int32` data type (note that the integral ones require using `'i'`
|
|
4024
|
+
# in `window size`).
|
|
4025
|
+
# @param window_size [String]
|
|
4026
|
+
# The length of the window. Can be a dynamic temporal
|
|
4027
|
+
# size indicated by a timedelta or the following string language:
|
|
4028
|
+
#
|
|
4029
|
+
# - 1ns (1 nanosecond)
|
|
4030
|
+
# - 1us (1 microsecond)
|
|
4031
|
+
# - 1ms (1 millisecond)
|
|
4032
|
+
# - 1s (1 second)
|
|
4033
|
+
# - 1m (1 minute)
|
|
4034
|
+
# - 1h (1 hour)
|
|
4035
|
+
# - 1d (1 calendar day)
|
|
4036
|
+
# - 1w (1 calendar week)
|
|
4037
|
+
# - 1mo (1 calendar month)
|
|
4038
|
+
# - 1q (1 calendar quarter)
|
|
4039
|
+
# - 1y (1 calendar year)
|
|
4040
|
+
# - 1i (1 index count)
|
|
4041
|
+
#
|
|
4042
|
+
# By "calendar day", we mean the corresponding time on the next day
|
|
4043
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
|
4044
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
|
4045
|
+
# "calendar year".
|
|
4046
|
+
# @param min_samples [Integer]
|
|
4047
|
+
# The number of values in the window that should be non-null before computing
|
|
4048
|
+
# a result.
|
|
4049
|
+
# @param closed ['left', 'right', 'both', 'none']
|
|
4050
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
|
4051
|
+
# defaults to `'right'`.
|
|
4052
|
+
#
|
|
4053
|
+
# @return [Series]
|
|
4054
|
+
#
|
|
4055
|
+
# @note
|
|
4056
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
|
4057
|
+
# window, consider using `rolling` - this method can cache the window size
|
|
4058
|
+
# computation.
|
|
4059
|
+
#
|
|
4060
|
+
# @example
|
|
4061
|
+
# start = DateTime.new(2001, 1, 1)
|
|
4062
|
+
# stop = DateTime.new(2001, 1, 2)
|
|
4063
|
+
# s = Polars::Series.new("index", 25.times.to_a)
|
|
4064
|
+
# d = Polars::Series.new("date", Polars.datetime_range(start, stop, "1h", eager: true))
|
|
4065
|
+
# s.rolling_mean_by(d, "3h")
|
|
4066
|
+
# # =>
|
|
4067
|
+
# # shape: (25,)
|
|
4068
|
+
# # Series: 'index' [f64]
|
|
4069
|
+
# # [
|
|
4070
|
+
# # 0.0
|
|
4071
|
+
# # 0.5
|
|
4072
|
+
# # 1.0
|
|
4073
|
+
# # 2.0
|
|
4074
|
+
# # 3.0
|
|
4075
|
+
# # …
|
|
4076
|
+
# # 19.0
|
|
4077
|
+
# # 20.0
|
|
4078
|
+
# # 21.0
|
|
4079
|
+
# # 22.0
|
|
4080
|
+
# # 23.0
|
|
4081
|
+
# # ]
|
|
4082
|
+
def rolling_mean_by(
|
|
4083
|
+
by,
|
|
4084
|
+
window_size,
|
|
4085
|
+
min_samples: 1,
|
|
4086
|
+
closed: "right"
|
|
4087
|
+
)
|
|
4088
|
+
super
|
|
4089
|
+
end
|
|
4090
|
+
|
|
4091
|
+
# Apply a rolling mean (moving mean) over the values in this array.
|
|
4092
|
+
#
|
|
4093
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
|
4094
|
+
# this window will (optionally) be multiplied with the weights given by the
|
|
4095
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
|
4096
|
+
#
|
|
4097
|
+
# @param window_size [Integer]
|
|
4098
|
+
# The length of the window.
|
|
4099
|
+
# @param weights [Array]
|
|
4100
|
+
# An optional slice with the same length as the window that will be multiplied
|
|
4101
|
+
# elementwise with the values in the window.
|
|
4102
|
+
# @param min_samples [Integer]
|
|
4103
|
+
# The number of values in the window that should be non-null before computing
|
|
4104
|
+
# a result. If nil, it will be set equal to window size.
|
|
4105
|
+
# @param center [Boolean]
|
|
4106
|
+
# Set the labels at the center of the window
|
|
4107
|
+
#
|
|
4108
|
+
# @return [Series]
|
|
4109
|
+
#
|
|
4110
|
+
# @example
|
|
4111
|
+
# s = Polars::Series.new("a", [100, 200, 300, 400, 500])
|
|
4112
|
+
# s.rolling_mean(2)
|
|
4113
|
+
# # =>
|
|
3942
4114
|
# # shape: (5,)
|
|
3943
4115
|
# # Series: 'a' [f64]
|
|
3944
4116
|
# # [
|
|
@@ -3951,12 +4123,96 @@ module Polars
|
|
|
3951
4123
|
def rolling_mean(
|
|
3952
4124
|
window_size,
|
|
3953
4125
|
weights: nil,
|
|
3954
|
-
|
|
4126
|
+
min_samples: nil,
|
|
3955
4127
|
center: false
|
|
3956
4128
|
)
|
|
3957
4129
|
super
|
|
3958
4130
|
end
|
|
3959
4131
|
|
|
4132
|
+
# Compute a rolling sum based on another series.
|
|
4133
|
+
#
|
|
4134
|
+
# @note
|
|
4135
|
+
# This functionality is considered **unstable**. It may be changed
|
|
4136
|
+
# at any point without it being considered a breaking change.
|
|
4137
|
+
#
|
|
4138
|
+
# Given a `by` column `<t_0, t_1, ..., t_n>`, then `closed: "right"`
|
|
4139
|
+
# (the default) means the windows will be:
|
|
4140
|
+
#
|
|
4141
|
+
# - (t_0 - window_size, t_0]
|
|
4142
|
+
# - (t_1 - window_size, t_1]
|
|
4143
|
+
# - ...
|
|
4144
|
+
# - (t_n - window_size, t_n]
|
|
4145
|
+
#
|
|
4146
|
+
# @param by [Object]
|
|
4147
|
+
# Should be `DateTime`, `Date`, `UInt64`, `UInt32`, `Int64`,
|
|
4148
|
+
# or `Int32` data type (note that the integral ones require using `'i'`
|
|
4149
|
+
# in `window size`).
|
|
4150
|
+
# @param window_size [String]
|
|
4151
|
+
# The length of the window. Can be a dynamic temporal
|
|
4152
|
+
# size indicated by a timedelta or the following string language:
|
|
4153
|
+
#
|
|
4154
|
+
# - 1ns (1 nanosecond)
|
|
4155
|
+
# - 1us (1 microsecond)
|
|
4156
|
+
# - 1ms (1 millisecond)
|
|
4157
|
+
# - 1s (1 second)
|
|
4158
|
+
# - 1m (1 minute)
|
|
4159
|
+
# - 1h (1 hour)
|
|
4160
|
+
# - 1d (1 calendar day)
|
|
4161
|
+
# - 1w (1 calendar week)
|
|
4162
|
+
# - 1mo (1 calendar month)
|
|
4163
|
+
# - 1q (1 calendar quarter)
|
|
4164
|
+
# - 1y (1 calendar year)
|
|
4165
|
+
# - 1i (1 index count)
|
|
4166
|
+
#
|
|
4167
|
+
# By "calendar day", we mean the corresponding time on the next day
|
|
4168
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
|
4169
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
|
4170
|
+
# "calendar year".
|
|
4171
|
+
# @param min_samples [Integer]
|
|
4172
|
+
# The number of values in the window that should be non-null before computing
|
|
4173
|
+
# a result.
|
|
4174
|
+
# @param closed ['left', 'right', 'both', 'none']
|
|
4175
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
|
4176
|
+
# defaults to `'right'`.
|
|
4177
|
+
#
|
|
4178
|
+
# @return [Series]
|
|
4179
|
+
#
|
|
4180
|
+
# @note
|
|
4181
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
|
4182
|
+
# window, consider using `rolling` - this method can cache the window size
|
|
4183
|
+
# computation.
|
|
4184
|
+
#
|
|
4185
|
+
# @example
|
|
4186
|
+
# start = DateTime.new(2001, 1, 1)
|
|
4187
|
+
# stop = DateTime.new(2001, 1, 2)
|
|
4188
|
+
# s = Polars::Series.new("index", 25.times.to_a)
|
|
4189
|
+
# d = Polars::Series.new("date", Polars.datetime_range(start, stop, "1h", eager: true))
|
|
4190
|
+
# s.rolling_sum_by(d, "3h")
|
|
4191
|
+
# # =>
|
|
4192
|
+
# # shape: (25,)
|
|
4193
|
+
# # Series: 'index' [i64]
|
|
4194
|
+
# # [
|
|
4195
|
+
# # 0
|
|
4196
|
+
# # 1
|
|
4197
|
+
# # 3
|
|
4198
|
+
# # 6
|
|
4199
|
+
# # 9
|
|
4200
|
+
# # …
|
|
4201
|
+
# # 57
|
|
4202
|
+
# # 60
|
|
4203
|
+
# # 63
|
|
4204
|
+
# # 66
|
|
4205
|
+
# # 69
|
|
4206
|
+
# # ]
|
|
4207
|
+
def rolling_sum_by(
|
|
4208
|
+
by,
|
|
4209
|
+
window_size,
|
|
4210
|
+
min_samples: 1,
|
|
4211
|
+
closed: "right"
|
|
4212
|
+
)
|
|
4213
|
+
super
|
|
4214
|
+
end
|
|
4215
|
+
|
|
3960
4216
|
# Apply a rolling sum (moving sum) over the values in this array.
|
|
3961
4217
|
#
|
|
3962
4218
|
# A window of length `window_size` will traverse the array. The values that fill
|
|
@@ -3968,7 +4224,7 @@ module Polars
|
|
|
3968
4224
|
# @param weights [Array]
|
|
3969
4225
|
# An optional slice with the same length as the window that will be multiplied
|
|
3970
4226
|
# elementwise with the values in the window.
|
|
3971
|
-
# @param
|
|
4227
|
+
# @param min_samples [Integer]
|
|
3972
4228
|
# The number of values in the window that should be non-null before computing
|
|
3973
4229
|
# a result. If nil, it will be set equal to window size.
|
|
3974
4230
|
# @param center [Boolean]
|
|
@@ -3992,12 +4248,99 @@ module Polars
|
|
|
3992
4248
|
def rolling_sum(
|
|
3993
4249
|
window_size,
|
|
3994
4250
|
weights: nil,
|
|
3995
|
-
|
|
4251
|
+
min_samples: nil,
|
|
3996
4252
|
center: false
|
|
3997
4253
|
)
|
|
3998
4254
|
super
|
|
3999
4255
|
end
|
|
4000
4256
|
|
|
4257
|
+
# Compute a rolling standard deviation based on another series.
|
|
4258
|
+
#
|
|
4259
|
+
# @note
|
|
4260
|
+
# This functionality is considered **unstable**. It may be changed
|
|
4261
|
+
# at any point without it being considered a breaking change.
|
|
4262
|
+
#
|
|
4263
|
+
# Given a `by` column `<t_0, t_1, ..., t_n>`, then `closed="right"`
|
|
4264
|
+
# (the default) means the windows will be:
|
|
4265
|
+
#
|
|
4266
|
+
# - (t_0 - window_size, t_0]
|
|
4267
|
+
# - (t_1 - window_size, t_1]
|
|
4268
|
+
# - ...
|
|
4269
|
+
# - (t_n - window_size, t_n]
|
|
4270
|
+
#
|
|
4271
|
+
# @param by [Object]
|
|
4272
|
+
# Should be `DateTime`, `Date`, `UInt64`, `UInt32`, `Int64`,
|
|
4273
|
+
# or `Int32` data type (note that the integral ones require using `'i'`
|
|
4274
|
+
# in `window size`).
|
|
4275
|
+
# @param window_size [String]
|
|
4276
|
+
# The length of the window. Can be a dynamic temporal
|
|
4277
|
+
# size indicated by a timedelta or the following string language:
|
|
4278
|
+
#
|
|
4279
|
+
# - 1ns (1 nanosecond)
|
|
4280
|
+
# - 1us (1 microsecond)
|
|
4281
|
+
# - 1ms (1 millisecond)
|
|
4282
|
+
# - 1s (1 second)
|
|
4283
|
+
# - 1m (1 minute)
|
|
4284
|
+
# - 1h (1 hour)
|
|
4285
|
+
# - 1d (1 calendar day)
|
|
4286
|
+
# - 1w (1 calendar week)
|
|
4287
|
+
# - 1mo (1 calendar month)
|
|
4288
|
+
# - 1q (1 calendar quarter)
|
|
4289
|
+
# - 1y (1 calendar year)
|
|
4290
|
+
# - 1i (1 index count)
|
|
4291
|
+
#
|
|
4292
|
+
# By "calendar day", we mean the corresponding time on the next day
|
|
4293
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
|
4294
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
|
4295
|
+
# "calendar year".
|
|
4296
|
+
# @param min_samples [Integer]
|
|
4297
|
+
# The number of values in the window that should be non-null before computing
|
|
4298
|
+
# a result.
|
|
4299
|
+
# @param closed ['left', 'right', 'both', 'none']
|
|
4300
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
|
4301
|
+
# defaults to `'right'`.
|
|
4302
|
+
# @param ddof [Integer]
|
|
4303
|
+
# "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
|
|
4304
|
+
#
|
|
4305
|
+
# @return [Series]
|
|
4306
|
+
#
|
|
4307
|
+
# @note
|
|
4308
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
|
4309
|
+
# window, consider using `rolling` - this method can cache the window size
|
|
4310
|
+
# computation.
|
|
4311
|
+
#
|
|
4312
|
+
# @example
|
|
4313
|
+
# start = DateTime.new(2001, 1, 1)
|
|
4314
|
+
# stop = DateTime.new(2001, 1, 2)
|
|
4315
|
+
# s = Polars::Series.new("index", 25.times.to_a)
|
|
4316
|
+
# d = Polars::Series.new("date", Polars.datetime_range(start, stop, "1h", eager: true))
|
|
4317
|
+
# s.rolling_std_by(d, "3h")
|
|
4318
|
+
# # =>
|
|
4319
|
+
# # shape: (25,)
|
|
4320
|
+
# # Series: 'index' [f64]
|
|
4321
|
+
# # [
|
|
4322
|
+
# # null
|
|
4323
|
+
# # 0.707107
|
|
4324
|
+
# # 1.0
|
|
4325
|
+
# # 1.0
|
|
4326
|
+
# # 1.0
|
|
4327
|
+
# # …
|
|
4328
|
+
# # 1.0
|
|
4329
|
+
# # 1.0
|
|
4330
|
+
# # 1.0
|
|
4331
|
+
# # 1.0
|
|
4332
|
+
# # 1.0
|
|
4333
|
+
# # ]
|
|
4334
|
+
def rolling_std_by(
|
|
4335
|
+
by,
|
|
4336
|
+
window_size,
|
|
4337
|
+
min_samples: 1,
|
|
4338
|
+
closed: "right",
|
|
4339
|
+
ddof: 1
|
|
4340
|
+
)
|
|
4341
|
+
super
|
|
4342
|
+
end
|
|
4343
|
+
|
|
4001
4344
|
# Compute a rolling std dev.
|
|
4002
4345
|
#
|
|
4003
4346
|
# A window of length `window_size` will traverse the array. The values that fill
|
|
@@ -4009,7 +4352,7 @@ module Polars
|
|
|
4009
4352
|
# @param weights [Array]
|
|
4010
4353
|
# An optional slice with the same length as the window that will be multiplied
|
|
4011
4354
|
# elementwise with the values in the window.
|
|
4012
|
-
# @param
|
|
4355
|
+
# @param min_samples [Integer]
|
|
4013
4356
|
# The number of values in the window that should be non-null before computing
|
|
4014
4357
|
# a result. If nil, it will be set equal to window size.
|
|
4015
4358
|
# @param center [Boolean]
|
|
@@ -4036,13 +4379,100 @@ module Polars
|
|
|
4036
4379
|
def rolling_std(
|
|
4037
4380
|
window_size,
|
|
4038
4381
|
weights: nil,
|
|
4039
|
-
|
|
4382
|
+
min_samples: nil,
|
|
4040
4383
|
center: false,
|
|
4041
4384
|
ddof: 1
|
|
4042
4385
|
)
|
|
4043
4386
|
super
|
|
4044
4387
|
end
|
|
4045
4388
|
|
|
4389
|
+
# Compute a rolling variance based on another series.
|
|
4390
|
+
#
|
|
4391
|
+
# @note
|
|
4392
|
+
# This functionality is considered **unstable**. It may be changed
|
|
4393
|
+
# at any point without it being considered a breaking change.
|
|
4394
|
+
#
|
|
4395
|
+
# Given a `by` column `<t_0, t_1, ..., t_n>`, then `closed: "right"`
|
|
4396
|
+
# (the default) means the windows will be:
|
|
4397
|
+
#
|
|
4398
|
+
# - (t_0 - window_size, t_0]
|
|
4399
|
+
# - (t_1 - window_size, t_1]
|
|
4400
|
+
# - ...
|
|
4401
|
+
# - (t_n - window_size, t_n]
|
|
4402
|
+
#
|
|
4403
|
+
# @param by
|
|
4404
|
+
# Should be `DateTime`, `Date`, `UInt64`, `UInt32`, `Int64`,
|
|
4405
|
+
# or `Int32` data type (note that the integral ones require using `'i'`
|
|
4406
|
+
# in `window size`).
|
|
4407
|
+
# @param window_size
|
|
4408
|
+
# The length of the window. Can be a dynamic temporal
|
|
4409
|
+
# size indicated by a timedelta or the following string language:
|
|
4410
|
+
#
|
|
4411
|
+
# - 1ns (1 nanosecond)
|
|
4412
|
+
# - 1us (1 microsecond)
|
|
4413
|
+
# - 1ms (1 millisecond)
|
|
4414
|
+
# - 1s (1 second)
|
|
4415
|
+
# - 1m (1 minute)
|
|
4416
|
+
# - 1h (1 hour)
|
|
4417
|
+
# - 1d (1 calendar day)
|
|
4418
|
+
# - 1w (1 calendar week)
|
|
4419
|
+
# - 1mo (1 calendar month)
|
|
4420
|
+
# - 1q (1 calendar quarter)
|
|
4421
|
+
# - 1y (1 calendar year)
|
|
4422
|
+
# - 1i (1 index count)
|
|
4423
|
+
#
|
|
4424
|
+
# By "calendar day", we mean the corresponding time on the next day
|
|
4425
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
|
4426
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
|
4427
|
+
# "calendar year".
|
|
4428
|
+
# @param min_samples [Integer]
|
|
4429
|
+
# The number of values in the window that should be non-null before computing
|
|
4430
|
+
# a result.
|
|
4431
|
+
# @param closed ['left', 'right', 'both', 'none']
|
|
4432
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
|
4433
|
+
# defaults to `'right'`.
|
|
4434
|
+
# @param ddof
|
|
4435
|
+
# "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
|
|
4436
|
+
#
|
|
4437
|
+
# @return [Series]
|
|
4438
|
+
#
|
|
4439
|
+
# @note
|
|
4440
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
|
4441
|
+
# window, consider using `rolling` - this method can cache the window size
|
|
4442
|
+
# computation.
|
|
4443
|
+
#
|
|
4444
|
+
# @example
|
|
4445
|
+
# start = DateTime.new(2001, 1, 1)
|
|
4446
|
+
# stop = DateTime.new(2001, 1, 2)
|
|
4447
|
+
# s = Polars::Series.new("index", 25.times.to_a)
|
|
4448
|
+
# d = Polars::Series.new("date", Polars.datetime_range(start, stop, "1h", eager: true))
|
|
4449
|
+
# s.rolling_var_by(d, "3h")
|
|
4450
|
+
# # =>
|
|
4451
|
+
# # shape: (25,)
|
|
4452
|
+
# # Series: 'index' [f64]
|
|
4453
|
+
# # [
|
|
4454
|
+
# # null
|
|
4455
|
+
# # 0.5
|
|
4456
|
+
# # 1.0
|
|
4457
|
+
# # 1.0
|
|
4458
|
+
# # 1.0
|
|
4459
|
+
# # …
|
|
4460
|
+
# # 1.0
|
|
4461
|
+
# # 1.0
|
|
4462
|
+
# # 1.0
|
|
4463
|
+
# # 1.0
|
|
4464
|
+
# # 1.0
|
|
4465
|
+
# # ]
|
|
4466
|
+
def rolling_var_by(
|
|
4467
|
+
by,
|
|
4468
|
+
window_size,
|
|
4469
|
+
min_samples: 1,
|
|
4470
|
+
closed: "right",
|
|
4471
|
+
ddof: 1
|
|
4472
|
+
)
|
|
4473
|
+
super
|
|
4474
|
+
end
|
|
4475
|
+
|
|
4046
4476
|
# Compute a rolling variance.
|
|
4047
4477
|
#
|
|
4048
4478
|
# A window of length `window_size` will traverse the array. The values that fill
|
|
@@ -4054,7 +4484,7 @@ module Polars
|
|
|
4054
4484
|
# @param weights [Array]
|
|
4055
4485
|
# An optional slice with the same length as the window that will be multiplied
|
|
4056
4486
|
# elementwise with the values in the window.
|
|
4057
|
-
# @param
|
|
4487
|
+
# @param min_samples [Integer]
|
|
4058
4488
|
# The number of values in the window that should be non-null before computing
|
|
4059
4489
|
# a result. If nil, it will be set equal to window size.
|
|
4060
4490
|
# @param center [Boolean]
|
|
@@ -4081,7 +4511,7 @@ module Polars
|
|
|
4081
4511
|
def rolling_var(
|
|
4082
4512
|
window_size,
|
|
4083
4513
|
weights: nil,
|
|
4084
|
-
|
|
4514
|
+
min_samples: nil,
|
|
4085
4515
|
center: false,
|
|
4086
4516
|
ddof: 1
|
|
4087
4517
|
)
|
|
@@ -4091,6 +4521,90 @@ module Polars
|
|
|
4091
4521
|
# def rolling_apply
|
|
4092
4522
|
# end
|
|
4093
4523
|
|
|
4524
|
+
# Compute a rolling median based on another series.
|
|
4525
|
+
#
|
|
4526
|
+
# @note
|
|
4527
|
+
# This functionality is considered **unstable**. It may be changed
|
|
4528
|
+
# at any point without it being considered a breaking change.
|
|
4529
|
+
#
|
|
4530
|
+
# Given a `by` column `<t_0, t_1, ..., t_n>`, then `closed: "right"`
|
|
4531
|
+
# (the default) means the windows will be:
|
|
4532
|
+
#
|
|
4533
|
+
# - (t_0 - window_size, t_0]
|
|
4534
|
+
# - (t_1 - window_size, t_1]
|
|
4535
|
+
# - ...
|
|
4536
|
+
# - (t_n - window_size, t_n]
|
|
4537
|
+
#
|
|
4538
|
+
# @param by [Object]
|
|
4539
|
+
# Should be `DateTime`, `Date`, `UInt64`, `UInt32`, `Int64`,
|
|
4540
|
+
# or `Int32` data type (note that the integral ones require using `'i'`
|
|
4541
|
+
# in `window size`).
|
|
4542
|
+
# @param window_size [String]
|
|
4543
|
+
# The length of the window. Can be a dynamic temporal
|
|
4544
|
+
# size indicated by a timedelta or the following string language:
|
|
4545
|
+
#
|
|
4546
|
+
# - 1ns (1 nanosecond)
|
|
4547
|
+
# - 1us (1 microsecond)
|
|
4548
|
+
# - 1ms (1 millisecond)
|
|
4549
|
+
# - 1s (1 second)
|
|
4550
|
+
# - 1m (1 minute)
|
|
4551
|
+
# - 1h (1 hour)
|
|
4552
|
+
# - 1d (1 calendar day)
|
|
4553
|
+
# - 1w (1 calendar week)
|
|
4554
|
+
# - 1mo (1 calendar month)
|
|
4555
|
+
# - 1q (1 calendar quarter)
|
|
4556
|
+
# - 1y (1 calendar year)
|
|
4557
|
+
# - 1i (1 index count)
|
|
4558
|
+
#
|
|
4559
|
+
# By "calendar day", we mean the corresponding time on the next day
|
|
4560
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
|
4561
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
|
4562
|
+
# "calendar year".
|
|
4563
|
+
# @param min_samples [Integer]
|
|
4564
|
+
# The number of values in the window that should be non-null before computing
|
|
4565
|
+
# a result.
|
|
4566
|
+
# @param closed ['left', 'right', 'both', 'none']
|
|
4567
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
|
4568
|
+
# defaults to `'right'`.
|
|
4569
|
+
#
|
|
4570
|
+
# @return [Series]
|
|
4571
|
+
#
|
|
4572
|
+
# @note
|
|
4573
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
|
4574
|
+
# window, consider using `rolling` - this method can cache the window size
|
|
4575
|
+
# computation.
|
|
4576
|
+
#
|
|
4577
|
+
# @example
|
|
4578
|
+
# start = DateTime.new(2001, 1, 1)
|
|
4579
|
+
# stop = DateTime.new(2001, 1, 2)
|
|
4580
|
+
# s = Polars::Series.new("index", 25.times.to_a)
|
|
4581
|
+
# d = Polars::Series.new("date", Polars.datetime_range(start, stop, "1h", eager: true))
|
|
4582
|
+
# s.rolling_median_by(d, "3h")
|
|
4583
|
+
# # =>
|
|
4584
|
+
# # shape: (25,)
|
|
4585
|
+
# # Series: 'index' [f64]
|
|
4586
|
+
# # [
|
|
4587
|
+
# # 0.0
|
|
4588
|
+
# # 0.5
|
|
4589
|
+
# # 1.0
|
|
4590
|
+
# # 2.0
|
|
4591
|
+
# # 3.0
|
|
4592
|
+
# # …
|
|
4593
|
+
# # 19.0
|
|
4594
|
+
# # 20.0
|
|
4595
|
+
# # 21.0
|
|
4596
|
+
# # 22.0
|
|
4597
|
+
# # 23.0
|
|
4598
|
+
# # ]
|
|
4599
|
+
def rolling_median_by(
|
|
4600
|
+
by,
|
|
4601
|
+
window_size,
|
|
4602
|
+
min_samples: 1,
|
|
4603
|
+
closed: "right"
|
|
4604
|
+
)
|
|
4605
|
+
super
|
|
4606
|
+
end
|
|
4607
|
+
|
|
4094
4608
|
# Compute a rolling median.
|
|
4095
4609
|
#
|
|
4096
4610
|
# @param window_size [Integer]
|
|
@@ -4098,7 +4612,7 @@ module Polars
|
|
|
4098
4612
|
# @param weights [Array]
|
|
4099
4613
|
# An optional slice with the same length as the window that will be multiplied
|
|
4100
4614
|
# elementwise with the values in the window.
|
|
4101
|
-
# @param
|
|
4615
|
+
# @param min_samples [Integer]
|
|
4102
4616
|
# The number of values in the window that should be non-null before computing
|
|
4103
4617
|
# a result. If nil, it will be set equal to window size.
|
|
4104
4618
|
# @param center [Boolean]
|
|
@@ -4123,12 +4637,102 @@ module Polars
|
|
|
4123
4637
|
def rolling_median(
|
|
4124
4638
|
window_size,
|
|
4125
4639
|
weights: nil,
|
|
4126
|
-
|
|
4640
|
+
min_samples: nil,
|
|
4127
4641
|
center: false
|
|
4128
4642
|
)
|
|
4129
4643
|
super
|
|
4130
4644
|
end
|
|
4131
4645
|
|
|
4646
|
+
# Compute a rolling quantile based on another series.
|
|
4647
|
+
#
|
|
4648
|
+
# @note
|
|
4649
|
+
# This functionality is considered **unstable**. It may be changed
|
|
4650
|
+
# at any point without it being considered a breaking change.
|
|
4651
|
+
#
|
|
4652
|
+
# Given a `by` column `<t_0, t_1, ..., t_n>`, then `closed: "right"`
|
|
4653
|
+
# (the default) means the windows will be:
|
|
4654
|
+
#
|
|
4655
|
+
# - (t_0 - window_size, t_0]
|
|
4656
|
+
# - (t_1 - window_size, t_1]
|
|
4657
|
+
# - ...
|
|
4658
|
+
# - (t_n - window_size, t_n]
|
|
4659
|
+
#
|
|
4660
|
+
# @param by [Object]
|
|
4661
|
+
# Should be `DateTime`, `Date`, `UInt64`, `UInt32`, `Int64`,
|
|
4662
|
+
# or `Int32` data type (note that the integral ones require using `'i'`
|
|
4663
|
+
# in `window size`).
|
|
4664
|
+
# @param window_size [String]
|
|
4665
|
+
# The length of the window. Can be a dynamic
|
|
4666
|
+
# temporal size indicated by a timedelta or the following string language:
|
|
4667
|
+
#
|
|
4668
|
+
# - 1ns (1 nanosecond)
|
|
4669
|
+
# - 1us (1 microsecond)
|
|
4670
|
+
# - 1ms (1 millisecond)
|
|
4671
|
+
# - 1s (1 second)
|
|
4672
|
+
# - 1m (1 minute)
|
|
4673
|
+
# - 1h (1 hour)
|
|
4674
|
+
# - 1d (1 calendar day)
|
|
4675
|
+
# - 1w (1 calendar week)
|
|
4676
|
+
# - 1mo (1 calendar month)
|
|
4677
|
+
# - 1q (1 calendar quarter)
|
|
4678
|
+
# - 1y (1 calendar year)
|
|
4679
|
+
# - 1i (1 index count)
|
|
4680
|
+
#
|
|
4681
|
+
# By "calendar day", we mean the corresponding time on the next day
|
|
4682
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
|
4683
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
|
4684
|
+
# "calendar year".
|
|
4685
|
+
# @param quantile [Float]
|
|
4686
|
+
# Quantile between 0.0 and 1.0.
|
|
4687
|
+
# @param interpolation ['nearest', 'higher', 'lower', 'midpoint', 'linear', 'equiprobable']
|
|
4688
|
+
# Interpolation method.
|
|
4689
|
+
# @param min_samples [Integer]
|
|
4690
|
+
# The number of values in the window that should be non-null before computing
|
|
4691
|
+
# a result.
|
|
4692
|
+
# @param closed ['left', 'right', 'both', 'none']
|
|
4693
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
|
4694
|
+
# defaults to `'right'`.
|
|
4695
|
+
#
|
|
4696
|
+
# @return [Series]
|
|
4697
|
+
#
|
|
4698
|
+
# @note
|
|
4699
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
|
4700
|
+
# window, consider using `rolling` - this method can cache the window size
|
|
4701
|
+
# computation.
|
|
4702
|
+
#
|
|
4703
|
+
# @example
|
|
4704
|
+
# start = DateTime.new(2001, 1, 1)
|
|
4705
|
+
# stop = DateTime.new(2001, 1, 2)
|
|
4706
|
+
# s = Polars::Series.new("index", 25.times.to_a)
|
|
4707
|
+
# d = Polars::Series.new("date", Polars.datetime_range(start, stop, "1h", eager: true))
|
|
4708
|
+
# s.rolling_quantile_by(d, "3h", quantile: 0.5)
|
|
4709
|
+
# # =>
|
|
4710
|
+
# # shape: (25,)
|
|
4711
|
+
# # Series: 'index' [f64]
|
|
4712
|
+
# # [
|
|
4713
|
+
# # 0.0
|
|
4714
|
+
# # 1.0
|
|
4715
|
+
# # 1.0
|
|
4716
|
+
# # 2.0
|
|
4717
|
+
# # 3.0
|
|
4718
|
+
# # …
|
|
4719
|
+
# # 19.0
|
|
4720
|
+
# # 20.0
|
|
4721
|
+
# # 21.0
|
|
4722
|
+
# # 22.0
|
|
4723
|
+
# # 23.0
|
|
4724
|
+
# # ]
|
|
4725
|
+
def rolling_quantile_by(
|
|
4726
|
+
by,
|
|
4727
|
+
window_size,
|
|
4728
|
+
quantile:,
|
|
4729
|
+
interpolation: "nearest",
|
|
4730
|
+
min_samples: 1,
|
|
4731
|
+
closed: "right"
|
|
4732
|
+
)
|
|
4733
|
+
super
|
|
4734
|
+
end
|
|
4735
|
+
|
|
4132
4736
|
# Compute a rolling quantile.
|
|
4133
4737
|
#
|
|
4134
4738
|
# @param quantile [Float]
|
|
@@ -4140,7 +4744,7 @@ module Polars
|
|
|
4140
4744
|
# @param weights [Array]
|
|
4141
4745
|
# An optional slice with the same length as the window that will be multiplied
|
|
4142
4746
|
# elementwise with the values in the window.
|
|
4143
|
-
# @param
|
|
4747
|
+
# @param min_samples [Integer]
|
|
4144
4748
|
# The number of values in the window that should be non-null before computing
|
|
4145
4749
|
# a result. If nil, it will be set equal to window size.
|
|
4146
4750
|
# @param center [Boolean]
|
|
@@ -4181,7 +4785,144 @@ module Polars
|
|
|
4181
4785
|
interpolation: "nearest",
|
|
4182
4786
|
window_size: 2,
|
|
4183
4787
|
weights: nil,
|
|
4184
|
-
|
|
4788
|
+
min_samples: nil,
|
|
4789
|
+
center: false
|
|
4790
|
+
)
|
|
4791
|
+
super
|
|
4792
|
+
end
|
|
4793
|
+
|
|
4794
|
+
# Compute a rolling rank based on another column.
|
|
4795
|
+
#
|
|
4796
|
+
# @note
|
|
4797
|
+
# This functionality is considered **unstable**. It may be changed
|
|
4798
|
+
# at any point without it being considered a breaking change.
|
|
4799
|
+
#
|
|
4800
|
+
# Given a `by` column `<t_0, t_1, ..., t_n>`, then `closed="right"`
|
|
4801
|
+
# (the default) means the windows will be:
|
|
4802
|
+
#
|
|
4803
|
+
# - (t_0 - window_size, t_0]
|
|
4804
|
+
# - (t_1 - window_size, t_1]
|
|
4805
|
+
# - ...
|
|
4806
|
+
# - (t_n - window_size, t_n]
|
|
4807
|
+
#
|
|
4808
|
+
# @param by [Expr]
|
|
4809
|
+
# Should be `DateTime`, `Date`, `UInt64`, `UInt32`, `Int64`,
|
|
4810
|
+
# or `Int32` data type (note that the integral ones require using `'i'`
|
|
4811
|
+
# in `window size`).
|
|
4812
|
+
# @param window_size [String]
|
|
4813
|
+
# The length of the window. Can be a dynamic
|
|
4814
|
+
# temporal size indicated by a timedelta or the following string language:
|
|
4815
|
+
#
|
|
4816
|
+
# - 1ns (1 nanosecond)
|
|
4817
|
+
# - 1us (1 microsecond)
|
|
4818
|
+
# - 1ms (1 millisecond)
|
|
4819
|
+
# - 1s (1 second)
|
|
4820
|
+
# - 1m (1 minute)
|
|
4821
|
+
# - 1h (1 hour)
|
|
4822
|
+
# - 1d (1 calendar day)
|
|
4823
|
+
# - 1w (1 calendar week)
|
|
4824
|
+
# - 1mo (1 calendar month)
|
|
4825
|
+
# - 1q (1 calendar quarter)
|
|
4826
|
+
# - 1y (1 calendar year)
|
|
4827
|
+
# - 1i (1 index count)
|
|
4828
|
+
#
|
|
4829
|
+
# By "calendar day", we mean the corresponding time on the next day
|
|
4830
|
+
# (which may not be 24 hours, due to daylight savings). Similarly for
|
|
4831
|
+
# "calendar week", "calendar month", "calendar quarter", and
|
|
4832
|
+
# "calendar year".
|
|
4833
|
+
# @param method ['average', 'min', 'max', 'dense', 'random']
|
|
4834
|
+
# The method used to assign ranks to tied elements.
|
|
4835
|
+
# The following methods are available (default is 'average'):
|
|
4836
|
+
#
|
|
4837
|
+
# - 'average' : The average of the ranks that would have been assigned to
|
|
4838
|
+
# all the tied values is assigned to each value.
|
|
4839
|
+
# - 'min' : The minimum of the ranks that would have been assigned to all
|
|
4840
|
+
# the tied values is assigned to each value. (This is also referred to
|
|
4841
|
+
# as "competition" ranking.)
|
|
4842
|
+
# - 'max' : The maximum of the ranks that would have been assigned to all
|
|
4843
|
+
# the tied values is assigned to each value.
|
|
4844
|
+
# - 'dense' : Like 'min', but the rank of the next highest element is
|
|
4845
|
+
# assigned the rank immediately after those assigned to the tied
|
|
4846
|
+
# elements.
|
|
4847
|
+
# - 'random' : Choose a random rank for each value in a tie.
|
|
4848
|
+
# @param seed [Integer]
|
|
4849
|
+
# Random seed used when `method: 'random'`. If set to nil (default), a
|
|
4850
|
+
# random seed is generated for each rolling rank operation.
|
|
4851
|
+
# @param min_samples [Integer]
|
|
4852
|
+
# The number of values in the window that should be non-null before computing
|
|
4853
|
+
# a result.
|
|
4854
|
+
# @param closed ['left', 'right', 'both', 'none']
|
|
4855
|
+
# Define which sides of the temporal interval are closed (inclusive),
|
|
4856
|
+
# defaults to `'right'`.
|
|
4857
|
+
#
|
|
4858
|
+
# @return [Series]
|
|
4859
|
+
def rolling_rank_by(
|
|
4860
|
+
by,
|
|
4861
|
+
window_size,
|
|
4862
|
+
method: "average",
|
|
4863
|
+
seed: nil,
|
|
4864
|
+
min_samples: 1,
|
|
4865
|
+
closed: "right"
|
|
4866
|
+
)
|
|
4867
|
+
super
|
|
4868
|
+
end
|
|
4869
|
+
|
|
4870
|
+
# Compute a rolling rank.
|
|
4871
|
+
#
|
|
4872
|
+
# @note
|
|
4873
|
+
# This functionality is considered **unstable**. It may be changed
|
|
4874
|
+
# at any point without it being considered a breaking change.
|
|
4875
|
+
#
|
|
4876
|
+
# A window of length `window_size` will traverse the array. The values
|
|
4877
|
+
# that fill this window will be ranked according to the `method`
|
|
4878
|
+
# parameter. The resulting values will be the rank of the value that is
|
|
4879
|
+
# at the end of the sliding window.
|
|
4880
|
+
#
|
|
4881
|
+
# @param window_size [Integer]
|
|
4882
|
+
# Integer size of the rolling window.
|
|
4883
|
+
# @param method ['average', 'min', 'max', 'dense', 'random']
|
|
4884
|
+
# The method used to assign ranks to tied elements.
|
|
4885
|
+
# The following methods are available (default is 'average'):
|
|
4886
|
+
#
|
|
4887
|
+
# - 'average' : The average of the ranks that would have been assigned to
|
|
4888
|
+
# all the tied values is assigned to each value.
|
|
4889
|
+
# - 'min' : The minimum of the ranks that would have been assigned to all
|
|
4890
|
+
# the tied values is assigned to each value. (This is also referred to
|
|
4891
|
+
# as "competition" ranking.)
|
|
4892
|
+
# - 'max' : The maximum of the ranks that would have been assigned to all
|
|
4893
|
+
# the tied values is assigned to each value.
|
|
4894
|
+
# - 'dense' : Like 'min', but the rank of the next highest element is
|
|
4895
|
+
# assigned the rank immediately after those assigned to the tied
|
|
4896
|
+
# elements.
|
|
4897
|
+
# - 'random' : Choose a random rank for each value in a tie.
|
|
4898
|
+
# @param seed [Integer]
|
|
4899
|
+
# Random seed used when `method: 'random'`. If set to nil (default), a
|
|
4900
|
+
# random seed is generated for each rolling rank operation.
|
|
4901
|
+
# @param min_samples [Integer]
|
|
4902
|
+
# The number of values in the window that should be non-null before computing
|
|
4903
|
+
# a result. If set to `nil` (default), it will be set equal to `window_size`.
|
|
4904
|
+
# @param center [Boolean]
|
|
4905
|
+
# Set the labels at the center of the window.
|
|
4906
|
+
#
|
|
4907
|
+
# @return [Series]
|
|
4908
|
+
#
|
|
4909
|
+
# @example
|
|
4910
|
+
# Polars::Series.new([1, 4, 4, 1, 9]).rolling_rank(3, method: "average")
|
|
4911
|
+
# # =>
|
|
4912
|
+
# # shape: (5,)
|
|
4913
|
+
# # Series: '' [f64]
|
|
4914
|
+
# # [
|
|
4915
|
+
# # null
|
|
4916
|
+
# # null
|
|
4917
|
+
# # 2.5
|
|
4918
|
+
# # 1.0
|
|
4919
|
+
# # 3.0
|
|
4920
|
+
# # ]
|
|
4921
|
+
def rolling_rank(
|
|
4922
|
+
window_size,
|
|
4923
|
+
method: "average",
|
|
4924
|
+
seed: nil,
|
|
4925
|
+
min_samples: nil,
|
|
4185
4926
|
center: false
|
|
4186
4927
|
)
|
|
4187
4928
|
super
|
|
@@ -4264,9 +5005,9 @@ module Polars
|
|
|
4264
5005
|
# Sample from this Series.
|
|
4265
5006
|
#
|
|
4266
5007
|
# @param n [Integer]
|
|
4267
|
-
# Number of items to return. Cannot be used with `
|
|
4268
|
-
# `
|
|
4269
|
-
# @param
|
|
5008
|
+
# Number of items to return. Cannot be used with `fraction`. Defaults to 1 if
|
|
5009
|
+
# `fraction` is nil.
|
|
5010
|
+
# @param fraction [Float]
|
|
4270
5011
|
# Fraction of items to return. Cannot be used with `n`.
|
|
4271
5012
|
# @param with_replacement [Boolean]
|
|
4272
5013
|
# Allow values to be sampled more than once.
|
|
@@ -4290,23 +5031,12 @@ module Polars
|
|
|
4290
5031
|
# # ]
|
|
4291
5032
|
def sample(
|
|
4292
5033
|
n: nil,
|
|
4293
|
-
|
|
5034
|
+
fraction: nil,
|
|
4294
5035
|
with_replacement: false,
|
|
4295
5036
|
shuffle: false,
|
|
4296
5037
|
seed: nil
|
|
4297
5038
|
)
|
|
4298
|
-
|
|
4299
|
-
raise ArgumentError, "cannot specify both `n` and `frac`"
|
|
4300
|
-
end
|
|
4301
|
-
|
|
4302
|
-
if n.nil? && !frac.nil?
|
|
4303
|
-
return Utils.wrap_s(_s.sample_frac(frac, with_replacement, shuffle, seed))
|
|
4304
|
-
end
|
|
4305
|
-
|
|
4306
|
-
if n.nil?
|
|
4307
|
-
n = 1
|
|
4308
|
-
end
|
|
4309
|
-
Utils.wrap_s(_s.sample_n(n, with_replacement, shuffle, seed))
|
|
5039
|
+
super
|
|
4310
5040
|
end
|
|
4311
5041
|
|
|
4312
5042
|
# Get a boolean mask of the local maximum peaks.
|
|
@@ -4382,7 +5112,7 @@ module Polars
|
|
|
4382
5112
|
|
|
4383
5113
|
# Hash the Series.
|
|
4384
5114
|
#
|
|
4385
|
-
# The hash value is of type
|
|
5115
|
+
# The hash value is of type `UInt64`.
|
|
4386
5116
|
#
|
|
4387
5117
|
# @param seed [Integer]
|
|
4388
5118
|
# Random seed parameter. Defaults to 0.
|
|
@@ -4397,7 +5127,7 @@ module Polars
|
|
|
4397
5127
|
#
|
|
4398
5128
|
# @example
|
|
4399
5129
|
# s = Polars::Series.new("a", [1, 2, 3])
|
|
4400
|
-
# s.
|
|
5130
|
+
# s.hash_(42)
|
|
4401
5131
|
# # =>
|
|
4402
5132
|
# # shape: (3,)
|
|
4403
5133
|
# # Series: 'a' [u64]
|
|
@@ -4406,7 +5136,7 @@ module Polars
|
|
|
4406
5136
|
# # 10386026231460783898
|
|
4407
5137
|
# # 17796317186427479491
|
|
4408
5138
|
# # ]
|
|
4409
|
-
def
|
|
5139
|
+
def hash_(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
|
|
4410
5140
|
super
|
|
4411
5141
|
end
|
|
4412
5142
|
|
|
@@ -4416,7 +5146,7 @@ module Polars
|
|
|
4416
5146
|
# you can safely use that cast operation.
|
|
4417
5147
|
#
|
|
4418
5148
|
# @param signed [Boolean]
|
|
4419
|
-
# If true, reinterpret as
|
|
5149
|
+
# If true, reinterpret as `Polars::Int64`. Otherwise, reinterpret as `Polars::UInt64`.
|
|
4420
5150
|
#
|
|
4421
5151
|
# @return [Series]
|
|
4422
5152
|
#
|
|
@@ -4519,7 +5249,7 @@ module Polars
|
|
|
4519
5249
|
# the order that the values occur in the Series.
|
|
4520
5250
|
# - 'random' : Like 'ordinal', but the rank for ties is not dependent
|
|
4521
5251
|
# on the order that the values occur in the Series.
|
|
4522
|
-
# @param
|
|
5252
|
+
# @param descending [Boolean]
|
|
4523
5253
|
# Reverse the operation.
|
|
4524
5254
|
# @param seed [Integer]
|
|
4525
5255
|
# If `method: "random"`, use this as seed.
|
|
@@ -4553,7 +5283,7 @@ module Polars
|
|
|
4553
5283
|
# # 2
|
|
4554
5284
|
# # 5
|
|
4555
5285
|
# # ]
|
|
4556
|
-
def rank(method: "average",
|
|
5286
|
+
def rank(method: "average", descending: false, seed: nil)
|
|
4557
5287
|
super
|
|
4558
5288
|
end
|
|
4559
5289
|
|
|
@@ -4711,17 +5441,16 @@ module Polars
|
|
|
4711
5441
|
_s.kurtosis(fisher, bias)
|
|
4712
5442
|
end
|
|
4713
5443
|
|
|
4714
|
-
#
|
|
4715
|
-
#
|
|
4716
|
-
# Only works for numerical types.
|
|
5444
|
+
# Set values outside the given boundaries to the boundary value.
|
|
4717
5445
|
#
|
|
4718
|
-
#
|
|
4719
|
-
#
|
|
4720
|
-
#
|
|
4721
|
-
#
|
|
4722
|
-
#
|
|
4723
|
-
#
|
|
4724
|
-
#
|
|
5446
|
+
# @param lower_bound [Numeric]
|
|
5447
|
+
# Lower bound. Accepts expression input.
|
|
5448
|
+
# Non-expression inputs are parsed as literals.
|
|
5449
|
+
# If set to `nil` (default), no lower bound is applied.
|
|
5450
|
+
# @param upper_bound [Numeric]
|
|
5451
|
+
# Upper bound. Accepts expression input.
|
|
5452
|
+
# Non-expression inputs are parsed as literals.
|
|
5453
|
+
# If set to `nil` (default), no upper bound is applied.
|
|
4725
5454
|
#
|
|
4726
5455
|
# @return [Series]
|
|
4727
5456
|
#
|
|
@@ -4737,37 +5466,7 @@ module Polars
|
|
|
4737
5466
|
# # null
|
|
4738
5467
|
# # 10
|
|
4739
5468
|
# # ]
|
|
4740
|
-
def clip(
|
|
4741
|
-
super
|
|
4742
|
-
end
|
|
4743
|
-
|
|
4744
|
-
# Clip (limit) the values in an array to a `min` boundary.
|
|
4745
|
-
#
|
|
4746
|
-
# Only works for numerical types.
|
|
4747
|
-
#
|
|
4748
|
-
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
|
4749
|
-
# expression. See {#when} for more information.
|
|
4750
|
-
#
|
|
4751
|
-
# @param min_val [Numeric]
|
|
4752
|
-
# Minimum value.
|
|
4753
|
-
#
|
|
4754
|
-
# @return [Series]
|
|
4755
|
-
def clip_min(min_val)
|
|
4756
|
-
super
|
|
4757
|
-
end
|
|
4758
|
-
|
|
4759
|
-
# Clip (limit) the values in an array to a `max` boundary.
|
|
4760
|
-
#
|
|
4761
|
-
# Only works for numerical types.
|
|
4762
|
-
#
|
|
4763
|
-
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
|
4764
|
-
# expression. See {#when} for more information.
|
|
4765
|
-
#
|
|
4766
|
-
# @param max_val [Numeric]
|
|
4767
|
-
# Maximum value.
|
|
4768
|
-
#
|
|
4769
|
-
# @return [Series]
|
|
4770
|
-
def clip_max(max_val)
|
|
5469
|
+
def clip(lower_bound = nil, upper_bound = nil)
|
|
4771
5470
|
super
|
|
4772
5471
|
end
|
|
4773
5472
|
|
|
@@ -4828,10 +5527,10 @@ module Polars
|
|
|
4828
5527
|
# Replace values by different values.
|
|
4829
5528
|
#
|
|
4830
5529
|
# @param old [Object]
|
|
4831
|
-
# Value or
|
|
5530
|
+
# Value or array of values to replace.
|
|
4832
5531
|
# Also accepts a mapping of values to their replacement.
|
|
4833
5532
|
# @param new [Object]
|
|
4834
|
-
# Value or
|
|
5533
|
+
# Value or array of values to replace by.
|
|
4835
5534
|
# Length must match the length of `old` or have length 1.
|
|
4836
5535
|
# @param default [Object]
|
|
4837
5536
|
# Set values that were not replaced to this value.
|
|
@@ -4856,7 +5555,7 @@ module Polars
|
|
|
4856
5555
|
# # 3
|
|
4857
5556
|
# # ]
|
|
4858
5557
|
#
|
|
4859
|
-
# @example Replace multiple values by passing
|
|
5558
|
+
# @example Replace multiple values by passing arrays to the `old` and `new` parameters.
|
|
4860
5559
|
# s.replace([2, 3], [100, 200])
|
|
4861
5560
|
# # =>
|
|
4862
5561
|
# # shape: (4,)
|
|
@@ -4893,18 +5592,18 @@ module Polars
|
|
|
4893
5592
|
# # "2"
|
|
4894
5593
|
# # "3"
|
|
4895
5594
|
# # ]
|
|
4896
|
-
def replace(old, new =
|
|
5595
|
+
def replace(old, new = NO_DEFAULT, default: NO_DEFAULT, return_dtype: nil)
|
|
4897
5596
|
super
|
|
4898
5597
|
end
|
|
4899
5598
|
|
|
4900
5599
|
# Replace all values by different values.
|
|
4901
5600
|
#
|
|
4902
5601
|
# @param old [Object]
|
|
4903
|
-
# Value or
|
|
5602
|
+
# Value or array of values to replace.
|
|
4904
5603
|
# Also accepts a mapping of values to their replacement as syntactic sugar for
|
|
4905
5604
|
# `replace_strict(old: Polars::Series.new(mapping.keys), new: Polars::Series.new(mapping.values))`.
|
|
4906
5605
|
# @param new [Object]
|
|
4907
|
-
# Value or
|
|
5606
|
+
# Value or array of values to replace by.
|
|
4908
5607
|
# Length must match the length of `old` or have length 1.
|
|
4909
5608
|
# @param default [Object]
|
|
4910
5609
|
# Set values that were not replaced to this value. If no default is specified,
|
|
@@ -4916,7 +5615,7 @@ module Polars
|
|
|
4916
5615
|
#
|
|
4917
5616
|
# @return [Series]
|
|
4918
5617
|
#
|
|
4919
|
-
# @example Replace values by passing
|
|
5618
|
+
# @example Replace values by passing arrays to the `old` and `new` parameters.
|
|
4920
5619
|
# s = Polars::Series.new([1, 2, 2, 3])
|
|
4921
5620
|
# s.replace_strict([1, 2, 3], [100, 200, 300])
|
|
4922
5621
|
# # =>
|
|
@@ -5004,8 +5703,8 @@ module Polars
|
|
|
5004
5703
|
# # ]
|
|
5005
5704
|
def replace_strict(
|
|
5006
5705
|
old,
|
|
5007
|
-
new =
|
|
5008
|
-
default:
|
|
5706
|
+
new = NO_DEFAULT,
|
|
5707
|
+
default: NO_DEFAULT,
|
|
5009
5708
|
return_dtype: nil
|
|
5010
5709
|
)
|
|
5011
5710
|
super
|
|
@@ -5013,7 +5712,7 @@ module Polars
|
|
|
5013
5712
|
|
|
5014
5713
|
# Reshape this Series to a flat Series or a Series of Lists.
|
|
5015
5714
|
#
|
|
5016
|
-
# @param
|
|
5715
|
+
# @param dimensions [Array]
|
|
5017
5716
|
# Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that
|
|
5018
5717
|
# dimension is inferred.
|
|
5019
5718
|
#
|
|
@@ -5047,8 +5746,8 @@ module Polars
|
|
|
5047
5746
|
# # 8
|
|
5048
5747
|
# # 9
|
|
5049
5748
|
# # ]
|
|
5050
|
-
def reshape(
|
|
5051
|
-
|
|
5749
|
+
def reshape(dimensions)
|
|
5750
|
+
self.class._from_rbseries(_s.reshape(dimensions))
|
|
5052
5751
|
end
|
|
5053
5752
|
|
|
5054
5753
|
# Shuffle the contents of this Series.
|
|
@@ -5094,8 +5793,8 @@ module Polars
|
|
|
5094
5793
|
half_life: nil,
|
|
5095
5794
|
alpha: nil,
|
|
5096
5795
|
adjust: true,
|
|
5097
|
-
|
|
5098
|
-
ignore_nulls:
|
|
5796
|
+
min_samples: 1,
|
|
5797
|
+
ignore_nulls: false
|
|
5099
5798
|
)
|
|
5100
5799
|
super
|
|
5101
5800
|
end
|
|
@@ -5184,8 +5883,8 @@ module Polars
|
|
|
5184
5883
|
alpha: nil,
|
|
5185
5884
|
adjust: true,
|
|
5186
5885
|
bias: false,
|
|
5187
|
-
|
|
5188
|
-
ignore_nulls:
|
|
5886
|
+
min_samples: 1,
|
|
5887
|
+
ignore_nulls: false
|
|
5189
5888
|
)
|
|
5190
5889
|
super
|
|
5191
5890
|
end
|
|
@@ -5212,8 +5911,8 @@ module Polars
|
|
|
5212
5911
|
alpha: nil,
|
|
5213
5912
|
adjust: true,
|
|
5214
5913
|
bias: false,
|
|
5215
|
-
|
|
5216
|
-
ignore_nulls:
|
|
5914
|
+
min_samples: 1,
|
|
5915
|
+
ignore_nulls: false
|
|
5217
5916
|
)
|
|
5218
5917
|
super
|
|
5219
5918
|
end
|
|
@@ -5249,7 +5948,7 @@ module Polars
|
|
|
5249
5948
|
#
|
|
5250
5949
|
# Enables downstream code to user fast paths for sorted arrays.
|
|
5251
5950
|
#
|
|
5252
|
-
# @param
|
|
5951
|
+
# @param descending [Boolean]
|
|
5253
5952
|
# If the Series order is reversed, e.g. descending.
|
|
5254
5953
|
#
|
|
5255
5954
|
# @return [Series]
|
|
@@ -5262,8 +5961,8 @@ module Polars
|
|
|
5262
5961
|
# s = Polars::Series.new("a", [1, 2, 3])
|
|
5263
5962
|
# s.set_sorted.max
|
|
5264
5963
|
# # => 3
|
|
5265
|
-
def set_sorted(
|
|
5266
|
-
Utils.wrap_s(_s.set_sorted(
|
|
5964
|
+
def set_sorted(descending: false)
|
|
5965
|
+
Utils.wrap_s(_s.set_sorted(descending))
|
|
5267
5966
|
end
|
|
5268
5967
|
|
|
5269
5968
|
# Create a new Series filled with values from the given index.
|
|
@@ -5493,6 +6192,21 @@ module Polars
|
|
|
5493
6192
|
StructNameSpace.new(self)
|
|
5494
6193
|
end
|
|
5495
6194
|
|
|
6195
|
+
# Create a plot namespace.
|
|
6196
|
+
#
|
|
6197
|
+
# @note
|
|
6198
|
+
# This functionality is currently considered **unstable**. It may be
|
|
6199
|
+
# changed at any point without it being considered a breaking change.
|
|
6200
|
+
#
|
|
6201
|
+
# @return [SeriesPlot]
|
|
6202
|
+
#
|
|
6203
|
+
# @example Histogram:
|
|
6204
|
+
# s = Polars::Series.new([1, 4, 4, 6, 2, 4, 3, 5, 5, 7, 1])
|
|
6205
|
+
# s.plot.hist
|
|
6206
|
+
def plot
|
|
6207
|
+
SeriesPlot.new(self)
|
|
6208
|
+
end
|
|
6209
|
+
|
|
5496
6210
|
# Repeat the elements in this Series as specified in the given expression.
|
|
5497
6211
|
#
|
|
5498
6212
|
# The repeated elements are expanded into a List.
|
|
@@ -5590,12 +6304,12 @@ module Polars
|
|
|
5590
6304
|
ts = Utils.datetime_to_int(other, time_unit)
|
|
5591
6305
|
f = ffi_func("#{op}_<>", Int64, _s)
|
|
5592
6306
|
fail if f.nil?
|
|
5593
|
-
return Utils.wrap_s(f.
|
|
6307
|
+
return Utils.wrap_s(f.(ts))
|
|
5594
6308
|
elsif other.is_a?(::Date) && dtype == Date
|
|
5595
6309
|
d = Utils.date_to_int(other)
|
|
5596
6310
|
f = ffi_func("#{op}_<>", Int32, _s)
|
|
5597
6311
|
fail if f.nil?
|
|
5598
|
-
return Utils.wrap_s(f.
|
|
6312
|
+
return Utils.wrap_s(f.(d))
|
|
5599
6313
|
end
|
|
5600
6314
|
|
|
5601
6315
|
if other.is_a?(Series)
|
|
@@ -5606,7 +6320,7 @@ module Polars
|
|
|
5606
6320
|
if f.nil?
|
|
5607
6321
|
raise NotImplementedError
|
|
5608
6322
|
end
|
|
5609
|
-
Utils.wrap_s(f.
|
|
6323
|
+
Utils.wrap_s(f.(other))
|
|
5610
6324
|
end
|
|
5611
6325
|
|
|
5612
6326
|
def ffi_func(name, dtype, _s)
|
|
@@ -5621,8 +6335,8 @@ module Polars
|
|
|
5621
6335
|
return Utils.wrap_s(_s.send(op, other._s))
|
|
5622
6336
|
end
|
|
5623
6337
|
|
|
5624
|
-
if (other.is_a?(Float) || other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(::String)) && !
|
|
5625
|
-
_s2 = sequence_to_rbseries(name, [other])
|
|
6338
|
+
if (other.is_a?(Float) || other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(::String)) && !dtype.float?
|
|
6339
|
+
_s2 = Utils.sequence_to_rbseries(name, [other])
|
|
5626
6340
|
return Utils.wrap_s(_s.send(op, _s2))
|
|
5627
6341
|
end
|
|
5628
6342
|
|
|
@@ -5630,7 +6344,7 @@ module Polars
|
|
|
5630
6344
|
if f.nil?
|
|
5631
6345
|
raise ArgumentError, "cannot do arithmetic with series of dtype: #{dtype} and argument of type: #{other.class.name}"
|
|
5632
6346
|
end
|
|
5633
|
-
Utils.wrap_s(f.
|
|
6347
|
+
Utils.wrap_s(f.(other))
|
|
5634
6348
|
end
|
|
5635
6349
|
|
|
5636
6350
|
DTYPE_TO_FFINAME = {
|
|
@@ -5656,291 +6370,5 @@ module Polars
|
|
|
5656
6370
|
Struct => "struct",
|
|
5657
6371
|
Binary => "binary"
|
|
5658
6372
|
}
|
|
5659
|
-
|
|
5660
|
-
def series_to_rbseries(name, values)
|
|
5661
|
-
# should not be in-place?
|
|
5662
|
-
values.rename(name, in_place: true)
|
|
5663
|
-
values._s
|
|
5664
|
-
end
|
|
5665
|
-
|
|
5666
|
-
def numo_to_rbseries(name, values, strict: true, nan_to_null: false)
|
|
5667
|
-
# not needed yet
|
|
5668
|
-
# if !values.contiguous?
|
|
5669
|
-
# end
|
|
5670
|
-
|
|
5671
|
-
if values.shape.length == 1
|
|
5672
|
-
values, dtype = numo_values_and_dtype(values)
|
|
5673
|
-
strict = nan_to_null if [Numo::SFloat, Numo::DFloat].include?(dtype)
|
|
5674
|
-
if dtype == Numo::RObject
|
|
5675
|
-
sequence_to_rbseries(name, values.to_a, strict: strict)
|
|
5676
|
-
else
|
|
5677
|
-
constructor = numo_type_to_constructor(dtype)
|
|
5678
|
-
# TODO improve performance
|
|
5679
|
-
constructor.call(name, values.to_a, strict)
|
|
5680
|
-
end
|
|
5681
|
-
elsif values.shape.sum == 0
|
|
5682
|
-
raise Todo
|
|
5683
|
-
else
|
|
5684
|
-
original_shape = values.shape
|
|
5685
|
-
values = values.reshape(original_shape.inject(&:*))
|
|
5686
|
-
rb_s = numo_to_rbseries(
|
|
5687
|
-
name,
|
|
5688
|
-
values,
|
|
5689
|
-
strict: strict,
|
|
5690
|
-
nan_to_null: nan_to_null
|
|
5691
|
-
)
|
|
5692
|
-
Utils.wrap_s(rb_s).reshape(original_shape)._s
|
|
5693
|
-
end
|
|
5694
|
-
end
|
|
5695
|
-
|
|
5696
|
-
def numo_values_and_dtype(values)
|
|
5697
|
-
[values, values.class]
|
|
5698
|
-
end
|
|
5699
|
-
|
|
5700
|
-
def numo_type_to_constructor(dtype)
|
|
5701
|
-
{
|
|
5702
|
-
Numo::Float32 => RbSeries.method(:new_opt_f32),
|
|
5703
|
-
Numo::Float64 => RbSeries.method(:new_opt_f64),
|
|
5704
|
-
Numo::Int8 => RbSeries.method(:new_opt_i8),
|
|
5705
|
-
Numo::Int16 => RbSeries.method(:new_opt_i16),
|
|
5706
|
-
Numo::Int32 => RbSeries.method(:new_opt_i32),
|
|
5707
|
-
Numo::Int64 => RbSeries.method(:new_opt_i64),
|
|
5708
|
-
Numo::UInt8 => RbSeries.method(:new_opt_u8),
|
|
5709
|
-
Numo::UInt16 => RbSeries.method(:new_opt_u16),
|
|
5710
|
-
Numo::UInt32 => RbSeries.method(:new_opt_u32),
|
|
5711
|
-
Numo::UInt64 => RbSeries.method(:new_opt_u64)
|
|
5712
|
-
}.fetch(dtype)
|
|
5713
|
-
rescue KeyError
|
|
5714
|
-
RbSeries.method(:new_object)
|
|
5715
|
-
end
|
|
5716
|
-
|
|
5717
|
-
def sequence_to_rbseries(name, values, dtype: nil, strict: true, dtype_if_empty: nil)
|
|
5718
|
-
ruby_dtype = nil
|
|
5719
|
-
|
|
5720
|
-
if (values.nil? || values.empty?) && dtype.nil?
|
|
5721
|
-
dtype = dtype_if_empty || Float32
|
|
5722
|
-
elsif dtype == List
|
|
5723
|
-
ruby_dtype = ::Array
|
|
5724
|
-
end
|
|
5725
|
-
|
|
5726
|
-
rb_temporal_types = [::Date, ::DateTime, ::Time]
|
|
5727
|
-
rb_temporal_types << ActiveSupport::TimeWithZone if defined?(ActiveSupport::TimeWithZone)
|
|
5728
|
-
|
|
5729
|
-
value = _get_first_non_none(values)
|
|
5730
|
-
if !value.nil?
|
|
5731
|
-
if value.is_a?(Hash)
|
|
5732
|
-
return DataFrame.new(values).to_struct(name)._s
|
|
5733
|
-
end
|
|
5734
|
-
end
|
|
5735
|
-
|
|
5736
|
-
if !dtype.nil? && ![List, Struct, Unknown].include?(dtype) && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
|
|
5737
|
-
if dtype == Array && !dtype.is_a?(Array) && value.is_a?(::Array)
|
|
5738
|
-
dtype = Array.new(nil, value.size)
|
|
5739
|
-
end
|
|
5740
|
-
|
|
5741
|
-
constructor = polars_type_to_constructor(dtype)
|
|
5742
|
-
rbseries =
|
|
5743
|
-
if dtype == Array
|
|
5744
|
-
constructor.call(name, values, strict)
|
|
5745
|
-
else
|
|
5746
|
-
construct_series_with_fallbacks(constructor, name, values, dtype, strict: strict)
|
|
5747
|
-
end
|
|
5748
|
-
|
|
5749
|
-
base_type = dtype.is_a?(DataType) ? dtype.class : dtype
|
|
5750
|
-
if [Date, Datetime, Duration, Time, Categorical, Boolean, Enum].include?(base_type) || dtype.is_a?(Decimal)
|
|
5751
|
-
if rbseries.dtype != dtype
|
|
5752
|
-
rbseries = rbseries.cast(dtype, true)
|
|
5753
|
-
end
|
|
5754
|
-
end
|
|
5755
|
-
|
|
5756
|
-
# Uninstanced Decimal is a bit special and has various inference paths
|
|
5757
|
-
if dtype == Decimal
|
|
5758
|
-
if rbseries.dtype == String
|
|
5759
|
-
rbseries = rbseries.str_to_decimal_infer(0)
|
|
5760
|
-
elsif rbseries.dtype.float?
|
|
5761
|
-
# Go through string so we infer an appropriate scale.
|
|
5762
|
-
rbseries = rbseries.cast(
|
|
5763
|
-
String, strict: strict, wrap_numerical: false
|
|
5764
|
-
).str_to_decimal_infer(0)
|
|
5765
|
-
elsif rbseries.dtype.integer? || rbseries.dtype == Null
|
|
5766
|
-
rbseries = rbseries.cast(
|
|
5767
|
-
Decimal.new(nil, 0), strict: strict, wrap_numerical: false
|
|
5768
|
-
)
|
|
5769
|
-
elsif !rbseries.dtype.is_a?(Decimal)
|
|
5770
|
-
msg = "can't convert #{rbseries.dtype} to Decimal"
|
|
5771
|
-
raise TypeError, msg
|
|
5772
|
-
end
|
|
5773
|
-
end
|
|
5774
|
-
|
|
5775
|
-
rbseries
|
|
5776
|
-
elsif dtype == Struct
|
|
5777
|
-
struct_schema = dtype.is_a?(Struct) ? dtype.to_schema : nil
|
|
5778
|
-
empty = {}
|
|
5779
|
-
DataFrame.sequence_to_rbdf(
|
|
5780
|
-
values.map { |v| v.nil? ? empty : v },
|
|
5781
|
-
schema: struct_schema,
|
|
5782
|
-
orient: "row",
|
|
5783
|
-
).to_struct(name)
|
|
5784
|
-
else
|
|
5785
|
-
if ruby_dtype.nil?
|
|
5786
|
-
if value.nil?
|
|
5787
|
-
# generic default dtype
|
|
5788
|
-
ruby_dtype = Float
|
|
5789
|
-
else
|
|
5790
|
-
ruby_dtype = value.class
|
|
5791
|
-
end
|
|
5792
|
-
end
|
|
5793
|
-
|
|
5794
|
-
# temporal branch
|
|
5795
|
-
if rb_temporal_types.include?(ruby_dtype)
|
|
5796
|
-
if dtype.nil?
|
|
5797
|
-
dtype = Utils.rb_type_to_dtype(ruby_dtype)
|
|
5798
|
-
elsif rb_temporal_types.include?(dtype)
|
|
5799
|
-
dtype = Utils.rb_type_to_dtype(dtype)
|
|
5800
|
-
end
|
|
5801
|
-
# TODO
|
|
5802
|
-
time_unit = nil
|
|
5803
|
-
|
|
5804
|
-
rb_series = RbSeries.new_from_any_values(name, values, strict)
|
|
5805
|
-
if time_unit.nil?
|
|
5806
|
-
s = Utils.wrap_s(rb_series)
|
|
5807
|
-
else
|
|
5808
|
-
s = Utils.wrap_s(rb_series).dt.cast_time_unit(time_unit)
|
|
5809
|
-
end
|
|
5810
|
-
s._s
|
|
5811
|
-
elsif defined?(Numo::NArray) && value.is_a?(Numo::NArray) && value.shape.length == 1
|
|
5812
|
-
raise Todo
|
|
5813
|
-
elsif ruby_dtype == ::Array
|
|
5814
|
-
if dtype.is_a?(Object)
|
|
5815
|
-
return RbSeries.new_object(name, values, strict)
|
|
5816
|
-
end
|
|
5817
|
-
if dtype
|
|
5818
|
-
srs = sequence_from_anyvalue_or_object(name, values)
|
|
5819
|
-
if dtype != srs.dtype
|
|
5820
|
-
srs = srs.cast(dtype, strict: false)
|
|
5821
|
-
end
|
|
5822
|
-
return srs
|
|
5823
|
-
end
|
|
5824
|
-
sequence_from_anyvalue_or_object(name, values)
|
|
5825
|
-
elsif ruby_dtype == Series
|
|
5826
|
-
RbSeries.new_series_list(name, values.map(&:_s), strict)
|
|
5827
|
-
elsif ruby_dtype == RbSeries
|
|
5828
|
-
RbSeries.new_series_list(name, values, strict)
|
|
5829
|
-
else
|
|
5830
|
-
constructor =
|
|
5831
|
-
if value.is_a?(::String)
|
|
5832
|
-
if value.encoding == Encoding::UTF_8
|
|
5833
|
-
RbSeries.method(:new_str)
|
|
5834
|
-
else
|
|
5835
|
-
RbSeries.method(:new_binary)
|
|
5836
|
-
end
|
|
5837
|
-
elsif value.is_a?(Integer) && values.any? { |v| v.is_a?(Float) }
|
|
5838
|
-
# TODO improve performance
|
|
5839
|
-
RbSeries.method(:new_opt_f64)
|
|
5840
|
-
else
|
|
5841
|
-
rb_type_to_constructor(value.class)
|
|
5842
|
-
end
|
|
5843
|
-
|
|
5844
|
-
construct_series_with_fallbacks(constructor, name, values, dtype, strict: strict)
|
|
5845
|
-
end
|
|
5846
|
-
end
|
|
5847
|
-
end
|
|
5848
|
-
|
|
5849
|
-
def construct_series_with_fallbacks(constructor, name, values, dtype, strict:)
|
|
5850
|
-
begin
|
|
5851
|
-
constructor.call(name, values, strict)
|
|
5852
|
-
rescue
|
|
5853
|
-
if dtype.nil?
|
|
5854
|
-
RbSeries.new_from_any_values(name, values, strict)
|
|
5855
|
-
else
|
|
5856
|
-
RbSeries.new_from_any_values_and_dtype(name, values, dtype, strict)
|
|
5857
|
-
end
|
|
5858
|
-
end
|
|
5859
|
-
end
|
|
5860
|
-
|
|
5861
|
-
def sequence_from_anyvalue_or_object(name, values)
|
|
5862
|
-
RbSeries.new_from_any_values(name, values, true)
|
|
5863
|
-
rescue
|
|
5864
|
-
RbSeries.new_object(name, values, false)
|
|
5865
|
-
end
|
|
5866
|
-
|
|
5867
|
-
POLARS_TYPE_TO_CONSTRUCTOR = {
|
|
5868
|
-
Float32 => RbSeries.method(:new_opt_f32),
|
|
5869
|
-
Float64 => RbSeries.method(:new_opt_f64),
|
|
5870
|
-
Int8 => RbSeries.method(:new_opt_i8),
|
|
5871
|
-
Int16 => RbSeries.method(:new_opt_i16),
|
|
5872
|
-
Int32 => RbSeries.method(:new_opt_i32),
|
|
5873
|
-
Int64 => RbSeries.method(:new_opt_i64),
|
|
5874
|
-
Int128 => RbSeries.method(:new_opt_i128),
|
|
5875
|
-
UInt8 => RbSeries.method(:new_opt_u8),
|
|
5876
|
-
UInt16 => RbSeries.method(:new_opt_u16),
|
|
5877
|
-
UInt32 => RbSeries.method(:new_opt_u32),
|
|
5878
|
-
UInt64 => RbSeries.method(:new_opt_u64),
|
|
5879
|
-
UInt128 => RbSeries.method(:new_opt_u128),
|
|
5880
|
-
Decimal => RbSeries.method(:new_decimal),
|
|
5881
|
-
Date => RbSeries.method(:new_from_any_values),
|
|
5882
|
-
Datetime => RbSeries.method(:new_from_any_values),
|
|
5883
|
-
Duration => RbSeries.method(:new_from_any_values),
|
|
5884
|
-
Time => RbSeries.method(:new_from_any_values),
|
|
5885
|
-
Boolean => RbSeries.method(:new_opt_bool),
|
|
5886
|
-
Utf8 => RbSeries.method(:new_str),
|
|
5887
|
-
Object => RbSeries.method(:new_object),
|
|
5888
|
-
Categorical => RbSeries.method(:new_str),
|
|
5889
|
-
Enum => RbSeries.method(:new_str),
|
|
5890
|
-
Binary => RbSeries.method(:new_binary),
|
|
5891
|
-
Null => RbSeries.method(:new_null)
|
|
5892
|
-
}
|
|
5893
|
-
|
|
5894
|
-
SYM_TYPE_TO_CONSTRUCTOR = {
|
|
5895
|
-
f32: RbSeries.method(:new_opt_f32),
|
|
5896
|
-
f64: RbSeries.method(:new_opt_f64),
|
|
5897
|
-
i8: RbSeries.method(:new_opt_i8),
|
|
5898
|
-
i16: RbSeries.method(:new_opt_i16),
|
|
5899
|
-
i32: RbSeries.method(:new_opt_i32),
|
|
5900
|
-
i64: RbSeries.method(:new_opt_i64),
|
|
5901
|
-
i128: RbSeries.method(:new_opt_i128),
|
|
5902
|
-
u8: RbSeries.method(:new_opt_u8),
|
|
5903
|
-
u16: RbSeries.method(:new_opt_u16),
|
|
5904
|
-
u32: RbSeries.method(:new_opt_u32),
|
|
5905
|
-
u64: RbSeries.method(:new_opt_u64),
|
|
5906
|
-
u128: RbSeries.method(:new_opt_u128),
|
|
5907
|
-
bool: RbSeries.method(:new_opt_bool),
|
|
5908
|
-
str: RbSeries.method(:new_str)
|
|
5909
|
-
}
|
|
5910
|
-
|
|
5911
|
-
def polars_type_to_constructor(dtype)
|
|
5912
|
-
if dtype.is_a?(Array)
|
|
5913
|
-
lambda do |name, values, strict|
|
|
5914
|
-
RbSeries.new_array(dtype.width, dtype.inner, name, values, strict)
|
|
5915
|
-
end
|
|
5916
|
-
elsif dtype.is_a?(Class) && dtype < DataType
|
|
5917
|
-
POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype)
|
|
5918
|
-
elsif dtype.is_a?(DataType)
|
|
5919
|
-
POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype.class)
|
|
5920
|
-
else
|
|
5921
|
-
SYM_TYPE_TO_CONSTRUCTOR.fetch(dtype.to_sym)
|
|
5922
|
-
end
|
|
5923
|
-
rescue KeyError
|
|
5924
|
-
raise ArgumentError, "Cannot construct RbSeries for type #{dtype}."
|
|
5925
|
-
end
|
|
5926
|
-
|
|
5927
|
-
RB_TYPE_TO_CONSTRUCTOR = {
|
|
5928
|
-
Float => RbSeries.method(:new_opt_f64),
|
|
5929
|
-
Integer => RbSeries.method(:new_opt_i64),
|
|
5930
|
-
TrueClass => RbSeries.method(:new_opt_bool),
|
|
5931
|
-
FalseClass => RbSeries.method(:new_opt_bool),
|
|
5932
|
-
BigDecimal => RbSeries.method(:new_decimal),
|
|
5933
|
-
NilClass => RbSeries.method(:new_null)
|
|
5934
|
-
}
|
|
5935
|
-
|
|
5936
|
-
def rb_type_to_constructor(dtype)
|
|
5937
|
-
RB_TYPE_TO_CONSTRUCTOR.fetch(dtype)
|
|
5938
|
-
rescue KeyError
|
|
5939
|
-
RbSeries.method(:new_object)
|
|
5940
|
-
end
|
|
5941
|
-
|
|
5942
|
-
def _get_first_non_none(values)
|
|
5943
|
-
values.find { |v| !v.nil? }
|
|
5944
|
-
end
|
|
5945
6373
|
end
|
|
5946
6374
|
end
|