polars-df 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +127 -1
  3. data/Cargo.lock +72 -58
  4. data/README.md +31 -27
  5. data/ext/polars/Cargo.toml +15 -6
  6. data/ext/polars/src/batched_csv.rs +35 -39
  7. data/ext/polars/src/c_api/allocator.rs +7 -0
  8. data/ext/polars/src/c_api/mod.rs +1 -0
  9. data/ext/polars/src/catalog/unity.rs +123 -101
  10. data/ext/polars/src/conversion/any_value.rs +13 -17
  11. data/ext/polars/src/conversion/chunked_array.rs +5 -5
  12. data/ext/polars/src/conversion/datetime.rs +3 -2
  13. data/ext/polars/src/conversion/mod.rs +50 -45
  14. data/ext/polars/src/dataframe/export.rs +13 -13
  15. data/ext/polars/src/dataframe/general.rs +223 -223
  16. data/ext/polars/src/dataframe/io.rs +27 -141
  17. data/ext/polars/src/dataframe/mod.rs +13 -5
  18. data/ext/polars/src/dataframe/serde.rs +1 -1
  19. data/ext/polars/src/error.rs +44 -7
  20. data/ext/polars/src/exceptions.rs +45 -12
  21. data/ext/polars/src/expr/array.rs +12 -0
  22. data/ext/polars/src/expr/datatype.rs +2 -2
  23. data/ext/polars/src/expr/datetime.rs +4 -5
  24. data/ext/polars/src/expr/general.rs +49 -13
  25. data/ext/polars/src/expr/list.rs +4 -0
  26. data/ext/polars/src/expr/meta.rs +8 -3
  27. data/ext/polars/src/expr/mod.rs +22 -6
  28. data/ext/polars/src/expr/name.rs +19 -8
  29. data/ext/polars/src/expr/rolling.rs +50 -1
  30. data/ext/polars/src/expr/string.rs +0 -1
  31. data/ext/polars/src/expr/struct.rs +7 -2
  32. data/ext/polars/src/file.rs +136 -103
  33. data/ext/polars/src/functions/aggregation.rs +9 -8
  34. data/ext/polars/src/functions/io.rs +81 -10
  35. data/ext/polars/src/functions/lazy.rs +95 -21
  36. data/ext/polars/src/functions/mod.rs +2 -0
  37. data/ext/polars/src/functions/range.rs +19 -3
  38. data/ext/polars/src/functions/strings.rs +6 -0
  39. data/ext/polars/src/functions/utils.rs +6 -0
  40. data/ext/polars/src/interop/arrow/mod.rs +50 -1
  41. data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
  42. data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
  43. data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
  44. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  45. data/ext/polars/src/lazyframe/exitable.rs +39 -0
  46. data/ext/polars/src/lazyframe/general.rs +340 -236
  47. data/ext/polars/src/lazyframe/mod.rs +46 -10
  48. data/ext/polars/src/lazyframe/optflags.rs +5 -4
  49. data/ext/polars/src/lazyframe/serde.rs +11 -3
  50. data/ext/polars/src/lazyframe/sink.rs +10 -5
  51. data/ext/polars/src/lazygroupby.rs +6 -7
  52. data/ext/polars/src/lib.rs +141 -76
  53. data/ext/polars/src/map/dataframe.rs +12 -12
  54. data/ext/polars/src/map/lazy.rs +7 -5
  55. data/ext/polars/src/map/mod.rs +15 -8
  56. data/ext/polars/src/map/series.rs +3 -3
  57. data/ext/polars/src/on_startup.rs +16 -8
  58. data/ext/polars/src/prelude.rs +1 -0
  59. data/ext/polars/src/rb_modules.rs +19 -49
  60. data/ext/polars/src/series/aggregation.rs +79 -140
  61. data/ext/polars/src/series/arithmetic.rs +16 -22
  62. data/ext/polars/src/series/comparison.rs +101 -222
  63. data/ext/polars/src/series/construction.rs +17 -18
  64. data/ext/polars/src/series/export.rs +1 -1
  65. data/ext/polars/src/series/general.rs +254 -289
  66. data/ext/polars/src/series/import.rs +17 -0
  67. data/ext/polars/src/series/map.rs +178 -160
  68. data/ext/polars/src/series/mod.rs +28 -12
  69. data/ext/polars/src/series/scatter.rs +12 -9
  70. data/ext/polars/src/sql.rs +16 -9
  71. data/ext/polars/src/testing/frame.rs +31 -0
  72. data/ext/polars/src/testing/mod.rs +5 -0
  73. data/ext/polars/src/testing/series.rs +31 -0
  74. data/ext/polars/src/timeout.rs +105 -0
  75. data/ext/polars/src/utils.rs +159 -1
  76. data/lib/polars/array_expr.rb +81 -12
  77. data/lib/polars/array_name_space.rb +74 -7
  78. data/lib/polars/batched_csv_reader.rb +21 -21
  79. data/lib/polars/binary_name_space.rb +1 -1
  80. data/lib/polars/cat_expr.rb +7 -7
  81. data/lib/polars/config.rb +1 -1
  82. data/lib/polars/convert.rb +189 -34
  83. data/lib/polars/data_frame.rb +1066 -831
  84. data/lib/polars/data_frame_plot.rb +173 -0
  85. data/lib/polars/data_type_group.rb +1 -0
  86. data/lib/polars/data_types.rb +31 -12
  87. data/lib/polars/date_time_expr.rb +51 -69
  88. data/lib/polars/date_time_name_space.rb +80 -112
  89. data/lib/polars/dynamic_group_by.rb +7 -7
  90. data/lib/polars/exceptions.rb +50 -10
  91. data/lib/polars/expr.rb +470 -517
  92. data/lib/polars/functions/aggregation/horizontal.rb +0 -1
  93. data/lib/polars/functions/aggregation/vertical.rb +2 -3
  94. data/lib/polars/functions/as_datatype.rb +290 -8
  95. data/lib/polars/functions/eager.rb +204 -10
  96. data/lib/polars/functions/escape_regex.rb +21 -0
  97. data/lib/polars/functions/lazy.rb +409 -169
  98. data/lib/polars/functions/lit.rb +17 -1
  99. data/lib/polars/functions/range/int_range.rb +74 -2
  100. data/lib/polars/functions/range/linear_space.rb +77 -0
  101. data/lib/polars/functions/range/time_range.rb +1 -1
  102. data/lib/polars/functions/repeat.rb +3 -12
  103. data/lib/polars/functions/whenthen.rb +2 -2
  104. data/lib/polars/group_by.rb +72 -20
  105. data/lib/polars/iceberg_dataset.rb +1 -6
  106. data/lib/polars/in_process_query.rb +37 -0
  107. data/lib/polars/io/cloud.rb +18 -0
  108. data/lib/polars/io/csv.rb +265 -126
  109. data/lib/polars/io/database.rb +0 -1
  110. data/lib/polars/io/delta.rb +15 -7
  111. data/lib/polars/io/ipc.rb +24 -17
  112. data/lib/polars/io/ndjson.rb +161 -24
  113. data/lib/polars/io/parquet.rb +101 -38
  114. data/lib/polars/lazy_frame.rb +849 -558
  115. data/lib/polars/lazy_group_by.rb +327 -2
  116. data/lib/polars/list_expr.rb +94 -16
  117. data/lib/polars/list_name_space.rb +88 -24
  118. data/lib/polars/meta_expr.rb +42 -1
  119. data/lib/polars/name_expr.rb +41 -4
  120. data/lib/polars/query_opt_flags.rb +198 -2
  121. data/lib/polars/rolling_group_by.rb +3 -3
  122. data/lib/polars/schema.rb +21 -3
  123. data/lib/polars/selector.rb +37 -2
  124. data/lib/polars/selectors.rb +45 -9
  125. data/lib/polars/series.rb +1156 -728
  126. data/lib/polars/series_plot.rb +72 -0
  127. data/lib/polars/slice.rb +1 -1
  128. data/lib/polars/sql_context.rb +11 -4
  129. data/lib/polars/string_expr.rb +59 -68
  130. data/lib/polars/string_name_space.rb +51 -87
  131. data/lib/polars/struct_expr.rb +36 -18
  132. data/lib/polars/testing.rb +24 -273
  133. data/lib/polars/utils/constants.rb +2 -0
  134. data/lib/polars/utils/construction/data_frame.rb +410 -0
  135. data/lib/polars/utils/construction/series.rb +364 -0
  136. data/lib/polars/utils/construction/utils.rb +9 -0
  137. data/lib/polars/utils/deprecation.rb +11 -0
  138. data/lib/polars/utils/serde.rb +8 -3
  139. data/lib/polars/utils/unstable.rb +19 -0
  140. data/lib/polars/utils/various.rb +59 -0
  141. data/lib/polars/utils.rb +46 -47
  142. data/lib/polars/version.rb +1 -1
  143. data/lib/polars.rb +47 -1
  144. metadata +25 -6
  145. data/ext/polars/src/allocator.rs +0 -13
  146. data/lib/polars/plot.rb +0 -109
data/lib/polars/series.rb CHANGED
@@ -16,9 +16,6 @@ module Polars
16
16
  # Throw error on numeric overflow.
17
17
  # @param nan_to_null [Boolean]
18
18
  # Not used.
19
- # @param dtype_if_empty [Symbol, nil]
20
- # If no dtype is specified and values contains `nil` or an empty array,
21
- # set the Polars dtype of the Series data. If not specified, Float32 is used.
22
19
  #
23
20
  # @example Constructing a Series by specifying name and values positionally:
24
21
  # s = Polars::Series.new("a", [1, 2, 3])
@@ -28,53 +25,56 @@ module Polars
28
25
  # # => Polars::Int64
29
26
  #
30
27
  # @example Constructing a Series with a specific dtype:
31
- # s2 = Polars::Series.new("a", [1, 2, 3], dtype: :f32)
28
+ # s2 = Polars::Series.new("a", [1, 2, 3], dtype: Polars::Float32)
32
29
  #
33
30
  # @example It is possible to construct a Series with values as the first positional argument. This syntax considered an anti-pattern, but it can be useful in certain scenarios. You must specify any other arguments through keywords.
34
31
  # s3 = Polars::Series.new([1, 2, 3])
35
- def initialize(name = nil, values = nil, dtype: nil, strict: true, nan_to_null: false, dtype_if_empty: nil)
32
+ def initialize(name = nil, values = nil, dtype: nil, strict: true, nan_to_null: false)
33
+ # If 'Unknown' treat as nil to trigger type inference
34
+ if dtype == Unknown
35
+ dtype = nil
36
+ elsif !dtype.nil? && !Utils.is_polars_dtype(dtype)
37
+ dtype = Utils.parse_into_dtype(dtype)
38
+ end
39
+
36
40
  # Handle case where values are passed as the first argument
37
- if !name.nil? && !name.is_a?(::String)
41
+ original_name = nil
42
+ if name.nil?
43
+ name = ""
44
+ elsif name.is_a?(::String)
45
+ original_name = name
46
+ else
38
47
  if values.nil?
39
48
  values = name
40
- name = nil
49
+ name = ""
41
50
  else
42
- raise ArgumentError, "Series name must be a string."
51
+ raise TypeError, "Series name must be a string"
43
52
  end
44
53
  end
45
54
 
46
- name = "" if name.nil?
47
-
48
- # TODO improve
49
- if values.is_a?(Range) && values.begin.is_a?(::String)
50
- values = values.to_a
51
- end
52
-
53
- if values.nil?
54
- self._s = sequence_to_rbseries(name, [], dtype: dtype, dtype_if_empty: dtype_if_empty)
55
- elsif values.is_a?(Series)
56
- self._s = series_to_rbseries(name, values)
57
- elsif values.is_a?(Range)
58
- self._s =
59
- Polars.arange(
60
- values.first,
61
- values.last + (values.exclude_end? ? 0 : 1),
62
- step: 1,
63
- eager: true,
64
- dtype: dtype
65
- )
66
- .rename(name, in_place: true)
67
- ._s
68
- elsif values.is_a?(::Array)
69
- self._s = sequence_to_rbseries(name, values, dtype: dtype, strict: strict, dtype_if_empty: dtype_if_empty)
55
+ if values.is_a?(::Array) || values.is_a?(Range)
56
+ self._s = Utils.sequence_to_rbseries(
57
+ name,
58
+ values,
59
+ dtype: dtype,
60
+ strict: strict
61
+ )
62
+ elsif values.nil?
63
+ self._s = Utils.sequence_to_rbseries(name, [], dtype: dtype)
70
64
  elsif defined?(Numo::NArray) && values.is_a?(Numo::NArray)
71
- self._s = numo_to_rbseries(name, values, strict: strict, nan_to_null: nan_to_null)
65
+ self._s = Utils.numo_to_rbseries(name, values, strict: strict, nan_to_null: nan_to_null)
72
66
 
73
67
  if !dtype.nil?
74
- self._s = self.cast(dtype, strict: true)._s
68
+ self._s = cast(dtype, strict: strict)._s
75
69
  end
70
+ elsif values.is_a?(Series)
71
+ self._s = Utils.series_to_rbseries(original_name, values, dtype: dtype, strict: strict)
72
+ elsif values.is_a?(DataFrame)
73
+ self._s = Utils.dataframe_to_rbseries(
74
+ original_name, values, dtype: dtype, strict: strict
75
+ )
76
76
  else
77
- raise ArgumentError, "Series constructor called with unsupported type; got #{values.class.name}"
77
+ raise TypeError, "Series constructor called with unsupported type; got #{values.class.name}"
78
78
  end
79
79
  end
80
80
 
@@ -358,7 +358,7 @@ module Polars
358
358
  #
359
359
  # @return [Series]
360
360
  def *(other)
361
- if is_temporal
361
+ if dtype.temporal?
362
362
  raise ArgumentError, "first cast to integer before multiplying datelike dtypes"
363
363
  elsif other.is_a?(DataFrame)
364
364
  other * self
@@ -371,11 +371,11 @@ module Polars
371
371
  #
372
372
  # @return [Series]
373
373
  def /(other)
374
- if is_temporal
374
+ if dtype.temporal?
375
375
  raise ArgumentError, "first cast to integer before dividing datelike dtypes"
376
376
  end
377
377
 
378
- if is_float
378
+ if dtype.float?
379
379
  return _arithmetic(other, :div)
380
380
  end
381
381
 
@@ -386,7 +386,7 @@ module Polars
386
386
  #
387
387
  # @return [Series]
388
388
  def %(other)
389
- if is_datelike
389
+ if dtype.temporal?
390
390
  raise ArgumentError, "first cast to integer before applying modulo on datelike dtypes"
391
391
  end
392
392
  _arithmetic(other, :rem)
@@ -396,7 +396,7 @@ module Polars
396
396
  #
397
397
  # @return [Series]
398
398
  def **(power)
399
- if is_datelike
399
+ if dtype.temporal?
400
400
  raise ArgumentError, "first cast to integer before raising datelike dtypes to a power"
401
401
  end
402
402
  to_frame.select(Polars.col(name).pow(power)).to_series
@@ -435,7 +435,7 @@ module Polars
435
435
  # @return [Object]
436
436
  def [](item)
437
437
  if item.is_a?(Series) && [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64].include?(item.dtype)
438
- return Utils.wrap_s(_s.take_with_series(_pos_idxs(item)._s))
438
+ return Utils.wrap_s(_s.gather_with_series(_pos_idxs(item)._s))
439
439
  end
440
440
 
441
441
  if item.is_a?(Series) && item.bool?
@@ -455,7 +455,7 @@ module Polars
455
455
  end
456
456
 
457
457
  if Utils.is_int_sequence(item)
458
- return Utils.wrap_s(_s.take_with_series(_pos_idxs(Series.new("", item))._s))
458
+ return Utils.wrap_s(_s.gather_with_series(_pos_idxs(Series.new("", item))._s))
459
459
  end
460
460
 
461
461
  raise ArgumentError, "Cannot get item of type: #{item.class.name}"
@@ -466,7 +466,7 @@ module Polars
466
466
  # @return [Object]
467
467
  def []=(key, value)
468
468
  if value.is_a?(::Array)
469
- if is_numeric || is_datelike
469
+ if dtype.numeric? || dtype.temporal?
470
470
  scatter(key, value)
471
471
  return
472
472
  end
@@ -484,7 +484,7 @@ module Polars
484
484
  raise Todo
485
485
  end
486
486
  elsif key.is_a?(::Array)
487
- s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: UInt32))
487
+ s = Utils.wrap_s(Utils.sequence_to_rbseries("", key, dtype: UInt32))
488
488
  self[s] = value
489
489
  elsif key.is_a?(Range)
490
490
  s = Series.new("", key, dtype: UInt32)
@@ -548,7 +548,7 @@ module Polars
548
548
  # @return [Numeric]
549
549
  #
550
550
  # @example
551
- # s = Polars::Series.new("values", 1..1_000_000, dtype: :u32)
551
+ # s = Polars::Series.new("values", 1..1_000_000, dtype: Polars::UInt32)
552
552
  # s.estimated_size
553
553
  # # => 4000000
554
554
  # s.estimated_size("mb")
@@ -613,7 +613,7 @@ module Polars
613
613
  # # => false
614
614
  def any?(ignore_nulls: true, &block)
615
615
  if block_given?
616
- apply(return_dtype: Boolean, skip_nulls: ignore_nulls, &block).any?
616
+ map_elements(return_dtype: Boolean, skip_nulls: ignore_nulls, &block).any?
617
617
  else
618
618
  _s.any(ignore_nulls)
619
619
  end
@@ -637,7 +637,7 @@ module Polars
637
637
  # # => true
638
638
  def all?(ignore_nulls: true, &block)
639
639
  if block_given?
640
- apply(return_dtype: Boolean, skip_nulls: ignore_nulls, &block).all?
640
+ map_elements(return_dtype: Boolean, skip_nulls: ignore_nulls, &block).all?
641
641
  else
642
642
  _s.all(ignore_nulls)
643
643
  end
@@ -661,7 +661,7 @@ module Polars
661
661
  # # => true
662
662
  def none?(&block)
663
663
  if block_given?
664
- apply(return_dtype: Boolean, &block).none?
664
+ map_elements(return_dtype: Boolean, &block).none?
665
665
  else
666
666
  to_frame.select(Polars.col(name).is_not.all).to_series[0]
667
667
  end
@@ -827,81 +827,60 @@ module Polars
827
827
  # Series with mixed datatypes will return summary statistics for the datatype of
828
828
  # the first value.
829
829
  #
830
+ # @param percentiles [Array]
831
+ # One or more percentiles to include in the summary statistics (if the
832
+ # Series has a numeric dtype). All values must be in the range `[0, 1]`.
833
+ # @param interpolation ['nearest', 'higher', 'lower', 'midpoint', 'linear', 'equiprobable']
834
+ # Interpolation method used when calculating percentiles.
835
+ #
830
836
  # @return [DataFrame]
831
837
  #
832
838
  # @example
833
- # series_num = Polars::Series.new([1, 2, 3, 4, 5])
834
- # series_num.describe
839
+ # s = Polars::Series.new([1, 2, 3, 4, 5])
840
+ # s.describe
835
841
  # # =>
836
- # # shape: (6, 2)
842
+ # # shape: (9, 2)
837
843
  # # ┌────────────┬──────────┐
838
844
  # # │ statistic ┆ value │
839
845
  # # │ --- ┆ --- │
840
846
  # # │ str ┆ f64 │
841
847
  # # ╞════════════╪══════════╡
842
- # # │ min 1.0 │
843
- # # │ max ┆ 5.0 │
848
+ # # │ count 5.0 │
844
849
  # # │ null_count ┆ 0.0 │
845
850
  # # │ mean ┆ 3.0 │
846
851
  # # │ std ┆ 1.581139 │
847
- # # │ count 5.0 │
852
+ # # │ min 1.0 │
853
+ # # │ 25% ┆ 2.0 │
854
+ # # │ 50% ┆ 3.0 │
855
+ # # │ 75% ┆ 4.0 │
856
+ # # │ max ┆ 5.0 │
848
857
  # # └────────────┴──────────┘
849
858
  #
850
- # @example
851
- # series_str = Polars::Series.new(["a", "a", nil, "b", "c"])
852
- # series_str.describe
859
+ # @example Non-numeric data types may not have all statistics available.
860
+ # s = Polars::Series.new(["aa", "aa", nil, "bb", "cc"])
861
+ # s.describe
853
862
  # # =>
854
- # # shape: (3, 2)
863
+ # # shape: (4, 2)
855
864
  # # ┌────────────┬───────┐
856
865
  # # │ statistic ┆ value │
857
866
  # # │ --- ┆ --- │
858
- # # │ str ┆ i64
867
+ # # │ str ┆ str
859
868
  # # ╞════════════╪═══════╡
860
- # # │ unique ┆ 4 │
869
+ # # │ count ┆ 4 │
861
870
  # # │ null_count ┆ 1 │
862
- # # │ count 5
871
+ # # │ min aa
872
+ # # │ max ┆ cc │
863
873
  # # └────────────┴───────┘
864
- def describe
865
- if len == 0
866
- raise ArgumentError, "Series must contain at least one value"
867
- elsif is_numeric
868
- s = cast(:f64)
869
- stats = {
870
- "min" => s.min,
871
- "max" => s.max,
872
- "null_count" => s.null_count,
873
- "mean" => s.mean,
874
- "std" => s.std,
875
- "count" => s.len
876
- }
877
- elsif is_boolean
878
- stats = {
879
- "sum" => sum,
880
- "null_count" => null_count,
881
- "count" => len
882
- }
883
- elsif is_utf8
884
- stats = {
885
- "unique" => unique.length,
886
- "null_count" => null_count,
887
- "count" => len
888
- }
889
- elsif is_datelike
890
- # we coerce all to string, because a polars column
891
- # only has a single dtype and dates: datetime and count: int don't match
892
- stats = {
893
- "min" => dt.min.to_s,
894
- "max" => dt.max.to_s,
895
- "null_count" => null_count.to_s,
896
- "count" => len.to_s
897
- }
898
- else
899
- raise TypeError, "This type is not supported"
900
- end
901
-
902
- Polars::DataFrame.new(
903
- {"statistic" => stats.keys, "value" => stats.values}
874
+ def describe(
875
+ percentiles: [0.25, 0.5, 0.75],
876
+ interpolation: "nearest"
877
+ )
878
+ stats = to_frame.describe(
879
+ percentiles: percentiles,
880
+ interpolation: interpolation
904
881
  )
882
+ stats.columns = ["statistic", "value"]
883
+ stats.filter(F.col("value").is_not_null)
905
884
  end
906
885
 
907
886
  # Reduce this Series to the sum value.
@@ -909,8 +888,8 @@ module Polars
909
888
  # @return [Numeric]
910
889
  #
911
890
  # @note
912
- # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
913
- # `:i64` before summing to prevent overflow issues.
891
+ # Dtypes in \\\\{Int8, UInt8, Int16, UInt16} are cast to
892
+ # Int64 before summing to prevent overflow issues.
914
893
  #
915
894
  # @example
916
895
  # s = Polars::Series.new("a", [1, 2, 3])
@@ -1053,7 +1032,7 @@ module Polars
1053
1032
  # s.std
1054
1033
  # # => 1.0
1055
1034
  def std(ddof: 1)
1056
- if !is_numeric
1035
+ if !dtype.numeric?
1057
1036
  nil
1058
1037
  else
1059
1038
  to_frame.select(Polars.col(name).std(ddof: ddof)).to_series[0]
@@ -1073,7 +1052,7 @@ module Polars
1073
1052
  # s.var
1074
1053
  # # => 1.0
1075
1054
  def var(ddof: 1)
1076
- if !is_numeric
1055
+ if !dtype.numeric?
1077
1056
  nil
1078
1057
  else
1079
1058
  to_frame.select(Polars.col(name).var(ddof: ddof)).to_series[0]
@@ -1490,7 +1469,13 @@ module Polars
1490
1469
  # b = Polars::Series.new([0.65, 0.10, 0.25])
1491
1470
  # b.entropy(normalize: true)
1492
1471
  # # => 0.8568409950394724
1493
- def entropy(base: Math::E, normalize: false)
1472
+ def entropy(base: Math::E, normalize: nil)
1473
+ # TODO update
1474
+ if normalize.nil?
1475
+ warn "The default `normalize` for `entropy` method will change from `false` to `true` in a future version"
1476
+ normalize = false
1477
+ end
1478
+
1494
1479
  Polars.select(Polars.lit(self).entropy(base: base, normalize: normalize)).to_series[0]
1495
1480
  end
1496
1481
 
@@ -1498,7 +1483,7 @@ module Polars
1498
1483
  #
1499
1484
  # @param expr [Expr]
1500
1485
  # Expression to evaluate
1501
- # @param min_periods [Integer]
1486
+ # @param min_samples [Integer]
1502
1487
  # Number of valid values there should be in the window before the expression
1503
1488
  # is evaluated. valid values = `length - null_count`
1504
1489
  #
@@ -1525,7 +1510,7 @@ module Polars
1525
1510
  # # -15
1526
1511
  # # -24
1527
1512
  # # ]
1528
- def cumulative_eval(expr, min_periods: 1)
1513
+ def cumulative_eval(expr, min_samples: 1)
1529
1514
  super
1530
1515
  end
1531
1516
 
@@ -1537,8 +1522,16 @@ module Polars
1537
1522
  # @return [Series]
1538
1523
  #
1539
1524
  # @example
1540
- # s = Polars::Series.new("x", [1, 2, 3])
1541
- # s.alias("y")
1525
+ # s = Polars::Series.new("a", [1, 2, 3])
1526
+ # s.alias("b")
1527
+ # # =>
1528
+ # # shape: (3,)
1529
+ # # Series: 'b' [i64]
1530
+ # # [
1531
+ # # 1
1532
+ # # 2
1533
+ # # 3
1534
+ # # ]
1542
1535
  def alias(name)
1543
1536
  s = dup
1544
1537
  s._s.rename(name)
@@ -1549,21 +1542,22 @@ module Polars
1549
1542
  #
1550
1543
  # @param name [String]
1551
1544
  # New name.
1552
- # @param in_place [Boolean]
1553
- # Modify the Series in-place.
1554
1545
  #
1555
1546
  # @return [Series]
1556
1547
  #
1557
1548
  # @example
1558
1549
  # s = Polars::Series.new("a", [1, 2, 3])
1559
1550
  # s.rename("b")
1560
- def rename(name, in_place: false)
1561
- if in_place
1562
- _s.rename(name)
1563
- self
1564
- else
1565
- self.alias(name)
1566
- end
1551
+ # # =>
1552
+ # # shape: (3,)
1553
+ # # Series: 'b' [i64]
1554
+ # # [
1555
+ # # 1
1556
+ # # 2
1557
+ # # 3
1558
+ # # ]
1559
+ def rename(name)
1560
+ self.alias(name)
1567
1561
  end
1568
1562
 
1569
1563
  # Get the length of each individual chunk.
@@ -1575,7 +1569,7 @@ module Polars
1575
1569
  # s2 = Polars::Series.new("b", [4, 5, 6])
1576
1570
  #
1577
1571
  # @example Concatenate Series with rechunk: true
1578
- # Polars.concat([s, s2]).chunk_lengths
1572
+ # Polars.concat([s, s2], rechunk: true).chunk_lengths
1579
1573
  # # => [6]
1580
1574
  #
1581
1575
  # @example Concatenate Series with rechunk: false
@@ -1594,7 +1588,7 @@ module Polars
1594
1588
  # s2 = Polars::Series.new("b", [4, 5, 6])
1595
1589
  #
1596
1590
  # @example Concatenate Series with rechunk: true
1597
- # Polars.concat([s, s2]).n_chunks
1591
+ # Polars.concat([s, s2], rechunk: true).n_chunks
1598
1592
  # # => 1
1599
1593
  #
1600
1594
  # @example Concatenate Series with rechunk: false
@@ -1612,8 +1606,8 @@ module Polars
1612
1606
  # @return [Series]
1613
1607
  #
1614
1608
  # @note
1615
- # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
1616
- # `:i64` before summing to prevent overflow issues.
1609
+ # Dtypes in \\\\{Int8, UInt8, Int16, UInt16} are cast to
1610
+ # Int64 before summing to prevent overflow issues.
1617
1611
  #
1618
1612
  # @example
1619
1613
  # s = Polars::Series.new("a", [1, 2, 3])
@@ -1629,7 +1623,6 @@ module Polars
1629
1623
  def cum_sum(reverse: false)
1630
1624
  super
1631
1625
  end
1632
- alias_method :cumsum, :cum_sum
1633
1626
 
1634
1627
  # Return the cumulative count of the non-null values in the column.
1635
1628
  #
@@ -1675,7 +1668,6 @@ module Polars
1675
1668
  def cum_min(reverse: false)
1676
1669
  super
1677
1670
  end
1678
- alias_method :cummin, :cum_min
1679
1671
 
1680
1672
  # Get an array with the cumulative max computed at every element.
1681
1673
  #
@@ -1698,7 +1690,6 @@ module Polars
1698
1690
  def cum_max(reverse: false)
1699
1691
  super
1700
1692
  end
1701
- alias_method :cummax, :cum_max
1702
1693
 
1703
1694
  # Get an array with the cumulative product computed at every element.
1704
1695
  #
@@ -1708,8 +1699,8 @@ module Polars
1708
1699
  # @return [Series]
1709
1700
  #
1710
1701
  # @note
1711
- # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
1712
- # `:i64` before multiplying to prevent overflow issues.
1702
+ # Dtypes in \\\\{Int8, UInt8, Int16, UInt16} are cast to
1703
+ # Int64 before summing to prevent overflow issues.
1713
1704
  #
1714
1705
  # @example
1715
1706
  # s = Polars::Series.new("a", [1, 2, 3])
@@ -1725,7 +1716,6 @@ module Polars
1725
1716
  def cum_prod(reverse: false)
1726
1717
  super
1727
1718
  end
1728
- alias_method :cumprod, :cum_prod
1729
1719
 
1730
1720
  # Get a slice of this Series.
1731
1721
  #
@@ -1755,29 +1745,6 @@ module Polars
1755
1745
  #
1756
1746
  # @param other [Series]
1757
1747
  # Series to append.
1758
- # @param append_chunks [Boolean]
1759
- # If set to `true` the append operation will add the chunks from `other` to
1760
- # self. This is super cheap.
1761
- #
1762
- # If set to `false` the append operation will do the same as
1763
- # {DataFrame#extend} which extends the memory backed by this Series with
1764
- # the values from `other`.
1765
- #
1766
- # Different from `append_chunks`, `extend` appends the data from `other` to
1767
- # the underlying memory locations and thus may cause a reallocation (which is
1768
- # expensive).
1769
- #
1770
- # If this does not cause a reallocation, the resulting data structure will not
1771
- # have any extra chunks and thus will yield faster queries.
1772
- #
1773
- # Prefer `extend` over `append_chunks` when you want to do a query after a
1774
- # single append. For instance during online operations where you add `n` rows
1775
- # and rerun a query.
1776
- #
1777
- # Prefer `append_chunks` over `extend` when you want to append many times
1778
- # before doing a query. For instance, when you read in multiple files and when
1779
- # to store them in a single Series. In the latter case, finish the sequence
1780
- # of `append_chunks` operations with a `rechunk`.
1781
1748
  #
1782
1749
  # @return [Series]
1783
1750
  #
@@ -1796,20 +1763,60 @@ module Polars
1796
1763
  # # 5
1797
1764
  # # 6
1798
1765
  # # ]
1799
- def append(other, append_chunks: true)
1800
- begin
1801
- if append_chunks
1802
- _s.append(other._s)
1803
- else
1804
- _s.extend(other._s)
1805
- end
1806
- rescue => e
1807
- if e.message == "Already mutably borrowed"
1808
- append(other.clone, append_chunks)
1809
- else
1810
- raise e
1811
- end
1812
- end
1766
+ def append(other)
1767
+ Utils.require_same_type(self, other)
1768
+ _s.append(other._s)
1769
+ self
1770
+ end
1771
+
1772
+ # Extend the memory backed by this Series with the values from another.
1773
+ #
1774
+ # Different from `append`, which adds the chunks from `other` to the chunks of
1775
+ # this series, `extend` appends the data from `other` to the underlying memory
1776
+ # locations and thus may cause a reallocation (which is expensive).
1777
+ #
1778
+ # If this does `not` cause a reallocation, the resulting data structure will not
1779
+ # have any extra chunks and thus will yield faster queries.
1780
+ #
1781
+ # Prefer `extend` over `append` when you want to do a query after a single
1782
+ # append. For instance, during online operations where you add `n` rows
1783
+ # and rerun a query.
1784
+ #
1785
+ # Prefer `append` over `extend` when you want to append many times
1786
+ # before doing a query. For instance, when you read in multiple files and want
1787
+ # to store them in a single `Series`. In the latter case, finish the sequence
1788
+ # of `append` operations with a `rechunk`.
1789
+ #
1790
+ # @param other [Series]
1791
+ # Series to extend the series with.
1792
+ #
1793
+ # @return [Series]
1794
+ #
1795
+ # @note
1796
+ # This method modifies the series in-place. The series is returned for
1797
+ # convenience only.
1798
+ #
1799
+ # @example
1800
+ # a = Polars::Series.new("a", [1, 2, 3])
1801
+ # b = Polars::Series.new("b", [4, 5])
1802
+ # a.extend(b)
1803
+ # # =>
1804
+ # # shape: (5,)
1805
+ # # Series: 'a' [i64]
1806
+ # # [
1807
+ # # 1
1808
+ # # 2
1809
+ # # 3
1810
+ # # 4
1811
+ # # 5
1812
+ # # ]
1813
+ #
1814
+ # @example The resulting series will consist of a single chunk.
1815
+ # a.n_chunks
1816
+ # # => 1
1817
+ def extend(other)
1818
+ Utils.require_same_type(self, other)
1819
+ _s.extend(other._s)
1813
1820
  self
1814
1821
  end
1815
1822
 
@@ -1856,7 +1863,10 @@ module Polars
1856
1863
  # # 2
1857
1864
  # # ]
1858
1865
  def head(n = 10)
1859
- to_frame.select(F.col(name).head(n)).to_series
1866
+ if n < 0
1867
+ n = [0, len + n].max
1868
+ end
1869
+ self.class._from_rbseries(_s.head(n))
1860
1870
  end
1861
1871
 
1862
1872
  # Get the last `n` rows.
@@ -1877,7 +1887,10 @@ module Polars
1877
1887
  # # 3
1878
1888
  # # ]
1879
1889
  def tail(n = 10)
1880
- to_frame.select(F.col(name).tail(n)).to_series
1890
+ if n < 0
1891
+ n = [0, len + n].max
1892
+ end
1893
+ self.class._from_rbseries(_s.tail(n))
1881
1894
  end
1882
1895
 
1883
1896
  # Get the first `n` rows.
@@ -1900,7 +1913,7 @@ module Polars
1900
1913
  # # 2
1901
1914
  # # ]
1902
1915
  def limit(n = 10)
1903
- to_frame.select(F.col(name).limit(n)).to_series
1916
+ head(n)
1904
1917
  end
1905
1918
 
1906
1919
  # Take every nth value in the Series and return as new Series.
@@ -1935,11 +1948,10 @@ module Polars
1935
1948
  def gather_every(n, offset = 0)
1936
1949
  super
1937
1950
  end
1938
- alias_method :take_every, :gather_every
1939
1951
 
1940
1952
  # Sort this Series.
1941
1953
  #
1942
- # @param reverse [Boolean]
1954
+ # @param descending [Boolean]
1943
1955
  # Reverse sort.
1944
1956
  # @param nulls_last [Boolean]
1945
1957
  # Place null values last instead of first.
@@ -1962,7 +1974,7 @@ module Polars
1962
1974
  # # 3
1963
1975
  # # 4
1964
1976
  # # ]
1965
- # s.sort(reverse: true)
1977
+ # s.sort(descending: true)
1966
1978
  # # =>
1967
1979
  # # shape: (4,)
1968
1980
  # # Series: 'a' [i64]
@@ -1972,12 +1984,12 @@ module Polars
1972
1984
  # # 2
1973
1985
  # # 1
1974
1986
  # # ]
1975
- def sort(reverse: false, nulls_last: false, multithreaded: true, in_place: false)
1987
+ def sort(descending: false, nulls_last: false, multithreaded: true, in_place: false)
1976
1988
  if in_place
1977
- self._s = _s.sort(reverse, nulls_last, multithreaded)
1989
+ self._s = _s.sort(descending, nulls_last, multithreaded)
1978
1990
  self
1979
1991
  else
1980
- Utils.wrap_s(_s.sort(reverse, nulls_last, multithreaded))
1992
+ Utils.wrap_s(_s.sort(descending, nulls_last, multithreaded))
1981
1993
  end
1982
1994
  end
1983
1995
 
@@ -2017,7 +2029,7 @@ module Polars
2017
2029
  # Number of elements to return.
2018
2030
  # @param reverse [Object]
2019
2031
  # Consider the `k` smallest elements of the `by` column (instead of the `k`
2020
- # largest). This can be specified per column by passing a sequence of
2032
+ # largest). This can be specified per column by passing an array of
2021
2033
  # booleans.
2022
2034
  #
2023
2035
  # @return [Series]
@@ -2077,7 +2089,7 @@ module Polars
2077
2089
  # Number of elements to return.
2078
2090
  # @param reverse [Object]
2079
2091
  # Consider the `k` largest elements of the `by` column( (instead of the `k`
2080
- # smallest). This can be specified per column by passing a sequence of
2092
+ # smallest). This can be specified per column by passing an array of
2081
2093
  # booleans.
2082
2094
  #
2083
2095
  # @return [Series]
@@ -2103,7 +2115,7 @@ module Polars
2103
2115
 
2104
2116
  # Get the index values that would sort this Series.
2105
2117
  #
2106
- # @param reverse [Boolean]
2118
+ # @param descending [Boolean]
2107
2119
  # Sort in reverse (descending) order.
2108
2120
  # @param nulls_last [Boolean]
2109
2121
  # Place null values last instead of first.
@@ -2123,10 +2135,9 @@ module Polars
2123
2135
  # # 2
2124
2136
  # # 0
2125
2137
  # # ]
2126
- def arg_sort(reverse: false, nulls_last: false)
2138
+ def arg_sort(descending: false, nulls_last: false)
2127
2139
  super
2128
2140
  end
2129
- alias_method :argsort, :arg_sort
2130
2141
 
2131
2142
  # Get unique index as Series.
2132
2143
  #
@@ -2281,7 +2292,6 @@ module Polars
2281
2292
  def gather(indices)
2282
2293
  super
2283
2294
  end
2284
- alias_method :take, :gather
2285
2295
 
2286
2296
  # Count the null values in this Series.
2287
2297
  #
@@ -2313,7 +2323,6 @@ module Polars
2313
2323
  def has_nulls
2314
2324
  _s.has_nulls
2315
2325
  end
2316
- alias_method :has_validity, :has_nulls
2317
2326
 
2318
2327
  # Check if the Series is empty.
2319
2328
  #
@@ -2613,8 +2622,6 @@ module Polars
2613
2622
  def is_first_distinct
2614
2623
  super
2615
2624
  end
2616
- alias_method :is_first, :is_first_distinct
2617
-
2618
2625
 
2619
2626
  # Return a boolean mask indicating the last occurrence of each distinct value.
2620
2627
  #
@@ -2685,7 +2692,7 @@ module Polars
2685
2692
  #
2686
2693
  # @param other [Series]
2687
2694
  # Series to compare with.
2688
- # @param strict [Boolean]
2695
+ # @param check_dtypes [Boolean]
2689
2696
  # Require data types to match.
2690
2697
  # @param check_names [Boolean]
2691
2698
  # Require names to match.
@@ -2701,10 +2708,9 @@ module Polars
2701
2708
  # # => true
2702
2709
  # s.equals(s2)
2703
2710
  # # => false
2704
- def equals(other, strict: false, check_names: false, null_equal: false)
2705
- _s.equals(other._s, strict, check_names, null_equal)
2711
+ def equals(other, check_dtypes: false, check_names: false, null_equal: true)
2712
+ _s.equals(other._s, check_dtypes, check_names, null_equal)
2706
2713
  end
2707
- alias_method :series_equal, :equals
2708
2714
 
2709
2715
  # Return the number of elements in the Series.
2710
2716
  #
@@ -2734,16 +2740,19 @@ module Polars
2734
2740
 
2735
2741
  # Cast between data types.
2736
2742
  #
2737
- # @param dtype [Symbol]
2743
+ # @param dtype [Object]
2738
2744
  # DataType to cast to
2739
2745
  # @param strict [Boolean]
2740
2746
  # Throw an error if a cast could not be done for instance due to an overflow
2747
+ # @param wrap_numerical [Boolean]
2748
+ # If true numeric casts wrap overflowing values instead of
2749
+ # marking the cast as invalid.
2741
2750
  #
2742
2751
  # @return [Series]
2743
2752
  #
2744
2753
  # @example
2745
2754
  # s = Polars::Series.new("a", [true, false, true])
2746
- # s.cast(:u32)
2755
+ # s.cast(Polars::UInt32)
2747
2756
  # # =>
2748
2757
  # # shape: (3,)
2749
2758
  # # Series: 'a' [u32]
@@ -2752,24 +2761,18 @@ module Polars
2752
2761
  # # 0
2753
2762
  # # 1
2754
2763
  # # ]
2755
- def cast(dtype, strict: true)
2756
- super
2764
+ def cast(dtype, strict: true, wrap_numerical: false)
2765
+ dtype = Utils.parse_into_dtype(dtype)
2766
+ self.class._from_rbseries(_s.cast(dtype, strict, wrap_numerical))
2757
2767
  end
2758
2768
 
2759
2769
  # Cast to physical representation of the logical dtype.
2760
2770
  #
2761
- # - `:date` -> `:i32`
2762
- # - `:datetime` -> `:i64`
2763
- # - `:time` -> `:i64`
2764
- # - `:duration` -> `:i64`
2765
- # - `:cat` -> `:u32`
2766
- # - other data types will be left unchanged.
2767
- #
2768
2771
  # @return [Series]
2769
2772
  #
2770
2773
  # @example
2771
2774
  # s = Polars::Series.new("values", ["a", nil, "x", "a"])
2772
- # s.cast(:cat).to_physical
2775
+ # s.cast(Polars::Categorical).to_physical
2773
2776
  # # =>
2774
2777
  # # shape: (4,)
2775
2778
  # # Series: 'values' [u32]
@@ -2840,7 +2843,7 @@ module Polars
2840
2843
  # @return [Series]
2841
2844
  #
2842
2845
  # @example
2843
- # s = Polars::Series.new("a", [1, 2, 3], dtype: :i8)
2846
+ # s = Polars::Series.new("a", [1, 2, 3], dtype: Polars::Int8)
2844
2847
  # s.reverse
2845
2848
  # # =>
2846
2849
  # # shape: (3,)
@@ -2869,7 +2872,7 @@ module Polars
2869
2872
  #
2870
2873
  # @note
2871
2874
  # If the value of the `lower_bound` is greater than that of the `upper_bound`
2872
- # then the result will be False, as no value can satisfy the condition.
2875
+ # then the result will be false, as no value can satisfy the condition.
2873
2876
  #
2874
2877
  # @example
2875
2878
  # s = Polars::Series.new("num", [1, 2, 3, 4, 5])
@@ -2962,7 +2965,7 @@ module Polars
2962
2965
  def is_close(
2963
2966
  other,
2964
2967
  abs_tol: 0.0,
2965
- rel_tol: 1e-09,
2968
+ rel_tol: 1.0e-09,
2966
2969
  nans_equal: false
2967
2970
  )
2968
2971
  F.select(
@@ -2972,75 +2975,6 @@ module Polars
2972
2975
  ).to_series
2973
2976
  end
2974
2977
 
2975
- # Check if this Series datatype is numeric.
2976
- #
2977
- # @return [Boolean]
2978
- #
2979
- # @example
2980
- # s = Polars::Series.new("a", [1, 2, 3])
2981
- # s.is_numeric
2982
- # # => true
2983
- def is_numeric
2984
- [Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64].include?(dtype)
2985
- end
2986
- alias_method :numeric?, :is_numeric
2987
-
2988
- # Check if this Series datatype is datelike.
2989
- #
2990
- # @return [Boolean]
2991
- #
2992
- # @example
2993
- # s = Polars::Series.new([Date.new(2021, 1, 1), Date.new(2021, 1, 2), Date.new(2021, 1, 3)])
2994
- # s.is_datelike
2995
- # # => true
2996
- def is_datelike
2997
- [Date, Time].include?(dtype) || dtype.is_a?(Datetime) || dtype.is_a?(Duration)
2998
- end
2999
- alias_method :datelike?, :is_datelike
3000
- alias_method :is_temporal, :is_datelike
3001
- alias_method :temporal?, :is_datelike
3002
-
3003
- # Check if this Series has floating point numbers.
3004
- #
3005
- # @return [Boolean]
3006
- #
3007
- # @example
3008
- # s = Polars::Series.new("a", [1.0, 2.0, 3.0])
3009
- # s.is_float
3010
- # # => true
3011
- def is_float
3012
- [Float32, Float64].include?(dtype)
3013
- end
3014
- alias_method :float?, :is_float
3015
-
3016
- # Check if this Series is a Boolean.
3017
- #
3018
- # @return [Boolean]
3019
- #
3020
- # @example
3021
- # s = Polars::Series.new("a", [true, false, true])
3022
- # s.is_boolean
3023
- # # => true
3024
- def is_boolean
3025
- dtype == Boolean
3026
- end
3027
- alias_method :boolean?, :is_boolean
3028
- alias_method :is_bool, :is_boolean
3029
- alias_method :bool?, :is_boolean
3030
-
3031
- # Check if this Series datatype is a Utf8.
3032
- #
3033
- # @return [Boolean]
3034
- #
3035
- # @example
3036
- # s = Polars::Series.new("x", ["a", "b", "c"])
3037
- # s.is_utf8
3038
- # # => true
3039
- def is_utf8
3040
- dtype == String
3041
- end
3042
- alias_method :utf8?, :is_utf8
3043
-
3044
2978
  # def view
3045
2979
  # end
3046
2980
 
@@ -3055,7 +2989,7 @@ module Polars
3055
2989
  # # Numo::Int64#shape=[3]
3056
2990
  # # [1, 2, 3]
3057
2991
  def to_numo
3058
- if is_datelike
2992
+ if dtype.temporal?
3059
2993
  Numo::RObject.cast(to_a)
3060
2994
  else
3061
2995
  _s.to_numo
@@ -3093,16 +3027,16 @@ module Polars
3093
3027
 
3094
3028
  # Set values at the index locations.
3095
3029
  #
3096
- # @param idx [Object]
3030
+ # @param indices [Object]
3097
3031
  # Integers representing the index locations.
3098
- # @param value [Object]
3032
+ # @param values [Object]
3099
3033
  # Replacement values.
3100
3034
  #
3101
3035
  # @return [Series]
3102
3036
  #
3103
3037
  # @example
3104
3038
  # s = Polars::Series.new("a", [1, 2, 3])
3105
- # s.set_at_idx(1, 10)
3039
+ # s.scatter(1, 10)
3106
3040
  # # =>
3107
3041
  # # shape: (3,)
3108
3042
  # # Series: 'a' [i64]
@@ -3111,29 +3045,28 @@ module Polars
3111
3045
  # # 10
3112
3046
  # # 3
3113
3047
  # # ]
3114
- def scatter(idx, value)
3115
- if idx.is_a?(Integer)
3116
- idx = [idx]
3048
+ def scatter(indices, values)
3049
+ if indices.is_a?(Integer)
3050
+ indices = [indices]
3117
3051
  end
3118
- if idx.length == 0
3052
+ if indices.length == 0
3119
3053
  return self
3120
3054
  end
3121
3055
 
3122
- idx = Series.new("", idx)
3123
- if value.is_a?(Integer) || value.is_a?(Float) || Utils.bool?(value) || value.is_a?(::String) || value.nil?
3124
- value = Series.new("", [value])
3056
+ indices = Series.new("", indices)
3057
+ if values.is_a?(Integer) || values.is_a?(Float) || Utils.bool?(values) || values.is_a?(::String) || values.nil?
3058
+ values = Series.new("", [values])
3125
3059
 
3126
3060
  # if we need to set more than a single value, we extend it
3127
- if idx.length > 0
3128
- value = value.extend_constant(value[0], idx.length - 1)
3061
+ if indices.length > 0
3062
+ values = values.extend_constant(values[0], indices.length - 1)
3129
3063
  end
3130
- elsif !value.is_a?(Series)
3131
- value = Series.new("", value)
3064
+ elsif !values.is_a?(Series)
3065
+ values = Series.new("", values)
3132
3066
  end
3133
- _s.scatter(idx._s, value._s)
3067
+ _s.scatter(indices._s, values._s)
3134
3068
  self
3135
3069
  end
3136
- alias_method :set_at_idx, :scatter
3137
3070
 
3138
3071
  # Get the index of the first occurrence of a value, or `nil` if it's not found.
3139
3072
  #
@@ -3197,13 +3130,12 @@ module Polars
3197
3130
  s = len > 0 ? self.class.new(name, [], dtype: dtype) : clone
3198
3131
  n > 0 ? s.extend_constant(nil, n) : s
3199
3132
  end
3200
- alias_method :cleared, :clear
3201
3133
 
3202
3134
  # clone handled by initialize_copy
3203
3135
 
3204
3136
  # Fill floating point NaN value with a fill value.
3205
3137
  #
3206
- # @param fill_value [Object]
3138
+ # @param value [Object]
3207
3139
  # Value used to fill nan values.
3208
3140
  #
3209
3141
  # @return [Series]
@@ -3220,7 +3152,7 @@ module Polars
3220
3152
  # # 3.0
3221
3153
  # # 0.0
3222
3154
  # # ]
3223
- def fill_nan(fill_value)
3155
+ def fill_nan(value)
3224
3156
  super
3225
3157
  end
3226
3158
 
@@ -3344,8 +3276,12 @@ module Polars
3344
3276
 
3345
3277
  # Round underlying floating point data by `decimals` digits.
3346
3278
  #
3279
+ # The default rounding mode is "half to even" (also known as "bankers' rounding").
3280
+ #
3347
3281
  # @param decimals [Integer]
3348
- # number of decimals to round by.
3282
+ # Number of decimals to round by.
3283
+ # @param mode ['half_to_even', 'half_away_from_zero']
3284
+ # Rounding mode.
3349
3285
  #
3350
3286
  # @return [Series]
3351
3287
  #
@@ -3360,7 +3296,7 @@ module Polars
3360
3296
  # # 2.57
3361
3297
  # # 3.9
3362
3298
  # # ]
3363
- def round(decimals = 0)
3299
+ def round(decimals = 0, mode: "half_to_even")
3364
3300
  super
3365
3301
  end
3366
3302
 
@@ -3543,7 +3479,6 @@ module Polars
3543
3479
  def arcsin
3544
3480
  super
3545
3481
  end
3546
- alias_method :asin, :arcsin
3547
3482
 
3548
3483
  # Compute the element-wise value for the inverse cosine.
3549
3484
  #
@@ -3563,7 +3498,6 @@ module Polars
3563
3498
  def arccos
3564
3499
  super
3565
3500
  end
3566
- alias_method :acos, :arccos
3567
3501
 
3568
3502
  # Compute the element-wise value for the inverse tangent.
3569
3503
  #
@@ -3583,7 +3517,6 @@ module Polars
3583
3517
  def arctan
3584
3518
  super
3585
3519
  end
3586
- alias_method :atan, :arctan
3587
3520
 
3588
3521
  # Compute the element-wise value for the inverse hyperbolic sine.
3589
3522
  #
@@ -3603,7 +3536,6 @@ module Polars
3603
3536
  def arcsinh
3604
3537
  super
3605
3538
  end
3606
- alias_method :asinh, :arcsinh
3607
3539
 
3608
3540
  # Compute the element-wise value for the inverse hyperbolic cosine.
3609
3541
  #
@@ -3624,7 +3556,6 @@ module Polars
3624
3556
  def arccosh
3625
3557
  super
3626
3558
  end
3627
- alias_method :acosh, :arccosh
3628
3559
 
3629
3560
  # Compute the element-wise value for the inverse hyperbolic tangent.
3630
3561
  #
@@ -3648,7 +3579,6 @@ module Polars
3648
3579
  def arctanh
3649
3580
  super
3650
3581
  end
3651
- alias_method :atanh, :arctanh
3652
3582
 
3653
3583
  # Compute the element-wise value for the hyperbolic sine.
3654
3584
  #
@@ -3734,21 +3664,23 @@ module Polars
3734
3664
  # # 12
3735
3665
  # # 13
3736
3666
  # # ]
3737
- def map_elements(return_dtype: nil, skip_nulls: true, &func)
3667
+ def map_elements(return_dtype: nil, skip_nulls: true, &function)
3738
3668
  if return_dtype.nil?
3739
3669
  pl_return_dtype = nil
3740
3670
  else
3741
- pl_return_dtype = Utils.rb_type_to_dtype(return_dtype)
3671
+ pl_return_dtype = Utils.parse_into_dtype(return_dtype)
3742
3672
  end
3743
- Utils.wrap_s(_s.map_elements(func, pl_return_dtype, skip_nulls))
3673
+ Utils.wrap_s(_s.map_elements(function, pl_return_dtype, skip_nulls))
3744
3674
  end
3745
3675
  alias_method :map, :map_elements
3746
- alias_method :apply, :map_elements
3747
3676
 
3748
3677
  # Shift the values by a given period.
3749
3678
  #
3750
- # @param periods [Integer]
3679
+ # @param n [Integer]
3751
3680
  # Number of places to shift (may be negative).
3681
+ # @param fill_value [Object]
3682
+ # Fill the resulting null values with this value. Accepts scalar expression
3683
+ # input. Non-expression inputs are parsed as literals.
3752
3684
  #
3753
3685
  # @return [Series]
3754
3686
  #
@@ -3774,19 +3706,7 @@ module Polars
3774
3706
  # # 3
3775
3707
  # # null
3776
3708
  # # ]
3777
- def shift(periods = 1)
3778
- super
3779
- end
3780
-
3781
- # Shift the values by a given period and fill the resulting null values.
3782
- #
3783
- # @param periods [Integer]
3784
- # Number of places to shift (may be negative).
3785
- # @param fill_value [Object]
3786
- # Fill nil values with the result of this expression.
3787
- #
3788
- # @return [Series]
3789
- def shift_and_fill(periods, fill_value)
3709
+ def shift(n = 1, fill_value: nil)
3790
3710
  super
3791
3711
  end
3792
3712
 
@@ -3834,6 +3754,90 @@ module Polars
3834
3754
  Utils.wrap_s(_s.zip_with(mask._s, other._s))
3835
3755
  end
3836
3756
 
3757
+ # Compute a rolling min based on another series.
3758
+ #
3759
+ # @note
3760
+ # This functionality is considered **unstable**. It may be changed
3761
+ # at any point without it being considered a breaking change.
3762
+ #
3763
+ # Given a `by` column `<t_0, t_1, ..., t_n>`, then `closed: "right"`
3764
+ # (the default) means the windows will be:
3765
+ #
3766
+ # - (t_0 - window_size, t_0]
3767
+ # - (t_1 - window_size, t_1]
3768
+ # - ...
3769
+ # - (t_n - window_size, t_n]
3770
+ #
3771
+ # @param by [Object]
3772
+ # Should be `DateTime`, `Date`, `UInt64`, `UInt32`, `Int64`,
3773
+ # or `Int32` data type (note that the integral ones require using `'i'`
3774
+ # in `window size`).
3775
+ # @param window_size [String]
3776
+ # The length of the window. Can be a dynamic temporal
3777
+ # size indicated by a timedelta or the following string language:
3778
+ #
3779
+ # - 1ns (1 nanosecond)
3780
+ # - 1us (1 microsecond)
3781
+ # - 1ms (1 millisecond)
3782
+ # - 1s (1 second)
3783
+ # - 1m (1 minute)
3784
+ # - 1h (1 hour)
3785
+ # - 1d (1 calendar day)
3786
+ # - 1w (1 calendar week)
3787
+ # - 1mo (1 calendar month)
3788
+ # - 1q (1 calendar quarter)
3789
+ # - 1y (1 calendar year)
3790
+ # - 1i (1 index count)
3791
+ #
3792
+ # By "calendar day", we mean the corresponding time on the next day
3793
+ # (which may not be 24 hours, due to daylight savings). Similarly for
3794
+ # "calendar week", "calendar month", "calendar quarter", and
3795
+ # "calendar year".
3796
+ # @param min_samples [Integer]
3797
+ # The number of values in the window that should be non-null before computing
3798
+ # a result.
3799
+ # @param closed ['left', 'right', 'both', 'none']
3800
+ # Define which sides of the temporal interval are closed (inclusive),
3801
+ # defaults to `'right'`.
3802
+ #
3803
+ # @return [Series]
3804
+ #
3805
+ # @note
3806
+ # If you want to compute multiple aggregation statistics over the same dynamic
3807
+ # window, consider using `rolling` - this method can cache the window size
3808
+ # computation.
3809
+ #
3810
+ # @example
3811
+ # start = DateTime.new(2001, 1, 1)
3812
+ # stop = DateTime.new(2001, 1, 2)
3813
+ # s = Polars::Series.new("index", 25.times.to_a)
3814
+ # d = Polars::Series.new("date", Polars.datetime_range(start, stop, "1h", eager: true))
3815
+ # s.rolling_min_by(d, "3h")
3816
+ # # =>
3817
+ # # shape: (25,)
3818
+ # # Series: 'index' [i64]
3819
+ # # [
3820
+ # # 0
3821
+ # # 0
3822
+ # # 0
3823
+ # # 1
3824
+ # # 2
3825
+ # # …
3826
+ # # 18
3827
+ # # 19
3828
+ # # 20
3829
+ # # 21
3830
+ # # 22
3831
+ # # ]
3832
+ def rolling_min_by(
3833
+ by,
3834
+ window_size,
3835
+ min_samples: 1,
3836
+ closed: "right"
3837
+ )
3838
+ super
3839
+ end
3840
+
3837
3841
  # Apply a rolling min (moving min) over the values in this array.
3838
3842
  #
3839
3843
  # A window of length `window_size` will traverse the array. The values that fill
@@ -3845,7 +3849,7 @@ module Polars
3845
3849
  # @param weights [Array]
3846
3850
  # An optional slice with the same length as the window that will be multiplied
3847
3851
  # elementwise with the values in the window.
3848
- # @param min_periods [Integer]
3852
+ # @param min_samples [Integer]
3849
3853
  # The number of values in the window that should be non-null before computing
3850
3854
  # a result. If nil, it will be set equal to window size.
3851
3855
  # @param center [Boolean]
@@ -3869,12 +3873,96 @@ module Polars
3869
3873
  def rolling_min(
3870
3874
  window_size,
3871
3875
  weights: nil,
3872
- min_periods: nil,
3876
+ min_samples: nil,
3873
3877
  center: false
3874
3878
  )
3875
3879
  super
3876
3880
  end
3877
3881
 
3882
+ # Compute a rolling max based on another series.
3883
+ #
3884
+ # @note
3885
+ # This functionality is considered **unstable**. It may be changed
3886
+ # at any point without it being considered a breaking change.
3887
+ #
3888
+ # Given a `by` column `<t_0, t_1, ..., t_n>`, then `closed="right"`
3889
+ # (the default) means the windows will be:
3890
+ #
3891
+ # - (t_0 - window_size, t_0]
3892
+ # - (t_1 - window_size, t_1]
3893
+ # - ...
3894
+ # - (t_n - window_size, t_n]
3895
+ #
3896
+ # @param by [Object]
3897
+ # Should be `DateTime`, `Date`, `UInt64`, `UInt32`, `Int64`,
3898
+ # or `Int32` data type (note that the integral ones require using `'i'`
3899
+ # in `window size`).
3900
+ # @param window_size [String]
3901
+ # The length of the window. Can be a dynamic temporal
3902
+ # size indicated by a timedelta or the following string language:
3903
+ #
3904
+ # - 1ns (1 nanosecond)
3905
+ # - 1us (1 microsecond)
3906
+ # - 1ms (1 millisecond)
3907
+ # - 1s (1 second)
3908
+ # - 1m (1 minute)
3909
+ # - 1h (1 hour)
3910
+ # - 1d (1 calendar day)
3911
+ # - 1w (1 calendar week)
3912
+ # - 1mo (1 calendar month)
3913
+ # - 1q (1 calendar quarter)
3914
+ # - 1y (1 calendar year)
3915
+ # - 1i (1 index count)
3916
+ #
3917
+ # By "calendar day", we mean the corresponding time on the next day
3918
+ # (which may not be 24 hours, due to daylight savings). Similarly for
3919
+ # "calendar week", "calendar month", "calendar quarter", and
3920
+ # "calendar year".
3921
+ # @param min_samples [Integer]
3922
+ # The number of values in the window that should be non-null before computing
3923
+ # a result.
3924
+ # @param closed ['left', 'right', 'both', 'none']
3925
+ # Define which sides of the temporal interval are closed (inclusive),
3926
+ # defaults to `'right'`.
3927
+ #
3928
+ # @return [Series]
3929
+ #
3930
+ # @note
3931
+ # If you want to compute multiple aggregation statistics over the same dynamic
3932
+ # window, consider using `rolling` - this method can cache the window size
3933
+ # computation.
3934
+ #
3935
+ # @example
3936
+ # start = DateTime.new(2001, 1, 1)
3937
+ # stop = DateTime.new(2001, 1, 2)
3938
+ # s = Polars::Series.new("index", 25.times.to_a)
3939
+ # d = Polars::Series.new("date", Polars.datetime_range(start, stop, "1h", eager: true))
3940
+ # s.rolling_max_by(d, "3h")
3941
+ # # =>
3942
+ # # shape: (25,)
3943
+ # # Series: 'index' [i64]
3944
+ # # [
3945
+ # # 0
3946
+ # # 1
3947
+ # # 2
3948
+ # # 3
3949
+ # # 4
3950
+ # # …
3951
+ # # 20
3952
+ # # 21
3953
+ # # 22
3954
+ # # 23
3955
+ # # 24
3956
+ # # ]
3957
+ def rolling_max_by(
3958
+ by,
3959
+ window_size,
3960
+ min_samples: 1,
3961
+ closed: "right"
3962
+ )
3963
+ super
3964
+ end
3965
+
3878
3966
  # Apply a rolling max (moving max) over the values in this array.
3879
3967
  #
3880
3968
  # A window of length `window_size` will traverse the array. The values that fill
@@ -3886,7 +3974,7 @@ module Polars
3886
3974
  # @param weights [Array]
3887
3975
  # An optional slice with the same length as the window that will be multiplied
3888
3976
  # elementwise with the values in the window.
3889
- # @param min_periods [Integer]
3977
+ # @param min_samples [Integer]
3890
3978
  # The number of values in the window that should be non-null before computing
3891
3979
  # a result. If nil, it will be set equal to window size.
3892
3980
  # @param center [Boolean]
@@ -3910,35 +3998,119 @@ module Polars
3910
3998
  def rolling_max(
3911
3999
  window_size,
3912
4000
  weights: nil,
3913
- min_periods: nil,
4001
+ min_samples: nil,
3914
4002
  center: false
3915
4003
  )
3916
4004
  super
3917
4005
  end
3918
4006
 
3919
- # Apply a rolling mean (moving mean) over the values in this array.
4007
+ # Compute a rolling mean based on another series.
3920
4008
  #
3921
- # A window of length `window_size` will traverse the array. The values that fill
3922
- # this window will (optionally) be multiplied with the weights given by the
3923
- # `weight` vector. The resulting values will be aggregated to their sum.
4009
+ # @note
4010
+ # This functionality is considered **unstable**. It may be changed
4011
+ # at any point without it being considered a breaking change.
3924
4012
  #
3925
- # @param window_size [Integer]
3926
- # The length of the window.
3927
- # @param weights [Array]
3928
- # An optional slice with the same length as the window that will be multiplied
3929
- # elementwise with the values in the window.
3930
- # @param min_periods [Integer]
3931
- # The number of values in the window that should be non-null before computing
3932
- # a result. If nil, it will be set equal to window size.
3933
- # @param center [Boolean]
3934
- # Set the labels at the center of the window
4013
+ # Given a `by` column `<t_0, t_1, ..., t_n>`, then `closed: "right"`
4014
+ # (the default) means the windows will be:
3935
4015
  #
3936
- # @return [Series]
4016
+ # - (t_0 - window_size, t_0]
4017
+ # - (t_1 - window_size, t_1]
4018
+ # - ...
4019
+ # - (t_n - window_size, t_n]
3937
4020
  #
3938
- # @example
3939
- # s = Polars::Series.new("a", [100, 200, 300, 400, 500])
3940
- # s.rolling_mean(2)
3941
- # # =>
4021
+ # @param by [Object]
4022
+ # Should be `DateTime`, `Date`, `UInt64`, `UInt32`, `Int64`,
4023
+ # or `Int32` data type (note that the integral ones require using `'i'`
4024
+ # in `window size`).
4025
+ # @param window_size [String]
4026
+ # The length of the window. Can be a dynamic temporal
4027
+ # size indicated by a timedelta or the following string language:
4028
+ #
4029
+ # - 1ns (1 nanosecond)
4030
+ # - 1us (1 microsecond)
4031
+ # - 1ms (1 millisecond)
4032
+ # - 1s (1 second)
4033
+ # - 1m (1 minute)
4034
+ # - 1h (1 hour)
4035
+ # - 1d (1 calendar day)
4036
+ # - 1w (1 calendar week)
4037
+ # - 1mo (1 calendar month)
4038
+ # - 1q (1 calendar quarter)
4039
+ # - 1y (1 calendar year)
4040
+ # - 1i (1 index count)
4041
+ #
4042
+ # By "calendar day", we mean the corresponding time on the next day
4043
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4044
+ # "calendar week", "calendar month", "calendar quarter", and
4045
+ # "calendar year".
4046
+ # @param min_samples [Integer]
4047
+ # The number of values in the window that should be non-null before computing
4048
+ # a result.
4049
+ # @param closed ['left', 'right', 'both', 'none']
4050
+ # Define which sides of the temporal interval are closed (inclusive),
4051
+ # defaults to `'right'`.
4052
+ #
4053
+ # @return [Series]
4054
+ #
4055
+ # @note
4056
+ # If you want to compute multiple aggregation statistics over the same dynamic
4057
+ # window, consider using `rolling` - this method can cache the window size
4058
+ # computation.
4059
+ #
4060
+ # @example
4061
+ # start = DateTime.new(2001, 1, 1)
4062
+ # stop = DateTime.new(2001, 1, 2)
4063
+ # s = Polars::Series.new("index", 25.times.to_a)
4064
+ # d = Polars::Series.new("date", Polars.datetime_range(start, stop, "1h", eager: true))
4065
+ # s.rolling_mean_by(d, "3h")
4066
+ # # =>
4067
+ # # shape: (25,)
4068
+ # # Series: 'index' [f64]
4069
+ # # [
4070
+ # # 0.0
4071
+ # # 0.5
4072
+ # # 1.0
4073
+ # # 2.0
4074
+ # # 3.0
4075
+ # # …
4076
+ # # 19.0
4077
+ # # 20.0
4078
+ # # 21.0
4079
+ # # 22.0
4080
+ # # 23.0
4081
+ # # ]
4082
+ def rolling_mean_by(
4083
+ by,
4084
+ window_size,
4085
+ min_samples: 1,
4086
+ closed: "right"
4087
+ )
4088
+ super
4089
+ end
4090
+
4091
+ # Apply a rolling mean (moving mean) over the values in this array.
4092
+ #
4093
+ # A window of length `window_size` will traverse the array. The values that fill
4094
+ # this window will (optionally) be multiplied with the weights given by the
4095
+ # `weight` vector. The resulting values will be aggregated to their sum.
4096
+ #
4097
+ # @param window_size [Integer]
4098
+ # The length of the window.
4099
+ # @param weights [Array]
4100
+ # An optional slice with the same length as the window that will be multiplied
4101
+ # elementwise with the values in the window.
4102
+ # @param min_samples [Integer]
4103
+ # The number of values in the window that should be non-null before computing
4104
+ # a result. If nil, it will be set equal to window size.
4105
+ # @param center [Boolean]
4106
+ # Set the labels at the center of the window
4107
+ #
4108
+ # @return [Series]
4109
+ #
4110
+ # @example
4111
+ # s = Polars::Series.new("a", [100, 200, 300, 400, 500])
4112
+ # s.rolling_mean(2)
4113
+ # # =>
3942
4114
  # # shape: (5,)
3943
4115
  # # Series: 'a' [f64]
3944
4116
  # # [
@@ -3951,12 +4123,96 @@ module Polars
3951
4123
  def rolling_mean(
3952
4124
  window_size,
3953
4125
  weights: nil,
3954
- min_periods: nil,
4126
+ min_samples: nil,
3955
4127
  center: false
3956
4128
  )
3957
4129
  super
3958
4130
  end
3959
4131
 
4132
+ # Compute a rolling sum based on another series.
4133
+ #
4134
+ # @note
4135
+ # This functionality is considered **unstable**. It may be changed
4136
+ # at any point without it being considered a breaking change.
4137
+ #
4138
+ # Given a `by` column `<t_0, t_1, ..., t_n>`, then `closed: "right"`
4139
+ # (the default) means the windows will be:
4140
+ #
4141
+ # - (t_0 - window_size, t_0]
4142
+ # - (t_1 - window_size, t_1]
4143
+ # - ...
4144
+ # - (t_n - window_size, t_n]
4145
+ #
4146
+ # @param by [Object]
4147
+ # Should be `DateTime`, `Date`, `UInt64`, `UInt32`, `Int64`,
4148
+ # or `Int32` data type (note that the integral ones require using `'i'`
4149
+ # in `window size`).
4150
+ # @param window_size [String]
4151
+ # The length of the window. Can be a dynamic temporal
4152
+ # size indicated by a timedelta or the following string language:
4153
+ #
4154
+ # - 1ns (1 nanosecond)
4155
+ # - 1us (1 microsecond)
4156
+ # - 1ms (1 millisecond)
4157
+ # - 1s (1 second)
4158
+ # - 1m (1 minute)
4159
+ # - 1h (1 hour)
4160
+ # - 1d (1 calendar day)
4161
+ # - 1w (1 calendar week)
4162
+ # - 1mo (1 calendar month)
4163
+ # - 1q (1 calendar quarter)
4164
+ # - 1y (1 calendar year)
4165
+ # - 1i (1 index count)
4166
+ #
4167
+ # By "calendar day", we mean the corresponding time on the next day
4168
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4169
+ # "calendar week", "calendar month", "calendar quarter", and
4170
+ # "calendar year".
4171
+ # @param min_samples [Integer]
4172
+ # The number of values in the window that should be non-null before computing
4173
+ # a result.
4174
+ # @param closed ['left', 'right', 'both', 'none']
4175
+ # Define which sides of the temporal interval are closed (inclusive),
4176
+ # defaults to `'right'`.
4177
+ #
4178
+ # @return [Series]
4179
+ #
4180
+ # @note
4181
+ # If you want to compute multiple aggregation statistics over the same dynamic
4182
+ # window, consider using `rolling` - this method can cache the window size
4183
+ # computation.
4184
+ #
4185
+ # @example
4186
+ # start = DateTime.new(2001, 1, 1)
4187
+ # stop = DateTime.new(2001, 1, 2)
4188
+ # s = Polars::Series.new("index", 25.times.to_a)
4189
+ # d = Polars::Series.new("date", Polars.datetime_range(start, stop, "1h", eager: true))
4190
+ # s.rolling_sum_by(d, "3h")
4191
+ # # =>
4192
+ # # shape: (25,)
4193
+ # # Series: 'index' [i64]
4194
+ # # [
4195
+ # # 0
4196
+ # # 1
4197
+ # # 3
4198
+ # # 6
4199
+ # # 9
4200
+ # # …
4201
+ # # 57
4202
+ # # 60
4203
+ # # 63
4204
+ # # 66
4205
+ # # 69
4206
+ # # ]
4207
+ def rolling_sum_by(
4208
+ by,
4209
+ window_size,
4210
+ min_samples: 1,
4211
+ closed: "right"
4212
+ )
4213
+ super
4214
+ end
4215
+
3960
4216
  # Apply a rolling sum (moving sum) over the values in this array.
3961
4217
  #
3962
4218
  # A window of length `window_size` will traverse the array. The values that fill
@@ -3968,7 +4224,7 @@ module Polars
3968
4224
  # @param weights [Array]
3969
4225
  # An optional slice with the same length as the window that will be multiplied
3970
4226
  # elementwise with the values in the window.
3971
- # @param min_periods [Integer]
4227
+ # @param min_samples [Integer]
3972
4228
  # The number of values in the window that should be non-null before computing
3973
4229
  # a result. If nil, it will be set equal to window size.
3974
4230
  # @param center [Boolean]
@@ -3992,12 +4248,99 @@ module Polars
3992
4248
  def rolling_sum(
3993
4249
  window_size,
3994
4250
  weights: nil,
3995
- min_periods: nil,
4251
+ min_samples: nil,
3996
4252
  center: false
3997
4253
  )
3998
4254
  super
3999
4255
  end
4000
4256
 
4257
+ # Compute a rolling standard deviation based on another series.
4258
+ #
4259
+ # @note
4260
+ # This functionality is considered **unstable**. It may be changed
4261
+ # at any point without it being considered a breaking change.
4262
+ #
4263
+ # Given a `by` column `<t_0, t_1, ..., t_n>`, then `closed="right"`
4264
+ # (the default) means the windows will be:
4265
+ #
4266
+ # - (t_0 - window_size, t_0]
4267
+ # - (t_1 - window_size, t_1]
4268
+ # - ...
4269
+ # - (t_n - window_size, t_n]
4270
+ #
4271
+ # @param by [Object]
4272
+ # Should be `DateTime`, `Date`, `UInt64`, `UInt32`, `Int64`,
4273
+ # or `Int32` data type (note that the integral ones require using `'i'`
4274
+ # in `window size`).
4275
+ # @param window_size [String]
4276
+ # The length of the window. Can be a dynamic temporal
4277
+ # size indicated by a timedelta or the following string language:
4278
+ #
4279
+ # - 1ns (1 nanosecond)
4280
+ # - 1us (1 microsecond)
4281
+ # - 1ms (1 millisecond)
4282
+ # - 1s (1 second)
4283
+ # - 1m (1 minute)
4284
+ # - 1h (1 hour)
4285
+ # - 1d (1 calendar day)
4286
+ # - 1w (1 calendar week)
4287
+ # - 1mo (1 calendar month)
4288
+ # - 1q (1 calendar quarter)
4289
+ # - 1y (1 calendar year)
4290
+ # - 1i (1 index count)
4291
+ #
4292
+ # By "calendar day", we mean the corresponding time on the next day
4293
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4294
+ # "calendar week", "calendar month", "calendar quarter", and
4295
+ # "calendar year".
4296
+ # @param min_samples [Integer]
4297
+ # The number of values in the window that should be non-null before computing
4298
+ # a result.
4299
+ # @param closed ['left', 'right', 'both', 'none']
4300
+ # Define which sides of the temporal interval are closed (inclusive),
4301
+ # defaults to `'right'`.
4302
+ # @param ddof [Integer]
4303
+ # "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
4304
+ #
4305
+ # @return [Series]
4306
+ #
4307
+ # @note
4308
+ # If you want to compute multiple aggregation statistics over the same dynamic
4309
+ # window, consider using `rolling` - this method can cache the window size
4310
+ # computation.
4311
+ #
4312
+ # @example
4313
+ # start = DateTime.new(2001, 1, 1)
4314
+ # stop = DateTime.new(2001, 1, 2)
4315
+ # s = Polars::Series.new("index", 25.times.to_a)
4316
+ # d = Polars::Series.new("date", Polars.datetime_range(start, stop, "1h", eager: true))
4317
+ # s.rolling_std_by(d, "3h")
4318
+ # # =>
4319
+ # # shape: (25,)
4320
+ # # Series: 'index' [f64]
4321
+ # # [
4322
+ # # null
4323
+ # # 0.707107
4324
+ # # 1.0
4325
+ # # 1.0
4326
+ # # 1.0
4327
+ # # …
4328
+ # # 1.0
4329
+ # # 1.0
4330
+ # # 1.0
4331
+ # # 1.0
4332
+ # # 1.0
4333
+ # # ]
4334
+ def rolling_std_by(
4335
+ by,
4336
+ window_size,
4337
+ min_samples: 1,
4338
+ closed: "right",
4339
+ ddof: 1
4340
+ )
4341
+ super
4342
+ end
4343
+
4001
4344
  # Compute a rolling std dev.
4002
4345
  #
4003
4346
  # A window of length `window_size` will traverse the array. The values that fill
@@ -4009,7 +4352,7 @@ module Polars
4009
4352
  # @param weights [Array]
4010
4353
  # An optional slice with the same length as the window that will be multiplied
4011
4354
  # elementwise with the values in the window.
4012
- # @param min_periods [Integer]
4355
+ # @param min_samples [Integer]
4013
4356
  # The number of values in the window that should be non-null before computing
4014
4357
  # a result. If nil, it will be set equal to window size.
4015
4358
  # @param center [Boolean]
@@ -4036,13 +4379,100 @@ module Polars
4036
4379
  def rolling_std(
4037
4380
  window_size,
4038
4381
  weights: nil,
4039
- min_periods: nil,
4382
+ min_samples: nil,
4040
4383
  center: false,
4041
4384
  ddof: 1
4042
4385
  )
4043
4386
  super
4044
4387
  end
4045
4388
 
4389
+ # Compute a rolling variance based on another series.
4390
+ #
4391
+ # @note
4392
+ # This functionality is considered **unstable**. It may be changed
4393
+ # at any point without it being considered a breaking change.
4394
+ #
4395
+ # Given a `by` column `<t_0, t_1, ..., t_n>`, then `closed: "right"`
4396
+ # (the default) means the windows will be:
4397
+ #
4398
+ # - (t_0 - window_size, t_0]
4399
+ # - (t_1 - window_size, t_1]
4400
+ # - ...
4401
+ # - (t_n - window_size, t_n]
4402
+ #
4403
+ # @param by
4404
+ # Should be `DateTime`, `Date`, `UInt64`, `UInt32`, `Int64`,
4405
+ # or `Int32` data type (note that the integral ones require using `'i'`
4406
+ # in `window size`).
4407
+ # @param window_size
4408
+ # The length of the window. Can be a dynamic temporal
4409
+ # size indicated by a timedelta or the following string language:
4410
+ #
4411
+ # - 1ns (1 nanosecond)
4412
+ # - 1us (1 microsecond)
4413
+ # - 1ms (1 millisecond)
4414
+ # - 1s (1 second)
4415
+ # - 1m (1 minute)
4416
+ # - 1h (1 hour)
4417
+ # - 1d (1 calendar day)
4418
+ # - 1w (1 calendar week)
4419
+ # - 1mo (1 calendar month)
4420
+ # - 1q (1 calendar quarter)
4421
+ # - 1y (1 calendar year)
4422
+ # - 1i (1 index count)
4423
+ #
4424
+ # By "calendar day", we mean the corresponding time on the next day
4425
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4426
+ # "calendar week", "calendar month", "calendar quarter", and
4427
+ # "calendar year".
4428
+ # @param min_samples [Integer]
4429
+ # The number of values in the window that should be non-null before computing
4430
+ # a result.
4431
+ # @param closed ['left', 'right', 'both', 'none']
4432
+ # Define which sides of the temporal interval are closed (inclusive),
4433
+ # defaults to `'right'`.
4434
+ # @param ddof
4435
+ # "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
4436
+ #
4437
+ # @return [Series]
4438
+ #
4439
+ # @note
4440
+ # If you want to compute multiple aggregation statistics over the same dynamic
4441
+ # window, consider using `rolling` - this method can cache the window size
4442
+ # computation.
4443
+ #
4444
+ # @example
4445
+ # start = DateTime.new(2001, 1, 1)
4446
+ # stop = DateTime.new(2001, 1, 2)
4447
+ # s = Polars::Series.new("index", 25.times.to_a)
4448
+ # d = Polars::Series.new("date", Polars.datetime_range(start, stop, "1h", eager: true))
4449
+ # s.rolling_var_by(d, "3h")
4450
+ # # =>
4451
+ # # shape: (25,)
4452
+ # # Series: 'index' [f64]
4453
+ # # [
4454
+ # # null
4455
+ # # 0.5
4456
+ # # 1.0
4457
+ # # 1.0
4458
+ # # 1.0
4459
+ # # …
4460
+ # # 1.0
4461
+ # # 1.0
4462
+ # # 1.0
4463
+ # # 1.0
4464
+ # # 1.0
4465
+ # # ]
4466
+ def rolling_var_by(
4467
+ by,
4468
+ window_size,
4469
+ min_samples: 1,
4470
+ closed: "right",
4471
+ ddof: 1
4472
+ )
4473
+ super
4474
+ end
4475
+
4046
4476
  # Compute a rolling variance.
4047
4477
  #
4048
4478
  # A window of length `window_size` will traverse the array. The values that fill
@@ -4054,7 +4484,7 @@ module Polars
4054
4484
  # @param weights [Array]
4055
4485
  # An optional slice with the same length as the window that will be multiplied
4056
4486
  # elementwise with the values in the window.
4057
- # @param min_periods [Integer]
4487
+ # @param min_samples [Integer]
4058
4488
  # The number of values in the window that should be non-null before computing
4059
4489
  # a result. If nil, it will be set equal to window size.
4060
4490
  # @param center [Boolean]
@@ -4081,7 +4511,7 @@ module Polars
4081
4511
  def rolling_var(
4082
4512
  window_size,
4083
4513
  weights: nil,
4084
- min_periods: nil,
4514
+ min_samples: nil,
4085
4515
  center: false,
4086
4516
  ddof: 1
4087
4517
  )
@@ -4091,6 +4521,90 @@ module Polars
4091
4521
  # def rolling_apply
4092
4522
  # end
4093
4523
 
4524
+ # Compute a rolling median based on another series.
4525
+ #
4526
+ # @note
4527
+ # This functionality is considered **unstable**. It may be changed
4528
+ # at any point without it being considered a breaking change.
4529
+ #
4530
+ # Given a `by` column `<t_0, t_1, ..., t_n>`, then `closed: "right"`
4531
+ # (the default) means the windows will be:
4532
+ #
4533
+ # - (t_0 - window_size, t_0]
4534
+ # - (t_1 - window_size, t_1]
4535
+ # - ...
4536
+ # - (t_n - window_size, t_n]
4537
+ #
4538
+ # @param by [Object]
4539
+ # Should be `DateTime`, `Date`, `UInt64`, `UInt32`, `Int64`,
4540
+ # or `Int32` data type (note that the integral ones require using `'i'`
4541
+ # in `window size`).
4542
+ # @param window_size [String]
4543
+ # The length of the window. Can be a dynamic temporal
4544
+ # size indicated by a timedelta or the following string language:
4545
+ #
4546
+ # - 1ns (1 nanosecond)
4547
+ # - 1us (1 microsecond)
4548
+ # - 1ms (1 millisecond)
4549
+ # - 1s (1 second)
4550
+ # - 1m (1 minute)
4551
+ # - 1h (1 hour)
4552
+ # - 1d (1 calendar day)
4553
+ # - 1w (1 calendar week)
4554
+ # - 1mo (1 calendar month)
4555
+ # - 1q (1 calendar quarter)
4556
+ # - 1y (1 calendar year)
4557
+ # - 1i (1 index count)
4558
+ #
4559
+ # By "calendar day", we mean the corresponding time on the next day
4560
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4561
+ # "calendar week", "calendar month", "calendar quarter", and
4562
+ # "calendar year".
4563
+ # @param min_samples [Integer]
4564
+ # The number of values in the window that should be non-null before computing
4565
+ # a result.
4566
+ # @param closed ['left', 'right', 'both', 'none']
4567
+ # Define which sides of the temporal interval are closed (inclusive),
4568
+ # defaults to `'right'`.
4569
+ #
4570
+ # @return [Series]
4571
+ #
4572
+ # @note
4573
+ # If you want to compute multiple aggregation statistics over the same dynamic
4574
+ # window, consider using `rolling` - this method can cache the window size
4575
+ # computation.
4576
+ #
4577
+ # @example
4578
+ # start = DateTime.new(2001, 1, 1)
4579
+ # stop = DateTime.new(2001, 1, 2)
4580
+ # s = Polars::Series.new("index", 25.times.to_a)
4581
+ # d = Polars::Series.new("date", Polars.datetime_range(start, stop, "1h", eager: true))
4582
+ # s.rolling_median_by(d, "3h")
4583
+ # # =>
4584
+ # # shape: (25,)
4585
+ # # Series: 'index' [f64]
4586
+ # # [
4587
+ # # 0.0
4588
+ # # 0.5
4589
+ # # 1.0
4590
+ # # 2.0
4591
+ # # 3.0
4592
+ # # …
4593
+ # # 19.0
4594
+ # # 20.0
4595
+ # # 21.0
4596
+ # # 22.0
4597
+ # # 23.0
4598
+ # # ]
4599
+ def rolling_median_by(
4600
+ by,
4601
+ window_size,
4602
+ min_samples: 1,
4603
+ closed: "right"
4604
+ )
4605
+ super
4606
+ end
4607
+
4094
4608
  # Compute a rolling median.
4095
4609
  #
4096
4610
  # @param window_size [Integer]
@@ -4098,7 +4612,7 @@ module Polars
4098
4612
  # @param weights [Array]
4099
4613
  # An optional slice with the same length as the window that will be multiplied
4100
4614
  # elementwise with the values in the window.
4101
- # @param min_periods [Integer]
4615
+ # @param min_samples [Integer]
4102
4616
  # The number of values in the window that should be non-null before computing
4103
4617
  # a result. If nil, it will be set equal to window size.
4104
4618
  # @param center [Boolean]
@@ -4123,12 +4637,102 @@ module Polars
4123
4637
  def rolling_median(
4124
4638
  window_size,
4125
4639
  weights: nil,
4126
- min_periods: nil,
4640
+ min_samples: nil,
4127
4641
  center: false
4128
4642
  )
4129
4643
  super
4130
4644
  end
4131
4645
 
4646
+ # Compute a rolling quantile based on another series.
4647
+ #
4648
+ # @note
4649
+ # This functionality is considered **unstable**. It may be changed
4650
+ # at any point without it being considered a breaking change.
4651
+ #
4652
+ # Given a `by` column `<t_0, t_1, ..., t_n>`, then `closed: "right"`
4653
+ # (the default) means the windows will be:
4654
+ #
4655
+ # - (t_0 - window_size, t_0]
4656
+ # - (t_1 - window_size, t_1]
4657
+ # - ...
4658
+ # - (t_n - window_size, t_n]
4659
+ #
4660
+ # @param by [Object]
4661
+ # Should be `DateTime`, `Date`, `UInt64`, `UInt32`, `Int64`,
4662
+ # or `Int32` data type (note that the integral ones require using `'i'`
4663
+ # in `window size`).
4664
+ # @param window_size [String]
4665
+ # The length of the window. Can be a dynamic
4666
+ # temporal size indicated by a timedelta or the following string language:
4667
+ #
4668
+ # - 1ns (1 nanosecond)
4669
+ # - 1us (1 microsecond)
4670
+ # - 1ms (1 millisecond)
4671
+ # - 1s (1 second)
4672
+ # - 1m (1 minute)
4673
+ # - 1h (1 hour)
4674
+ # - 1d (1 calendar day)
4675
+ # - 1w (1 calendar week)
4676
+ # - 1mo (1 calendar month)
4677
+ # - 1q (1 calendar quarter)
4678
+ # - 1y (1 calendar year)
4679
+ # - 1i (1 index count)
4680
+ #
4681
+ # By "calendar day", we mean the corresponding time on the next day
4682
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4683
+ # "calendar week", "calendar month", "calendar quarter", and
4684
+ # "calendar year".
4685
+ # @param quantile [Float]
4686
+ # Quantile between 0.0 and 1.0.
4687
+ # @param interpolation ['nearest', 'higher', 'lower', 'midpoint', 'linear', 'equiprobable']
4688
+ # Interpolation method.
4689
+ # @param min_samples [Integer]
4690
+ # The number of values in the window that should be non-null before computing
4691
+ # a result.
4692
+ # @param closed ['left', 'right', 'both', 'none']
4693
+ # Define which sides of the temporal interval are closed (inclusive),
4694
+ # defaults to `'right'`.
4695
+ #
4696
+ # @return [Series]
4697
+ #
4698
+ # @note
4699
+ # If you want to compute multiple aggregation statistics over the same dynamic
4700
+ # window, consider using `rolling` - this method can cache the window size
4701
+ # computation.
4702
+ #
4703
+ # @example
4704
+ # start = DateTime.new(2001, 1, 1)
4705
+ # stop = DateTime.new(2001, 1, 2)
4706
+ # s = Polars::Series.new("index", 25.times.to_a)
4707
+ # d = Polars::Series.new("date", Polars.datetime_range(start, stop, "1h", eager: true))
4708
+ # s.rolling_quantile_by(d, "3h", quantile: 0.5)
4709
+ # # =>
4710
+ # # shape: (25,)
4711
+ # # Series: 'index' [f64]
4712
+ # # [
4713
+ # # 0.0
4714
+ # # 1.0
4715
+ # # 1.0
4716
+ # # 2.0
4717
+ # # 3.0
4718
+ # # …
4719
+ # # 19.0
4720
+ # # 20.0
4721
+ # # 21.0
4722
+ # # 22.0
4723
+ # # 23.0
4724
+ # # ]
4725
+ def rolling_quantile_by(
4726
+ by,
4727
+ window_size,
4728
+ quantile:,
4729
+ interpolation: "nearest",
4730
+ min_samples: 1,
4731
+ closed: "right"
4732
+ )
4733
+ super
4734
+ end
4735
+
4132
4736
  # Compute a rolling quantile.
4133
4737
  #
4134
4738
  # @param quantile [Float]
@@ -4140,7 +4744,7 @@ module Polars
4140
4744
  # @param weights [Array]
4141
4745
  # An optional slice with the same length as the window that will be multiplied
4142
4746
  # elementwise with the values in the window.
4143
- # @param min_periods [Integer]
4747
+ # @param min_samples [Integer]
4144
4748
  # The number of values in the window that should be non-null before computing
4145
4749
  # a result. If nil, it will be set equal to window size.
4146
4750
  # @param center [Boolean]
@@ -4181,7 +4785,144 @@ module Polars
4181
4785
  interpolation: "nearest",
4182
4786
  window_size: 2,
4183
4787
  weights: nil,
4184
- min_periods: nil,
4788
+ min_samples: nil,
4789
+ center: false
4790
+ )
4791
+ super
4792
+ end
4793
+
4794
+ # Compute a rolling rank based on another column.
4795
+ #
4796
+ # @note
4797
+ # This functionality is considered **unstable**. It may be changed
4798
+ # at any point without it being considered a breaking change.
4799
+ #
4800
+ # Given a `by` column `<t_0, t_1, ..., t_n>`, then `closed="right"`
4801
+ # (the default) means the windows will be:
4802
+ #
4803
+ # - (t_0 - window_size, t_0]
4804
+ # - (t_1 - window_size, t_1]
4805
+ # - ...
4806
+ # - (t_n - window_size, t_n]
4807
+ #
4808
+ # @param by [Expr]
4809
+ # Should be `DateTime`, `Date`, `UInt64`, `UInt32`, `Int64`,
4810
+ # or `Int32` data type (note that the integral ones require using `'i'`
4811
+ # in `window size`).
4812
+ # @param window_size [String]
4813
+ # The length of the window. Can be a dynamic
4814
+ # temporal size indicated by a timedelta or the following string language:
4815
+ #
4816
+ # - 1ns (1 nanosecond)
4817
+ # - 1us (1 microsecond)
4818
+ # - 1ms (1 millisecond)
4819
+ # - 1s (1 second)
4820
+ # - 1m (1 minute)
4821
+ # - 1h (1 hour)
4822
+ # - 1d (1 calendar day)
4823
+ # - 1w (1 calendar week)
4824
+ # - 1mo (1 calendar month)
4825
+ # - 1q (1 calendar quarter)
4826
+ # - 1y (1 calendar year)
4827
+ # - 1i (1 index count)
4828
+ #
4829
+ # By "calendar day", we mean the corresponding time on the next day
4830
+ # (which may not be 24 hours, due to daylight savings). Similarly for
4831
+ # "calendar week", "calendar month", "calendar quarter", and
4832
+ # "calendar year".
4833
+ # @param method ['average', 'min', 'max', 'dense', 'random']
4834
+ # The method used to assign ranks to tied elements.
4835
+ # The following methods are available (default is 'average'):
4836
+ #
4837
+ # - 'average' : The average of the ranks that would have been assigned to
4838
+ # all the tied values is assigned to each value.
4839
+ # - 'min' : The minimum of the ranks that would have been assigned to all
4840
+ # the tied values is assigned to each value. (This is also referred to
4841
+ # as "competition" ranking.)
4842
+ # - 'max' : The maximum of the ranks that would have been assigned to all
4843
+ # the tied values is assigned to each value.
4844
+ # - 'dense' : Like 'min', but the rank of the next highest element is
4845
+ # assigned the rank immediately after those assigned to the tied
4846
+ # elements.
4847
+ # - 'random' : Choose a random rank for each value in a tie.
4848
+ # @param seed [Integer]
4849
+ # Random seed used when `method: 'random'`. If set to nil (default), a
4850
+ # random seed is generated for each rolling rank operation.
4851
+ # @param min_samples [Integer]
4852
+ # The number of values in the window that should be non-null before computing
4853
+ # a result.
4854
+ # @param closed ['left', 'right', 'both', 'none']
4855
+ # Define which sides of the temporal interval are closed (inclusive),
4856
+ # defaults to `'right'`.
4857
+ #
4858
+ # @return [Series]
4859
+ def rolling_rank_by(
4860
+ by,
4861
+ window_size,
4862
+ method: "average",
4863
+ seed: nil,
4864
+ min_samples: 1,
4865
+ closed: "right"
4866
+ )
4867
+ super
4868
+ end
4869
+
4870
+ # Compute a rolling rank.
4871
+ #
4872
+ # @note
4873
+ # This functionality is considered **unstable**. It may be changed
4874
+ # at any point without it being considered a breaking change.
4875
+ #
4876
+ # A window of length `window_size` will traverse the array. The values
4877
+ # that fill this window will be ranked according to the `method`
4878
+ # parameter. The resulting values will be the rank of the value that is
4879
+ # at the end of the sliding window.
4880
+ #
4881
+ # @param window_size [Integer]
4882
+ # Integer size of the rolling window.
4883
+ # @param method ['average', 'min', 'max', 'dense', 'random']
4884
+ # The method used to assign ranks to tied elements.
4885
+ # The following methods are available (default is 'average'):
4886
+ #
4887
+ # - 'average' : The average of the ranks that would have been assigned to
4888
+ # all the tied values is assigned to each value.
4889
+ # - 'min' : The minimum of the ranks that would have been assigned to all
4890
+ # the tied values is assigned to each value. (This is also referred to
4891
+ # as "competition" ranking.)
4892
+ # - 'max' : The maximum of the ranks that would have been assigned to all
4893
+ # the tied values is assigned to each value.
4894
+ # - 'dense' : Like 'min', but the rank of the next highest element is
4895
+ # assigned the rank immediately after those assigned to the tied
4896
+ # elements.
4897
+ # - 'random' : Choose a random rank for each value in a tie.
4898
+ # @param seed [Integer]
4899
+ # Random seed used when `method: 'random'`. If set to nil (default), a
4900
+ # random seed is generated for each rolling rank operation.
4901
+ # @param min_samples [Integer]
4902
+ # The number of values in the window that should be non-null before computing
4903
+ # a result. If set to `nil` (default), it will be set equal to `window_size`.
4904
+ # @param center [Boolean]
4905
+ # Set the labels at the center of the window.
4906
+ #
4907
+ # @return [Series]
4908
+ #
4909
+ # @example
4910
+ # Polars::Series.new([1, 4, 4, 1, 9]).rolling_rank(3, method: "average")
4911
+ # # =>
4912
+ # # shape: (5,)
4913
+ # # Series: '' [f64]
4914
+ # # [
4915
+ # # null
4916
+ # # null
4917
+ # # 2.5
4918
+ # # 1.0
4919
+ # # 3.0
4920
+ # # ]
4921
+ def rolling_rank(
4922
+ window_size,
4923
+ method: "average",
4924
+ seed: nil,
4925
+ min_samples: nil,
4185
4926
  center: false
4186
4927
  )
4187
4928
  super
@@ -4264,9 +5005,9 @@ module Polars
4264
5005
  # Sample from this Series.
4265
5006
  #
4266
5007
  # @param n [Integer]
4267
- # Number of items to return. Cannot be used with `frac`. Defaults to 1 if
4268
- # `frac` is nil.
4269
- # @param frac [Float]
5008
+ # Number of items to return. Cannot be used with `fraction`. Defaults to 1 if
5009
+ # `fraction` is nil.
5010
+ # @param fraction [Float]
4270
5011
  # Fraction of items to return. Cannot be used with `n`.
4271
5012
  # @param with_replacement [Boolean]
4272
5013
  # Allow values to be sampled more than once.
@@ -4290,23 +5031,12 @@ module Polars
4290
5031
  # # ]
4291
5032
  def sample(
4292
5033
  n: nil,
4293
- frac: nil,
5034
+ fraction: nil,
4294
5035
  with_replacement: false,
4295
5036
  shuffle: false,
4296
5037
  seed: nil
4297
5038
  )
4298
- if !n.nil? && !frac.nil?
4299
- raise ArgumentError, "cannot specify both `n` and `frac`"
4300
- end
4301
-
4302
- if n.nil? && !frac.nil?
4303
- return Utils.wrap_s(_s.sample_frac(frac, with_replacement, shuffle, seed))
4304
- end
4305
-
4306
- if n.nil?
4307
- n = 1
4308
- end
4309
- Utils.wrap_s(_s.sample_n(n, with_replacement, shuffle, seed))
5039
+ super
4310
5040
  end
4311
5041
 
4312
5042
  # Get a boolean mask of the local maximum peaks.
@@ -4382,7 +5112,7 @@ module Polars
4382
5112
 
4383
5113
  # Hash the Series.
4384
5114
  #
4385
- # The hash value is of type `:u64`.
5115
+ # The hash value is of type `UInt64`.
4386
5116
  #
4387
5117
  # @param seed [Integer]
4388
5118
  # Random seed parameter. Defaults to 0.
@@ -4397,7 +5127,7 @@ module Polars
4397
5127
  #
4398
5128
  # @example
4399
5129
  # s = Polars::Series.new("a", [1, 2, 3])
4400
- # s._hash(42)
5130
+ # s.hash_(42)
4401
5131
  # # =>
4402
5132
  # # shape: (3,)
4403
5133
  # # Series: 'a' [u64]
@@ -4406,7 +5136,7 @@ module Polars
4406
5136
  # # 10386026231460783898
4407
5137
  # # 17796317186427479491
4408
5138
  # # ]
4409
- def _hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
5139
+ def hash_(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
4410
5140
  super
4411
5141
  end
4412
5142
 
@@ -4416,7 +5146,7 @@ module Polars
4416
5146
  # you can safely use that cast operation.
4417
5147
  #
4418
5148
  # @param signed [Boolean]
4419
- # If true, reinterpret as `:i64`. Otherwise, reinterpret as `:u64`.
5149
+ # If true, reinterpret as `Polars::Int64`. Otherwise, reinterpret as `Polars::UInt64`.
4420
5150
  #
4421
5151
  # @return [Series]
4422
5152
  #
@@ -4519,7 +5249,7 @@ module Polars
4519
5249
  # the order that the values occur in the Series.
4520
5250
  # - 'random' : Like 'ordinal', but the rank for ties is not dependent
4521
5251
  # on the order that the values occur in the Series.
4522
- # @param reverse [Boolean]
5252
+ # @param descending [Boolean]
4523
5253
  # Reverse the operation.
4524
5254
  # @param seed [Integer]
4525
5255
  # If `method: "random"`, use this as seed.
@@ -4553,7 +5283,7 @@ module Polars
4553
5283
  # # 2
4554
5284
  # # 5
4555
5285
  # # ]
4556
- def rank(method: "average", reverse: false, seed: nil)
5286
+ def rank(method: "average", descending: false, seed: nil)
4557
5287
  super
4558
5288
  end
4559
5289
 
@@ -4711,17 +5441,16 @@ module Polars
4711
5441
  _s.kurtosis(fisher, bias)
4712
5442
  end
4713
5443
 
4714
- # Clip (limit) the values in an array to a `min` and `max` boundary.
4715
- #
4716
- # Only works for numerical types.
5444
+ # Set values outside the given boundaries to the boundary value.
4717
5445
  #
4718
- # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4719
- # expression. See {#when} for more information.
4720
- #
4721
- # @param min_val [Numeric]
4722
- # Minimum value.
4723
- # @param max_val [Numeric]
4724
- # Maximum value.
5446
+ # @param lower_bound [Numeric]
5447
+ # Lower bound. Accepts expression input.
5448
+ # Non-expression inputs are parsed as literals.
5449
+ # If set to `nil` (default), no lower bound is applied.
5450
+ # @param upper_bound [Numeric]
5451
+ # Upper bound. Accepts expression input.
5452
+ # Non-expression inputs are parsed as literals.
5453
+ # If set to `nil` (default), no upper bound is applied.
4725
5454
  #
4726
5455
  # @return [Series]
4727
5456
  #
@@ -4737,37 +5466,7 @@ module Polars
4737
5466
  # # null
4738
5467
  # # 10
4739
5468
  # # ]
4740
- def clip(min_val = nil, max_val = nil)
4741
- super
4742
- end
4743
-
4744
- # Clip (limit) the values in an array to a `min` boundary.
4745
- #
4746
- # Only works for numerical types.
4747
- #
4748
- # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4749
- # expression. See {#when} for more information.
4750
- #
4751
- # @param min_val [Numeric]
4752
- # Minimum value.
4753
- #
4754
- # @return [Series]
4755
- def clip_min(min_val)
4756
- super
4757
- end
4758
-
4759
- # Clip (limit) the values in an array to a `max` boundary.
4760
- #
4761
- # Only works for numerical types.
4762
- #
4763
- # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4764
- # expression. See {#when} for more information.
4765
- #
4766
- # @param max_val [Numeric]
4767
- # Maximum value.
4768
- #
4769
- # @return [Series]
4770
- def clip_max(max_val)
5469
+ def clip(lower_bound = nil, upper_bound = nil)
4771
5470
  super
4772
5471
  end
4773
5472
 
@@ -4828,10 +5527,10 @@ module Polars
4828
5527
  # Replace values by different values.
4829
5528
  #
4830
5529
  # @param old [Object]
4831
- # Value or sequence of values to replace.
5530
+ # Value or array of values to replace.
4832
5531
  # Also accepts a mapping of values to their replacement.
4833
5532
  # @param new [Object]
4834
- # Value or sequence of values to replace by.
5533
+ # Value or array of values to replace by.
4835
5534
  # Length must match the length of `old` or have length 1.
4836
5535
  # @param default [Object]
4837
5536
  # Set values that were not replaced to this value.
@@ -4856,7 +5555,7 @@ module Polars
4856
5555
  # # 3
4857
5556
  # # ]
4858
5557
  #
4859
- # @example Replace multiple values by passing sequences to the `old` and `new` parameters.
5558
+ # @example Replace multiple values by passing arrays to the `old` and `new` parameters.
4860
5559
  # s.replace([2, 3], [100, 200])
4861
5560
  # # =>
4862
5561
  # # shape: (4,)
@@ -4893,18 +5592,18 @@ module Polars
4893
5592
  # # "2"
4894
5593
  # # "3"
4895
5594
  # # ]
4896
- def replace(old, new = Expr::NO_DEFAULT, default: Expr::NO_DEFAULT, return_dtype: nil)
5595
+ def replace(old, new = NO_DEFAULT, default: NO_DEFAULT, return_dtype: nil)
4897
5596
  super
4898
5597
  end
4899
5598
 
4900
5599
  # Replace all values by different values.
4901
5600
  #
4902
5601
  # @param old [Object]
4903
- # Value or sequence of values to replace.
5602
+ # Value or array of values to replace.
4904
5603
  # Also accepts a mapping of values to their replacement as syntactic sugar for
4905
5604
  # `replace_strict(old: Polars::Series.new(mapping.keys), new: Polars::Series.new(mapping.values))`.
4906
5605
  # @param new [Object]
4907
- # Value or sequence of values to replace by.
5606
+ # Value or array of values to replace by.
4908
5607
  # Length must match the length of `old` or have length 1.
4909
5608
  # @param default [Object]
4910
5609
  # Set values that were not replaced to this value. If no default is specified,
@@ -4916,7 +5615,7 @@ module Polars
4916
5615
  #
4917
5616
  # @return [Series]
4918
5617
  #
4919
- # @example Replace values by passing sequences to the `old` and `new` parameters.
5618
+ # @example Replace values by passing arrays to the `old` and `new` parameters.
4920
5619
  # s = Polars::Series.new([1, 2, 2, 3])
4921
5620
  # s.replace_strict([1, 2, 3], [100, 200, 300])
4922
5621
  # # =>
@@ -5004,8 +5703,8 @@ module Polars
5004
5703
  # # ]
5005
5704
  def replace_strict(
5006
5705
  old,
5007
- new = Expr::NO_DEFAULT,
5008
- default: Expr::NO_DEFAULT,
5706
+ new = NO_DEFAULT,
5707
+ default: NO_DEFAULT,
5009
5708
  return_dtype: nil
5010
5709
  )
5011
5710
  super
@@ -5013,7 +5712,7 @@ module Polars
5013
5712
 
5014
5713
  # Reshape this Series to a flat Series or a Series of Lists.
5015
5714
  #
5016
- # @param dims [Array]
5715
+ # @param dimensions [Array]
5017
5716
  # Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that
5018
5717
  # dimension is inferred.
5019
5718
  #
@@ -5047,8 +5746,8 @@ module Polars
5047
5746
  # # 8
5048
5747
  # # 9
5049
5748
  # # ]
5050
- def reshape(dims)
5051
- super
5749
+ def reshape(dimensions)
5750
+ self.class._from_rbseries(_s.reshape(dimensions))
5052
5751
  end
5053
5752
 
5054
5753
  # Shuffle the contents of this Series.
@@ -5094,8 +5793,8 @@ module Polars
5094
5793
  half_life: nil,
5095
5794
  alpha: nil,
5096
5795
  adjust: true,
5097
- min_periods: 1,
5098
- ignore_nulls: true
5796
+ min_samples: 1,
5797
+ ignore_nulls: false
5099
5798
  )
5100
5799
  super
5101
5800
  end
@@ -5184,8 +5883,8 @@ module Polars
5184
5883
  alpha: nil,
5185
5884
  adjust: true,
5186
5885
  bias: false,
5187
- min_periods: 1,
5188
- ignore_nulls: true
5886
+ min_samples: 1,
5887
+ ignore_nulls: false
5189
5888
  )
5190
5889
  super
5191
5890
  end
@@ -5212,8 +5911,8 @@ module Polars
5212
5911
  alpha: nil,
5213
5912
  adjust: true,
5214
5913
  bias: false,
5215
- min_periods: 1,
5216
- ignore_nulls: true
5914
+ min_samples: 1,
5915
+ ignore_nulls: false
5217
5916
  )
5218
5917
  super
5219
5918
  end
@@ -5249,7 +5948,7 @@ module Polars
5249
5948
  #
5250
5949
  # Enables downstream code to user fast paths for sorted arrays.
5251
5950
  #
5252
- # @param reverse [Boolean]
5951
+ # @param descending [Boolean]
5253
5952
  # If the Series order is reversed, e.g. descending.
5254
5953
  #
5255
5954
  # @return [Series]
@@ -5262,8 +5961,8 @@ module Polars
5262
5961
  # s = Polars::Series.new("a", [1, 2, 3])
5263
5962
  # s.set_sorted.max
5264
5963
  # # => 3
5265
- def set_sorted(reverse: false)
5266
- Utils.wrap_s(_s.set_sorted(reverse))
5964
+ def set_sorted(descending: false)
5965
+ Utils.wrap_s(_s.set_sorted(descending))
5267
5966
  end
5268
5967
 
5269
5968
  # Create a new Series filled with values from the given index.
@@ -5493,6 +6192,21 @@ module Polars
5493
6192
  StructNameSpace.new(self)
5494
6193
  end
5495
6194
 
6195
+ # Create a plot namespace.
6196
+ #
6197
+ # @note
6198
+ # This functionality is currently considered **unstable**. It may be
6199
+ # changed at any point without it being considered a breaking change.
6200
+ #
6201
+ # @return [SeriesPlot]
6202
+ #
6203
+ # @example Histogram:
6204
+ # s = Polars::Series.new([1, 4, 4, 6, 2, 4, 3, 5, 5, 7, 1])
6205
+ # s.plot.hist
6206
+ def plot
6207
+ SeriesPlot.new(self)
6208
+ end
6209
+
5496
6210
  # Repeat the elements in this Series as specified in the given expression.
5497
6211
  #
5498
6212
  # The repeated elements are expanded into a List.
@@ -5590,12 +6304,12 @@ module Polars
5590
6304
  ts = Utils.datetime_to_int(other, time_unit)
5591
6305
  f = ffi_func("#{op}_<>", Int64, _s)
5592
6306
  fail if f.nil?
5593
- return Utils.wrap_s(f.call(ts))
6307
+ return Utils.wrap_s(f.(ts))
5594
6308
  elsif other.is_a?(::Date) && dtype == Date
5595
6309
  d = Utils.date_to_int(other)
5596
6310
  f = ffi_func("#{op}_<>", Int32, _s)
5597
6311
  fail if f.nil?
5598
- return Utils.wrap_s(f.call(d))
6312
+ return Utils.wrap_s(f.(d))
5599
6313
  end
5600
6314
 
5601
6315
  if other.is_a?(Series)
@@ -5606,7 +6320,7 @@ module Polars
5606
6320
  if f.nil?
5607
6321
  raise NotImplementedError
5608
6322
  end
5609
- Utils.wrap_s(f.call(other))
6323
+ Utils.wrap_s(f.(other))
5610
6324
  end
5611
6325
 
5612
6326
  def ffi_func(name, dtype, _s)
@@ -5621,8 +6335,8 @@ module Polars
5621
6335
  return Utils.wrap_s(_s.send(op, other._s))
5622
6336
  end
5623
6337
 
5624
- if (other.is_a?(Float) || other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(::String)) && !is_float
5625
- _s2 = sequence_to_rbseries(name, [other])
6338
+ if (other.is_a?(Float) || other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(::String)) && !dtype.float?
6339
+ _s2 = Utils.sequence_to_rbseries(name, [other])
5626
6340
  return Utils.wrap_s(_s.send(op, _s2))
5627
6341
  end
5628
6342
 
@@ -5630,7 +6344,7 @@ module Polars
5630
6344
  if f.nil?
5631
6345
  raise ArgumentError, "cannot do arithmetic with series of dtype: #{dtype} and argument of type: #{other.class.name}"
5632
6346
  end
5633
- Utils.wrap_s(f.call(other))
6347
+ Utils.wrap_s(f.(other))
5634
6348
  end
5635
6349
 
5636
6350
  DTYPE_TO_FFINAME = {
@@ -5656,291 +6370,5 @@ module Polars
5656
6370
  Struct => "struct",
5657
6371
  Binary => "binary"
5658
6372
  }
5659
-
5660
- def series_to_rbseries(name, values)
5661
- # should not be in-place?
5662
- values.rename(name, in_place: true)
5663
- values._s
5664
- end
5665
-
5666
- def numo_to_rbseries(name, values, strict: true, nan_to_null: false)
5667
- # not needed yet
5668
- # if !values.contiguous?
5669
- # end
5670
-
5671
- if values.shape.length == 1
5672
- values, dtype = numo_values_and_dtype(values)
5673
- strict = nan_to_null if [Numo::SFloat, Numo::DFloat].include?(dtype)
5674
- if dtype == Numo::RObject
5675
- sequence_to_rbseries(name, values.to_a, strict: strict)
5676
- else
5677
- constructor = numo_type_to_constructor(dtype)
5678
- # TODO improve performance
5679
- constructor.call(name, values.to_a, strict)
5680
- end
5681
- elsif values.shape.sum == 0
5682
- raise Todo
5683
- else
5684
- original_shape = values.shape
5685
- values = values.reshape(original_shape.inject(&:*))
5686
- rb_s = numo_to_rbseries(
5687
- name,
5688
- values,
5689
- strict: strict,
5690
- nan_to_null: nan_to_null
5691
- )
5692
- Utils.wrap_s(rb_s).reshape(original_shape)._s
5693
- end
5694
- end
5695
-
5696
- def numo_values_and_dtype(values)
5697
- [values, values.class]
5698
- end
5699
-
5700
- def numo_type_to_constructor(dtype)
5701
- {
5702
- Numo::Float32 => RbSeries.method(:new_opt_f32),
5703
- Numo::Float64 => RbSeries.method(:new_opt_f64),
5704
- Numo::Int8 => RbSeries.method(:new_opt_i8),
5705
- Numo::Int16 => RbSeries.method(:new_opt_i16),
5706
- Numo::Int32 => RbSeries.method(:new_opt_i32),
5707
- Numo::Int64 => RbSeries.method(:new_opt_i64),
5708
- Numo::UInt8 => RbSeries.method(:new_opt_u8),
5709
- Numo::UInt16 => RbSeries.method(:new_opt_u16),
5710
- Numo::UInt32 => RbSeries.method(:new_opt_u32),
5711
- Numo::UInt64 => RbSeries.method(:new_opt_u64)
5712
- }.fetch(dtype)
5713
- rescue KeyError
5714
- RbSeries.method(:new_object)
5715
- end
5716
-
5717
- def sequence_to_rbseries(name, values, dtype: nil, strict: true, dtype_if_empty: nil)
5718
- ruby_dtype = nil
5719
-
5720
- if (values.nil? || values.empty?) && dtype.nil?
5721
- dtype = dtype_if_empty || Float32
5722
- elsif dtype == List
5723
- ruby_dtype = ::Array
5724
- end
5725
-
5726
- rb_temporal_types = [::Date, ::DateTime, ::Time]
5727
- rb_temporal_types << ActiveSupport::TimeWithZone if defined?(ActiveSupport::TimeWithZone)
5728
-
5729
- value = _get_first_non_none(values)
5730
- if !value.nil?
5731
- if value.is_a?(Hash)
5732
- return DataFrame.new(values).to_struct(name)._s
5733
- end
5734
- end
5735
-
5736
- if !dtype.nil? && ![List, Struct, Unknown].include?(dtype) && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
5737
- if dtype == Array && !dtype.is_a?(Array) && value.is_a?(::Array)
5738
- dtype = Array.new(nil, value.size)
5739
- end
5740
-
5741
- constructor = polars_type_to_constructor(dtype)
5742
- rbseries =
5743
- if dtype == Array
5744
- constructor.call(name, values, strict)
5745
- else
5746
- construct_series_with_fallbacks(constructor, name, values, dtype, strict: strict)
5747
- end
5748
-
5749
- base_type = dtype.is_a?(DataType) ? dtype.class : dtype
5750
- if [Date, Datetime, Duration, Time, Categorical, Boolean, Enum].include?(base_type) || dtype.is_a?(Decimal)
5751
- if rbseries.dtype != dtype
5752
- rbseries = rbseries.cast(dtype, true)
5753
- end
5754
- end
5755
-
5756
- # Uninstanced Decimal is a bit special and has various inference paths
5757
- if dtype == Decimal
5758
- if rbseries.dtype == String
5759
- rbseries = rbseries.str_to_decimal_infer(0)
5760
- elsif rbseries.dtype.float?
5761
- # Go through string so we infer an appropriate scale.
5762
- rbseries = rbseries.cast(
5763
- String, strict: strict, wrap_numerical: false
5764
- ).str_to_decimal_infer(0)
5765
- elsif rbseries.dtype.integer? || rbseries.dtype == Null
5766
- rbseries = rbseries.cast(
5767
- Decimal.new(nil, 0), strict: strict, wrap_numerical: false
5768
- )
5769
- elsif !rbseries.dtype.is_a?(Decimal)
5770
- msg = "can't convert #{rbseries.dtype} to Decimal"
5771
- raise TypeError, msg
5772
- end
5773
- end
5774
-
5775
- rbseries
5776
- elsif dtype == Struct
5777
- struct_schema = dtype.is_a?(Struct) ? dtype.to_schema : nil
5778
- empty = {}
5779
- DataFrame.sequence_to_rbdf(
5780
- values.map { |v| v.nil? ? empty : v },
5781
- schema: struct_schema,
5782
- orient: "row",
5783
- ).to_struct(name)
5784
- else
5785
- if ruby_dtype.nil?
5786
- if value.nil?
5787
- # generic default dtype
5788
- ruby_dtype = Float
5789
- else
5790
- ruby_dtype = value.class
5791
- end
5792
- end
5793
-
5794
- # temporal branch
5795
- if rb_temporal_types.include?(ruby_dtype)
5796
- if dtype.nil?
5797
- dtype = Utils.rb_type_to_dtype(ruby_dtype)
5798
- elsif rb_temporal_types.include?(dtype)
5799
- dtype = Utils.rb_type_to_dtype(dtype)
5800
- end
5801
- # TODO
5802
- time_unit = nil
5803
-
5804
- rb_series = RbSeries.new_from_any_values(name, values, strict)
5805
- if time_unit.nil?
5806
- s = Utils.wrap_s(rb_series)
5807
- else
5808
- s = Utils.wrap_s(rb_series).dt.cast_time_unit(time_unit)
5809
- end
5810
- s._s
5811
- elsif defined?(Numo::NArray) && value.is_a?(Numo::NArray) && value.shape.length == 1
5812
- raise Todo
5813
- elsif ruby_dtype == ::Array
5814
- if dtype.is_a?(Object)
5815
- return RbSeries.new_object(name, values, strict)
5816
- end
5817
- if dtype
5818
- srs = sequence_from_anyvalue_or_object(name, values)
5819
- if dtype != srs.dtype
5820
- srs = srs.cast(dtype, strict: false)
5821
- end
5822
- return srs
5823
- end
5824
- sequence_from_anyvalue_or_object(name, values)
5825
- elsif ruby_dtype == Series
5826
- RbSeries.new_series_list(name, values.map(&:_s), strict)
5827
- elsif ruby_dtype == RbSeries
5828
- RbSeries.new_series_list(name, values, strict)
5829
- else
5830
- constructor =
5831
- if value.is_a?(::String)
5832
- if value.encoding == Encoding::UTF_8
5833
- RbSeries.method(:new_str)
5834
- else
5835
- RbSeries.method(:new_binary)
5836
- end
5837
- elsif value.is_a?(Integer) && values.any? { |v| v.is_a?(Float) }
5838
- # TODO improve performance
5839
- RbSeries.method(:new_opt_f64)
5840
- else
5841
- rb_type_to_constructor(value.class)
5842
- end
5843
-
5844
- construct_series_with_fallbacks(constructor, name, values, dtype, strict: strict)
5845
- end
5846
- end
5847
- end
5848
-
5849
- def construct_series_with_fallbacks(constructor, name, values, dtype, strict:)
5850
- begin
5851
- constructor.call(name, values, strict)
5852
- rescue
5853
- if dtype.nil?
5854
- RbSeries.new_from_any_values(name, values, strict)
5855
- else
5856
- RbSeries.new_from_any_values_and_dtype(name, values, dtype, strict)
5857
- end
5858
- end
5859
- end
5860
-
5861
- def sequence_from_anyvalue_or_object(name, values)
5862
- RbSeries.new_from_any_values(name, values, true)
5863
- rescue
5864
- RbSeries.new_object(name, values, false)
5865
- end
5866
-
5867
- POLARS_TYPE_TO_CONSTRUCTOR = {
5868
- Float32 => RbSeries.method(:new_opt_f32),
5869
- Float64 => RbSeries.method(:new_opt_f64),
5870
- Int8 => RbSeries.method(:new_opt_i8),
5871
- Int16 => RbSeries.method(:new_opt_i16),
5872
- Int32 => RbSeries.method(:new_opt_i32),
5873
- Int64 => RbSeries.method(:new_opt_i64),
5874
- Int128 => RbSeries.method(:new_opt_i128),
5875
- UInt8 => RbSeries.method(:new_opt_u8),
5876
- UInt16 => RbSeries.method(:new_opt_u16),
5877
- UInt32 => RbSeries.method(:new_opt_u32),
5878
- UInt64 => RbSeries.method(:new_opt_u64),
5879
- UInt128 => RbSeries.method(:new_opt_u128),
5880
- Decimal => RbSeries.method(:new_decimal),
5881
- Date => RbSeries.method(:new_from_any_values),
5882
- Datetime => RbSeries.method(:new_from_any_values),
5883
- Duration => RbSeries.method(:new_from_any_values),
5884
- Time => RbSeries.method(:new_from_any_values),
5885
- Boolean => RbSeries.method(:new_opt_bool),
5886
- Utf8 => RbSeries.method(:new_str),
5887
- Object => RbSeries.method(:new_object),
5888
- Categorical => RbSeries.method(:new_str),
5889
- Enum => RbSeries.method(:new_str),
5890
- Binary => RbSeries.method(:new_binary),
5891
- Null => RbSeries.method(:new_null)
5892
- }
5893
-
5894
- SYM_TYPE_TO_CONSTRUCTOR = {
5895
- f32: RbSeries.method(:new_opt_f32),
5896
- f64: RbSeries.method(:new_opt_f64),
5897
- i8: RbSeries.method(:new_opt_i8),
5898
- i16: RbSeries.method(:new_opt_i16),
5899
- i32: RbSeries.method(:new_opt_i32),
5900
- i64: RbSeries.method(:new_opt_i64),
5901
- i128: RbSeries.method(:new_opt_i128),
5902
- u8: RbSeries.method(:new_opt_u8),
5903
- u16: RbSeries.method(:new_opt_u16),
5904
- u32: RbSeries.method(:new_opt_u32),
5905
- u64: RbSeries.method(:new_opt_u64),
5906
- u128: RbSeries.method(:new_opt_u128),
5907
- bool: RbSeries.method(:new_opt_bool),
5908
- str: RbSeries.method(:new_str)
5909
- }
5910
-
5911
- def polars_type_to_constructor(dtype)
5912
- if dtype.is_a?(Array)
5913
- lambda do |name, values, strict|
5914
- RbSeries.new_array(dtype.width, dtype.inner, name, values, strict)
5915
- end
5916
- elsif dtype.is_a?(Class) && dtype < DataType
5917
- POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype)
5918
- elsif dtype.is_a?(DataType)
5919
- POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype.class)
5920
- else
5921
- SYM_TYPE_TO_CONSTRUCTOR.fetch(dtype.to_sym)
5922
- end
5923
- rescue KeyError
5924
- raise ArgumentError, "Cannot construct RbSeries for type #{dtype}."
5925
- end
5926
-
5927
- RB_TYPE_TO_CONSTRUCTOR = {
5928
- Float => RbSeries.method(:new_opt_f64),
5929
- Integer => RbSeries.method(:new_opt_i64),
5930
- TrueClass => RbSeries.method(:new_opt_bool),
5931
- FalseClass => RbSeries.method(:new_opt_bool),
5932
- BigDecimal => RbSeries.method(:new_decimal),
5933
- NilClass => RbSeries.method(:new_null)
5934
- }
5935
-
5936
- def rb_type_to_constructor(dtype)
5937
- RB_TYPE_TO_CONSTRUCTOR.fetch(dtype)
5938
- rescue KeyError
5939
- RbSeries.method(:new_object)
5940
- end
5941
-
5942
- def _get_first_non_none(values)
5943
- values.find { |v| !v.nil? }
5944
- end
5945
6373
  end
5946
6374
  end