polars-df 0.6.0-x86_64-darwin → 0.8.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/polars/series.rb CHANGED
@@ -34,7 +34,7 @@ module Polars
34
34
  # s3 = Polars::Series.new([1, 2, 3])
35
35
  def initialize(name = nil, values = nil, dtype: nil, strict: true, nan_to_null: false, dtype_if_empty: nil)
36
36
  # Handle case where values are passed as the first argument
37
- if !name.nil? && !name.is_a?(String)
37
+ if !name.nil? && !name.is_a?(::String)
38
38
  if values.nil?
39
39
  values = name
40
40
  name = nil
@@ -46,7 +46,7 @@ module Polars
46
46
  name = "" if name.nil?
47
47
 
48
48
  # TODO improve
49
- if values.is_a?(Range) && values.begin.is_a?(String)
49
+ if values.is_a?(Range) && values.begin.is_a?(::String)
50
50
  values = values.to_a
51
51
  end
52
52
 
@@ -341,7 +341,7 @@ module Polars
341
341
  def []=(key, value)
342
342
  if value.is_a?(::Array)
343
343
  if is_numeric || is_datelike
344
- set_at_idx(key, value)
344
+ scatter(key, value)
345
345
  return
346
346
  end
347
347
  raise ArgumentError, "cannot set Series of dtype: #{dtype} with list/tuple as value; use a scalar value"
@@ -351,9 +351,9 @@ module Polars
351
351
  if key.dtype == Boolean
352
352
  self._s = set(key, value)._s
353
353
  elsif key.dtype == UInt64
354
- self._s = set_at_idx(key.cast(UInt32), value)._s
354
+ self._s = scatter(key.cast(UInt32), value)._s
355
355
  elsif key.dtype == UInt32
356
- self._s = set_at_idx(key, value)._s
356
+ self._s = scatter(key, value)._s
357
357
  else
358
358
  raise Todo
359
359
  end
@@ -432,6 +432,18 @@ module Polars
432
432
  end
433
433
  alias_method :all, :all?
434
434
 
435
+ # Check if all boolean values in the column are `false`.
436
+ #
437
+ # @return [Boolean]
438
+ def none?(&block)
439
+ if block_given?
440
+ apply(&block).none?
441
+ else
442
+ to_frame.select(Polars.col(name).is_not.all).to_series[0]
443
+ end
444
+ end
445
+ alias_method :none, :none?
446
+
435
447
  # Compute the logarithm to a given base.
436
448
  #
437
449
  # @param base [Float]
@@ -723,6 +735,212 @@ module Polars
723
735
  Utils.wrap_df(_s.to_dummies(separator, drop_first))
724
736
  end
725
737
 
738
+ # Bin continuous values into discrete categories.
739
+ #
740
+ # @param breaks [Array]
741
+ # List of unique cut points.
742
+ # @param labels [Array]
743
+ # Names of the categories. The number of labels must be equal to the number
744
+ # of cut points plus one.
745
+ # @param left_closed [Boolean]
746
+ # Set the intervals to be left-closed instead of right-closed.
747
+ # @param include_breaks [Boolean]
748
+ # Include a column with the right endpoint of the bin each observation falls
749
+ # in. This will change the data type of the output from a
750
+ # `Categorical` to a `Struct`.
751
+ #
752
+ # @return [Series]
753
+ #
754
+ # @example Divide the column into three categories.
755
+ # s = Polars::Series.new("foo", [-2, -1, 0, 1, 2])
756
+ # s.cut([-1, 1], labels: ["a", "b", "c"])
757
+ # # =>
758
+ # # shape: (5,)
759
+ # # Series: 'foo' [cat]
760
+ # # [
761
+ # # "a"
762
+ # # "a"
763
+ # # "b"
764
+ # # "b"
765
+ # # "c"
766
+ # # ]
767
+ #
768
+ # @example Create a DataFrame with the breakpoint and category for each value.
769
+ # cut = s.cut([-1, 1], include_breaks: true).alias("cut")
770
+ # s.to_frame.with_columns(cut).unnest("cut")
771
+ # # =>
772
+ # # shape: (5, 3)
773
+ # # ┌─────┬─────────────┬────────────┐
774
+ # # │ foo ┆ break_point ┆ category │
775
+ # # │ --- ┆ --- ┆ --- │
776
+ # # │ i64 ┆ f64 ┆ cat │
777
+ # # ╞═════╪═════════════╪════════════╡
778
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
779
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
780
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
781
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
782
+ # # │ 2 ┆ inf ┆ (1, inf] │
783
+ # # └─────┴─────────────┴────────────┘
784
+ def cut(breaks, labels: nil, left_closed: false, include_breaks: false)
785
+ result = (
786
+ to_frame
787
+ .select(
788
+ Polars.col(name).cut(
789
+ breaks,
790
+ labels: labels,
791
+ left_closed: left_closed,
792
+ include_breaks: include_breaks
793
+ )
794
+ )
795
+ .to_series
796
+ )
797
+
798
+ if include_breaks
799
+ result = result.struct.rename_fields(["break_point", "category"])
800
+ end
801
+
802
+ result
803
+ end
804
+
805
+ # Bin continuous values into discrete categories based on their quantiles.
806
+ #
807
+ # @param quantiles [Array]
808
+ # Either a list of quantile probabilities between 0 and 1 or a positive
809
+ # integer determining the number of bins with uniform probability.
810
+ # @param labels [Array]
811
+ # Names of the categories. The number of labels must be equal to the number
812
+ # of cut points plus one.
813
+ # @param left_closed [Boolean]
814
+ # Set the intervals to be left-closed instead of right-closed.
815
+ # @param allow_duplicates [Boolean]
816
+ # If set to `true`, duplicates in the resulting quantiles are dropped,
817
+ # rather than raising a `DuplicateError`. This can happen even with unique
818
+ # probabilities, depending on the data.
819
+ # @param include_breaks [Boolean]
820
+ # Include a column with the right endpoint of the bin each observation falls
821
+ # in. This will change the data type of the output from a
822
+ # `Categorical` to a `Struct`.
823
+ #
824
+ # @return [Series]
825
+ #
826
+ # @example Divide a column into three categories according to pre-defined quantile probabilities.
827
+ # s = Polars::Series.new("foo", [-2, -1, 0, 1, 2])
828
+ # s.qcut([0.25, 0.75], labels: ["a", "b", "c"])
829
+ # # =>
830
+ # # shape: (5,)
831
+ # # Series: 'foo' [cat]
832
+ # # [
833
+ # # "a"
834
+ # # "a"
835
+ # # "b"
836
+ # # "b"
837
+ # # "c"
838
+ # # ]
839
+ #
840
+ # @example Divide a column into two categories using uniform quantile probabilities.
841
+ # s.qcut(2, labels: ["low", "high"], left_closed: true)
842
+ # # =>
843
+ # # shape: (5,)
844
+ # # Series: 'foo' [cat]
845
+ # # [
846
+ # # "low"
847
+ # # "low"
848
+ # # "high"
849
+ # # "high"
850
+ # # "high"
851
+ # # ]
852
+ #
853
+ # @example Create a DataFrame with the breakpoint and category for each value.
854
+ # cut = s.qcut([0.25, 0.75], include_breaks: true).alias("cut")
855
+ # s.to_frame.with_columns(cut).unnest("cut")
856
+ # # =>
857
+ # # shape: (5, 3)
858
+ # # ┌─────┬─────────────┬────────────┐
859
+ # # │ foo ┆ break_point ┆ category │
860
+ # # │ --- ┆ --- ┆ --- │
861
+ # # │ i64 ┆ f64 ┆ cat │
862
+ # # ╞═════╪═════════════╪════════════╡
863
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
864
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
865
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
866
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
867
+ # # │ 2 ┆ inf ┆ (1, inf] │
868
+ # # └─────┴─────────────┴────────────┘
869
+ def qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false)
870
+ result = (
871
+ to_frame
872
+ .select(
873
+ Polars.col(name).qcut(
874
+ quantiles,
875
+ labels: labels,
876
+ left_closed: left_closed,
877
+ allow_duplicates: allow_duplicates,
878
+ include_breaks: include_breaks
879
+ )
880
+ )
881
+ .to_series
882
+ )
883
+
884
+ if include_breaks
885
+ result = result.struct.rename_fields(["break_point", "category"])
886
+ end
887
+
888
+ result
889
+ end
890
+
891
+ # Get the lengths of runs of identical values.
892
+ #
893
+ # @return [Series]
894
+ #
895
+ # @example
896
+ # s = Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3])
897
+ # s.rle.struct.unnest
898
+ # # =>
899
+ # # shape: (6, 2)
900
+ # # ┌─────────┬────────┐
901
+ # # │ lengths ┆ values │
902
+ # # │ --- ┆ --- │
903
+ # # │ i32 ┆ i64 │
904
+ # # ╞═════════╪════════╡
905
+ # # │ 2 ┆ 1 │
906
+ # # │ 1 ┆ 2 │
907
+ # # │ 1 ┆ 1 │
908
+ # # │ 1 ┆ null │
909
+ # # │ 1 ┆ 1 │
910
+ # # │ 2 ┆ 3 │
911
+ # # └─────────┴────────┘
912
+ def rle
913
+ super
914
+ end
915
+
916
+ # Map values to run IDs.
917
+ #
918
+ # Similar to RLE, but it maps each value to an ID corresponding to the run into
919
+ # which it falls. This is especially useful when you want to define groups by
920
+ # runs of identical values rather than the values themselves.
921
+ #
922
+ # @return [Series]
923
+ #
924
+ # @example
925
+ # s = Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3])
926
+ # s.rle_id()
927
+ # # =>
928
+ # # shape: (8,)
929
+ # # Series: 's' [u32]
930
+ # # [
931
+ # # 0
932
+ # # 0
933
+ # # 1
934
+ # # 2
935
+ # # 3
936
+ # # 4
937
+ # # 5
938
+ # # 5
939
+ # # ]
940
+ def rle_id
941
+ super
942
+ end
943
+
726
944
  # Count the unique values in a Series.
727
945
  #
728
946
  # @param sort [Boolean]
@@ -799,7 +1017,7 @@ module Polars
799
1017
  # Number of valid values there should be in the window before the expression
800
1018
  # is evaluated. valid values = `length - null_count`
801
1019
  # @param parallel [Boolean]
802
- # Run in parallel. Don't do this in a groupby or another operation that
1020
+ # Run in parallel. Don't do this in a group by or another operation that
803
1021
  # already has much parallelization.
804
1022
  #
805
1023
  # @return [Series]
@@ -1236,15 +1454,45 @@ module Polars
1236
1454
 
1237
1455
  # Return the `k` largest elements.
1238
1456
  #
1239
- # If `reverse: true`, the smallest elements will be given.
1457
+ # @param k [Integer]
1458
+ # Number of elements to return.
1459
+ #
1460
+ # @return [Boolean]
1461
+ #
1462
+ # @example
1463
+ # s = Polars::Series.new("a", [2, 5, 1, 4, 3])
1464
+ # s.top_k(k: 3)
1465
+ # # =>
1466
+ # # shape: (3,)
1467
+ # # Series: 'a' [i64]
1468
+ # # [
1469
+ # # 5
1470
+ # # 4
1471
+ # # 3
1472
+ # # ]
1473
+ def top_k(k: 5)
1474
+ super
1475
+ end
1476
+
1477
+ # Return the `k` smallest elements.
1240
1478
  #
1241
1479
  # @param k [Integer]
1242
1480
  # Number of elements to return.
1243
- # @param reverse [Boolean]
1244
- # Return the smallest elements.
1245
1481
  #
1246
1482
  # @return [Boolean]
1247
- def top_k(k: 5, reverse: false)
1483
+ #
1484
+ # @example
1485
+ # s = Polars::Series.new("a", [2, 5, 1, 4, 3])
1486
+ # s.bottom_k(k: 3)
1487
+ # # =>
1488
+ # # shape: (3,)
1489
+ # # Series: 'a' [i64]
1490
+ # # [
1491
+ # # 1
1492
+ # # 2
1493
+ # # 3
1494
+ # # ]
1495
+ def bottom_k(k: 5)
1248
1496
  super
1249
1497
  end
1250
1498
 
@@ -1693,26 +1941,40 @@ module Polars
1693
1941
  # @example
1694
1942
  # s = Polars::Series.new("a", [1, 2, 3])
1695
1943
  # s2 = Polars::Series.new("b", [4, 5, 6])
1696
- # s.series_equal(s)
1944
+ # s.equals(s)
1697
1945
  # # => true
1698
- # s.series_equal(s2)
1946
+ # s.equals(s2)
1699
1947
  # # => false
1700
- def series_equal(other, null_equal: false, strict: false)
1701
- _s.series_equal(other._s, null_equal, strict)
1948
+ def equals(other, null_equal: false, strict: false)
1949
+ _s.equals(other._s, null_equal, strict)
1702
1950
  end
1951
+ alias_method :series_equal, :equals
1703
1952
 
1704
- # Length of this Series.
1953
+ # Return the number of elements in the Series.
1705
1954
  #
1706
1955
  # @return [Integer]
1707
1956
  #
1708
1957
  # @example
1709
- # s = Polars::Series.new("a", [1, 2, 3])
1958
+ # s = Polars::Series.new("a", [1, 2, nil])
1959
+ # s.count
1960
+ # # => 3
1961
+ def count
1962
+ warn "`Series#count` will exclude null values in 0.9.0. Use `Series#length` instead."
1963
+ # len - null_count
1964
+ len
1965
+ end
1966
+
1967
+ # Return the number of elements in the Series.
1968
+ #
1969
+ # @return [Integer]
1970
+ #
1971
+ # @example
1972
+ # s = Polars::Series.new("a", [1, 2, nil])
1710
1973
  # s.len
1711
1974
  # # => 3
1712
1975
  def len
1713
1976
  _s.len
1714
1977
  end
1715
- alias_method :count, :len
1716
1978
  alias_method :length, :len
1717
1979
  alias_method :size, :len
1718
1980
 
@@ -1874,7 +2136,7 @@ module Polars
1874
2136
  # s.is_utf8
1875
2137
  # # => true
1876
2138
  def is_utf8
1877
- dtype == Utf8
2139
+ dtype == String
1878
2140
  end
1879
2141
  alias_method :utf8?, :is_utf8
1880
2142
 
@@ -1970,7 +2232,7 @@ module Polars
1970
2232
  # # 10
1971
2233
  # # 3
1972
2234
  # # ]
1973
- def set_at_idx(idx, value)
2235
+ def scatter(idx, value)
1974
2236
  if idx.is_a?(Integer)
1975
2237
  idx = [idx]
1976
2238
  end
@@ -1979,7 +2241,7 @@ module Polars
1979
2241
  end
1980
2242
 
1981
2243
  idx = Series.new("", idx)
1982
- if value.is_a?(Integer) || value.is_a?(Float) || Utils.bool?(value) || value.is_a?(String) || value.nil?
2244
+ if value.is_a?(Integer) || value.is_a?(Float) || Utils.bool?(value) || value.is_a?(::String) || value.nil?
1983
2245
  value = Series.new("", [value])
1984
2246
 
1985
2247
  # if we need to set more than a single value, we extend it
@@ -1989,9 +2251,10 @@ module Polars
1989
2251
  elsif !value.is_a?(Series)
1990
2252
  value = Series.new("", value)
1991
2253
  end
1992
- _s.set_at_idx(idx._s, value._s)
2254
+ _s.scatter(idx._s, value._s)
1993
2255
  self
1994
2256
  end
2257
+ alias_method :set_at_idx, :scatter
1995
2258
 
1996
2259
  # Create an empty copy of the current Series.
1997
2260
  #
@@ -2818,7 +3081,8 @@ module Polars
2818
3081
  weights: nil,
2819
3082
  min_periods: nil,
2820
3083
  center: false,
2821
- ddof: 1
3084
+ ddof: 1,
3085
+ warn_if_unsorted: true
2822
3086
  )
2823
3087
  to_frame
2824
3088
  .select(
@@ -2827,7 +3091,8 @@ module Polars
2827
3091
  weights: weights,
2828
3092
  min_periods: min_periods,
2829
3093
  center: center,
2830
- ddof: ddof
3094
+ ddof: ddof,
3095
+ warn_if_unsorted: warn_if_unsorted
2831
3096
  )
2832
3097
  )
2833
3098
  .to_series
@@ -2871,7 +3136,8 @@ module Polars
2871
3136
  weights: nil,
2872
3137
  min_periods: nil,
2873
3138
  center: false,
2874
- ddof: 1
3139
+ ddof: 1,
3140
+ warn_if_unsorted: true
2875
3141
  )
2876
3142
  to_frame
2877
3143
  .select(
@@ -2880,7 +3146,8 @@ module Polars
2880
3146
  weights: weights,
2881
3147
  min_periods: min_periods,
2882
3148
  center: center,
2883
- ddof: ddof
3149
+ ddof: ddof,
3150
+ warn_if_unsorted: warn_if_unsorted
2884
3151
  )
2885
3152
  )
2886
3153
  .to_series
@@ -2922,7 +3189,8 @@ module Polars
2922
3189
  window_size,
2923
3190
  weights: nil,
2924
3191
  min_periods: nil,
2925
- center: false
3192
+ center: false,
3193
+ warn_if_unsorted: true
2926
3194
  )
2927
3195
  if min_periods.nil?
2928
3196
  min_periods = window_size
@@ -2934,7 +3202,8 @@ module Polars
2934
3202
  window_size,
2935
3203
  weights: weights,
2936
3204
  min_periods: min_periods,
2937
- center: center
3205
+ center: center,
3206
+ warn_if_unsorted: warn_if_unsorted
2938
3207
  )
2939
3208
  )
2940
3209
  .to_series
@@ -2993,7 +3262,8 @@ module Polars
2993
3262
  window_size: 2,
2994
3263
  weights: nil,
2995
3264
  min_periods: nil,
2996
- center: false
3265
+ center: false,
3266
+ warn_if_unsorted: true
2997
3267
  )
2998
3268
  if min_periods.nil?
2999
3269
  min_periods = window_size
@@ -3007,7 +3277,8 @@ module Polars
3007
3277
  window_size: window_size,
3008
3278
  weights: weights,
3009
3279
  min_periods: min_periods,
3010
- center: center
3280
+ center: center,
3281
+ warn_if_unsorted: warn_if_unsorted
3011
3282
  )
3012
3283
  )
3013
3284
  .to_series
@@ -3097,7 +3368,7 @@ module Polars
3097
3368
  # s.peak_max
3098
3369
  # # =>
3099
3370
  # # shape: (5,)
3100
- # # Series: '' [bool]
3371
+ # # Series: 'a' [bool]
3101
3372
  # # [
3102
3373
  # # false
3103
3374
  # # false
@@ -3106,7 +3377,7 @@ module Polars
3106
3377
  # # true
3107
3378
  # # ]
3108
3379
  def peak_max
3109
- Utils.wrap_s(_s.peak_max)
3380
+ super
3110
3381
  end
3111
3382
 
3112
3383
  # Get a boolean mask of the local minimum peaks.
@@ -3118,7 +3389,7 @@ module Polars
3118
3389
  # s.peak_min
3119
3390
  # # =>
3120
3391
  # # shape: (5,)
3121
- # # Series: '' [bool]
3392
+ # # Series: 'a' [bool]
3122
3393
  # # [
3123
3394
  # # false
3124
3395
  # # true
@@ -3127,7 +3398,7 @@ module Polars
3127
3398
  # # false
3128
3399
  # # ]
3129
3400
  def peak_min
3130
- Utils.wrap_s(_s.peak_min)
3401
+ super
3131
3402
  end
3132
3403
 
3133
3404
  # Count the number of unique values in this Series.
@@ -3211,13 +3482,13 @@ module Polars
3211
3482
  # s.interpolate
3212
3483
  # # =>
3213
3484
  # # shape: (5,)
3214
- # # Series: 'a' [i64]
3485
+ # # Series: 'a' [f64]
3215
3486
  # # [
3216
- # # 1
3217
- # # 2
3218
- # # 3
3219
- # # 4
3220
- # # 5
3487
+ # # 1.0
3488
+ # # 2.0
3489
+ # # 3.0
3490
+ # # 4.0
3491
+ # # 5.0
3221
3492
  # # ]
3222
3493
  def interpolate(method: "linear")
3223
3494
  super
@@ -3260,7 +3531,7 @@ module Polars
3260
3531
  # s.rank
3261
3532
  # # =>
3262
3533
  # # shape: (5,)
3263
- # # Series: 'a' [f32]
3534
+ # # Series: 'a' [f64]
3264
3535
  # # [
3265
3536
  # # 3.0
3266
3537
  # # 4.5
@@ -3749,7 +4020,7 @@ module Polars
3749
4020
  return Utils.wrap_s(_s.send(op, other._s))
3750
4021
  end
3751
4022
 
3752
- if (other.is_a?(Float) || other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(String)) && !is_float
4023
+ if (other.is_a?(Float) || other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(::String)) && !is_float
3753
4024
  _s2 = sequence_to_rbseries(name, [other])
3754
4025
  return Utils.wrap_s(_s.send(op, _s2))
3755
4026
  end
@@ -3915,7 +4186,7 @@ module Polars
3915
4186
  return RbSeries.new_series_list(name, values, strict)
3916
4187
  else
3917
4188
  constructor =
3918
- if value.is_a?(String)
4189
+ if value.is_a?(::String)
3919
4190
  if value.encoding == Encoding::UTF_8
3920
4191
  RbSeries.method(:new_str)
3921
4192
  else
@@ -3998,7 +4269,8 @@ module Polars
3998
4269
  Integer => RbSeries.method(:new_opt_i64),
3999
4270
  TrueClass => RbSeries.method(:new_opt_bool),
4000
4271
  FalseClass => RbSeries.method(:new_opt_bool),
4001
- BigDecimal => RbSeries.method(:new_decimal)
4272
+ BigDecimal => RbSeries.method(:new_decimal),
4273
+ NilClass => RbSeries.method(:new_null)
4002
4274
  }
4003
4275
 
4004
4276
  def rb_type_to_constructor(dtype)