polars-df 0.11.0-x86_64-darwin → 0.13.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +22 -0
  3. data/Cargo.lock +428 -450
  4. data/LICENSE-THIRD-PARTY.txt +2212 -1952
  5. data/lib/polars/3.1/polars.bundle +0 -0
  6. data/lib/polars/3.2/polars.bundle +0 -0
  7. data/lib/polars/3.3/polars.bundle +0 -0
  8. data/lib/polars/array_expr.rb +4 -4
  9. data/lib/polars/batched_csv_reader.rb +2 -2
  10. data/lib/polars/cat_expr.rb +0 -36
  11. data/lib/polars/cat_name_space.rb +0 -37
  12. data/lib/polars/data_frame.rb +93 -101
  13. data/lib/polars/data_types.rb +1 -1
  14. data/lib/polars/date_time_expr.rb +525 -573
  15. data/lib/polars/date_time_name_space.rb +263 -464
  16. data/lib/polars/dynamic_group_by.rb +3 -3
  17. data/lib/polars/exceptions.rb +3 -0
  18. data/lib/polars/expr.rb +367 -330
  19. data/lib/polars/expr_dispatch.rb +1 -1
  20. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  21. data/lib/polars/functions/as_datatype.rb +63 -40
  22. data/lib/polars/functions/lazy.rb +63 -14
  23. data/lib/polars/functions/lit.rb +1 -1
  24. data/lib/polars/functions/range/date_range.rb +18 -77
  25. data/lib/polars/functions/range/datetime_range.rb +4 -4
  26. data/lib/polars/functions/range/int_range.rb +2 -2
  27. data/lib/polars/functions/range/time_range.rb +4 -4
  28. data/lib/polars/functions/repeat.rb +1 -1
  29. data/lib/polars/functions/whenthen.rb +1 -1
  30. data/lib/polars/io/csv.rb +8 -8
  31. data/lib/polars/io/ipc.rb +35 -7
  32. data/lib/polars/io/json.rb +13 -2
  33. data/lib/polars/io/ndjson.rb +15 -4
  34. data/lib/polars/io/parquet.rb +15 -8
  35. data/lib/polars/lazy_frame.rb +123 -105
  36. data/lib/polars/lazy_group_by.rb +1 -1
  37. data/lib/polars/list_expr.rb +11 -11
  38. data/lib/polars/list_name_space.rb +5 -1
  39. data/lib/polars/rolling_group_by.rb +5 -7
  40. data/lib/polars/series.rb +108 -191
  41. data/lib/polars/string_expr.rb +51 -76
  42. data/lib/polars/string_name_space.rb +5 -4
  43. data/lib/polars/testing.rb +2 -2
  44. data/lib/polars/utils/constants.rb +9 -0
  45. data/lib/polars/utils/convert.rb +97 -0
  46. data/lib/polars/utils/parse.rb +89 -0
  47. data/lib/polars/utils/various.rb +76 -0
  48. data/lib/polars/utils/wrap.rb +19 -0
  49. data/lib/polars/utils.rb +4 -330
  50. data/lib/polars/version.rb +1 -1
  51. data/lib/polars/whenthen.rb +6 -6
  52. data/lib/polars.rb +11 -0
  53. metadata +7 -2
data/lib/polars/series.rb CHANGED
@@ -1017,18 +1017,18 @@ module Polars
1017
1017
  # s.rle.struct.unnest
1018
1018
  # # =>
1019
1019
  # # shape: (6, 2)
1020
- # # ┌─────────┬────────┐
1021
- # # │ lengthsvalues
1022
- # # │ --- ┆ ---
1023
- # # │ i32 ┆ i64
1024
- # # ╞═════════╪════════╡
1025
- # # │ 2 ┆ 1
1026
- # # │ 1 ┆ 2
1027
- # # │ 1 ┆ 1
1028
- # # │ 1 ┆ null
1029
- # # │ 1 ┆ 1
1030
- # # │ 2 ┆ 3
1031
- # # └─────────┴────────┘
1020
+ # # ┌─────┬───────┐
1021
+ # # │ lenvalue
1022
+ # # │ --- ┆ ---
1023
+ # # │ u32 ┆ i64
1024
+ # # ╞═════╪═══════╡
1025
+ # # │ 2 ┆ 1
1026
+ # # │ 1 ┆ 2
1027
+ # # │ 1 ┆ 1
1028
+ # # │ 1 ┆ null
1029
+ # # │ 1 ┆ 1
1030
+ # # │ 2 ┆ 3
1031
+ # # └─────┴───────┘
1032
1032
  def rle
1033
1033
  super
1034
1034
  end
@@ -1082,8 +1082,24 @@ module Polars
1082
1082
  # # │ 2 ┆ 2 │
1083
1083
  # # │ 3 ┆ 1 │
1084
1084
  # # └─────┴────────┘
1085
- def value_counts(sort: false)
1086
- Utils.wrap_df(_s.value_counts(sort))
1085
+ def value_counts(
1086
+ sort: false,
1087
+ parallel: false,
1088
+ name: nil,
1089
+ normalize: false
1090
+ )
1091
+ if name.nil?
1092
+ if normalize
1093
+ name = "proportion"
1094
+ else
1095
+ name = "count"
1096
+ end
1097
+ end
1098
+ DataFrame._from_rbdf(
1099
+ self._s.value_counts(
1100
+ sort, parallel, name, normalize
1101
+ )
1102
+ )
1087
1103
  end
1088
1104
 
1089
1105
  # Return a count of the unique values in the order of appearance.
@@ -1362,7 +1378,7 @@ module Polars
1362
1378
  # # 2
1363
1379
  # # ]
1364
1380
  def limit(n = 10)
1365
- to_frame.select(Utils.col(name).limit(n)).to_series
1381
+ to_frame.select(F.col(name).limit(n)).to_series
1366
1382
  end
1367
1383
 
1368
1384
  # Get a slice of this Series.
@@ -1386,7 +1402,7 @@ module Polars
1386
1402
  # # 3
1387
1403
  # # ]
1388
1404
  def slice(offset, length = nil)
1389
- super
1405
+ self.class._from_rbseries(_s.slice(offset, length))
1390
1406
  end
1391
1407
 
1392
1408
  # Append a Series to this one.
@@ -1494,7 +1510,7 @@ module Polars
1494
1510
  # # 2
1495
1511
  # # ]
1496
1512
  def head(n = 10)
1497
- to_frame.select(Utils.col(name).head(n)).to_series
1513
+ to_frame.select(F.col(name).head(n)).to_series
1498
1514
  end
1499
1515
 
1500
1516
  # Get the last `n` rows.
@@ -1515,7 +1531,7 @@ module Polars
1515
1531
  # # 3
1516
1532
  # # ]
1517
1533
  def tail(n = 10)
1518
- to_frame.select(Utils.col(name).tail(n)).to_series
1534
+ to_frame.select(F.col(name).tail(n)).to_series
1519
1535
  end
1520
1536
 
1521
1537
  # Take every nth value in the Series and return as new Series.
@@ -1594,7 +1610,7 @@ module Polars
1594
1610
  # # 4
1595
1611
  # # 3
1596
1612
  # # ]
1597
- def top_k(k: 5, nulls_last: false, multithreaded: true)
1613
+ def top_k(k: 5)
1598
1614
  super
1599
1615
  end
1600
1616
 
@@ -1616,7 +1632,7 @@ module Polars
1616
1632
  # # 2
1617
1633
  # # 3
1618
1634
  # # ]
1619
- def bottom_k(k: 5, nulls_last: false, multithreaded: true)
1635
+ def bottom_k(k: 5)
1620
1636
  super
1621
1637
  end
1622
1638
 
@@ -1774,9 +1790,10 @@ module Polars
1774
1790
  # Use this to swiftly assert a Series does not have null values.
1775
1791
  #
1776
1792
  # @return [Boolean]
1777
- def has_validity
1778
- _s.has_validity
1793
+ def has_nulls
1794
+ _s.has_nulls
1779
1795
  end
1796
+ alias_method :has_validity, :has_nulls
1780
1797
 
1781
1798
  # Check if the Series is empty.
1782
1799
  #
@@ -2054,11 +2071,12 @@ module Polars
2054
2071
  #
2055
2072
  # @param other [Series]
2056
2073
  # Series to compare with.
2074
+ # @param strict [Boolean]
2075
+ # Require data types to match.
2076
+ # @param check_names [Boolean]
2077
+ # Require names to match.
2057
2078
  # @param null_equal [Boolean]
2058
2079
  # Consider null values as equal.
2059
- # @param strict [Boolean]
2060
- # Don't allow different numerical dtypes, e.g. comparing `:u32` with a
2061
- # `:i64` will return `false`.
2062
2080
  #
2063
2081
  # @return [Boolean]
2064
2082
  #
@@ -2069,8 +2087,8 @@ module Polars
2069
2087
  # # => true
2070
2088
  # s.equals(s2)
2071
2089
  # # => false
2072
- def equals(other, null_equal: false, strict: false)
2073
- _s.equals(other._s, null_equal, strict)
2090
+ def equals(other, strict: false, check_names: false, null_equal: false)
2091
+ _s.equals(other._s, strict, check_names, null_equal)
2074
2092
  end
2075
2093
  alias_method :series_equal, :equals
2076
2094
 
@@ -3004,16 +3022,7 @@ module Polars
3004
3022
  min_periods: nil,
3005
3023
  center: false
3006
3024
  )
3007
- to_frame
3008
- .select(
3009
- Polars.col(name).rolling_min(
3010
- window_size,
3011
- weights: weights,
3012
- min_periods: min_periods,
3013
- center: center
3014
- )
3015
- )
3016
- .to_series
3025
+ super
3017
3026
  end
3018
3027
 
3019
3028
  # Apply a rolling max (moving max) over the values in this array.
@@ -3054,16 +3063,7 @@ module Polars
3054
3063
  min_periods: nil,
3055
3064
  center: false
3056
3065
  )
3057
- to_frame
3058
- .select(
3059
- Polars.col(name).rolling_max(
3060
- window_size,
3061
- weights: weights,
3062
- min_periods: min_periods,
3063
- center: center
3064
- )
3065
- )
3066
- .to_series
3066
+ super
3067
3067
  end
3068
3068
 
3069
3069
  # Apply a rolling mean (moving mean) over the values in this array.
@@ -3104,16 +3104,7 @@ module Polars
3104
3104
  min_periods: nil,
3105
3105
  center: false
3106
3106
  )
3107
- to_frame
3108
- .select(
3109
- Polars.col(name).rolling_mean(
3110
- window_size,
3111
- weights: weights,
3112
- min_periods: min_periods,
3113
- center: center
3114
- )
3115
- )
3116
- .to_series
3107
+ super
3117
3108
  end
3118
3109
 
3119
3110
  # Apply a rolling sum (moving sum) over the values in this array.
@@ -3154,16 +3145,7 @@ module Polars
3154
3145
  min_periods: nil,
3155
3146
  center: false
3156
3147
  )
3157
- to_frame
3158
- .select(
3159
- Polars.col(name).rolling_sum(
3160
- window_size,
3161
- weights: weights,
3162
- min_periods: min_periods,
3163
- center: center
3164
- )
3165
- )
3166
- .to_series
3148
+ super
3167
3149
  end
3168
3150
 
3169
3151
  # Compute a rolling std dev.
@@ -3204,21 +3186,9 @@ module Polars
3204
3186
  weights: nil,
3205
3187
  min_periods: nil,
3206
3188
  center: false,
3207
- ddof: 1,
3208
- warn_if_unsorted: true
3189
+ ddof: 1
3209
3190
  )
3210
- to_frame
3211
- .select(
3212
- Polars.col(name).rolling_std(
3213
- window_size,
3214
- weights: weights,
3215
- min_periods: min_periods,
3216
- center: center,
3217
- ddof: ddof,
3218
- warn_if_unsorted: warn_if_unsorted
3219
- )
3220
- )
3221
- .to_series
3191
+ super
3222
3192
  end
3223
3193
 
3224
3194
  # Compute a rolling variance.
@@ -3259,21 +3229,9 @@ module Polars
3259
3229
  weights: nil,
3260
3230
  min_periods: nil,
3261
3231
  center: false,
3262
- ddof: 1,
3263
- warn_if_unsorted: true
3232
+ ddof: 1
3264
3233
  )
3265
- to_frame
3266
- .select(
3267
- Polars.col(name).rolling_var(
3268
- window_size,
3269
- weights: weights,
3270
- min_periods: min_periods,
3271
- center: center,
3272
- ddof: ddof,
3273
- warn_if_unsorted: warn_if_unsorted
3274
- )
3275
- )
3276
- .to_series
3234
+ super
3277
3235
  end
3278
3236
 
3279
3237
  # def rolling_apply
@@ -3312,24 +3270,9 @@ module Polars
3312
3270
  window_size,
3313
3271
  weights: nil,
3314
3272
  min_periods: nil,
3315
- center: false,
3316
- warn_if_unsorted: true
3273
+ center: false
3317
3274
  )
3318
- if min_periods.nil?
3319
- min_periods = window_size
3320
- end
3321
-
3322
- to_frame
3323
- .select(
3324
- Polars.col(name).rolling_median(
3325
- window_size,
3326
- weights: weights,
3327
- min_periods: min_periods,
3328
- center: center,
3329
- warn_if_unsorted: warn_if_unsorted
3330
- )
3331
- )
3332
- .to_series
3275
+ super
3333
3276
  end
3334
3277
 
3335
3278
  # Compute a rolling quantile.
@@ -3385,26 +3328,9 @@ module Polars
3385
3328
  window_size: 2,
3386
3329
  weights: nil,
3387
3330
  min_periods: nil,
3388
- center: false,
3389
- warn_if_unsorted: true
3331
+ center: false
3390
3332
  )
3391
- if min_periods.nil?
3392
- min_periods = window_size
3393
- end
3394
-
3395
- to_frame
3396
- .select(
3397
- Polars.col(name).rolling_quantile(
3398
- quantile,
3399
- interpolation: interpolation,
3400
- window_size: window_size,
3401
- weights: weights,
3402
- min_periods: min_periods,
3403
- center: center,
3404
- warn_if_unsorted: warn_if_unsorted
3405
- )
3406
- )
3407
- .to_series
3333
+ super
3408
3334
  end
3409
3335
 
3410
3336
  # Compute a rolling skew.
@@ -3806,7 +3732,7 @@ module Polars
3806
3732
  # # null
3807
3733
  # # 10
3808
3734
  # # ]
3809
- def clip(min_val, max_val)
3735
+ def clip(min_val = nil, max_val = nil)
3810
3736
  super
3811
3737
  end
3812
3738
 
@@ -3883,33 +3809,20 @@ module Polars
3883
3809
  # # 200
3884
3810
  # # ]
3885
3811
  #
3886
- # @example Passing a mapping with replacements is also supported as syntactic sugar. Specify a default to set all values that were not matched.
3812
+ # @example Passing a mapping with replacements is also supported as syntactic sugar.
3887
3813
  # mapping = {2 => 100, 3 => 200}
3888
- # s.replace(mapping, default: -1)
3814
+ # s.replace(mapping)
3889
3815
  # # =>
3890
3816
  # # shape: (4,)
3891
3817
  # # Series: '' [i64]
3892
3818
  # # [
3893
- # # -1
3819
+ # # 1
3894
3820
  # # 100
3895
3821
  # # 100
3896
3822
  # # 200
3897
3823
  # # ]
3898
3824
  #
3899
- # @example The default can be another Series.
3900
- # default = Polars::Series.new([2.5, 5.0, 7.5, 10.0])
3901
- # s.replace(2, 100, default: default)
3902
- # # =>
3903
- # # shape: (4,)
3904
- # # Series: '' [f64]
3905
- # # [
3906
- # # 2.5
3907
- # # 100.0
3908
- # # 100.0
3909
- # # 10.0
3910
- # # ]
3911
- #
3912
- # @example Replacing by values of a different data type sets the return type based on a combination of the `new` data type and either the original data type or the default data type if it was set.
3825
+ # @example The original data type is preserved when replacing by values of a different data type.
3913
3826
  # s = Polars::Series.new(["x", "y", "z"])
3914
3827
  # mapping = {"x" => 1, "y" => 2, "z" => 3}
3915
3828
  # s.replace(mapping)
@@ -3921,28 +3834,6 @@ module Polars
3921
3834
  # # "2"
3922
3835
  # # "3"
3923
3836
  # # ]
3924
- #
3925
- # @example
3926
- # s.replace(mapping, default: nil)
3927
- # # =>
3928
- # # shape: (3,)
3929
- # # Series: '' [i64]
3930
- # # [
3931
- # # 1
3932
- # # 2
3933
- # # 3
3934
- # # ]
3935
- #
3936
- # @example Set the `return_dtype` parameter to control the resulting data type directly.
3937
- # s.replace(mapping, return_dtype: Polars::UInt8)
3938
- # # =>
3939
- # # shape: (3,)
3940
- # # Series: '' [u8]
3941
- # # [
3942
- # # 1
3943
- # # 2
3944
- # # 3
3945
- # # ]
3946
3837
  def replace(old, new = Expr::NO_DEFAULT, default: Expr::NO_DEFAULT, return_dtype: nil)
3947
3838
  super
3948
3839
  end
@@ -3989,7 +3880,8 @@ module Polars
3989
3880
  half_life: nil,
3990
3881
  alpha: nil,
3991
3882
  adjust: true,
3992
- min_periods: 1
3883
+ min_periods: 1,
3884
+ ignore_nulls: true
3993
3885
  )
3994
3886
  super
3995
3887
  end
@@ -4004,7 +3896,8 @@ module Polars
4004
3896
  alpha: nil,
4005
3897
  adjust: true,
4006
3898
  bias: false,
4007
- min_periods: 1
3899
+ min_periods: 1,
3900
+ ignore_nulls: true
4008
3901
  )
4009
3902
  super
4010
3903
  end
@@ -4019,7 +3912,8 @@ module Polars
4019
3912
  alpha: nil,
4020
3913
  adjust: true,
4021
3914
  bias: false,
4022
- min_periods: 1
3915
+ min_periods: 1,
3916
+ ignore_nulls: true
4023
3917
  )
4024
3918
  super
4025
3919
  end
@@ -4218,12 +4112,12 @@ module Polars
4218
4112
  end
4219
4113
 
4220
4114
  if other.is_a?(::Time) && dtype.is_a?(Datetime)
4221
- ts = Utils._datetime_to_pl_timestamp(other, time_unit)
4115
+ ts = Utils.datetime_to_int(other, time_unit)
4222
4116
  f = ffi_func("#{op}_<>", Int64, _s)
4223
4117
  fail if f.nil?
4224
4118
  return Utils.wrap_s(f.call(ts))
4225
4119
  elsif other.is_a?(::Date) && dtype == Date
4226
- d = Utils._date_to_pl_date(other)
4120
+ d = Utils.date_to_int(other)
4227
4121
  f = ffi_func("#{op}_<>", Int32, _s)
4228
4122
  fail if f.nil?
4229
4123
  return Utils.wrap_s(f.call(d))
@@ -4309,10 +4203,18 @@ module Polars
4309
4203
  # TODO improve performance
4310
4204
  constructor.call(name, values.to_a, strict)
4311
4205
  end
4312
- elsif values.shape.length == 2
4206
+ elsif values.shape.sum == 0
4313
4207
  raise Todo
4314
4208
  else
4315
- raise Todo
4209
+ original_shape = values.shape
4210
+ values = values.reshape(original_shape.inject(&:*))
4211
+ rb_s = numo_to_rbseries(
4212
+ name,
4213
+ values,
4214
+ strict: strict,
4215
+ nan_to_null: nan_to_null
4216
+ )
4217
+ Utils.wrap_s(rb_s).reshape(original_shape)._s
4316
4218
  end
4317
4219
  end
4318
4220
 
@@ -4362,6 +4264,8 @@ module Polars
4362
4264
  end
4363
4265
 
4364
4266
  constructor = polars_type_to_constructor(dtype)
4267
+ # TODO remove
4268
+ strict = false if dtype == Decimal
4365
4269
  rbseries = constructor.call(name, values, strict)
4366
4270
 
4367
4271
  base_type = dtype.is_a?(DataType) ? dtype.class : dtype
@@ -4370,11 +4274,11 @@ module Polars
4370
4274
  rbseries = rbseries.cast(dtype, true)
4371
4275
  end
4372
4276
  end
4373
- return rbseries
4277
+ rbseries
4374
4278
  elsif dtype == Struct
4375
4279
  struct_schema = dtype.is_a?(Struct) ? dtype.to_schema : nil
4376
4280
  empty = {}
4377
- return DataFrame.sequence_to_rbdf(
4281
+ DataFrame.sequence_to_rbdf(
4378
4282
  values.map { |v| v.nil? ? empty : v },
4379
4283
  schema: struct_schema,
4380
4284
  orient: "row",
@@ -4399,13 +4303,13 @@ module Polars
4399
4303
  # TODO
4400
4304
  time_unit = nil
4401
4305
 
4402
- rb_series = RbSeries.new_from_anyvalues(name, values, strict)
4306
+ rb_series = RbSeries.new_from_any_values(name, values, strict)
4403
4307
  if time_unit.nil?
4404
4308
  s = Utils.wrap_s(rb_series)
4405
4309
  else
4406
4310
  s = Utils.wrap_s(rb_series).dt.cast_time_unit(time_unit)
4407
4311
  end
4408
- return s._s
4312
+ s._s
4409
4313
  elsif defined?(Numo::NArray) && value.is_a?(Numo::NArray) && value.shape.length == 1
4410
4314
  raise Todo
4411
4315
  elsif ruby_dtype == ::Array
@@ -4419,11 +4323,11 @@ module Polars
4419
4323
  end
4420
4324
  return srs
4421
4325
  end
4422
- return sequence_from_anyvalue_or_object(name, values)
4326
+ sequence_from_anyvalue_or_object(name, values)
4423
4327
  elsif ruby_dtype == Series
4424
- return RbSeries.new_series_list(name, values.map(&:_s), strict)
4328
+ RbSeries.new_series_list(name, values.map(&:_s), strict)
4425
4329
  elsif ruby_dtype == RbSeries
4426
- return RbSeries.new_series_list(name, values, strict)
4330
+ RbSeries.new_series_list(name, values, strict)
4427
4331
  else
4428
4332
  constructor =
4429
4333
  if value.is_a?(::String)
@@ -4438,13 +4342,26 @@ module Polars
4438
4342
  else
4439
4343
  rb_type_to_constructor(value.class)
4440
4344
  end
4441
- constructor.call(name, values, strict)
4345
+
4346
+ construct_series_with_fallbacks(constructor, name, values, dtype, strict: strict)
4347
+ end
4348
+ end
4349
+ end
4350
+
4351
+ def construct_series_with_fallbacks(constructor, name, values, dtype, strict:)
4352
+ begin
4353
+ constructor.call(name, values, strict)
4354
+ rescue
4355
+ if dtype.nil?
4356
+ RbSeries.new_from_any_values(name, values, strict)
4357
+ else
4358
+ RbSeries.new_from_any_values_and_dtype(name, values, dtype, strict)
4442
4359
  end
4443
4360
  end
4444
4361
  end
4445
4362
 
4446
4363
  def sequence_from_anyvalue_or_object(name, values)
4447
- RbSeries.new_from_anyvalues(name, values, true)
4364
+ RbSeries.new_from_any_values(name, values, true)
4448
4365
  rescue
4449
4366
  RbSeries.new_object(name, values, false)
4450
4367
  end
@@ -4461,10 +4378,10 @@ module Polars
4461
4378
  UInt32 => RbSeries.method(:new_opt_u32),
4462
4379
  UInt64 => RbSeries.method(:new_opt_u64),
4463
4380
  Decimal => RbSeries.method(:new_decimal),
4464
- Date => RbSeries.method(:new_from_anyvalues),
4465
- Datetime => RbSeries.method(:new_from_anyvalues),
4466
- Duration => RbSeries.method(:new_from_anyvalues),
4467
- Time => RbSeries.method(:new_from_anyvalues),
4381
+ Date => RbSeries.method(:new_from_any_values),
4382
+ Datetime => RbSeries.method(:new_from_any_values),
4383
+ Duration => RbSeries.method(:new_from_any_values),
4384
+ Time => RbSeries.method(:new_from_any_values),
4468
4385
  Boolean => RbSeries.method(:new_opt_bool),
4469
4386
  Utf8 => RbSeries.method(:new_str),
4470
4387
  Object => RbSeries.method(:new_object),