polars-df 0.11.0-x86_64-darwin → 0.12.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/Cargo.lock +360 -361
  4. data/LICENSE-THIRD-PARTY.txt +1067 -880
  5. data/lib/polars/3.1/polars.bundle +0 -0
  6. data/lib/polars/3.2/polars.bundle +0 -0
  7. data/lib/polars/3.3/polars.bundle +0 -0
  8. data/lib/polars/array_expr.rb +4 -4
  9. data/lib/polars/batched_csv_reader.rb +2 -2
  10. data/lib/polars/cat_expr.rb +0 -36
  11. data/lib/polars/cat_name_space.rb +0 -37
  12. data/lib/polars/data_frame.rb +93 -101
  13. data/lib/polars/data_types.rb +1 -1
  14. data/lib/polars/date_time_expr.rb +525 -573
  15. data/lib/polars/date_time_name_space.rb +263 -464
  16. data/lib/polars/dynamic_group_by.rb +3 -3
  17. data/lib/polars/exceptions.rb +3 -0
  18. data/lib/polars/expr.rb +367 -330
  19. data/lib/polars/expr_dispatch.rb +1 -1
  20. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  21. data/lib/polars/functions/as_datatype.rb +63 -40
  22. data/lib/polars/functions/lazy.rb +63 -14
  23. data/lib/polars/functions/lit.rb +1 -1
  24. data/lib/polars/functions/range/date_range.rb +18 -77
  25. data/lib/polars/functions/range/datetime_range.rb +4 -4
  26. data/lib/polars/functions/range/int_range.rb +2 -2
  27. data/lib/polars/functions/range/time_range.rb +4 -4
  28. data/lib/polars/functions/repeat.rb +1 -1
  29. data/lib/polars/functions/whenthen.rb +1 -1
  30. data/lib/polars/io/csv.rb +8 -8
  31. data/lib/polars/io/ipc.rb +3 -3
  32. data/lib/polars/io/json.rb +13 -2
  33. data/lib/polars/io/ndjson.rb +15 -4
  34. data/lib/polars/io/parquet.rb +5 -4
  35. data/lib/polars/lazy_frame.rb +120 -106
  36. data/lib/polars/lazy_group_by.rb +1 -1
  37. data/lib/polars/list_expr.rb +11 -11
  38. data/lib/polars/list_name_space.rb +5 -1
  39. data/lib/polars/rolling_group_by.rb +5 -7
  40. data/lib/polars/series.rb +105 -189
  41. data/lib/polars/string_expr.rb +42 -67
  42. data/lib/polars/string_name_space.rb +5 -4
  43. data/lib/polars/testing.rb +2 -2
  44. data/lib/polars/utils/constants.rb +9 -0
  45. data/lib/polars/utils/convert.rb +97 -0
  46. data/lib/polars/utils/parse.rb +89 -0
  47. data/lib/polars/utils/various.rb +76 -0
  48. data/lib/polars/utils/wrap.rb +19 -0
  49. data/lib/polars/utils.rb +4 -330
  50. data/lib/polars/version.rb +1 -1
  51. data/lib/polars/whenthen.rb +6 -6
  52. data/lib/polars.rb +11 -0
  53. metadata +7 -2
data/lib/polars/series.rb CHANGED
@@ -1017,18 +1017,18 @@ module Polars
1017
1017
  # s.rle.struct.unnest
1018
1018
  # # =>
1019
1019
  # # shape: (6, 2)
1020
- # # ┌─────────┬────────┐
1021
- # # │ lengthsvalues
1022
- # # │ --- ┆ ---
1023
- # # │ i32 ┆ i64
1024
- # # ╞═════════╪════════╡
1025
- # # │ 2 ┆ 1
1026
- # # │ 1 ┆ 2
1027
- # # │ 1 ┆ 1
1028
- # # │ 1 ┆ null
1029
- # # │ 1 ┆ 1
1030
- # # │ 2 ┆ 3
1031
- # # └─────────┴────────┘
1020
+ # # ┌─────┬───────┐
1021
+ # # │ lenvalue
1022
+ # # │ --- ┆ ---
1023
+ # # │ u32 ┆ i64
1024
+ # # ╞═════╪═══════╡
1025
+ # # │ 2 ┆ 1
1026
+ # # │ 1 ┆ 2
1027
+ # # │ 1 ┆ 1
1028
+ # # │ 1 ┆ null
1029
+ # # │ 1 ┆ 1
1030
+ # # │ 2 ┆ 3
1031
+ # # └─────┴───────┘
1032
1032
  def rle
1033
1033
  super
1034
1034
  end
@@ -1082,8 +1082,24 @@ module Polars
1082
1082
  # # │ 2 ┆ 2 │
1083
1083
  # # │ 3 ┆ 1 │
1084
1084
  # # └─────┴────────┘
1085
- def value_counts(sort: false)
1086
- Utils.wrap_df(_s.value_counts(sort))
1085
+ def value_counts(
1086
+ sort: false,
1087
+ parallel: false,
1088
+ name: nil,
1089
+ normalize: false
1090
+ )
1091
+ if name.nil?
1092
+ if normalize
1093
+ name = "proportion"
1094
+ else
1095
+ name = "count"
1096
+ end
1097
+ end
1098
+ DataFrame._from_rbdf(
1099
+ self._s.value_counts(
1100
+ sort, parallel, name, normalize
1101
+ )
1102
+ )
1087
1103
  end
1088
1104
 
1089
1105
  # Return a count of the unique values in the order of appearance.
@@ -1362,7 +1378,7 @@ module Polars
1362
1378
  # # 2
1363
1379
  # # ]
1364
1380
  def limit(n = 10)
1365
- to_frame.select(Utils.col(name).limit(n)).to_series
1381
+ to_frame.select(F.col(name).limit(n)).to_series
1366
1382
  end
1367
1383
 
1368
1384
  # Get a slice of this Series.
@@ -1386,7 +1402,7 @@ module Polars
1386
1402
  # # 3
1387
1403
  # # ]
1388
1404
  def slice(offset, length = nil)
1389
- super
1405
+ self.class._from_rbseries(_s.slice(offset, length))
1390
1406
  end
1391
1407
 
1392
1408
  # Append a Series to this one.
@@ -1494,7 +1510,7 @@ module Polars
1494
1510
  # # 2
1495
1511
  # # ]
1496
1512
  def head(n = 10)
1497
- to_frame.select(Utils.col(name).head(n)).to_series
1513
+ to_frame.select(F.col(name).head(n)).to_series
1498
1514
  end
1499
1515
 
1500
1516
  # Get the last `n` rows.
@@ -1515,7 +1531,7 @@ module Polars
1515
1531
  # # 3
1516
1532
  # # ]
1517
1533
  def tail(n = 10)
1518
- to_frame.select(Utils.col(name).tail(n)).to_series
1534
+ to_frame.select(F.col(name).tail(n)).to_series
1519
1535
  end
1520
1536
 
1521
1537
  # Take every nth value in the Series and return as new Series.
@@ -1594,7 +1610,7 @@ module Polars
1594
1610
  # # 4
1595
1611
  # # 3
1596
1612
  # # ]
1597
- def top_k(k: 5, nulls_last: false, multithreaded: true)
1613
+ def top_k(k: 5)
1598
1614
  super
1599
1615
  end
1600
1616
 
@@ -1616,7 +1632,7 @@ module Polars
1616
1632
  # # 2
1617
1633
  # # 3
1618
1634
  # # ]
1619
- def bottom_k(k: 5, nulls_last: false, multithreaded: true)
1635
+ def bottom_k(k: 5)
1620
1636
  super
1621
1637
  end
1622
1638
 
@@ -2054,11 +2070,12 @@ module Polars
2054
2070
  #
2055
2071
  # @param other [Series]
2056
2072
  # Series to compare with.
2073
+ # @param strict [Boolean]
2074
+ # Require data types to match.
2075
+ # @param check_names [Boolean]
2076
+ # Require names to match.
2057
2077
  # @param null_equal [Boolean]
2058
2078
  # Consider null values as equal.
2059
- # @param strict [Boolean]
2060
- # Don't allow different numerical dtypes, e.g. comparing `:u32` with a
2061
- # `:i64` will return `false`.
2062
2079
  #
2063
2080
  # @return [Boolean]
2064
2081
  #
@@ -2069,8 +2086,8 @@ module Polars
2069
2086
  # # => true
2070
2087
  # s.equals(s2)
2071
2088
  # # => false
2072
- def equals(other, null_equal: false, strict: false)
2073
- _s.equals(other._s, null_equal, strict)
2089
+ def equals(other, strict: false, check_names: false, null_equal: false)
2090
+ _s.equals(other._s, strict, check_names, null_equal)
2074
2091
  end
2075
2092
  alias_method :series_equal, :equals
2076
2093
 
@@ -3004,16 +3021,7 @@ module Polars
3004
3021
  min_periods: nil,
3005
3022
  center: false
3006
3023
  )
3007
- to_frame
3008
- .select(
3009
- Polars.col(name).rolling_min(
3010
- window_size,
3011
- weights: weights,
3012
- min_periods: min_periods,
3013
- center: center
3014
- )
3015
- )
3016
- .to_series
3024
+ super
3017
3025
  end
3018
3026
 
3019
3027
  # Apply a rolling max (moving max) over the values in this array.
@@ -3054,16 +3062,7 @@ module Polars
3054
3062
  min_periods: nil,
3055
3063
  center: false
3056
3064
  )
3057
- to_frame
3058
- .select(
3059
- Polars.col(name).rolling_max(
3060
- window_size,
3061
- weights: weights,
3062
- min_periods: min_periods,
3063
- center: center
3064
- )
3065
- )
3066
- .to_series
3065
+ super
3067
3066
  end
3068
3067
 
3069
3068
  # Apply a rolling mean (moving mean) over the values in this array.
@@ -3104,16 +3103,7 @@ module Polars
3104
3103
  min_periods: nil,
3105
3104
  center: false
3106
3105
  )
3107
- to_frame
3108
- .select(
3109
- Polars.col(name).rolling_mean(
3110
- window_size,
3111
- weights: weights,
3112
- min_periods: min_periods,
3113
- center: center
3114
- )
3115
- )
3116
- .to_series
3106
+ super
3117
3107
  end
3118
3108
 
3119
3109
  # Apply a rolling sum (moving sum) over the values in this array.
@@ -3154,16 +3144,7 @@ module Polars
3154
3144
  min_periods: nil,
3155
3145
  center: false
3156
3146
  )
3157
- to_frame
3158
- .select(
3159
- Polars.col(name).rolling_sum(
3160
- window_size,
3161
- weights: weights,
3162
- min_periods: min_periods,
3163
- center: center
3164
- )
3165
- )
3166
- .to_series
3147
+ super
3167
3148
  end
3168
3149
 
3169
3150
  # Compute a rolling std dev.
@@ -3204,21 +3185,9 @@ module Polars
3204
3185
  weights: nil,
3205
3186
  min_periods: nil,
3206
3187
  center: false,
3207
- ddof: 1,
3208
- warn_if_unsorted: true
3188
+ ddof: 1
3209
3189
  )
3210
- to_frame
3211
- .select(
3212
- Polars.col(name).rolling_std(
3213
- window_size,
3214
- weights: weights,
3215
- min_periods: min_periods,
3216
- center: center,
3217
- ddof: ddof,
3218
- warn_if_unsorted: warn_if_unsorted
3219
- )
3220
- )
3221
- .to_series
3190
+ super
3222
3191
  end
3223
3192
 
3224
3193
  # Compute a rolling variance.
@@ -3259,21 +3228,9 @@ module Polars
3259
3228
  weights: nil,
3260
3229
  min_periods: nil,
3261
3230
  center: false,
3262
- ddof: 1,
3263
- warn_if_unsorted: true
3231
+ ddof: 1
3264
3232
  )
3265
- to_frame
3266
- .select(
3267
- Polars.col(name).rolling_var(
3268
- window_size,
3269
- weights: weights,
3270
- min_periods: min_periods,
3271
- center: center,
3272
- ddof: ddof,
3273
- warn_if_unsorted: warn_if_unsorted
3274
- )
3275
- )
3276
- .to_series
3233
+ super
3277
3234
  end
3278
3235
 
3279
3236
  # def rolling_apply
@@ -3312,24 +3269,9 @@ module Polars
3312
3269
  window_size,
3313
3270
  weights: nil,
3314
3271
  min_periods: nil,
3315
- center: false,
3316
- warn_if_unsorted: true
3272
+ center: false
3317
3273
  )
3318
- if min_periods.nil?
3319
- min_periods = window_size
3320
- end
3321
-
3322
- to_frame
3323
- .select(
3324
- Polars.col(name).rolling_median(
3325
- window_size,
3326
- weights: weights,
3327
- min_periods: min_periods,
3328
- center: center,
3329
- warn_if_unsorted: warn_if_unsorted
3330
- )
3331
- )
3332
- .to_series
3274
+ super
3333
3275
  end
3334
3276
 
3335
3277
  # Compute a rolling quantile.
@@ -3385,26 +3327,9 @@ module Polars
3385
3327
  window_size: 2,
3386
3328
  weights: nil,
3387
3329
  min_periods: nil,
3388
- center: false,
3389
- warn_if_unsorted: true
3330
+ center: false
3390
3331
  )
3391
- if min_periods.nil?
3392
- min_periods = window_size
3393
- end
3394
-
3395
- to_frame
3396
- .select(
3397
- Polars.col(name).rolling_quantile(
3398
- quantile,
3399
- interpolation: interpolation,
3400
- window_size: window_size,
3401
- weights: weights,
3402
- min_periods: min_periods,
3403
- center: center,
3404
- warn_if_unsorted: warn_if_unsorted
3405
- )
3406
- )
3407
- .to_series
3332
+ super
3408
3333
  end
3409
3334
 
3410
3335
  # Compute a rolling skew.
@@ -3806,7 +3731,7 @@ module Polars
3806
3731
  # # null
3807
3732
  # # 10
3808
3733
  # # ]
3809
- def clip(min_val, max_val)
3734
+ def clip(min_val = nil, max_val = nil)
3810
3735
  super
3811
3736
  end
3812
3737
 
@@ -3883,33 +3808,20 @@ module Polars
3883
3808
  # # 200
3884
3809
  # # ]
3885
3810
  #
3886
- # @example Passing a mapping with replacements is also supported as syntactic sugar. Specify a default to set all values that were not matched.
3811
+ # @example Passing a mapping with replacements is also supported as syntactic sugar.
3887
3812
  # mapping = {2 => 100, 3 => 200}
3888
- # s.replace(mapping, default: -1)
3813
+ # s.replace(mapping)
3889
3814
  # # =>
3890
3815
  # # shape: (4,)
3891
3816
  # # Series: '' [i64]
3892
3817
  # # [
3893
- # # -1
3818
+ # # 1
3894
3819
  # # 100
3895
3820
  # # 100
3896
3821
  # # 200
3897
3822
  # # ]
3898
3823
  #
3899
- # @example The default can be another Series.
3900
- # default = Polars::Series.new([2.5, 5.0, 7.5, 10.0])
3901
- # s.replace(2, 100, default: default)
3902
- # # =>
3903
- # # shape: (4,)
3904
- # # Series: '' [f64]
3905
- # # [
3906
- # # 2.5
3907
- # # 100.0
3908
- # # 100.0
3909
- # # 10.0
3910
- # # ]
3911
- #
3912
- # @example Replacing by values of a different data type sets the return type based on a combination of the `new` data type and either the original data type or the default data type if it was set.
3824
+ # @example The original data type is preserved when replacing by values of a different data type.
3913
3825
  # s = Polars::Series.new(["x", "y", "z"])
3914
3826
  # mapping = {"x" => 1, "y" => 2, "z" => 3}
3915
3827
  # s.replace(mapping)
@@ -3921,28 +3833,6 @@ module Polars
3921
3833
  # # "2"
3922
3834
  # # "3"
3923
3835
  # # ]
3924
- #
3925
- # @example
3926
- # s.replace(mapping, default: nil)
3927
- # # =>
3928
- # # shape: (3,)
3929
- # # Series: '' [i64]
3930
- # # [
3931
- # # 1
3932
- # # 2
3933
- # # 3
3934
- # # ]
3935
- #
3936
- # @example Set the `return_dtype` parameter to control the resulting data type directly.
3937
- # s.replace(mapping, return_dtype: Polars::UInt8)
3938
- # # =>
3939
- # # shape: (3,)
3940
- # # Series: '' [u8]
3941
- # # [
3942
- # # 1
3943
- # # 2
3944
- # # 3
3945
- # # ]
3946
3836
  def replace(old, new = Expr::NO_DEFAULT, default: Expr::NO_DEFAULT, return_dtype: nil)
3947
3837
  super
3948
3838
  end
@@ -3989,7 +3879,8 @@ module Polars
3989
3879
  half_life: nil,
3990
3880
  alpha: nil,
3991
3881
  adjust: true,
3992
- min_periods: 1
3882
+ min_periods: 1,
3883
+ ignore_nulls: true
3993
3884
  )
3994
3885
  super
3995
3886
  end
@@ -4004,7 +3895,8 @@ module Polars
4004
3895
  alpha: nil,
4005
3896
  adjust: true,
4006
3897
  bias: false,
4007
- min_periods: 1
3898
+ min_periods: 1,
3899
+ ignore_nulls: true
4008
3900
  )
4009
3901
  super
4010
3902
  end
@@ -4019,7 +3911,8 @@ module Polars
4019
3911
  alpha: nil,
4020
3912
  adjust: true,
4021
3913
  bias: false,
4022
- min_periods: 1
3914
+ min_periods: 1,
3915
+ ignore_nulls: true
4023
3916
  )
4024
3917
  super
4025
3918
  end
@@ -4218,12 +4111,12 @@ module Polars
4218
4111
  end
4219
4112
 
4220
4113
  if other.is_a?(::Time) && dtype.is_a?(Datetime)
4221
- ts = Utils._datetime_to_pl_timestamp(other, time_unit)
4114
+ ts = Utils.datetime_to_int(other, time_unit)
4222
4115
  f = ffi_func("#{op}_<>", Int64, _s)
4223
4116
  fail if f.nil?
4224
4117
  return Utils.wrap_s(f.call(ts))
4225
4118
  elsif other.is_a?(::Date) && dtype == Date
4226
- d = Utils._date_to_pl_date(other)
4119
+ d = Utils.date_to_int(other)
4227
4120
  f = ffi_func("#{op}_<>", Int32, _s)
4228
4121
  fail if f.nil?
4229
4122
  return Utils.wrap_s(f.call(d))
@@ -4309,10 +4202,18 @@ module Polars
4309
4202
  # TODO improve performance
4310
4203
  constructor.call(name, values.to_a, strict)
4311
4204
  end
4312
- elsif values.shape.length == 2
4205
+ elsif values.shape.sum == 0
4313
4206
  raise Todo
4314
4207
  else
4315
- raise Todo
4208
+ original_shape = values.shape
4209
+ values = values.reshape(original_shape.inject(&:*))
4210
+ rb_s = numo_to_rbseries(
4211
+ name,
4212
+ values,
4213
+ strict: strict,
4214
+ nan_to_null: nan_to_null
4215
+ )
4216
+ Utils.wrap_s(rb_s).reshape(original_shape)._s
4316
4217
  end
4317
4218
  end
4318
4219
 
@@ -4362,6 +4263,8 @@ module Polars
4362
4263
  end
4363
4264
 
4364
4265
  constructor = polars_type_to_constructor(dtype)
4266
+ # TODO remove
4267
+ strict = false if dtype == Decimal
4365
4268
  rbseries = constructor.call(name, values, strict)
4366
4269
 
4367
4270
  base_type = dtype.is_a?(DataType) ? dtype.class : dtype
@@ -4370,11 +4273,11 @@ module Polars
4370
4273
  rbseries = rbseries.cast(dtype, true)
4371
4274
  end
4372
4275
  end
4373
- return rbseries
4276
+ rbseries
4374
4277
  elsif dtype == Struct
4375
4278
  struct_schema = dtype.is_a?(Struct) ? dtype.to_schema : nil
4376
4279
  empty = {}
4377
- return DataFrame.sequence_to_rbdf(
4280
+ DataFrame.sequence_to_rbdf(
4378
4281
  values.map { |v| v.nil? ? empty : v },
4379
4282
  schema: struct_schema,
4380
4283
  orient: "row",
@@ -4399,13 +4302,13 @@ module Polars
4399
4302
  # TODO
4400
4303
  time_unit = nil
4401
4304
 
4402
- rb_series = RbSeries.new_from_anyvalues(name, values, strict)
4305
+ rb_series = RbSeries.new_from_any_values(name, values, strict)
4403
4306
  if time_unit.nil?
4404
4307
  s = Utils.wrap_s(rb_series)
4405
4308
  else
4406
4309
  s = Utils.wrap_s(rb_series).dt.cast_time_unit(time_unit)
4407
4310
  end
4408
- return s._s
4311
+ s._s
4409
4312
  elsif defined?(Numo::NArray) && value.is_a?(Numo::NArray) && value.shape.length == 1
4410
4313
  raise Todo
4411
4314
  elsif ruby_dtype == ::Array
@@ -4419,11 +4322,11 @@ module Polars
4419
4322
  end
4420
4323
  return srs
4421
4324
  end
4422
- return sequence_from_anyvalue_or_object(name, values)
4325
+ sequence_from_anyvalue_or_object(name, values)
4423
4326
  elsif ruby_dtype == Series
4424
- return RbSeries.new_series_list(name, values.map(&:_s), strict)
4327
+ RbSeries.new_series_list(name, values.map(&:_s), strict)
4425
4328
  elsif ruby_dtype == RbSeries
4426
- return RbSeries.new_series_list(name, values, strict)
4329
+ RbSeries.new_series_list(name, values, strict)
4427
4330
  else
4428
4331
  constructor =
4429
4332
  if value.is_a?(::String)
@@ -4438,13 +4341,26 @@ module Polars
4438
4341
  else
4439
4342
  rb_type_to_constructor(value.class)
4440
4343
  end
4441
- constructor.call(name, values, strict)
4344
+
4345
+ construct_series_with_fallbacks(constructor, name, values, dtype, strict: strict)
4346
+ end
4347
+ end
4348
+ end
4349
+
4350
+ def construct_series_with_fallbacks(constructor, name, values, dtype, strict:)
4351
+ begin
4352
+ constructor.call(name, values, strict)
4353
+ rescue
4354
+ if dtype.nil?
4355
+ RbSeries.new_from_any_values(name, values, strict)
4356
+ else
4357
+ RbSeries.new_from_any_values_and_dtype(name, values, dtype, strict)
4442
4358
  end
4443
4359
  end
4444
4360
  end
4445
4361
 
4446
4362
  def sequence_from_anyvalue_or_object(name, values)
4447
- RbSeries.new_from_anyvalues(name, values, true)
4363
+ RbSeries.new_from_any_values(name, values, true)
4448
4364
  rescue
4449
4365
  RbSeries.new_object(name, values, false)
4450
4366
  end
@@ -4461,10 +4377,10 @@ module Polars
4461
4377
  UInt32 => RbSeries.method(:new_opt_u32),
4462
4378
  UInt64 => RbSeries.method(:new_opt_u64),
4463
4379
  Decimal => RbSeries.method(:new_decimal),
4464
- Date => RbSeries.method(:new_from_anyvalues),
4465
- Datetime => RbSeries.method(:new_from_anyvalues),
4466
- Duration => RbSeries.method(:new_from_anyvalues),
4467
- Time => RbSeries.method(:new_from_anyvalues),
4380
+ Date => RbSeries.method(:new_from_any_values),
4381
+ Datetime => RbSeries.method(:new_from_any_values),
4382
+ Duration => RbSeries.method(:new_from_any_values),
4383
+ Time => RbSeries.method(:new_from_any_values),
4468
4384
  Boolean => RbSeries.method(:new_opt_bool),
4469
4385
  Utf8 => RbSeries.method(:new_str),
4470
4386
  Object => RbSeries.method(:new_object),