polars-df 0.11.0-x86_64-darwin → 0.13.0-x86_64-darwin

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +22 -0
  3. data/Cargo.lock +428 -450
  4. data/LICENSE-THIRD-PARTY.txt +2212 -1952
  5. data/lib/polars/3.1/polars.bundle +0 -0
  6. data/lib/polars/3.2/polars.bundle +0 -0
  7. data/lib/polars/3.3/polars.bundle +0 -0
  8. data/lib/polars/array_expr.rb +4 -4
  9. data/lib/polars/batched_csv_reader.rb +2 -2
  10. data/lib/polars/cat_expr.rb +0 -36
  11. data/lib/polars/cat_name_space.rb +0 -37
  12. data/lib/polars/data_frame.rb +93 -101
  13. data/lib/polars/data_types.rb +1 -1
  14. data/lib/polars/date_time_expr.rb +525 -573
  15. data/lib/polars/date_time_name_space.rb +263 -464
  16. data/lib/polars/dynamic_group_by.rb +3 -3
  17. data/lib/polars/exceptions.rb +3 -0
  18. data/lib/polars/expr.rb +367 -330
  19. data/lib/polars/expr_dispatch.rb +1 -1
  20. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  21. data/lib/polars/functions/as_datatype.rb +63 -40
  22. data/lib/polars/functions/lazy.rb +63 -14
  23. data/lib/polars/functions/lit.rb +1 -1
  24. data/lib/polars/functions/range/date_range.rb +18 -77
  25. data/lib/polars/functions/range/datetime_range.rb +4 -4
  26. data/lib/polars/functions/range/int_range.rb +2 -2
  27. data/lib/polars/functions/range/time_range.rb +4 -4
  28. data/lib/polars/functions/repeat.rb +1 -1
  29. data/lib/polars/functions/whenthen.rb +1 -1
  30. data/lib/polars/io/csv.rb +8 -8
  31. data/lib/polars/io/ipc.rb +35 -7
  32. data/lib/polars/io/json.rb +13 -2
  33. data/lib/polars/io/ndjson.rb +15 -4
  34. data/lib/polars/io/parquet.rb +15 -8
  35. data/lib/polars/lazy_frame.rb +123 -105
  36. data/lib/polars/lazy_group_by.rb +1 -1
  37. data/lib/polars/list_expr.rb +11 -11
  38. data/lib/polars/list_name_space.rb +5 -1
  39. data/lib/polars/rolling_group_by.rb +5 -7
  40. data/lib/polars/series.rb +108 -191
  41. data/lib/polars/string_expr.rb +51 -76
  42. data/lib/polars/string_name_space.rb +5 -4
  43. data/lib/polars/testing.rb +2 -2
  44. data/lib/polars/utils/constants.rb +9 -0
  45. data/lib/polars/utils/convert.rb +97 -0
  46. data/lib/polars/utils/parse.rb +89 -0
  47. data/lib/polars/utils/various.rb +76 -0
  48. data/lib/polars/utils/wrap.rb +19 -0
  49. data/lib/polars/utils.rb +4 -330
  50. data/lib/polars/version.rb +1 -1
  51. data/lib/polars/whenthen.rb +6 -6
  52. data/lib/polars.rb +11 -0
  53. metadata +7 -2
data/lib/polars/series.rb CHANGED
@@ -1017,18 +1017,18 @@ module Polars
1017
1017
  # s.rle.struct.unnest
1018
1018
  # # =>
1019
1019
  # # shape: (6, 2)
1020
- # # ┌─────────┬────────┐
1021
- # # │ lengthsvalues
1022
- # # │ --- ┆ ---
1023
- # # │ i32 ┆ i64
1024
- # # ╞═════════╪════════╡
1025
- # # │ 2 ┆ 1
1026
- # # │ 1 ┆ 2
1027
- # # │ 1 ┆ 1
1028
- # # │ 1 ┆ null
1029
- # # │ 1 ┆ 1
1030
- # # │ 2 ┆ 3
1031
- # # └─────────┴────────┘
1020
+ # # ┌─────┬───────┐
1021
+ # # │ lenvalue
1022
+ # # │ --- ┆ ---
1023
+ # # │ u32 ┆ i64
1024
+ # # ╞═════╪═══════╡
1025
+ # # │ 2 ┆ 1
1026
+ # # │ 1 ┆ 2
1027
+ # # │ 1 ┆ 1
1028
+ # # │ 1 ┆ null
1029
+ # # │ 1 ┆ 1
1030
+ # # │ 2 ┆ 3
1031
+ # # └─────┴───────┘
1032
1032
  def rle
1033
1033
  super
1034
1034
  end
@@ -1082,8 +1082,24 @@ module Polars
1082
1082
  # # │ 2 ┆ 2 │
1083
1083
  # # │ 3 ┆ 1 │
1084
1084
  # # └─────┴────────┘
1085
- def value_counts(sort: false)
1086
- Utils.wrap_df(_s.value_counts(sort))
1085
+ def value_counts(
1086
+ sort: false,
1087
+ parallel: false,
1088
+ name: nil,
1089
+ normalize: false
1090
+ )
1091
+ if name.nil?
1092
+ if normalize
1093
+ name = "proportion"
1094
+ else
1095
+ name = "count"
1096
+ end
1097
+ end
1098
+ DataFrame._from_rbdf(
1099
+ self._s.value_counts(
1100
+ sort, parallel, name, normalize
1101
+ )
1102
+ )
1087
1103
  end
1088
1104
 
1089
1105
  # Return a count of the unique values in the order of appearance.
@@ -1362,7 +1378,7 @@ module Polars
1362
1378
  # # 2
1363
1379
  # # ]
1364
1380
  def limit(n = 10)
1365
- to_frame.select(Utils.col(name).limit(n)).to_series
1381
+ to_frame.select(F.col(name).limit(n)).to_series
1366
1382
  end
1367
1383
 
1368
1384
  # Get a slice of this Series.
@@ -1386,7 +1402,7 @@ module Polars
1386
1402
  # # 3
1387
1403
  # # ]
1388
1404
  def slice(offset, length = nil)
1389
- super
1405
+ self.class._from_rbseries(_s.slice(offset, length))
1390
1406
  end
1391
1407
 
1392
1408
  # Append a Series to this one.
@@ -1494,7 +1510,7 @@ module Polars
1494
1510
  # # 2
1495
1511
  # # ]
1496
1512
  def head(n = 10)
1497
- to_frame.select(Utils.col(name).head(n)).to_series
1513
+ to_frame.select(F.col(name).head(n)).to_series
1498
1514
  end
1499
1515
 
1500
1516
  # Get the last `n` rows.
@@ -1515,7 +1531,7 @@ module Polars
1515
1531
  # # 3
1516
1532
  # # ]
1517
1533
  def tail(n = 10)
1518
- to_frame.select(Utils.col(name).tail(n)).to_series
1534
+ to_frame.select(F.col(name).tail(n)).to_series
1519
1535
  end
1520
1536
 
1521
1537
  # Take every nth value in the Series and return as new Series.
@@ -1594,7 +1610,7 @@ module Polars
1594
1610
  # # 4
1595
1611
  # # 3
1596
1612
  # # ]
1597
- def top_k(k: 5, nulls_last: false, multithreaded: true)
1613
+ def top_k(k: 5)
1598
1614
  super
1599
1615
  end
1600
1616
 
@@ -1616,7 +1632,7 @@ module Polars
1616
1632
  # # 2
1617
1633
  # # 3
1618
1634
  # # ]
1619
- def bottom_k(k: 5, nulls_last: false, multithreaded: true)
1635
+ def bottom_k(k: 5)
1620
1636
  super
1621
1637
  end
1622
1638
 
@@ -1774,9 +1790,10 @@ module Polars
1774
1790
  # Use this to swiftly assert a Series does not have null values.
1775
1791
  #
1776
1792
  # @return [Boolean]
1777
- def has_validity
1778
- _s.has_validity
1793
+ def has_nulls
1794
+ _s.has_nulls
1779
1795
  end
1796
+ alias_method :has_validity, :has_nulls
1780
1797
 
1781
1798
  # Check if the Series is empty.
1782
1799
  #
@@ -2054,11 +2071,12 @@ module Polars
2054
2071
  #
2055
2072
  # @param other [Series]
2056
2073
  # Series to compare with.
2074
+ # @param strict [Boolean]
2075
+ # Require data types to match.
2076
+ # @param check_names [Boolean]
2077
+ # Require names to match.
2057
2078
  # @param null_equal [Boolean]
2058
2079
  # Consider null values as equal.
2059
- # @param strict [Boolean]
2060
- # Don't allow different numerical dtypes, e.g. comparing `:u32` with a
2061
- # `:i64` will return `false`.
2062
2080
  #
2063
2081
  # @return [Boolean]
2064
2082
  #
@@ -2069,8 +2087,8 @@ module Polars
2069
2087
  # # => true
2070
2088
  # s.equals(s2)
2071
2089
  # # => false
2072
- def equals(other, null_equal: false, strict: false)
2073
- _s.equals(other._s, null_equal, strict)
2090
+ def equals(other, strict: false, check_names: false, null_equal: false)
2091
+ _s.equals(other._s, strict, check_names, null_equal)
2074
2092
  end
2075
2093
  alias_method :series_equal, :equals
2076
2094
 
@@ -3004,16 +3022,7 @@ module Polars
3004
3022
  min_periods: nil,
3005
3023
  center: false
3006
3024
  )
3007
- to_frame
3008
- .select(
3009
- Polars.col(name).rolling_min(
3010
- window_size,
3011
- weights: weights,
3012
- min_periods: min_periods,
3013
- center: center
3014
- )
3015
- )
3016
- .to_series
3025
+ super
3017
3026
  end
3018
3027
 
3019
3028
  # Apply a rolling max (moving max) over the values in this array.
@@ -3054,16 +3063,7 @@ module Polars
3054
3063
  min_periods: nil,
3055
3064
  center: false
3056
3065
  )
3057
- to_frame
3058
- .select(
3059
- Polars.col(name).rolling_max(
3060
- window_size,
3061
- weights: weights,
3062
- min_periods: min_periods,
3063
- center: center
3064
- )
3065
- )
3066
- .to_series
3066
+ super
3067
3067
  end
3068
3068
 
3069
3069
  # Apply a rolling mean (moving mean) over the values in this array.
@@ -3104,16 +3104,7 @@ module Polars
3104
3104
  min_periods: nil,
3105
3105
  center: false
3106
3106
  )
3107
- to_frame
3108
- .select(
3109
- Polars.col(name).rolling_mean(
3110
- window_size,
3111
- weights: weights,
3112
- min_periods: min_periods,
3113
- center: center
3114
- )
3115
- )
3116
- .to_series
3107
+ super
3117
3108
  end
3118
3109
 
3119
3110
  # Apply a rolling sum (moving sum) over the values in this array.
@@ -3154,16 +3145,7 @@ module Polars
3154
3145
  min_periods: nil,
3155
3146
  center: false
3156
3147
  )
3157
- to_frame
3158
- .select(
3159
- Polars.col(name).rolling_sum(
3160
- window_size,
3161
- weights: weights,
3162
- min_periods: min_periods,
3163
- center: center
3164
- )
3165
- )
3166
- .to_series
3148
+ super
3167
3149
  end
3168
3150
 
3169
3151
  # Compute a rolling std dev.
@@ -3204,21 +3186,9 @@ module Polars
3204
3186
  weights: nil,
3205
3187
  min_periods: nil,
3206
3188
  center: false,
3207
- ddof: 1,
3208
- warn_if_unsorted: true
3189
+ ddof: 1
3209
3190
  )
3210
- to_frame
3211
- .select(
3212
- Polars.col(name).rolling_std(
3213
- window_size,
3214
- weights: weights,
3215
- min_periods: min_periods,
3216
- center: center,
3217
- ddof: ddof,
3218
- warn_if_unsorted: warn_if_unsorted
3219
- )
3220
- )
3221
- .to_series
3191
+ super
3222
3192
  end
3223
3193
 
3224
3194
  # Compute a rolling variance.
@@ -3259,21 +3229,9 @@ module Polars
3259
3229
  weights: nil,
3260
3230
  min_periods: nil,
3261
3231
  center: false,
3262
- ddof: 1,
3263
- warn_if_unsorted: true
3232
+ ddof: 1
3264
3233
  )
3265
- to_frame
3266
- .select(
3267
- Polars.col(name).rolling_var(
3268
- window_size,
3269
- weights: weights,
3270
- min_periods: min_periods,
3271
- center: center,
3272
- ddof: ddof,
3273
- warn_if_unsorted: warn_if_unsorted
3274
- )
3275
- )
3276
- .to_series
3234
+ super
3277
3235
  end
3278
3236
 
3279
3237
  # def rolling_apply
@@ -3312,24 +3270,9 @@ module Polars
3312
3270
  window_size,
3313
3271
  weights: nil,
3314
3272
  min_periods: nil,
3315
- center: false,
3316
- warn_if_unsorted: true
3273
+ center: false
3317
3274
  )
3318
- if min_periods.nil?
3319
- min_periods = window_size
3320
- end
3321
-
3322
- to_frame
3323
- .select(
3324
- Polars.col(name).rolling_median(
3325
- window_size,
3326
- weights: weights,
3327
- min_periods: min_periods,
3328
- center: center,
3329
- warn_if_unsorted: warn_if_unsorted
3330
- )
3331
- )
3332
- .to_series
3275
+ super
3333
3276
  end
3334
3277
 
3335
3278
  # Compute a rolling quantile.
@@ -3385,26 +3328,9 @@ module Polars
3385
3328
  window_size: 2,
3386
3329
  weights: nil,
3387
3330
  min_periods: nil,
3388
- center: false,
3389
- warn_if_unsorted: true
3331
+ center: false
3390
3332
  )
3391
- if min_periods.nil?
3392
- min_periods = window_size
3393
- end
3394
-
3395
- to_frame
3396
- .select(
3397
- Polars.col(name).rolling_quantile(
3398
- quantile,
3399
- interpolation: interpolation,
3400
- window_size: window_size,
3401
- weights: weights,
3402
- min_periods: min_periods,
3403
- center: center,
3404
- warn_if_unsorted: warn_if_unsorted
3405
- )
3406
- )
3407
- .to_series
3333
+ super
3408
3334
  end
3409
3335
 
3410
3336
  # Compute a rolling skew.
@@ -3806,7 +3732,7 @@ module Polars
3806
3732
  # # null
3807
3733
  # # 10
3808
3734
  # # ]
3809
- def clip(min_val, max_val)
3735
+ def clip(min_val = nil, max_val = nil)
3810
3736
  super
3811
3737
  end
3812
3738
 
@@ -3883,33 +3809,20 @@ module Polars
3883
3809
  # # 200
3884
3810
  # # ]
3885
3811
  #
3886
- # @example Passing a mapping with replacements is also supported as syntactic sugar. Specify a default to set all values that were not matched.
3812
+ # @example Passing a mapping with replacements is also supported as syntactic sugar.
3887
3813
  # mapping = {2 => 100, 3 => 200}
3888
- # s.replace(mapping, default: -1)
3814
+ # s.replace(mapping)
3889
3815
  # # =>
3890
3816
  # # shape: (4,)
3891
3817
  # # Series: '' [i64]
3892
3818
  # # [
3893
- # # -1
3819
+ # # 1
3894
3820
  # # 100
3895
3821
  # # 100
3896
3822
  # # 200
3897
3823
  # # ]
3898
3824
  #
3899
- # @example The default can be another Series.
3900
- # default = Polars::Series.new([2.5, 5.0, 7.5, 10.0])
3901
- # s.replace(2, 100, default: default)
3902
- # # =>
3903
- # # shape: (4,)
3904
- # # Series: '' [f64]
3905
- # # [
3906
- # # 2.5
3907
- # # 100.0
3908
- # # 100.0
3909
- # # 10.0
3910
- # # ]
3911
- #
3912
- # @example Replacing by values of a different data type sets the return type based on a combination of the `new` data type and either the original data type or the default data type if it was set.
3825
+ # @example The original data type is preserved when replacing by values of a different data type.
3913
3826
  # s = Polars::Series.new(["x", "y", "z"])
3914
3827
  # mapping = {"x" => 1, "y" => 2, "z" => 3}
3915
3828
  # s.replace(mapping)
@@ -3921,28 +3834,6 @@ module Polars
3921
3834
  # # "2"
3922
3835
  # # "3"
3923
3836
  # # ]
3924
- #
3925
- # @example
3926
- # s.replace(mapping, default: nil)
3927
- # # =>
3928
- # # shape: (3,)
3929
- # # Series: '' [i64]
3930
- # # [
3931
- # # 1
3932
- # # 2
3933
- # # 3
3934
- # # ]
3935
- #
3936
- # @example Set the `return_dtype` parameter to control the resulting data type directly.
3937
- # s.replace(mapping, return_dtype: Polars::UInt8)
3938
- # # =>
3939
- # # shape: (3,)
3940
- # # Series: '' [u8]
3941
- # # [
3942
- # # 1
3943
- # # 2
3944
- # # 3
3945
- # # ]
3946
3837
  def replace(old, new = Expr::NO_DEFAULT, default: Expr::NO_DEFAULT, return_dtype: nil)
3947
3838
  super
3948
3839
  end
@@ -3989,7 +3880,8 @@ module Polars
3989
3880
  half_life: nil,
3990
3881
  alpha: nil,
3991
3882
  adjust: true,
3992
- min_periods: 1
3883
+ min_periods: 1,
3884
+ ignore_nulls: true
3993
3885
  )
3994
3886
  super
3995
3887
  end
@@ -4004,7 +3896,8 @@ module Polars
4004
3896
  alpha: nil,
4005
3897
  adjust: true,
4006
3898
  bias: false,
4007
- min_periods: 1
3899
+ min_periods: 1,
3900
+ ignore_nulls: true
4008
3901
  )
4009
3902
  super
4010
3903
  end
@@ -4019,7 +3912,8 @@ module Polars
4019
3912
  alpha: nil,
4020
3913
  adjust: true,
4021
3914
  bias: false,
4022
- min_periods: 1
3915
+ min_periods: 1,
3916
+ ignore_nulls: true
4023
3917
  )
4024
3918
  super
4025
3919
  end
@@ -4218,12 +4112,12 @@ module Polars
4218
4112
  end
4219
4113
 
4220
4114
  if other.is_a?(::Time) && dtype.is_a?(Datetime)
4221
- ts = Utils._datetime_to_pl_timestamp(other, time_unit)
4115
+ ts = Utils.datetime_to_int(other, time_unit)
4222
4116
  f = ffi_func("#{op}_<>", Int64, _s)
4223
4117
  fail if f.nil?
4224
4118
  return Utils.wrap_s(f.call(ts))
4225
4119
  elsif other.is_a?(::Date) && dtype == Date
4226
- d = Utils._date_to_pl_date(other)
4120
+ d = Utils.date_to_int(other)
4227
4121
  f = ffi_func("#{op}_<>", Int32, _s)
4228
4122
  fail if f.nil?
4229
4123
  return Utils.wrap_s(f.call(d))
@@ -4309,10 +4203,18 @@ module Polars
4309
4203
  # TODO improve performance
4310
4204
  constructor.call(name, values.to_a, strict)
4311
4205
  end
4312
- elsif values.shape.length == 2
4206
+ elsif values.shape.sum == 0
4313
4207
  raise Todo
4314
4208
  else
4315
- raise Todo
4209
+ original_shape = values.shape
4210
+ values = values.reshape(original_shape.inject(&:*))
4211
+ rb_s = numo_to_rbseries(
4212
+ name,
4213
+ values,
4214
+ strict: strict,
4215
+ nan_to_null: nan_to_null
4216
+ )
4217
+ Utils.wrap_s(rb_s).reshape(original_shape)._s
4316
4218
  end
4317
4219
  end
4318
4220
 
@@ -4362,6 +4264,8 @@ module Polars
4362
4264
  end
4363
4265
 
4364
4266
  constructor = polars_type_to_constructor(dtype)
4267
+ # TODO remove
4268
+ strict = false if dtype == Decimal
4365
4269
  rbseries = constructor.call(name, values, strict)
4366
4270
 
4367
4271
  base_type = dtype.is_a?(DataType) ? dtype.class : dtype
@@ -4370,11 +4274,11 @@ module Polars
4370
4274
  rbseries = rbseries.cast(dtype, true)
4371
4275
  end
4372
4276
  end
4373
- return rbseries
4277
+ rbseries
4374
4278
  elsif dtype == Struct
4375
4279
  struct_schema = dtype.is_a?(Struct) ? dtype.to_schema : nil
4376
4280
  empty = {}
4377
- return DataFrame.sequence_to_rbdf(
4281
+ DataFrame.sequence_to_rbdf(
4378
4282
  values.map { |v| v.nil? ? empty : v },
4379
4283
  schema: struct_schema,
4380
4284
  orient: "row",
@@ -4399,13 +4303,13 @@ module Polars
4399
4303
  # TODO
4400
4304
  time_unit = nil
4401
4305
 
4402
- rb_series = RbSeries.new_from_anyvalues(name, values, strict)
4306
+ rb_series = RbSeries.new_from_any_values(name, values, strict)
4403
4307
  if time_unit.nil?
4404
4308
  s = Utils.wrap_s(rb_series)
4405
4309
  else
4406
4310
  s = Utils.wrap_s(rb_series).dt.cast_time_unit(time_unit)
4407
4311
  end
4408
- return s._s
4312
+ s._s
4409
4313
  elsif defined?(Numo::NArray) && value.is_a?(Numo::NArray) && value.shape.length == 1
4410
4314
  raise Todo
4411
4315
  elsif ruby_dtype == ::Array
@@ -4419,11 +4323,11 @@ module Polars
4419
4323
  end
4420
4324
  return srs
4421
4325
  end
4422
- return sequence_from_anyvalue_or_object(name, values)
4326
+ sequence_from_anyvalue_or_object(name, values)
4423
4327
  elsif ruby_dtype == Series
4424
- return RbSeries.new_series_list(name, values.map(&:_s), strict)
4328
+ RbSeries.new_series_list(name, values.map(&:_s), strict)
4425
4329
  elsif ruby_dtype == RbSeries
4426
- return RbSeries.new_series_list(name, values, strict)
4330
+ RbSeries.new_series_list(name, values, strict)
4427
4331
  else
4428
4332
  constructor =
4429
4333
  if value.is_a?(::String)
@@ -4438,13 +4342,26 @@ module Polars
4438
4342
  else
4439
4343
  rb_type_to_constructor(value.class)
4440
4344
  end
4441
- constructor.call(name, values, strict)
4345
+
4346
+ construct_series_with_fallbacks(constructor, name, values, dtype, strict: strict)
4347
+ end
4348
+ end
4349
+ end
4350
+
4351
+ def construct_series_with_fallbacks(constructor, name, values, dtype, strict:)
4352
+ begin
4353
+ constructor.call(name, values, strict)
4354
+ rescue
4355
+ if dtype.nil?
4356
+ RbSeries.new_from_any_values(name, values, strict)
4357
+ else
4358
+ RbSeries.new_from_any_values_and_dtype(name, values, dtype, strict)
4442
4359
  end
4443
4360
  end
4444
4361
  end
4445
4362
 
4446
4363
  def sequence_from_anyvalue_or_object(name, values)
4447
- RbSeries.new_from_anyvalues(name, values, true)
4364
+ RbSeries.new_from_any_values(name, values, true)
4448
4365
  rescue
4449
4366
  RbSeries.new_object(name, values, false)
4450
4367
  end
@@ -4461,10 +4378,10 @@ module Polars
4461
4378
  UInt32 => RbSeries.method(:new_opt_u32),
4462
4379
  UInt64 => RbSeries.method(:new_opt_u64),
4463
4380
  Decimal => RbSeries.method(:new_decimal),
4464
- Date => RbSeries.method(:new_from_anyvalues),
4465
- Datetime => RbSeries.method(:new_from_anyvalues),
4466
- Duration => RbSeries.method(:new_from_anyvalues),
4467
- Time => RbSeries.method(:new_from_anyvalues),
4381
+ Date => RbSeries.method(:new_from_any_values),
4382
+ Datetime => RbSeries.method(:new_from_any_values),
4383
+ Duration => RbSeries.method(:new_from_any_values),
4384
+ Time => RbSeries.method(:new_from_any_values),
4468
4385
  Boolean => RbSeries.method(:new_opt_bool),
4469
4386
  Utf8 => RbSeries.method(:new_str),
4470
4387
  Object => RbSeries.method(:new_object),