polars-df 0.10.0-x86_64-darwin → 0.12.0-x86_64-darwin

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +392 -351
  4. data/LICENSE-THIRD-PARTY.txt +1127 -867
  5. data/README.md +6 -6
  6. data/lib/polars/3.1/polars.bundle +0 -0
  7. data/lib/polars/3.2/polars.bundle +0 -0
  8. data/lib/polars/3.3/polars.bundle +0 -0
  9. data/lib/polars/array_expr.rb +4 -4
  10. data/lib/polars/batched_csv_reader.rb +11 -5
  11. data/lib/polars/cat_expr.rb +0 -36
  12. data/lib/polars/cat_name_space.rb +0 -37
  13. data/lib/polars/convert.rb +6 -1
  14. data/lib/polars/data_frame.rb +176 -403
  15. data/lib/polars/data_types.rb +1 -1
  16. data/lib/polars/date_time_expr.rb +525 -572
  17. data/lib/polars/date_time_name_space.rb +263 -460
  18. data/lib/polars/dynamic_group_by.rb +5 -5
  19. data/lib/polars/exceptions.rb +7 -0
  20. data/lib/polars/expr.rb +1394 -243
  21. data/lib/polars/expr_dispatch.rb +1 -1
  22. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  23. data/lib/polars/functions/as_datatype.rb +63 -40
  24. data/lib/polars/functions/lazy.rb +63 -14
  25. data/lib/polars/functions/lit.rb +1 -1
  26. data/lib/polars/functions/range/date_range.rb +90 -57
  27. data/lib/polars/functions/range/datetime_range.rb +149 -0
  28. data/lib/polars/functions/range/int_range.rb +2 -2
  29. data/lib/polars/functions/range/time_range.rb +141 -0
  30. data/lib/polars/functions/repeat.rb +1 -1
  31. data/lib/polars/functions/whenthen.rb +1 -1
  32. data/lib/polars/group_by.rb +88 -23
  33. data/lib/polars/io/avro.rb +24 -0
  34. data/lib/polars/{io.rb → io/csv.rb} +299 -493
  35. data/lib/polars/io/database.rb +73 -0
  36. data/lib/polars/io/ipc.rb +247 -0
  37. data/lib/polars/io/json.rb +29 -0
  38. data/lib/polars/io/ndjson.rb +80 -0
  39. data/lib/polars/io/parquet.rb +227 -0
  40. data/lib/polars/lazy_frame.rb +143 -272
  41. data/lib/polars/lazy_group_by.rb +100 -3
  42. data/lib/polars/list_expr.rb +11 -11
  43. data/lib/polars/list_name_space.rb +5 -1
  44. data/lib/polars/rolling_group_by.rb +7 -9
  45. data/lib/polars/series.rb +103 -187
  46. data/lib/polars/string_expr.rb +78 -102
  47. data/lib/polars/string_name_space.rb +5 -4
  48. data/lib/polars/testing.rb +2 -2
  49. data/lib/polars/utils/constants.rb +9 -0
  50. data/lib/polars/utils/convert.rb +97 -0
  51. data/lib/polars/utils/parse.rb +89 -0
  52. data/lib/polars/utils/various.rb +76 -0
  53. data/lib/polars/utils/wrap.rb +19 -0
  54. data/lib/polars/utils.rb +8 -300
  55. data/lib/polars/version.rb +1 -1
  56. data/lib/polars/whenthen.rb +6 -6
  57. data/lib/polars.rb +20 -1
  58. metadata +17 -4
data/lib/polars/series.rb CHANGED
@@ -1017,18 +1017,18 @@ module Polars
1017
1017
  # s.rle.struct.unnest
1018
1018
  # # =>
1019
1019
  # # shape: (6, 2)
1020
- # # ┌─────────┬────────┐
1021
- # # │ lengthsvalues
1022
- # # │ --- ┆ ---
1023
- # # │ i32 ┆ i64
1024
- # # ╞═════════╪════════╡
1025
- # # │ 2 ┆ 1
1026
- # # │ 1 ┆ 2
1027
- # # │ 1 ┆ 1
1028
- # # │ 1 ┆ null
1029
- # # │ 1 ┆ 1
1030
- # # │ 2 ┆ 3
1031
- # # └─────────┴────────┘
1020
+ # # ┌─────┬───────┐
1021
+ # # │ lenvalue
1022
+ # # │ --- ┆ ---
1023
+ # # │ u32 ┆ i64
1024
+ # # ╞═════╪═══════╡
1025
+ # # │ 2 ┆ 1
1026
+ # # │ 1 ┆ 2
1027
+ # # │ 1 ┆ 1
1028
+ # # │ 1 ┆ null
1029
+ # # │ 1 ┆ 1
1030
+ # # │ 2 ┆ 3
1031
+ # # └─────┴───────┘
1032
1032
  def rle
1033
1033
  super
1034
1034
  end
@@ -1082,8 +1082,24 @@ module Polars
1082
1082
  # # │ 2 ┆ 2 │
1083
1083
  # # │ 3 ┆ 1 │
1084
1084
  # # └─────┴────────┘
1085
- def value_counts(sort: false)
1086
- Utils.wrap_df(_s.value_counts(sort))
1085
+ def value_counts(
1086
+ sort: false,
1087
+ parallel: false,
1088
+ name: nil,
1089
+ normalize: false
1090
+ )
1091
+ if name.nil?
1092
+ if normalize
1093
+ name = "proportion"
1094
+ else
1095
+ name = "count"
1096
+ end
1097
+ end
1098
+ DataFrame._from_rbdf(
1099
+ self._s.value_counts(
1100
+ sort, parallel, name, normalize
1101
+ )
1102
+ )
1087
1103
  end
1088
1104
 
1089
1105
  # Return a count of the unique values in the order of appearance.
@@ -1362,7 +1378,7 @@ module Polars
1362
1378
  # # 2
1363
1379
  # # ]
1364
1380
  def limit(n = 10)
1365
- to_frame.select(Utils.col(name).limit(n)).to_series
1381
+ to_frame.select(F.col(name).limit(n)).to_series
1366
1382
  end
1367
1383
 
1368
1384
  # Get a slice of this Series.
@@ -1386,7 +1402,7 @@ module Polars
1386
1402
  # # 3
1387
1403
  # # ]
1388
1404
  def slice(offset, length = nil)
1389
- super
1405
+ self.class._from_rbseries(_s.slice(offset, length))
1390
1406
  end
1391
1407
 
1392
1408
  # Append a Series to this one.
@@ -1494,7 +1510,7 @@ module Polars
1494
1510
  # # 2
1495
1511
  # # ]
1496
1512
  def head(n = 10)
1497
- to_frame.select(Utils.col(name).head(n)).to_series
1513
+ to_frame.select(F.col(name).head(n)).to_series
1498
1514
  end
1499
1515
 
1500
1516
  # Get the last `n` rows.
@@ -1515,7 +1531,7 @@ module Polars
1515
1531
  # # 3
1516
1532
  # # ]
1517
1533
  def tail(n = 10)
1518
- to_frame.select(Utils.col(name).tail(n)).to_series
1534
+ to_frame.select(F.col(name).tail(n)).to_series
1519
1535
  end
1520
1536
 
1521
1537
  # Take every nth value in the Series and return as new Series.
@@ -2054,11 +2070,12 @@ module Polars
2054
2070
  #
2055
2071
  # @param other [Series]
2056
2072
  # Series to compare with.
2073
+ # @param strict [Boolean]
2074
+ # Require data types to match.
2075
+ # @param check_names [Boolean]
2076
+ # Require names to match.
2057
2077
  # @param null_equal [Boolean]
2058
2078
  # Consider null values as equal.
2059
- # @param strict [Boolean]
2060
- # Don't allow different numerical dtypes, e.g. comparing `:u32` with a
2061
- # `:i64` will return `false`.
2062
2079
  #
2063
2080
  # @return [Boolean]
2064
2081
  #
@@ -2069,8 +2086,8 @@ module Polars
2069
2086
  # # => true
2070
2087
  # s.equals(s2)
2071
2088
  # # => false
2072
- def equals(other, null_equal: false, strict: false)
2073
- _s.equals(other._s, null_equal, strict)
2089
+ def equals(other, strict: false, check_names: false, null_equal: false)
2090
+ _s.equals(other._s, strict, check_names, null_equal)
2074
2091
  end
2075
2092
  alias_method :series_equal, :equals
2076
2093
 
@@ -3004,16 +3021,7 @@ module Polars
3004
3021
  min_periods: nil,
3005
3022
  center: false
3006
3023
  )
3007
- to_frame
3008
- .select(
3009
- Polars.col(name).rolling_min(
3010
- window_size,
3011
- weights: weights,
3012
- min_periods: min_periods,
3013
- center: center
3014
- )
3015
- )
3016
- .to_series
3024
+ super
3017
3025
  end
3018
3026
 
3019
3027
  # Apply a rolling max (moving max) over the values in this array.
@@ -3054,16 +3062,7 @@ module Polars
3054
3062
  min_periods: nil,
3055
3063
  center: false
3056
3064
  )
3057
- to_frame
3058
- .select(
3059
- Polars.col(name).rolling_max(
3060
- window_size,
3061
- weights: weights,
3062
- min_periods: min_periods,
3063
- center: center
3064
- )
3065
- )
3066
- .to_series
3065
+ super
3067
3066
  end
3068
3067
 
3069
3068
  # Apply a rolling mean (moving mean) over the values in this array.
@@ -3104,16 +3103,7 @@ module Polars
3104
3103
  min_periods: nil,
3105
3104
  center: false
3106
3105
  )
3107
- to_frame
3108
- .select(
3109
- Polars.col(name).rolling_mean(
3110
- window_size,
3111
- weights: weights,
3112
- min_periods: min_periods,
3113
- center: center
3114
- )
3115
- )
3116
- .to_series
3106
+ super
3117
3107
  end
3118
3108
 
3119
3109
  # Apply a rolling sum (moving sum) over the values in this array.
@@ -3154,16 +3144,7 @@ module Polars
3154
3144
  min_periods: nil,
3155
3145
  center: false
3156
3146
  )
3157
- to_frame
3158
- .select(
3159
- Polars.col(name).rolling_sum(
3160
- window_size,
3161
- weights: weights,
3162
- min_periods: min_periods,
3163
- center: center
3164
- )
3165
- )
3166
- .to_series
3147
+ super
3167
3148
  end
3168
3149
 
3169
3150
  # Compute a rolling std dev.
@@ -3204,21 +3185,9 @@ module Polars
3204
3185
  weights: nil,
3205
3186
  min_periods: nil,
3206
3187
  center: false,
3207
- ddof: 1,
3208
- warn_if_unsorted: true
3188
+ ddof: 1
3209
3189
  )
3210
- to_frame
3211
- .select(
3212
- Polars.col(name).rolling_std(
3213
- window_size,
3214
- weights: weights,
3215
- min_periods: min_periods,
3216
- center: center,
3217
- ddof: ddof,
3218
- warn_if_unsorted: warn_if_unsorted
3219
- )
3220
- )
3221
- .to_series
3190
+ super
3222
3191
  end
3223
3192
 
3224
3193
  # Compute a rolling variance.
@@ -3259,21 +3228,9 @@ module Polars
3259
3228
  weights: nil,
3260
3229
  min_periods: nil,
3261
3230
  center: false,
3262
- ddof: 1,
3263
- warn_if_unsorted: true
3231
+ ddof: 1
3264
3232
  )
3265
- to_frame
3266
- .select(
3267
- Polars.col(name).rolling_var(
3268
- window_size,
3269
- weights: weights,
3270
- min_periods: min_periods,
3271
- center: center,
3272
- ddof: ddof,
3273
- warn_if_unsorted: warn_if_unsorted
3274
- )
3275
- )
3276
- .to_series
3233
+ super
3277
3234
  end
3278
3235
 
3279
3236
  # def rolling_apply
@@ -3312,24 +3269,9 @@ module Polars
3312
3269
  window_size,
3313
3270
  weights: nil,
3314
3271
  min_periods: nil,
3315
- center: false,
3316
- warn_if_unsorted: true
3272
+ center: false
3317
3273
  )
3318
- if min_periods.nil?
3319
- min_periods = window_size
3320
- end
3321
-
3322
- to_frame
3323
- .select(
3324
- Polars.col(name).rolling_median(
3325
- window_size,
3326
- weights: weights,
3327
- min_periods: min_periods,
3328
- center: center,
3329
- warn_if_unsorted: warn_if_unsorted
3330
- )
3331
- )
3332
- .to_series
3274
+ super
3333
3275
  end
3334
3276
 
3335
3277
  # Compute a rolling quantile.
@@ -3385,26 +3327,9 @@ module Polars
3385
3327
  window_size: 2,
3386
3328
  weights: nil,
3387
3329
  min_periods: nil,
3388
- center: false,
3389
- warn_if_unsorted: true
3330
+ center: false
3390
3331
  )
3391
- if min_periods.nil?
3392
- min_periods = window_size
3393
- end
3394
-
3395
- to_frame
3396
- .select(
3397
- Polars.col(name).rolling_quantile(
3398
- quantile,
3399
- interpolation: interpolation,
3400
- window_size: window_size,
3401
- weights: weights,
3402
- min_periods: min_periods,
3403
- center: center,
3404
- warn_if_unsorted: warn_if_unsorted
3405
- )
3406
- )
3407
- .to_series
3332
+ super
3408
3333
  end
3409
3334
 
3410
3335
  # Compute a rolling skew.
@@ -3806,7 +3731,7 @@ module Polars
3806
3731
  # # null
3807
3732
  # # 10
3808
3733
  # # ]
3809
- def clip(min_val, max_val)
3734
+ def clip(min_val = nil, max_val = nil)
3810
3735
  super
3811
3736
  end
3812
3737
 
@@ -3883,33 +3808,20 @@ module Polars
3883
3808
  # # 200
3884
3809
  # # ]
3885
3810
  #
3886
- # @example Passing a mapping with replacements is also supported as syntactic sugar. Specify a default to set all values that were not matched.
3811
+ # @example Passing a mapping with replacements is also supported as syntactic sugar.
3887
3812
  # mapping = {2 => 100, 3 => 200}
3888
- # s.replace(mapping, default: -1)
3813
+ # s.replace(mapping)
3889
3814
  # # =>
3890
3815
  # # shape: (4,)
3891
3816
  # # Series: '' [i64]
3892
3817
  # # [
3893
- # # -1
3818
+ # # 1
3894
3819
  # # 100
3895
3820
  # # 100
3896
3821
  # # 200
3897
3822
  # # ]
3898
3823
  #
3899
- # @example The default can be another Series.
3900
- # default = Polars::Series.new([2.5, 5.0, 7.5, 10.0])
3901
- # s.replace(2, 100, default: default)
3902
- # # =>
3903
- # # shape: (4,)
3904
- # # Series: '' [f64]
3905
- # # [
3906
- # # 2.5
3907
- # # 100.0
3908
- # # 100.0
3909
- # # 10.0
3910
- # # ]
3911
- #
3912
- # @example Replacing by values of a different data type sets the return type based on a combination of the `new` data type and either the original data type or the default data type if it was set.
3824
+ # @example The original data type is preserved when replacing by values of a different data type.
3913
3825
  # s = Polars::Series.new(["x", "y", "z"])
3914
3826
  # mapping = {"x" => 1, "y" => 2, "z" => 3}
3915
3827
  # s.replace(mapping)
@@ -3921,28 +3833,6 @@ module Polars
3921
3833
  # # "2"
3922
3834
  # # "3"
3923
3835
  # # ]
3924
- #
3925
- # @example
3926
- # s.replace(mapping, default: nil)
3927
- # # =>
3928
- # # shape: (3,)
3929
- # # Series: '' [i64]
3930
- # # [
3931
- # # 1
3932
- # # 2
3933
- # # 3
3934
- # # ]
3935
- #
3936
- # @example Set the `return_dtype` parameter to control the resulting data type directly.
3937
- # s.replace(mapping, return_dtype: Polars::UInt8)
3938
- # # =>
3939
- # # shape: (3,)
3940
- # # Series: '' [u8]
3941
- # # [
3942
- # # 1
3943
- # # 2
3944
- # # 3
3945
- # # ]
3946
3836
  def replace(old, new = Expr::NO_DEFAULT, default: Expr::NO_DEFAULT, return_dtype: nil)
3947
3837
  super
3948
3838
  end
@@ -3989,7 +3879,8 @@ module Polars
3989
3879
  half_life: nil,
3990
3880
  alpha: nil,
3991
3881
  adjust: true,
3992
- min_periods: 1
3882
+ min_periods: 1,
3883
+ ignore_nulls: true
3993
3884
  )
3994
3885
  super
3995
3886
  end
@@ -4004,7 +3895,8 @@ module Polars
4004
3895
  alpha: nil,
4005
3896
  adjust: true,
4006
3897
  bias: false,
4007
- min_periods: 1
3898
+ min_periods: 1,
3899
+ ignore_nulls: true
4008
3900
  )
4009
3901
  super
4010
3902
  end
@@ -4019,7 +3911,8 @@ module Polars
4019
3911
  alpha: nil,
4020
3912
  adjust: true,
4021
3913
  bias: false,
4022
- min_periods: 1
3914
+ min_periods: 1,
3915
+ ignore_nulls: true
4023
3916
  )
4024
3917
  super
4025
3918
  end
@@ -4218,12 +4111,12 @@ module Polars
4218
4111
  end
4219
4112
 
4220
4113
  if other.is_a?(::Time) && dtype.is_a?(Datetime)
4221
- ts = Utils._datetime_to_pl_timestamp(other, time_unit)
4114
+ ts = Utils.datetime_to_int(other, time_unit)
4222
4115
  f = ffi_func("#{op}_<>", Int64, _s)
4223
4116
  fail if f.nil?
4224
4117
  return Utils.wrap_s(f.call(ts))
4225
4118
  elsif other.is_a?(::Date) && dtype == Date
4226
- d = Utils._date_to_pl_date(other)
4119
+ d = Utils.date_to_int(other)
4227
4120
  f = ffi_func("#{op}_<>", Int32, _s)
4228
4121
  fail if f.nil?
4229
4122
  return Utils.wrap_s(f.call(d))
@@ -4309,10 +4202,18 @@ module Polars
4309
4202
  # TODO improve performance
4310
4203
  constructor.call(name, values.to_a, strict)
4311
4204
  end
4312
- elsif values.shape.length == 2
4205
+ elsif values.shape.sum == 0
4313
4206
  raise Todo
4314
4207
  else
4315
- raise Todo
4208
+ original_shape = values.shape
4209
+ values = values.reshape(original_shape.inject(&:*))
4210
+ rb_s = numo_to_rbseries(
4211
+ name,
4212
+ values,
4213
+ strict: strict,
4214
+ nan_to_null: nan_to_null
4215
+ )
4216
+ Utils.wrap_s(rb_s).reshape(original_shape)._s
4316
4217
  end
4317
4218
  end
4318
4219
 
@@ -4362,6 +4263,8 @@ module Polars
4362
4263
  end
4363
4264
 
4364
4265
  constructor = polars_type_to_constructor(dtype)
4266
+ # TODO remove
4267
+ strict = false if dtype == Decimal
4365
4268
  rbseries = constructor.call(name, values, strict)
4366
4269
 
4367
4270
  base_type = dtype.is_a?(DataType) ? dtype.class : dtype
@@ -4370,11 +4273,11 @@ module Polars
4370
4273
  rbseries = rbseries.cast(dtype, true)
4371
4274
  end
4372
4275
  end
4373
- return rbseries
4276
+ rbseries
4374
4277
  elsif dtype == Struct
4375
4278
  struct_schema = dtype.is_a?(Struct) ? dtype.to_schema : nil
4376
4279
  empty = {}
4377
- return DataFrame.sequence_to_rbdf(
4280
+ DataFrame.sequence_to_rbdf(
4378
4281
  values.map { |v| v.nil? ? empty : v },
4379
4282
  schema: struct_schema,
4380
4283
  orient: "row",
@@ -4399,13 +4302,13 @@ module Polars
4399
4302
  # TODO
4400
4303
  time_unit = nil
4401
4304
 
4402
- rb_series = RbSeries.new_from_anyvalues(name, values, strict)
4305
+ rb_series = RbSeries.new_from_any_values(name, values, strict)
4403
4306
  if time_unit.nil?
4404
4307
  s = Utils.wrap_s(rb_series)
4405
4308
  else
4406
4309
  s = Utils.wrap_s(rb_series).dt.cast_time_unit(time_unit)
4407
4310
  end
4408
- return s._s
4311
+ s._s
4409
4312
  elsif defined?(Numo::NArray) && value.is_a?(Numo::NArray) && value.shape.length == 1
4410
4313
  raise Todo
4411
4314
  elsif ruby_dtype == ::Array
@@ -4419,11 +4322,11 @@ module Polars
4419
4322
  end
4420
4323
  return srs
4421
4324
  end
4422
- return sequence_from_anyvalue_or_object(name, values)
4325
+ sequence_from_anyvalue_or_object(name, values)
4423
4326
  elsif ruby_dtype == Series
4424
- return RbSeries.new_series_list(name, values.map(&:_s), strict)
4327
+ RbSeries.new_series_list(name, values.map(&:_s), strict)
4425
4328
  elsif ruby_dtype == RbSeries
4426
- return RbSeries.new_series_list(name, values, strict)
4329
+ RbSeries.new_series_list(name, values, strict)
4427
4330
  else
4428
4331
  constructor =
4429
4332
  if value.is_a?(::String)
@@ -4438,13 +4341,26 @@ module Polars
4438
4341
  else
4439
4342
  rb_type_to_constructor(value.class)
4440
4343
  end
4441
- constructor.call(name, values, strict)
4344
+
4345
+ construct_series_with_fallbacks(constructor, name, values, dtype, strict: strict)
4346
+ end
4347
+ end
4348
+ end
4349
+
4350
+ def construct_series_with_fallbacks(constructor, name, values, dtype, strict:)
4351
+ begin
4352
+ constructor.call(name, values, strict)
4353
+ rescue
4354
+ if dtype.nil?
4355
+ RbSeries.new_from_any_values(name, values, strict)
4356
+ else
4357
+ RbSeries.new_from_any_values_and_dtype(name, values, dtype, strict)
4442
4358
  end
4443
4359
  end
4444
4360
  end
4445
4361
 
4446
4362
  def sequence_from_anyvalue_or_object(name, values)
4447
- RbSeries.new_from_anyvalues(name, values, true)
4363
+ RbSeries.new_from_any_values(name, values, true)
4448
4364
  rescue
4449
4365
  RbSeries.new_object(name, values, false)
4450
4366
  end
@@ -4461,10 +4377,10 @@ module Polars
4461
4377
  UInt32 => RbSeries.method(:new_opt_u32),
4462
4378
  UInt64 => RbSeries.method(:new_opt_u64),
4463
4379
  Decimal => RbSeries.method(:new_decimal),
4464
- Date => RbSeries.method(:new_from_anyvalues),
4465
- Datetime => RbSeries.method(:new_from_anyvalues),
4466
- Duration => RbSeries.method(:new_from_anyvalues),
4467
- Time => RbSeries.method(:new_from_anyvalues),
4380
+ Date => RbSeries.method(:new_from_any_values),
4381
+ Datetime => RbSeries.method(:new_from_any_values),
4382
+ Duration => RbSeries.method(:new_from_any_values),
4383
+ Time => RbSeries.method(:new_from_any_values),
4468
4384
  Boolean => RbSeries.method(:new_opt_bool),
4469
4385
  Utf8 => RbSeries.method(:new_str),
4470
4386
  Object => RbSeries.method(:new_object),