polars-df 0.7.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +41 -0
  3. data/Cargo.lock +353 -237
  4. data/Cargo.toml +0 -3
  5. data/LICENSE.txt +1 -1
  6. data/README.md +2 -2
  7. data/ext/polars/Cargo.toml +17 -6
  8. data/ext/polars/src/batched_csv.rs +6 -7
  9. data/ext/polars/src/conversion/anyvalue.rs +185 -0
  10. data/ext/polars/src/conversion/chunked_array.rs +140 -0
  11. data/ext/polars/src/{conversion.rs → conversion/mod.rs} +268 -347
  12. data/ext/polars/src/dataframe.rs +96 -116
  13. data/ext/polars/src/expr/array.rs +74 -0
  14. data/ext/polars/src/expr/categorical.rs +8 -1
  15. data/ext/polars/src/expr/datetime.rs +22 -56
  16. data/ext/polars/src/expr/general.rs +124 -37
  17. data/ext/polars/src/expr/list.rs +52 -4
  18. data/ext/polars/src/expr/meta.rs +48 -0
  19. data/ext/polars/src/expr/rolling.rs +16 -10
  20. data/ext/polars/src/expr/string.rs +68 -17
  21. data/ext/polars/src/expr/struct.rs +8 -4
  22. data/ext/polars/src/functions/aggregation.rs +6 -0
  23. data/ext/polars/src/functions/lazy.rs +103 -48
  24. data/ext/polars/src/functions/meta.rs +45 -1
  25. data/ext/polars/src/functions/range.rs +5 -10
  26. data/ext/polars/src/functions/string_cache.rs +14 -0
  27. data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +166 -41
  28. data/ext/polars/src/lib.rs +245 -187
  29. data/ext/polars/src/map/dataframe.rs +1 -1
  30. data/ext/polars/src/map/mod.rs +2 -2
  31. data/ext/polars/src/map/series.rs +6 -6
  32. data/ext/polars/src/object.rs +0 -30
  33. data/ext/polars/src/on_startup.rs +32 -0
  34. data/ext/polars/src/series/aggregation.rs +23 -0
  35. data/ext/polars/src/series/construction.rs +1 -1
  36. data/ext/polars/src/series/export.rs +2 -2
  37. data/ext/polars/src/{series.rs → series/mod.rs} +45 -21
  38. data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +18 -18
  39. data/ext/polars/src/utils.rs +1 -1
  40. data/lib/polars/array_expr.rb +449 -0
  41. data/lib/polars/array_name_space.rb +346 -0
  42. data/lib/polars/cat_expr.rb +24 -0
  43. data/lib/polars/cat_name_space.rb +75 -0
  44. data/lib/polars/config.rb +2 -2
  45. data/lib/polars/data_frame.rb +248 -108
  46. data/lib/polars/data_types.rb +195 -29
  47. data/lib/polars/date_time_expr.rb +41 -24
  48. data/lib/polars/date_time_name_space.rb +12 -12
  49. data/lib/polars/exceptions.rb +12 -1
  50. data/lib/polars/expr.rb +1080 -195
  51. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  52. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  53. data/lib/polars/functions/as_datatype.rb +248 -0
  54. data/lib/polars/functions/col.rb +47 -0
  55. data/lib/polars/functions/eager.rb +182 -0
  56. data/lib/polars/functions/lazy.rb +1280 -0
  57. data/lib/polars/functions/len.rb +49 -0
  58. data/lib/polars/functions/lit.rb +35 -0
  59. data/lib/polars/functions/random.rb +16 -0
  60. data/lib/polars/functions/range/date_range.rb +103 -0
  61. data/lib/polars/functions/range/int_range.rb +51 -0
  62. data/lib/polars/functions/repeat.rb +144 -0
  63. data/lib/polars/functions/whenthen.rb +27 -0
  64. data/lib/polars/functions.rb +29 -416
  65. data/lib/polars/group_by.rb +3 -3
  66. data/lib/polars/io.rb +21 -28
  67. data/lib/polars/lazy_frame.rb +390 -76
  68. data/lib/polars/list_expr.rb +152 -6
  69. data/lib/polars/list_name_space.rb +102 -0
  70. data/lib/polars/meta_expr.rb +175 -7
  71. data/lib/polars/series.rb +557 -59
  72. data/lib/polars/sql_context.rb +1 -1
  73. data/lib/polars/string_cache.rb +75 -0
  74. data/lib/polars/string_expr.rb +412 -96
  75. data/lib/polars/string_name_space.rb +4 -4
  76. data/lib/polars/struct_expr.rb +1 -1
  77. data/lib/polars/struct_name_space.rb +1 -1
  78. data/lib/polars/testing.rb +507 -0
  79. data/lib/polars/utils.rb +64 -20
  80. data/lib/polars/version.rb +1 -1
  81. data/lib/polars.rb +15 -2
  82. metadata +40 -9
  83. data/lib/polars/lazy_functions.rb +0 -1197
data/lib/polars/series.rb CHANGED
@@ -34,7 +34,7 @@ module Polars
34
34
  # s3 = Polars::Series.new([1, 2, 3])
35
35
  def initialize(name = nil, values = nil, dtype: nil, strict: true, nan_to_null: false, dtype_if_empty: nil)
36
36
  # Handle case where values are passed as the first argument
37
- if !name.nil? && !name.is_a?(String)
37
+ if !name.nil? && !name.is_a?(::String)
38
38
  if values.nil?
39
39
  values = name
40
40
  name = nil
@@ -46,7 +46,7 @@ module Polars
46
46
  name = "" if name.nil?
47
47
 
48
48
  # TODO improve
49
- if values.is_a?(Range) && values.begin.is_a?(String)
49
+ if values.is_a?(Range) && values.begin.is_a?(::String)
50
50
  values = values.to_a
51
51
  end
52
52
 
@@ -214,6 +214,126 @@ module Polars
214
214
  _comp(other, :lt_eq)
215
215
  end
216
216
 
217
+ # Method equivalent of operator expression `series <= other`.
218
+ #
219
+ # @return [Series]
220
+ def le(other)
221
+ self <= other
222
+ end
223
+
224
+ # Method equivalent of operator expression `series < other`.
225
+ #
226
+ # @return [Series]
227
+ def lt(other)
228
+ self < other
229
+ end
230
+
231
+ # Method equivalent of operator expression `series == other`.
232
+ #
233
+ # @return [Series]
234
+ def eq(other)
235
+ self == other
236
+ end
237
+
238
+ # Method equivalent of equality operator `series == other` where `nil == nil`.
239
+ #
240
+ # This differs from the standard `ne` where null values are propagated.
241
+ #
242
+ # @param other [Object]
243
+ # A literal or expression value to compare with.
244
+ #
245
+ # @return [Object]
246
+ #
247
+ # @example
248
+ # s1 = Polars::Series.new("a", [333, 200, nil])
249
+ # s2 = Polars::Series.new("a", [100, 200, nil])
250
+ # s1.eq(s2)
251
+ # # =>
252
+ # # shape: (3,)
253
+ # # Series: 'a' [bool]
254
+ # # [
255
+ # # false
256
+ # # true
257
+ # # null
258
+ # # ]
259
+ #
260
+ # @example
261
+ # s1.eq_missing(s2)
262
+ # # =>
263
+ # # shape: (3,)
264
+ # # Series: 'a' [bool]
265
+ # # [
266
+ # # false
267
+ # # true
268
+ # # true
269
+ # # ]
270
+ def eq_missing(other)
271
+ if other.is_a?(Expr)
272
+ return Polars.lit(self).eq_missing(other)
273
+ end
274
+ to_frame.select(Polars.col(name).eq_missing(other)).to_series
275
+ end
276
+
277
+ # Method equivalent of operator expression `series != other`.
278
+ #
279
+ # @return [Series]
280
+ def ne(other)
281
+ self != other
282
+ end
283
+
284
+ # Method equivalent of equality operator `series != other` where `None == None`.
285
+ #
286
+ # This differs from the standard `ne` where null values are propagated.
287
+ #
288
+ # @param other [Object]
289
+ # A literal or expression value to compare with.
290
+ #
291
+ # @return [Object]
292
+ #
293
+ # @example
294
+ # s1 = Polars::Series.new("a", [333, 200, nil])
295
+ # s2 = Polars::Series.new("a", [100, 200, nil])
296
+ # s1.ne(s2)
297
+ # # =>
298
+ # # shape: (3,)
299
+ # # Series: 'a' [bool]
300
+ # # [
301
+ # # true
302
+ # # false
303
+ # # null
304
+ # # ]
305
+ #
306
+ # @example
307
+ # s1.ne_missing(s2)
308
+ # # =>
309
+ # # shape: (3,)
310
+ # # Series: 'a' [bool]
311
+ # # [
312
+ # # true
313
+ # # false
314
+ # # false
315
+ # # ]
316
+ def ne_missing(other)
317
+ if other.is_a?(Expr)
318
+ return Polars.lit(self).ne_missing(other)
319
+ end
320
+ to_frame.select(Polars.col(name).ne_missing(other)).to_series
321
+ end
322
+
323
+ # Method equivalent of operator expression `series >= other`.
324
+ #
325
+ # @return [Series]
326
+ def ge(other)
327
+ self >= other
328
+ end
329
+
330
+ # Method equivalent of operator expression `series > other`.
331
+ #
332
+ # @return [Series]
333
+ def gt(other)
334
+ self > other
335
+ end
336
+
217
337
  # Performs addition.
218
338
  #
219
339
  # @return [Series]
@@ -341,7 +461,7 @@ module Polars
341
461
  def []=(key, value)
342
462
  if value.is_a?(::Array)
343
463
  if is_numeric || is_datelike
344
- set_at_idx(key, value)
464
+ scatter(key, value)
345
465
  return
346
466
  end
347
467
  raise ArgumentError, "cannot set Series of dtype: #{dtype} with list/tuple as value; use a scalar value"
@@ -351,9 +471,9 @@ module Polars
351
471
  if key.dtype == Boolean
352
472
  self._s = set(key, value)._s
353
473
  elsif key.dtype == UInt64
354
- self._s = set_at_idx(key.cast(UInt32), value)._s
474
+ self._s = scatter(key.cast(UInt32), value)._s
355
475
  elsif key.dtype == UInt32
356
- self._s = set_at_idx(key, value)._s
476
+ self._s = scatter(key, value)._s
357
477
  else
358
478
  raise Todo
359
479
  end
@@ -411,11 +531,11 @@ module Polars
411
531
  # Check if any boolean value in the column is `true`.
412
532
  #
413
533
  # @return [Boolean]
414
- def any?(&block)
534
+ def any?(ignore_nulls: true, &block)
415
535
  if block_given?
416
- apply(&block).any?
536
+ apply(skip_nulls: ignore_nulls, &block).any?
417
537
  else
418
- to_frame.select(Polars.col(name).any).to_series[0]
538
+ _s.any(ignore_nulls)
419
539
  end
420
540
  end
421
541
  alias_method :any, :any?
@@ -423,11 +543,11 @@ module Polars
423
543
  # Check if all boolean values in the column are `true`.
424
544
  #
425
545
  # @return [Boolean]
426
- def all?(&block)
546
+ def all?(ignore_nulls: true, &block)
427
547
  if block_given?
428
- apply(&block).all?
548
+ apply(skip_nulls: ignore_nulls, &block).all?
429
549
  else
430
- to_frame.select(Polars.col(name).all).to_series[0]
550
+ _s.all(ignore_nulls)
431
551
  end
432
552
  end
433
553
  alias_method :all, :all?
@@ -735,6 +855,212 @@ module Polars
735
855
  Utils.wrap_df(_s.to_dummies(separator, drop_first))
736
856
  end
737
857
 
858
+ # Bin continuous values into discrete categories.
859
+ #
860
+ # @param breaks [Array]
861
+ # List of unique cut points.
862
+ # @param labels [Array]
863
+ # Names of the categories. The number of labels must be equal to the number
864
+ # of cut points plus one.
865
+ # @param left_closed [Boolean]
866
+ # Set the intervals to be left-closed instead of right-closed.
867
+ # @param include_breaks [Boolean]
868
+ # Include a column with the right endpoint of the bin each observation falls
869
+ # in. This will change the data type of the output from a
870
+ # `Categorical` to a `Struct`.
871
+ #
872
+ # @return [Series]
873
+ #
874
+ # @example Divide the column into three categories.
875
+ # s = Polars::Series.new("foo", [-2, -1, 0, 1, 2])
876
+ # s.cut([-1, 1], labels: ["a", "b", "c"])
877
+ # # =>
878
+ # # shape: (5,)
879
+ # # Series: 'foo' [cat]
880
+ # # [
881
+ # # "a"
882
+ # # "a"
883
+ # # "b"
884
+ # # "b"
885
+ # # "c"
886
+ # # ]
887
+ #
888
+ # @example Create a DataFrame with the breakpoint and category for each value.
889
+ # cut = s.cut([-1, 1], include_breaks: true).alias("cut")
890
+ # s.to_frame.with_columns(cut).unnest("cut")
891
+ # # =>
892
+ # # shape: (5, 3)
893
+ # # ┌─────┬─────────────┬────────────┐
894
+ # # │ foo ┆ break_point ┆ category │
895
+ # # │ --- ┆ --- ┆ --- │
896
+ # # │ i64 ┆ f64 ┆ cat │
897
+ # # ╞═════╪═════════════╪════════════╡
898
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
899
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
900
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
901
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
902
+ # # │ 2 ┆ inf ┆ (1, inf] │
903
+ # # └─────┴─────────────┴────────────┘
904
+ def cut(breaks, labels: nil, left_closed: false, include_breaks: false)
905
+ result = (
906
+ to_frame
907
+ .select(
908
+ Polars.col(name).cut(
909
+ breaks,
910
+ labels: labels,
911
+ left_closed: left_closed,
912
+ include_breaks: include_breaks
913
+ )
914
+ )
915
+ .to_series
916
+ )
917
+
918
+ if include_breaks
919
+ result = result.struct.rename_fields(["break_point", "category"])
920
+ end
921
+
922
+ result
923
+ end
924
+
925
+ # Bin continuous values into discrete categories based on their quantiles.
926
+ #
927
+ # @param quantiles [Array]
928
+ # Either a list of quantile probabilities between 0 and 1 or a positive
929
+ # integer determining the number of bins with uniform probability.
930
+ # @param labels [Array]
931
+ # Names of the categories. The number of labels must be equal to the number
932
+ # of cut points plus one.
933
+ # @param left_closed [Boolean]
934
+ # Set the intervals to be left-closed instead of right-closed.
935
+ # @param allow_duplicates [Boolean]
936
+ # If set to `true`, duplicates in the resulting quantiles are dropped,
937
+ # rather than raising a `DuplicateError`. This can happen even with unique
938
+ # probabilities, depending on the data.
939
+ # @param include_breaks [Boolean]
940
+ # Include a column with the right endpoint of the bin each observation falls
941
+ # in. This will change the data type of the output from a
942
+ # `Categorical` to a `Struct`.
943
+ #
944
+ # @return [Series]
945
+ #
946
+ # @example Divide a column into three categories according to pre-defined quantile probabilities.
947
+ # s = Polars::Series.new("foo", [-2, -1, 0, 1, 2])
948
+ # s.qcut([0.25, 0.75], labels: ["a", "b", "c"])
949
+ # # =>
950
+ # # shape: (5,)
951
+ # # Series: 'foo' [cat]
952
+ # # [
953
+ # # "a"
954
+ # # "a"
955
+ # # "b"
956
+ # # "b"
957
+ # # "c"
958
+ # # ]
959
+ #
960
+ # @example Divide a column into two categories using uniform quantile probabilities.
961
+ # s.qcut(2, labels: ["low", "high"], left_closed: true)
962
+ # # =>
963
+ # # shape: (5,)
964
+ # # Series: 'foo' [cat]
965
+ # # [
966
+ # # "low"
967
+ # # "low"
968
+ # # "high"
969
+ # # "high"
970
+ # # "high"
971
+ # # ]
972
+ #
973
+ # @example Create a DataFrame with the breakpoint and category for each value.
974
+ # cut = s.qcut([0.25, 0.75], include_breaks: true).alias("cut")
975
+ # s.to_frame.with_columns(cut).unnest("cut")
976
+ # # =>
977
+ # # shape: (5, 3)
978
+ # # ┌─────┬─────────────┬────────────┐
979
+ # # │ foo ┆ break_point ┆ category │
980
+ # # │ --- ┆ --- ┆ --- │
981
+ # # │ i64 ┆ f64 ┆ cat │
982
+ # # ╞═════╪═════════════╪════════════╡
983
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
984
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
985
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
986
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
987
+ # # │ 2 ┆ inf ┆ (1, inf] │
988
+ # # └─────┴─────────────┴────────────┘
989
+ def qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false)
990
+ result = (
991
+ to_frame
992
+ .select(
993
+ Polars.col(name).qcut(
994
+ quantiles,
995
+ labels: labels,
996
+ left_closed: left_closed,
997
+ allow_duplicates: allow_duplicates,
998
+ include_breaks: include_breaks
999
+ )
1000
+ )
1001
+ .to_series
1002
+ )
1003
+
1004
+ if include_breaks
1005
+ result = result.struct.rename_fields(["break_point", "category"])
1006
+ end
1007
+
1008
+ result
1009
+ end
1010
+
1011
+ # Get the lengths of runs of identical values.
1012
+ #
1013
+ # @return [Series]
1014
+ #
1015
+ # @example
1016
+ # s = Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3])
1017
+ # s.rle.struct.unnest
1018
+ # # =>
1019
+ # # shape: (6, 2)
1020
+ # # ┌─────────┬────────┐
1021
+ # # │ lengths ┆ values │
1022
+ # # │ --- ┆ --- │
1023
+ # # │ i32 ┆ i64 │
1024
+ # # ╞═════════╪════════╡
1025
+ # # │ 2 ┆ 1 │
1026
+ # # │ 1 ┆ 2 │
1027
+ # # │ 1 ┆ 1 │
1028
+ # # │ 1 ┆ null │
1029
+ # # │ 1 ┆ 1 │
1030
+ # # │ 2 ┆ 3 │
1031
+ # # └─────────┴────────┘
1032
+ def rle
1033
+ super
1034
+ end
1035
+
1036
+ # Map values to run IDs.
1037
+ #
1038
+ # Similar to RLE, but it maps each value to an ID corresponding to the run into
1039
+ # which it falls. This is especially useful when you want to define groups by
1040
+ # runs of identical values rather than the values themselves.
1041
+ #
1042
+ # @return [Series]
1043
+ #
1044
+ # @example
1045
+ # s = Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3])
1046
+ # s.rle_id
1047
+ # # =>
1048
+ # # shape: (8,)
1049
+ # # Series: 's' [u32]
1050
+ # # [
1051
+ # # 0
1052
+ # # 0
1053
+ # # 1
1054
+ # # 2
1055
+ # # 3
1056
+ # # 4
1057
+ # # 5
1058
+ # # 5
1059
+ # # ]
1060
+ def rle_id
1061
+ super
1062
+ end
1063
+
738
1064
  # Count the unique values in a Series.
739
1065
  #
740
1066
  # @param sort [Boolean]
@@ -929,7 +1255,7 @@ module Polars
929
1255
  #
930
1256
  # @example
931
1257
  # s = Polars::Series.new("a", [1, 2, 3])
932
- # s.cumsum
1258
+ # s.cum_sum
933
1259
  # # =>
934
1260
  # # shape: (3,)
935
1261
  # # Series: 'a' [i64]
@@ -938,9 +1264,10 @@ module Polars
938
1264
  # # 3
939
1265
  # # 6
940
1266
  # # ]
941
- def cumsum(reverse: false)
1267
+ def cum_sum(reverse: false)
942
1268
  super
943
1269
  end
1270
+ alias_method :cumsum, :cum_sum
944
1271
 
945
1272
  # Get an array with the cumulative min computed at every element.
946
1273
  #
@@ -951,7 +1278,7 @@ module Polars
951
1278
  #
952
1279
  # @example
953
1280
  # s = Polars::Series.new("a", [3, 5, 1])
954
- # s.cummin
1281
+ # s.cum_min
955
1282
  # # =>
956
1283
  # # shape: (3,)
957
1284
  # # Series: 'a' [i64]
@@ -960,9 +1287,10 @@ module Polars
960
1287
  # # 3
961
1288
  # # 1
962
1289
  # # ]
963
- def cummin(reverse: false)
1290
+ def cum_min(reverse: false)
964
1291
  super
965
1292
  end
1293
+ alias_method :cummin, :cum_min
966
1294
 
967
1295
  # Get an array with the cumulative max computed at every element.
968
1296
  #
@@ -973,7 +1301,7 @@ module Polars
973
1301
  #
974
1302
  # @example
975
1303
  # s = Polars::Series.new("a", [3, 5, 1])
976
- # s.cummax
1304
+ # s.cum_max
977
1305
  # # =>
978
1306
  # # shape: (3,)
979
1307
  # # Series: 'a' [i64]
@@ -982,9 +1310,10 @@ module Polars
982
1310
  # # 5
983
1311
  # # 5
984
1312
  # # ]
985
- def cummax(reverse: false)
1313
+ def cum_max(reverse: false)
986
1314
  super
987
1315
  end
1316
+ alias_method :cummax, :cum_max
988
1317
 
989
1318
  # Get an array with the cumulative product computed at every element.
990
1319
  #
@@ -999,7 +1328,7 @@ module Polars
999
1328
  #
1000
1329
  # @example
1001
1330
  # s = Polars::Series.new("a", [1, 2, 3])
1002
- # s.cumprod
1331
+ # s.cum_prod
1003
1332
  # # =>
1004
1333
  # # shape: (3,)
1005
1334
  # # Series: 'a' [i64]
@@ -1008,9 +1337,10 @@ module Polars
1008
1337
  # # 2
1009
1338
  # # 6
1010
1339
  # # ]
1011
- def cumprod(reverse: false)
1340
+ def cum_prod(reverse: false)
1012
1341
  super
1013
1342
  end
1343
+ alias_method :cumprod, :cum_prod
1014
1344
 
1015
1345
  # Get the first `n` rows.
1016
1346
  #
@@ -1237,26 +1567,56 @@ module Polars
1237
1567
  # # 2
1238
1568
  # # 1
1239
1569
  # # ]
1240
- def sort(reverse: false, in_place: false)
1570
+ def sort(reverse: false, nulls_last: false, in_place: false)
1241
1571
  if in_place
1242
- self._s = _s.sort(reverse)
1572
+ self._s = _s.sort(reverse, nulls_last)
1243
1573
  self
1244
1574
  else
1245
- Utils.wrap_s(_s.sort(reverse))
1575
+ Utils.wrap_s(_s.sort(reverse, nulls_last))
1246
1576
  end
1247
1577
  end
1248
1578
 
1249
1579
  # Return the `k` largest elements.
1250
1580
  #
1251
- # If `reverse: true`, the smallest elements will be given.
1581
+ # @param k [Integer]
1582
+ # Number of elements to return.
1583
+ #
1584
+ # @return [Boolean]
1585
+ #
1586
+ # @example
1587
+ # s = Polars::Series.new("a", [2, 5, 1, 4, 3])
1588
+ # s.top_k(k: 3)
1589
+ # # =>
1590
+ # # shape: (3,)
1591
+ # # Series: 'a' [i64]
1592
+ # # [
1593
+ # # 5
1594
+ # # 4
1595
+ # # 3
1596
+ # # ]
1597
+ def top_k(k: 5)
1598
+ super
1599
+ end
1600
+
1601
+ # Return the `k` smallest elements.
1252
1602
  #
1253
1603
  # @param k [Integer]
1254
1604
  # Number of elements to return.
1255
- # @param reverse [Boolean]
1256
- # Return the smallest elements.
1257
1605
  #
1258
1606
  # @return [Boolean]
1259
- def top_k(k: 5, reverse: false)
1607
+ #
1608
+ # @example
1609
+ # s = Polars::Series.new("a", [2, 5, 1, 4, 3])
1610
+ # s.bottom_k(k: 3)
1611
+ # # =>
1612
+ # # shape: (3,)
1613
+ # # Series: 'a' [i64]
1614
+ # # [
1615
+ # # 1
1616
+ # # 2
1617
+ # # 3
1618
+ # # ]
1619
+ def bottom_k(k: 5)
1260
1620
  super
1261
1621
  end
1262
1622
 
@@ -1705,26 +2065,38 @@ module Polars
1705
2065
  # @example
1706
2066
  # s = Polars::Series.new("a", [1, 2, 3])
1707
2067
  # s2 = Polars::Series.new("b", [4, 5, 6])
1708
- # s.series_equal(s)
2068
+ # s.equals(s)
1709
2069
  # # => true
1710
- # s.series_equal(s2)
2070
+ # s.equals(s2)
1711
2071
  # # => false
1712
- def series_equal(other, null_equal: false, strict: false)
1713
- _s.series_equal(other._s, null_equal, strict)
2072
+ def equals(other, null_equal: false, strict: false)
2073
+ _s.equals(other._s, null_equal, strict)
1714
2074
  end
2075
+ alias_method :series_equal, :equals
1715
2076
 
1716
- # Length of this Series.
2077
+ # Return the number of elements in the Series.
1717
2078
  #
1718
2079
  # @return [Integer]
1719
2080
  #
1720
2081
  # @example
1721
- # s = Polars::Series.new("a", [1, 2, 3])
2082
+ # s = Polars::Series.new("a", [1, 2, nil])
2083
+ # s.count
2084
+ # # => 2
2085
+ def count
2086
+ len - null_count
2087
+ end
2088
+
2089
+ # Return the number of elements in the Series.
2090
+ #
2091
+ # @return [Integer]
2092
+ #
2093
+ # @example
2094
+ # s = Polars::Series.new("a", [1, 2, nil])
1722
2095
  # s.len
1723
2096
  # # => 3
1724
2097
  def len
1725
2098
  _s.len
1726
2099
  end
1727
- alias_method :count, :len
1728
2100
  alias_method :length, :len
1729
2101
  alias_method :size, :len
1730
2102
 
@@ -1886,7 +2258,7 @@ module Polars
1886
2258
  # s.is_utf8
1887
2259
  # # => true
1888
2260
  def is_utf8
1889
- dtype == Utf8
2261
+ dtype == String
1890
2262
  end
1891
2263
  alias_method :utf8?, :is_utf8
1892
2264
 
@@ -1920,7 +2292,7 @@ module Polars
1920
2292
  Int64 => Numo::Int64,
1921
2293
  Float32 => Numo::SFloat,
1922
2294
  Float64 => Numo::DFloat
1923
- }.fetch(dtype).cast(to_a)
2295
+ }.fetch(dtype.class).cast(to_a)
1924
2296
  elsif is_boolean
1925
2297
  Numo::Bit.cast(to_a)
1926
2298
  else
@@ -1959,7 +2331,7 @@ module Polars
1959
2331
  # # 3
1960
2332
  # # ]
1961
2333
  def set(filter, value)
1962
- Utils.wrap_s(_s.send("set_with_mask_#{DTYPE_TO_FFINAME.fetch(dtype)}", filter._s, value))
2334
+ Utils.wrap_s(_s.send("set_with_mask_#{DTYPE_TO_FFINAME.fetch(dtype.class)}", filter._s, value))
1963
2335
  end
1964
2336
 
1965
2337
  # Set values at the index locations.
@@ -1982,7 +2354,7 @@ module Polars
1982
2354
  # # 10
1983
2355
  # # 3
1984
2356
  # # ]
1985
- def set_at_idx(idx, value)
2357
+ def scatter(idx, value)
1986
2358
  if idx.is_a?(Integer)
1987
2359
  idx = [idx]
1988
2360
  end
@@ -1991,7 +2363,7 @@ module Polars
1991
2363
  end
1992
2364
 
1993
2365
  idx = Series.new("", idx)
1994
- if value.is_a?(Integer) || value.is_a?(Float) || Utils.bool?(value) || value.is_a?(String) || value.nil?
2366
+ if value.is_a?(Integer) || value.is_a?(Float) || Utils.bool?(value) || value.is_a?(::String) || value.nil?
1995
2367
  value = Series.new("", [value])
1996
2368
 
1997
2369
  # if we need to set more than a single value, we extend it
@@ -2001,9 +2373,10 @@ module Polars
2001
2373
  elsif !value.is_a?(Series)
2002
2374
  value = Series.new("", value)
2003
2375
  end
2004
- _s.set_at_idx(idx._s, value._s)
2376
+ _s.scatter(idx._s, value._s)
2005
2377
  self
2006
2378
  end
2379
+ alias_method :set_at_idx, :scatter
2007
2380
 
2008
2381
  # Create an empty copy of the current Series.
2009
2382
  #
@@ -2484,7 +2857,7 @@ module Polars
2484
2857
  #
2485
2858
  # @example
2486
2859
  # s = Polars::Series.new("a", [1, 2, 3])
2487
- # s.apply { |x| x + 10 }
2860
+ # s.map_elements { |x| x + 10 }
2488
2861
  # # =>
2489
2862
  # # shape: (3,)
2490
2863
  # # Series: 'a' [i64]
@@ -2493,7 +2866,7 @@ module Polars
2493
2866
  # # 12
2494
2867
  # # 13
2495
2868
  # # ]
2496
- def apply(return_dtype: nil, skip_nulls: true, &func)
2869
+ def map_elements(return_dtype: nil, skip_nulls: true, &func)
2497
2870
  if return_dtype.nil?
2498
2871
  pl_return_dtype = nil
2499
2872
  else
@@ -2501,7 +2874,8 @@ module Polars
2501
2874
  end
2502
2875
  Utils.wrap_s(_s.apply_lambda(func, pl_return_dtype, skip_nulls))
2503
2876
  end
2504
- alias_method :map, :apply
2877
+ alias_method :map, :map_elements
2878
+ alias_method :apply, :map_elements
2505
2879
 
2506
2880
  # Shift the values by a given period.
2507
2881
  #
@@ -2830,7 +3204,8 @@ module Polars
2830
3204
  weights: nil,
2831
3205
  min_periods: nil,
2832
3206
  center: false,
2833
- ddof: 1
3207
+ ddof: 1,
3208
+ warn_if_unsorted: true
2834
3209
  )
2835
3210
  to_frame
2836
3211
  .select(
@@ -2839,7 +3214,8 @@ module Polars
2839
3214
  weights: weights,
2840
3215
  min_periods: min_periods,
2841
3216
  center: center,
2842
- ddof: ddof
3217
+ ddof: ddof,
3218
+ warn_if_unsorted: warn_if_unsorted
2843
3219
  )
2844
3220
  )
2845
3221
  .to_series
@@ -2883,7 +3259,8 @@ module Polars
2883
3259
  weights: nil,
2884
3260
  min_periods: nil,
2885
3261
  center: false,
2886
- ddof: 1
3262
+ ddof: 1,
3263
+ warn_if_unsorted: true
2887
3264
  )
2888
3265
  to_frame
2889
3266
  .select(
@@ -2892,7 +3269,8 @@ module Polars
2892
3269
  weights: weights,
2893
3270
  min_periods: min_periods,
2894
3271
  center: center,
2895
- ddof: ddof
3272
+ ddof: ddof,
3273
+ warn_if_unsorted: warn_if_unsorted
2896
3274
  )
2897
3275
  )
2898
3276
  .to_series
@@ -2934,7 +3312,8 @@ module Polars
2934
3312
  window_size,
2935
3313
  weights: nil,
2936
3314
  min_periods: nil,
2937
- center: false
3315
+ center: false,
3316
+ warn_if_unsorted: true
2938
3317
  )
2939
3318
  if min_periods.nil?
2940
3319
  min_periods = window_size
@@ -2946,7 +3325,8 @@ module Polars
2946
3325
  window_size,
2947
3326
  weights: weights,
2948
3327
  min_periods: min_periods,
2949
- center: center
3328
+ center: center,
3329
+ warn_if_unsorted: warn_if_unsorted
2950
3330
  )
2951
3331
  )
2952
3332
  .to_series
@@ -3005,7 +3385,8 @@ module Polars
3005
3385
  window_size: 2,
3006
3386
  weights: nil,
3007
3387
  min_periods: nil,
3008
- center: false
3388
+ center: false,
3389
+ warn_if_unsorted: true
3009
3390
  )
3010
3391
  if min_periods.nil?
3011
3392
  min_periods = window_size
@@ -3019,7 +3400,8 @@ module Polars
3019
3400
  window_size: window_size,
3020
3401
  weights: weights,
3021
3402
  min_periods: min_periods,
3022
- center: center
3403
+ center: center,
3404
+ warn_if_unsorted: warn_if_unsorted
3023
3405
  )
3024
3406
  )
3025
3407
  .to_series
@@ -3076,8 +3458,8 @@ module Polars
3076
3458
  # # shape: (2,)
3077
3459
  # # Series: 'a' [i64]
3078
3460
  # # [
3079
- # # 1
3080
- # # 5
3461
+ # # 5
3462
+ # # 3
3081
3463
  # # ]
3082
3464
  def sample(
3083
3465
  n: nil,
@@ -3456,6 +3838,113 @@ module Polars
3456
3838
  super
3457
3839
  end
3458
3840
 
3841
+ # Replace values by different values.
3842
+ #
3843
+ # @param old [Object]
3844
+ # Value or sequence of values to replace.
3845
+ # Also accepts a mapping of values to their replacement.
3846
+ # @param new [Object]
3847
+ # Value or sequence of values to replace by.
3848
+ # Length must match the length of `old` or have length 1.
3849
+ # @param default [Object]
3850
+ # Set values that were not replaced to this value.
3851
+ # Defaults to keeping the original value.
3852
+ # Accepts expression input. Non-expression inputs are parsed as literals.
3853
+ # @param return_dtype [Object]
3854
+ # The data type of the resulting Series. If set to `nil` (default),
3855
+ # the data type is determined automatically based on the other inputs.
3856
+ #
3857
+ # @return [Series]
3858
+ #
3859
+ # @example Replace a single value by another value. Values that were not replaced remain unchanged.
3860
+ # s = Polars::Series.new([1, 2, 2, 3])
3861
+ # s.replace(2, 100)
3862
+ # # =>
3863
+ # # shape: (4,)
3864
+ # # Series: '' [i64]
3865
+ # # [
3866
+ # # 1
3867
+ # # 100
3868
+ # # 100
3869
+ # # 3
3870
+ # # ]
3871
+ #
3872
+ # @example Replace multiple values by passing sequences to the `old` and `new` parameters.
3873
+ # s.replace([2, 3], [100, 200])
3874
+ # # =>
3875
+ # # shape: (4,)
3876
+ # # Series: '' [i64]
3877
+ # # [
3878
+ # # 1
3879
+ # # 100
3880
+ # # 100
3881
+ # # 200
3882
+ # # ]
3883
+ #
3884
+ # @example Passing a mapping with replacements is also supported as syntactic sugar. Specify a default to set all values that were not matched.
3885
+ # mapping = {2 => 100, 3 => 200}
3886
+ # s.replace(mapping, default: -1)
3887
+ # # =>
3888
+ # # shape: (4,)
3889
+ # # Series: '' [i64]
3890
+ # # [
3891
+ # # -1
3892
+ # # 100
3893
+ # # 100
3894
+ # # 200
3895
+ # # ]
3896
+ #
3897
+ # @example The default can be another Series.
3898
+ # default = Polars::Series.new([2.5, 5.0, 7.5, 10.0])
3899
+ # s.replace(2, 100, default: default)
3900
+ # # =>
3901
+ # # shape: (4,)
3902
+ # # Series: '' [f64]
3903
+ # # [
3904
+ # # 2.5
3905
+ # # 100.0
3906
+ # # 100.0
3907
+ # # 10.0
3908
+ # # ]
3909
+ #
3910
+ # @example Replacing by values of a different data type sets the return type based on a combination of the `new` data type and either the original data type or the default data type if it was set.
3911
+ # s = Polars::Series.new(["x", "y", "z"])
3912
+ # mapping = {"x" => 1, "y" => 2, "z" => 3}
3913
+ # s.replace(mapping)
3914
+ # # =>
3915
+ # # shape: (3,)
3916
+ # # Series: '' [str]
3917
+ # # [
3918
+ # # "1"
3919
+ # # "2"
3920
+ # # "3"
3921
+ # # ]
3922
+ #
3923
+ # @example
3924
+ # s.replace(mapping, default: nil)
3925
+ # # =>
3926
+ # # shape: (3,)
3927
+ # # Series: '' [i64]
3928
+ # # [
3929
+ # # 1
3930
+ # # 2
3931
+ # # 3
3932
+ # # ]
3933
+ #
3934
+ # @example Set the `return_dtype` parameter to control the resulting data type directly.
3935
+ # s.replace(mapping, return_dtype: Polars::UInt8)
3936
+ # # =>
3937
+ # # shape: (3,)
3938
+ # # Series: '' [u8]
3939
+ # # [
3940
+ # # 1
3941
+ # # 2
3942
+ # # 3
3943
+ # # ]
3944
+ def replace(old, new = Expr::NO_DEFAULT, default: Expr::NO_DEFAULT, return_dtype: nil)
3945
+ super
3946
+ end
3947
+
3459
3948
  # Reshape this Series to a flat Series or a Series of Lists.
3460
3949
  #
3461
3950
  # @param dims [Array]
@@ -3665,7 +4154,7 @@ module Polars
3665
4154
  end
3666
4155
 
3667
4156
  def _pos_idxs(idxs)
3668
- idx_type = Polars._get_idx_type
4157
+ idx_type = Plr.get_index_type
3669
4158
 
3670
4159
  if idxs.is_a?(Series)
3671
4160
  if idxs.dtype == idx_type
@@ -3750,7 +4239,7 @@ module Polars
3750
4239
  end
3751
4240
 
3752
4241
  def ffi_func(name, dtype, _s)
3753
- _s.method(name.sub("<>", DTYPE_TO_FFINAME.fetch(dtype))) if DTYPE_TO_FFINAME.key?(dtype)
4242
+ _s.method(name.sub("<>", DTYPE_TO_FFINAME.fetch(dtype.class))) if DTYPE_TO_FFINAME.key?(dtype.class)
3754
4243
  end
3755
4244
 
3756
4245
  def _arithmetic(other, op)
@@ -3761,7 +4250,7 @@ module Polars
3761
4250
  return Utils.wrap_s(_s.send(op, other._s))
3762
4251
  end
3763
4252
 
3764
- if (other.is_a?(Float) || other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(String)) && !is_float
4253
+ if (other.is_a?(Float) || other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(::String)) && !is_float
3765
4254
  _s2 = sequence_to_rbseries(name, [other])
3766
4255
  return Utils.wrap_s(_s.send(op, _s2))
3767
4256
  end
@@ -3865,21 +4354,29 @@ module Polars
3865
4354
  end
3866
4355
  end
3867
4356
 
3868
- if !dtype.nil? && ![List, Unknown].include?(dtype) && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
4357
+ if !dtype.nil? && ![List, Struct, Unknown].include?(dtype) && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
3869
4358
  if dtype == Array && !dtype.is_a?(Array) && value.is_a?(::Array)
3870
- dtype = Array.new(value.size)
4359
+ dtype = Array.new(nil, value.size)
3871
4360
  end
3872
4361
 
3873
4362
  constructor = polars_type_to_constructor(dtype)
3874
4363
  rbseries = constructor.call(name, values, strict)
3875
4364
 
3876
4365
  base_type = dtype.is_a?(DataType) ? dtype.class : dtype
3877
- if [Date, Datetime, Duration, Time, Categorical, Boolean].include?(base_type)
4366
+ if [Date, Datetime, Duration, Time, Categorical, Boolean, Enum].include?(base_type)
3878
4367
  if rbseries.dtype != dtype
3879
4368
  rbseries = rbseries.cast(dtype, true)
3880
4369
  end
3881
4370
  end
3882
4371
  return rbseries
4372
+ elsif dtype == Struct
4373
+ struct_schema = dtype.is_a?(Struct) ? dtype.to_schema : nil
4374
+ empty = {}
4375
+ return DataFrame.sequence_to_rbdf(
4376
+ values.map { |v| v.nil? ? empty : v },
4377
+ schema: struct_schema,
4378
+ orient: "row",
4379
+ ).to_struct(name)
3883
4380
  else
3884
4381
  if ruby_dtype.nil?
3885
4382
  if value.nil?
@@ -3927,7 +4424,7 @@ module Polars
3927
4424
  return RbSeries.new_series_list(name, values, strict)
3928
4425
  else
3929
4426
  constructor =
3930
- if value.is_a?(String)
4427
+ if value.is_a?(::String)
3931
4428
  if value.encoding == Encoding::UTF_8
3932
4429
  RbSeries.method(:new_str)
3933
4430
  else
@@ -3970,6 +4467,7 @@ module Polars
3970
4467
  Utf8 => RbSeries.method(:new_str),
3971
4468
  Object => RbSeries.method(:new_object),
3972
4469
  Categorical => RbSeries.method(:new_str),
4470
+ Enum => RbSeries.method(:new_str),
3973
4471
  Binary => RbSeries.method(:new_binary),
3974
4472
  Null => RbSeries.method(:new_null)
3975
4473
  }