polars-df 0.7.0-x86_64-linux → 0.9.0-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +41 -0
  3. data/Cargo.lock +353 -237
  4. data/Cargo.toml +0 -3
  5. data/LICENSE-THIRD-PARTY.txt +1978 -1459
  6. data/LICENSE.txt +1 -1
  7. data/README.md +2 -2
  8. data/lib/polars/3.1/polars.so +0 -0
  9. data/lib/polars/3.2/polars.so +0 -0
  10. data/lib/polars/{3.0 → 3.3}/polars.so +0 -0
  11. data/lib/polars/array_expr.rb +449 -0
  12. data/lib/polars/array_name_space.rb +346 -0
  13. data/lib/polars/cat_expr.rb +24 -0
  14. data/lib/polars/cat_name_space.rb +75 -0
  15. data/lib/polars/config.rb +2 -2
  16. data/lib/polars/data_frame.rb +248 -108
  17. data/lib/polars/data_types.rb +195 -29
  18. data/lib/polars/date_time_expr.rb +41 -24
  19. data/lib/polars/date_time_name_space.rb +12 -12
  20. data/lib/polars/exceptions.rb +12 -1
  21. data/lib/polars/expr.rb +1080 -195
  22. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  23. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  24. data/lib/polars/functions/as_datatype.rb +248 -0
  25. data/lib/polars/functions/col.rb +47 -0
  26. data/lib/polars/functions/eager.rb +182 -0
  27. data/lib/polars/functions/lazy.rb +1280 -0
  28. data/lib/polars/functions/len.rb +49 -0
  29. data/lib/polars/functions/lit.rb +35 -0
  30. data/lib/polars/functions/random.rb +16 -0
  31. data/lib/polars/functions/range/date_range.rb +103 -0
  32. data/lib/polars/functions/range/int_range.rb +51 -0
  33. data/lib/polars/functions/repeat.rb +144 -0
  34. data/lib/polars/functions/whenthen.rb +27 -0
  35. data/lib/polars/functions.rb +29 -416
  36. data/lib/polars/group_by.rb +3 -3
  37. data/lib/polars/io.rb +21 -28
  38. data/lib/polars/lazy_frame.rb +390 -76
  39. data/lib/polars/list_expr.rb +152 -6
  40. data/lib/polars/list_name_space.rb +102 -0
  41. data/lib/polars/meta_expr.rb +175 -7
  42. data/lib/polars/series.rb +557 -59
  43. data/lib/polars/sql_context.rb +1 -1
  44. data/lib/polars/string_cache.rb +75 -0
  45. data/lib/polars/string_expr.rb +412 -96
  46. data/lib/polars/string_name_space.rb +4 -4
  47. data/lib/polars/struct_expr.rb +1 -1
  48. data/lib/polars/struct_name_space.rb +1 -1
  49. data/lib/polars/testing.rb +507 -0
  50. data/lib/polars/utils.rb +64 -20
  51. data/lib/polars/version.rb +1 -1
  52. data/lib/polars.rb +15 -2
  53. metadata +36 -7
  54. data/lib/polars/lazy_functions.rb +0 -1197
data/lib/polars/series.rb CHANGED
@@ -34,7 +34,7 @@ module Polars
34
34
  # s3 = Polars::Series.new([1, 2, 3])
35
35
  def initialize(name = nil, values = nil, dtype: nil, strict: true, nan_to_null: false, dtype_if_empty: nil)
36
36
  # Handle case where values are passed as the first argument
37
- if !name.nil? && !name.is_a?(String)
37
+ if !name.nil? && !name.is_a?(::String)
38
38
  if values.nil?
39
39
  values = name
40
40
  name = nil
@@ -46,7 +46,7 @@ module Polars
46
46
  name = "" if name.nil?
47
47
 
48
48
  # TODO improve
49
- if values.is_a?(Range) && values.begin.is_a?(String)
49
+ if values.is_a?(Range) && values.begin.is_a?(::String)
50
50
  values = values.to_a
51
51
  end
52
52
 
@@ -214,6 +214,126 @@ module Polars
214
214
  _comp(other, :lt_eq)
215
215
  end
216
216
 
217
+ # Method equivalent of operator expression `series <= other`.
218
+ #
219
+ # @return [Series]
220
+ def le(other)
221
+ self <= other
222
+ end
223
+
224
+ # Method equivalent of operator expression `series < other`.
225
+ #
226
+ # @return [Series]
227
+ def lt(other)
228
+ self < other
229
+ end
230
+
231
+ # Method equivalent of operator expression `series == other`.
232
+ #
233
+ # @return [Series]
234
+ def eq(other)
235
+ self == other
236
+ end
237
+
238
+ # Method equivalent of equality operator `series == other` where `nil == nil`.
239
+ #
240
+ # This differs from the standard `ne` where null values are propagated.
241
+ #
242
+ # @param other [Object]
243
+ # A literal or expression value to compare with.
244
+ #
245
+ # @return [Object]
246
+ #
247
+ # @example
248
+ # s1 = Polars::Series.new("a", [333, 200, nil])
249
+ # s2 = Polars::Series.new("a", [100, 200, nil])
250
+ # s1.eq(s2)
251
+ # # =>
252
+ # # shape: (3,)
253
+ # # Series: 'a' [bool]
254
+ # # [
255
+ # # false
256
+ # # true
257
+ # # null
258
+ # # ]
259
+ #
260
+ # @example
261
+ # s1.eq_missing(s2)
262
+ # # =>
263
+ # # shape: (3,)
264
+ # # Series: 'a' [bool]
265
+ # # [
266
+ # # false
267
+ # # true
268
+ # # true
269
+ # # ]
270
+ def eq_missing(other)
271
+ if other.is_a?(Expr)
272
+ return Polars.lit(self).eq_missing(other)
273
+ end
274
+ to_frame.select(Polars.col(name).eq_missing(other)).to_series
275
+ end
276
+
277
+ # Method equivalent of operator expression `series != other`.
278
+ #
279
+ # @return [Series]
280
+ def ne(other)
281
+ self != other
282
+ end
283
+
284
+ # Method equivalent of equality operator `series != other` where `None == None`.
285
+ #
286
+ # This differs from the standard `ne` where null values are propagated.
287
+ #
288
+ # @param other [Object]
289
+ # A literal or expression value to compare with.
290
+ #
291
+ # @return [Object]
292
+ #
293
+ # @example
294
+ # s1 = Polars::Series.new("a", [333, 200, nil])
295
+ # s2 = Polars::Series.new("a", [100, 200, nil])
296
+ # s1.ne(s2)
297
+ # # =>
298
+ # # shape: (3,)
299
+ # # Series: 'a' [bool]
300
+ # # [
301
+ # # true
302
+ # # false
303
+ # # null
304
+ # # ]
305
+ #
306
+ # @example
307
+ # s1.ne_missing(s2)
308
+ # # =>
309
+ # # shape: (3,)
310
+ # # Series: 'a' [bool]
311
+ # # [
312
+ # # true
313
+ # # false
314
+ # # false
315
+ # # ]
316
+ def ne_missing(other)
317
+ if other.is_a?(Expr)
318
+ return Polars.lit(self).ne_missing(other)
319
+ end
320
+ to_frame.select(Polars.col(name).ne_missing(other)).to_series
321
+ end
322
+
323
+ # Method equivalent of operator expression `series >= other`.
324
+ #
325
+ # @return [Series]
326
+ def ge(other)
327
+ self >= other
328
+ end
329
+
330
+ # Method equivalent of operator expression `series > other`.
331
+ #
332
+ # @return [Series]
333
+ def gt(other)
334
+ self > other
335
+ end
336
+
217
337
  # Performs addition.
218
338
  #
219
339
  # @return [Series]
@@ -341,7 +461,7 @@ module Polars
341
461
  def []=(key, value)
342
462
  if value.is_a?(::Array)
343
463
  if is_numeric || is_datelike
344
- set_at_idx(key, value)
464
+ scatter(key, value)
345
465
  return
346
466
  end
347
467
  raise ArgumentError, "cannot set Series of dtype: #{dtype} with list/tuple as value; use a scalar value"
@@ -351,9 +471,9 @@ module Polars
351
471
  if key.dtype == Boolean
352
472
  self._s = set(key, value)._s
353
473
  elsif key.dtype == UInt64
354
- self._s = set_at_idx(key.cast(UInt32), value)._s
474
+ self._s = scatter(key.cast(UInt32), value)._s
355
475
  elsif key.dtype == UInt32
356
- self._s = set_at_idx(key, value)._s
476
+ self._s = scatter(key, value)._s
357
477
  else
358
478
  raise Todo
359
479
  end
@@ -411,11 +531,11 @@ module Polars
411
531
  # Check if any boolean value in the column is `true`.
412
532
  #
413
533
  # @return [Boolean]
414
- def any?(&block)
534
+ def any?(ignore_nulls: true, &block)
415
535
  if block_given?
416
- apply(&block).any?
536
+ apply(skip_nulls: ignore_nulls, &block).any?
417
537
  else
418
- to_frame.select(Polars.col(name).any).to_series[0]
538
+ _s.any(ignore_nulls)
419
539
  end
420
540
  end
421
541
  alias_method :any, :any?
@@ -423,11 +543,11 @@ module Polars
423
543
  # Check if all boolean values in the column are `true`.
424
544
  #
425
545
  # @return [Boolean]
426
- def all?(&block)
546
+ def all?(ignore_nulls: true, &block)
427
547
  if block_given?
428
- apply(&block).all?
548
+ apply(skip_nulls: ignore_nulls, &block).all?
429
549
  else
430
- to_frame.select(Polars.col(name).all).to_series[0]
550
+ _s.all(ignore_nulls)
431
551
  end
432
552
  end
433
553
  alias_method :all, :all?
@@ -735,6 +855,212 @@ module Polars
735
855
  Utils.wrap_df(_s.to_dummies(separator, drop_first))
736
856
  end
737
857
 
858
+ # Bin continuous values into discrete categories.
859
+ #
860
+ # @param breaks [Array]
861
+ # List of unique cut points.
862
+ # @param labels [Array]
863
+ # Names of the categories. The number of labels must be equal to the number
864
+ # of cut points plus one.
865
+ # @param left_closed [Boolean]
866
+ # Set the intervals to be left-closed instead of right-closed.
867
+ # @param include_breaks [Boolean]
868
+ # Include a column with the right endpoint of the bin each observation falls
869
+ # in. This will change the data type of the output from a
870
+ # `Categorical` to a `Struct`.
871
+ #
872
+ # @return [Series]
873
+ #
874
+ # @example Divide the column into three categories.
875
+ # s = Polars::Series.new("foo", [-2, -1, 0, 1, 2])
876
+ # s.cut([-1, 1], labels: ["a", "b", "c"])
877
+ # # =>
878
+ # # shape: (5,)
879
+ # # Series: 'foo' [cat]
880
+ # # [
881
+ # # "a"
882
+ # # "a"
883
+ # # "b"
884
+ # # "b"
885
+ # # "c"
886
+ # # ]
887
+ #
888
+ # @example Create a DataFrame with the breakpoint and category for each value.
889
+ # cut = s.cut([-1, 1], include_breaks: true).alias("cut")
890
+ # s.to_frame.with_columns(cut).unnest("cut")
891
+ # # =>
892
+ # # shape: (5, 3)
893
+ # # ┌─────┬─────────────┬────────────┐
894
+ # # │ foo ┆ break_point ┆ category │
895
+ # # │ --- ┆ --- ┆ --- │
896
+ # # │ i64 ┆ f64 ┆ cat │
897
+ # # ╞═════╪═════════════╪════════════╡
898
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
899
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
900
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
901
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
902
+ # # │ 2 ┆ inf ┆ (1, inf] │
903
+ # # └─────┴─────────────┴────────────┘
904
+ def cut(breaks, labels: nil, left_closed: false, include_breaks: false)
905
+ result = (
906
+ to_frame
907
+ .select(
908
+ Polars.col(name).cut(
909
+ breaks,
910
+ labels: labels,
911
+ left_closed: left_closed,
912
+ include_breaks: include_breaks
913
+ )
914
+ )
915
+ .to_series
916
+ )
917
+
918
+ if include_breaks
919
+ result = result.struct.rename_fields(["break_point", "category"])
920
+ end
921
+
922
+ result
923
+ end
924
+
925
+ # Bin continuous values into discrete categories based on their quantiles.
926
+ #
927
+ # @param quantiles [Array]
928
+ # Either a list of quantile probabilities between 0 and 1 or a positive
929
+ # integer determining the number of bins with uniform probability.
930
+ # @param labels [Array]
931
+ # Names of the categories. The number of labels must be equal to the number
932
+ # of cut points plus one.
933
+ # @param left_closed [Boolean]
934
+ # Set the intervals to be left-closed instead of right-closed.
935
+ # @param allow_duplicates [Boolean]
936
+ # If set to `true`, duplicates in the resulting quantiles are dropped,
937
+ # rather than raising a `DuplicateError`. This can happen even with unique
938
+ # probabilities, depending on the data.
939
+ # @param include_breaks [Boolean]
940
+ # Include a column with the right endpoint of the bin each observation falls
941
+ # in. This will change the data type of the output from a
942
+ # `Categorical` to a `Struct`.
943
+ #
944
+ # @return [Series]
945
+ #
946
+ # @example Divide a column into three categories according to pre-defined quantile probabilities.
947
+ # s = Polars::Series.new("foo", [-2, -1, 0, 1, 2])
948
+ # s.qcut([0.25, 0.75], labels: ["a", "b", "c"])
949
+ # # =>
950
+ # # shape: (5,)
951
+ # # Series: 'foo' [cat]
952
+ # # [
953
+ # # "a"
954
+ # # "a"
955
+ # # "b"
956
+ # # "b"
957
+ # # "c"
958
+ # # ]
959
+ #
960
+ # @example Divide a column into two categories using uniform quantile probabilities.
961
+ # s.qcut(2, labels: ["low", "high"], left_closed: true)
962
+ # # =>
963
+ # # shape: (5,)
964
+ # # Series: 'foo' [cat]
965
+ # # [
966
+ # # "low"
967
+ # # "low"
968
+ # # "high"
969
+ # # "high"
970
+ # # "high"
971
+ # # ]
972
+ #
973
+ # @example Create a DataFrame with the breakpoint and category for each value.
974
+ # cut = s.qcut([0.25, 0.75], include_breaks: true).alias("cut")
975
+ # s.to_frame.with_columns(cut).unnest("cut")
976
+ # # =>
977
+ # # shape: (5, 3)
978
+ # # ┌─────┬─────────────┬────────────┐
979
+ # # │ foo ┆ break_point ┆ category │
980
+ # # │ --- ┆ --- ┆ --- │
981
+ # # │ i64 ┆ f64 ┆ cat │
982
+ # # ╞═════╪═════════════╪════════════╡
983
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
984
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
985
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
986
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
987
+ # # │ 2 ┆ inf ┆ (1, inf] │
988
+ # # └─────┴─────────────┴────────────┘
989
+ def qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false)
990
+ result = (
991
+ to_frame
992
+ .select(
993
+ Polars.col(name).qcut(
994
+ quantiles,
995
+ labels: labels,
996
+ left_closed: left_closed,
997
+ allow_duplicates: allow_duplicates,
998
+ include_breaks: include_breaks
999
+ )
1000
+ )
1001
+ .to_series
1002
+ )
1003
+
1004
+ if include_breaks
1005
+ result = result.struct.rename_fields(["break_point", "category"])
1006
+ end
1007
+
1008
+ result
1009
+ end
1010
+
1011
+ # Get the lengths of runs of identical values.
1012
+ #
1013
+ # @return [Series]
1014
+ #
1015
+ # @example
1016
+ # s = Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3])
1017
+ # s.rle.struct.unnest
1018
+ # # =>
1019
+ # # shape: (6, 2)
1020
+ # # ┌─────────┬────────┐
1021
+ # # │ lengths ┆ values │
1022
+ # # │ --- ┆ --- │
1023
+ # # │ i32 ┆ i64 │
1024
+ # # ╞═════════╪════════╡
1025
+ # # │ 2 ┆ 1 │
1026
+ # # │ 1 ┆ 2 │
1027
+ # # │ 1 ┆ 1 │
1028
+ # # │ 1 ┆ null │
1029
+ # # │ 1 ┆ 1 │
1030
+ # # │ 2 ┆ 3 │
1031
+ # # └─────────┴────────┘
1032
+ def rle
1033
+ super
1034
+ end
1035
+
1036
+ # Map values to run IDs.
1037
+ #
1038
+ # Similar to RLE, but it maps each value to an ID corresponding to the run into
1039
+ # which it falls. This is especially useful when you want to define groups by
1040
+ # runs of identical values rather than the values themselves.
1041
+ #
1042
+ # @return [Series]
1043
+ #
1044
+ # @example
1045
+ # s = Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3])
1046
+ # s.rle_id
1047
+ # # =>
1048
+ # # shape: (8,)
1049
+ # # Series: 's' [u32]
1050
+ # # [
1051
+ # # 0
1052
+ # # 0
1053
+ # # 1
1054
+ # # 2
1055
+ # # 3
1056
+ # # 4
1057
+ # # 5
1058
+ # # 5
1059
+ # # ]
1060
+ def rle_id
1061
+ super
1062
+ end
1063
+
738
1064
  # Count the unique values in a Series.
739
1065
  #
740
1066
  # @param sort [Boolean]
@@ -929,7 +1255,7 @@ module Polars
929
1255
  #
930
1256
  # @example
931
1257
  # s = Polars::Series.new("a", [1, 2, 3])
932
- # s.cumsum
1258
+ # s.cum_sum
933
1259
  # # =>
934
1260
  # # shape: (3,)
935
1261
  # # Series: 'a' [i64]
@@ -938,9 +1264,10 @@ module Polars
938
1264
  # # 3
939
1265
  # # 6
940
1266
  # # ]
941
- def cumsum(reverse: false)
1267
+ def cum_sum(reverse: false)
942
1268
  super
943
1269
  end
1270
+ alias_method :cumsum, :cum_sum
944
1271
 
945
1272
  # Get an array with the cumulative min computed at every element.
946
1273
  #
@@ -951,7 +1278,7 @@ module Polars
951
1278
  #
952
1279
  # @example
953
1280
  # s = Polars::Series.new("a", [3, 5, 1])
954
- # s.cummin
1281
+ # s.cum_min
955
1282
  # # =>
956
1283
  # # shape: (3,)
957
1284
  # # Series: 'a' [i64]
@@ -960,9 +1287,10 @@ module Polars
960
1287
  # # 3
961
1288
  # # 1
962
1289
  # # ]
963
- def cummin(reverse: false)
1290
+ def cum_min(reverse: false)
964
1291
  super
965
1292
  end
1293
+ alias_method :cummin, :cum_min
966
1294
 
967
1295
  # Get an array with the cumulative max computed at every element.
968
1296
  #
@@ -973,7 +1301,7 @@ module Polars
973
1301
  #
974
1302
  # @example
975
1303
  # s = Polars::Series.new("a", [3, 5, 1])
976
- # s.cummax
1304
+ # s.cum_max
977
1305
  # # =>
978
1306
  # # shape: (3,)
979
1307
  # # Series: 'a' [i64]
@@ -982,9 +1310,10 @@ module Polars
982
1310
  # # 5
983
1311
  # # 5
984
1312
  # # ]
985
- def cummax(reverse: false)
1313
+ def cum_max(reverse: false)
986
1314
  super
987
1315
  end
1316
+ alias_method :cummax, :cum_max
988
1317
 
989
1318
  # Get an array with the cumulative product computed at every element.
990
1319
  #
@@ -999,7 +1328,7 @@ module Polars
999
1328
  #
1000
1329
  # @example
1001
1330
  # s = Polars::Series.new("a", [1, 2, 3])
1002
- # s.cumprod
1331
+ # s.cum_prod
1003
1332
  # # =>
1004
1333
  # # shape: (3,)
1005
1334
  # # Series: 'a' [i64]
@@ -1008,9 +1337,10 @@ module Polars
1008
1337
  # # 2
1009
1338
  # # 6
1010
1339
  # # ]
1011
- def cumprod(reverse: false)
1340
+ def cum_prod(reverse: false)
1012
1341
  super
1013
1342
  end
1343
+ alias_method :cumprod, :cum_prod
1014
1344
 
1015
1345
  # Get the first `n` rows.
1016
1346
  #
@@ -1237,26 +1567,56 @@ module Polars
1237
1567
  # # 2
1238
1568
  # # 1
1239
1569
  # # ]
1240
- def sort(reverse: false, in_place: false)
1570
+ def sort(reverse: false, nulls_last: false, in_place: false)
1241
1571
  if in_place
1242
- self._s = _s.sort(reverse)
1572
+ self._s = _s.sort(reverse, nulls_last)
1243
1573
  self
1244
1574
  else
1245
- Utils.wrap_s(_s.sort(reverse))
1575
+ Utils.wrap_s(_s.sort(reverse, nulls_last))
1246
1576
  end
1247
1577
  end
1248
1578
 
1249
1579
  # Return the `k` largest elements.
1250
1580
  #
1251
- # If `reverse: true`, the smallest elements will be given.
1581
+ # @param k [Integer]
1582
+ # Number of elements to return.
1583
+ #
1584
+ # @return [Boolean]
1585
+ #
1586
+ # @example
1587
+ # s = Polars::Series.new("a", [2, 5, 1, 4, 3])
1588
+ # s.top_k(k: 3)
1589
+ # # =>
1590
+ # # shape: (3,)
1591
+ # # Series: 'a' [i64]
1592
+ # # [
1593
+ # # 5
1594
+ # # 4
1595
+ # # 3
1596
+ # # ]
1597
+ def top_k(k: 5)
1598
+ super
1599
+ end
1600
+
1601
+ # Return the `k` smallest elements.
1252
1602
  #
1253
1603
  # @param k [Integer]
1254
1604
  # Number of elements to return.
1255
- # @param reverse [Boolean]
1256
- # Return the smallest elements.
1257
1605
  #
1258
1606
  # @return [Boolean]
1259
- def top_k(k: 5, reverse: false)
1607
+ #
1608
+ # @example
1609
+ # s = Polars::Series.new("a", [2, 5, 1, 4, 3])
1610
+ # s.bottom_k(k: 3)
1611
+ # # =>
1612
+ # # shape: (3,)
1613
+ # # Series: 'a' [i64]
1614
+ # # [
1615
+ # # 1
1616
+ # # 2
1617
+ # # 3
1618
+ # # ]
1619
+ def bottom_k(k: 5)
1260
1620
  super
1261
1621
  end
1262
1622
 
@@ -1705,26 +2065,38 @@ module Polars
1705
2065
  # @example
1706
2066
  # s = Polars::Series.new("a", [1, 2, 3])
1707
2067
  # s2 = Polars::Series.new("b", [4, 5, 6])
1708
- # s.series_equal(s)
2068
+ # s.equals(s)
1709
2069
  # # => true
1710
- # s.series_equal(s2)
2070
+ # s.equals(s2)
1711
2071
  # # => false
1712
- def series_equal(other, null_equal: false, strict: false)
1713
- _s.series_equal(other._s, null_equal, strict)
2072
+ def equals(other, null_equal: false, strict: false)
2073
+ _s.equals(other._s, null_equal, strict)
1714
2074
  end
2075
+ alias_method :series_equal, :equals
1715
2076
 
1716
- # Length of this Series.
2077
+ # Return the number of elements in the Series.
1717
2078
  #
1718
2079
  # @return [Integer]
1719
2080
  #
1720
2081
  # @example
1721
- # s = Polars::Series.new("a", [1, 2, 3])
2082
+ # s = Polars::Series.new("a", [1, 2, nil])
2083
+ # s.count
2084
+ # # => 2
2085
+ def count
2086
+ len - null_count
2087
+ end
2088
+
2089
+ # Return the number of elements in the Series.
2090
+ #
2091
+ # @return [Integer]
2092
+ #
2093
+ # @example
2094
+ # s = Polars::Series.new("a", [1, 2, nil])
1722
2095
  # s.len
1723
2096
  # # => 3
1724
2097
  def len
1725
2098
  _s.len
1726
2099
  end
1727
- alias_method :count, :len
1728
2100
  alias_method :length, :len
1729
2101
  alias_method :size, :len
1730
2102
 
@@ -1886,7 +2258,7 @@ module Polars
1886
2258
  # s.is_utf8
1887
2259
  # # => true
1888
2260
  def is_utf8
1889
- dtype == Utf8
2261
+ dtype == String
1890
2262
  end
1891
2263
  alias_method :utf8?, :is_utf8
1892
2264
 
@@ -1920,7 +2292,7 @@ module Polars
1920
2292
  Int64 => Numo::Int64,
1921
2293
  Float32 => Numo::SFloat,
1922
2294
  Float64 => Numo::DFloat
1923
- }.fetch(dtype).cast(to_a)
2295
+ }.fetch(dtype.class).cast(to_a)
1924
2296
  elsif is_boolean
1925
2297
  Numo::Bit.cast(to_a)
1926
2298
  else
@@ -1959,7 +2331,7 @@ module Polars
1959
2331
  # # 3
1960
2332
  # # ]
1961
2333
  def set(filter, value)
1962
- Utils.wrap_s(_s.send("set_with_mask_#{DTYPE_TO_FFINAME.fetch(dtype)}", filter._s, value))
2334
+ Utils.wrap_s(_s.send("set_with_mask_#{DTYPE_TO_FFINAME.fetch(dtype.class)}", filter._s, value))
1963
2335
  end
1964
2336
 
1965
2337
  # Set values at the index locations.
@@ -1982,7 +2354,7 @@ module Polars
1982
2354
  # # 10
1983
2355
  # # 3
1984
2356
  # # ]
1985
- def set_at_idx(idx, value)
2357
+ def scatter(idx, value)
1986
2358
  if idx.is_a?(Integer)
1987
2359
  idx = [idx]
1988
2360
  end
@@ -1991,7 +2363,7 @@ module Polars
1991
2363
  end
1992
2364
 
1993
2365
  idx = Series.new("", idx)
1994
- if value.is_a?(Integer) || value.is_a?(Float) || Utils.bool?(value) || value.is_a?(String) || value.nil?
2366
+ if value.is_a?(Integer) || value.is_a?(Float) || Utils.bool?(value) || value.is_a?(::String) || value.nil?
1995
2367
  value = Series.new("", [value])
1996
2368
 
1997
2369
  # if we need to set more than a single value, we extend it
@@ -2001,9 +2373,10 @@ module Polars
2001
2373
  elsif !value.is_a?(Series)
2002
2374
  value = Series.new("", value)
2003
2375
  end
2004
- _s.set_at_idx(idx._s, value._s)
2376
+ _s.scatter(idx._s, value._s)
2005
2377
  self
2006
2378
  end
2379
+ alias_method :set_at_idx, :scatter
2007
2380
 
2008
2381
  # Create an empty copy of the current Series.
2009
2382
  #
@@ -2484,7 +2857,7 @@ module Polars
2484
2857
  #
2485
2858
  # @example
2486
2859
  # s = Polars::Series.new("a", [1, 2, 3])
2487
- # s.apply { |x| x + 10 }
2860
+ # s.map_elements { |x| x + 10 }
2488
2861
  # # =>
2489
2862
  # # shape: (3,)
2490
2863
  # # Series: 'a' [i64]
@@ -2493,7 +2866,7 @@ module Polars
2493
2866
  # # 12
2494
2867
  # # 13
2495
2868
  # # ]
2496
- def apply(return_dtype: nil, skip_nulls: true, &func)
2869
+ def map_elements(return_dtype: nil, skip_nulls: true, &func)
2497
2870
  if return_dtype.nil?
2498
2871
  pl_return_dtype = nil
2499
2872
  else
@@ -2501,7 +2874,8 @@ module Polars
2501
2874
  end
2502
2875
  Utils.wrap_s(_s.apply_lambda(func, pl_return_dtype, skip_nulls))
2503
2876
  end
2504
- alias_method :map, :apply
2877
+ alias_method :map, :map_elements
2878
+ alias_method :apply, :map_elements
2505
2879
 
2506
2880
  # Shift the values by a given period.
2507
2881
  #
@@ -2830,7 +3204,8 @@ module Polars
2830
3204
  weights: nil,
2831
3205
  min_periods: nil,
2832
3206
  center: false,
2833
- ddof: 1
3207
+ ddof: 1,
3208
+ warn_if_unsorted: true
2834
3209
  )
2835
3210
  to_frame
2836
3211
  .select(
@@ -2839,7 +3214,8 @@ module Polars
2839
3214
  weights: weights,
2840
3215
  min_periods: min_periods,
2841
3216
  center: center,
2842
- ddof: ddof
3217
+ ddof: ddof,
3218
+ warn_if_unsorted: warn_if_unsorted
2843
3219
  )
2844
3220
  )
2845
3221
  .to_series
@@ -2883,7 +3259,8 @@ module Polars
2883
3259
  weights: nil,
2884
3260
  min_periods: nil,
2885
3261
  center: false,
2886
- ddof: 1
3262
+ ddof: 1,
3263
+ warn_if_unsorted: true
2887
3264
  )
2888
3265
  to_frame
2889
3266
  .select(
@@ -2892,7 +3269,8 @@ module Polars
2892
3269
  weights: weights,
2893
3270
  min_periods: min_periods,
2894
3271
  center: center,
2895
- ddof: ddof
3272
+ ddof: ddof,
3273
+ warn_if_unsorted: warn_if_unsorted
2896
3274
  )
2897
3275
  )
2898
3276
  .to_series
@@ -2934,7 +3312,8 @@ module Polars
2934
3312
  window_size,
2935
3313
  weights: nil,
2936
3314
  min_periods: nil,
2937
- center: false
3315
+ center: false,
3316
+ warn_if_unsorted: true
2938
3317
  )
2939
3318
  if min_periods.nil?
2940
3319
  min_periods = window_size
@@ -2946,7 +3325,8 @@ module Polars
2946
3325
  window_size,
2947
3326
  weights: weights,
2948
3327
  min_periods: min_periods,
2949
- center: center
3328
+ center: center,
3329
+ warn_if_unsorted: warn_if_unsorted
2950
3330
  )
2951
3331
  )
2952
3332
  .to_series
@@ -3005,7 +3385,8 @@ module Polars
3005
3385
  window_size: 2,
3006
3386
  weights: nil,
3007
3387
  min_periods: nil,
3008
- center: false
3388
+ center: false,
3389
+ warn_if_unsorted: true
3009
3390
  )
3010
3391
  if min_periods.nil?
3011
3392
  min_periods = window_size
@@ -3019,7 +3400,8 @@ module Polars
3019
3400
  window_size: window_size,
3020
3401
  weights: weights,
3021
3402
  min_periods: min_periods,
3022
- center: center
3403
+ center: center,
3404
+ warn_if_unsorted: warn_if_unsorted
3023
3405
  )
3024
3406
  )
3025
3407
  .to_series
@@ -3076,8 +3458,8 @@ module Polars
3076
3458
  # # shape: (2,)
3077
3459
  # # Series: 'a' [i64]
3078
3460
  # # [
3079
- # # 1
3080
- # # 5
3461
+ # # 5
3462
+ # # 3
3081
3463
  # # ]
3082
3464
  def sample(
3083
3465
  n: nil,
@@ -3456,6 +3838,113 @@ module Polars
3456
3838
  super
3457
3839
  end
3458
3840
 
3841
+ # Replace values by different values.
3842
+ #
3843
+ # @param old [Object]
3844
+ # Value or sequence of values to replace.
3845
+ # Also accepts a mapping of values to their replacement.
3846
+ # @param new [Object]
3847
+ # Value or sequence of values to replace by.
3848
+ # Length must match the length of `old` or have length 1.
3849
+ # @param default [Object]
3850
+ # Set values that were not replaced to this value.
3851
+ # Defaults to keeping the original value.
3852
+ # Accepts expression input. Non-expression inputs are parsed as literals.
3853
+ # @param return_dtype [Object]
3854
+ # The data type of the resulting Series. If set to `nil` (default),
3855
+ # the data type is determined automatically based on the other inputs.
3856
+ #
3857
+ # @return [Series]
3858
+ #
3859
+ # @example Replace a single value by another value. Values that were not replaced remain unchanged.
3860
+ # s = Polars::Series.new([1, 2, 2, 3])
3861
+ # s.replace(2, 100)
3862
+ # # =>
3863
+ # # shape: (4,)
3864
+ # # Series: '' [i64]
3865
+ # # [
3866
+ # # 1
3867
+ # # 100
3868
+ # # 100
3869
+ # # 3
3870
+ # # ]
3871
+ #
3872
+ # @example Replace multiple values by passing sequences to the `old` and `new` parameters.
3873
+ # s.replace([2, 3], [100, 200])
3874
+ # # =>
3875
+ # # shape: (4,)
3876
+ # # Series: '' [i64]
3877
+ # # [
3878
+ # # 1
3879
+ # # 100
3880
+ # # 100
3881
+ # # 200
3882
+ # # ]
3883
+ #
3884
+ # @example Passing a mapping with replacements is also supported as syntactic sugar. Specify a default to set all values that were not matched.
3885
+ # mapping = {2 => 100, 3 => 200}
3886
+ # s.replace(mapping, default: -1)
3887
+ # # =>
3888
+ # # shape: (4,)
3889
+ # # Series: '' [i64]
3890
+ # # [
3891
+ # # -1
3892
+ # # 100
3893
+ # # 100
3894
+ # # 200
3895
+ # # ]
3896
+ #
3897
+ # @example The default can be another Series.
3898
+ # default = Polars::Series.new([2.5, 5.0, 7.5, 10.0])
3899
+ # s.replace(2, 100, default: default)
3900
+ # # =>
3901
+ # # shape: (4,)
3902
+ # # Series: '' [f64]
3903
+ # # [
3904
+ # # 2.5
3905
+ # # 100.0
3906
+ # # 100.0
3907
+ # # 10.0
3908
+ # # ]
3909
+ #
3910
+ # @example Replacing by values of a different data type sets the return type based on a combination of the `new` data type and either the original data type or the default data type if it was set.
3911
+ # s = Polars::Series.new(["x", "y", "z"])
3912
+ # mapping = {"x" => 1, "y" => 2, "z" => 3}
3913
+ # s.replace(mapping)
3914
+ # # =>
3915
+ # # shape: (3,)
3916
+ # # Series: '' [str]
3917
+ # # [
3918
+ # # "1"
3919
+ # # "2"
3920
+ # # "3"
3921
+ # # ]
3922
+ #
3923
+ # @example
3924
+ # s.replace(mapping, default: nil)
3925
+ # # =>
3926
+ # # shape: (3,)
3927
+ # # Series: '' [i64]
3928
+ # # [
3929
+ # # 1
3930
+ # # 2
3931
+ # # 3
3932
+ # # ]
3933
+ #
3934
+ # @example Set the `return_dtype` parameter to control the resulting data type directly.
3935
+ # s.replace(mapping, return_dtype: Polars::UInt8)
3936
+ # # =>
3937
+ # # shape: (3,)
3938
+ # # Series: '' [u8]
3939
+ # # [
3940
+ # # 1
3941
+ # # 2
3942
+ # # 3
3943
+ # # ]
3944
+ def replace(old, new = Expr::NO_DEFAULT, default: Expr::NO_DEFAULT, return_dtype: nil)
3945
+ super
3946
+ end
3947
+
3459
3948
  # Reshape this Series to a flat Series or a Series of Lists.
3460
3949
  #
3461
3950
  # @param dims [Array]
@@ -3665,7 +4154,7 @@ module Polars
3665
4154
  end
3666
4155
 
3667
4156
  def _pos_idxs(idxs)
3668
- idx_type = Polars._get_idx_type
4157
+ idx_type = Plr.get_index_type
3669
4158
 
3670
4159
  if idxs.is_a?(Series)
3671
4160
  if idxs.dtype == idx_type
@@ -3750,7 +4239,7 @@ module Polars
3750
4239
  end
3751
4240
 
3752
4241
  def ffi_func(name, dtype, _s)
3753
- _s.method(name.sub("<>", DTYPE_TO_FFINAME.fetch(dtype))) if DTYPE_TO_FFINAME.key?(dtype)
4242
+ _s.method(name.sub("<>", DTYPE_TO_FFINAME.fetch(dtype.class))) if DTYPE_TO_FFINAME.key?(dtype.class)
3754
4243
  end
3755
4244
 
3756
4245
  def _arithmetic(other, op)
@@ -3761,7 +4250,7 @@ module Polars
3761
4250
  return Utils.wrap_s(_s.send(op, other._s))
3762
4251
  end
3763
4252
 
3764
- if (other.is_a?(Float) || other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(String)) && !is_float
4253
+ if (other.is_a?(Float) || other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(::String)) && !is_float
3765
4254
  _s2 = sequence_to_rbseries(name, [other])
3766
4255
  return Utils.wrap_s(_s.send(op, _s2))
3767
4256
  end
@@ -3865,21 +4354,29 @@ module Polars
3865
4354
  end
3866
4355
  end
3867
4356
 
3868
- if !dtype.nil? && ![List, Unknown].include?(dtype) && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
4357
+ if !dtype.nil? && ![List, Struct, Unknown].include?(dtype) && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
3869
4358
  if dtype == Array && !dtype.is_a?(Array) && value.is_a?(::Array)
3870
- dtype = Array.new(value.size)
4359
+ dtype = Array.new(nil, value.size)
3871
4360
  end
3872
4361
 
3873
4362
  constructor = polars_type_to_constructor(dtype)
3874
4363
  rbseries = constructor.call(name, values, strict)
3875
4364
 
3876
4365
  base_type = dtype.is_a?(DataType) ? dtype.class : dtype
3877
- if [Date, Datetime, Duration, Time, Categorical, Boolean].include?(base_type)
4366
+ if [Date, Datetime, Duration, Time, Categorical, Boolean, Enum].include?(base_type)
3878
4367
  if rbseries.dtype != dtype
3879
4368
  rbseries = rbseries.cast(dtype, true)
3880
4369
  end
3881
4370
  end
3882
4371
  return rbseries
4372
+ elsif dtype == Struct
4373
+ struct_schema = dtype.is_a?(Struct) ? dtype.to_schema : nil
4374
+ empty = {}
4375
+ return DataFrame.sequence_to_rbdf(
4376
+ values.map { |v| v.nil? ? empty : v },
4377
+ schema: struct_schema,
4378
+ orient: "row",
4379
+ ).to_struct(name)
3883
4380
  else
3884
4381
  if ruby_dtype.nil?
3885
4382
  if value.nil?
@@ -3927,7 +4424,7 @@ module Polars
3927
4424
  return RbSeries.new_series_list(name, values, strict)
3928
4425
  else
3929
4426
  constructor =
3930
- if value.is_a?(String)
4427
+ if value.is_a?(::String)
3931
4428
  if value.encoding == Encoding::UTF_8
3932
4429
  RbSeries.method(:new_str)
3933
4430
  else
@@ -3970,6 +4467,7 @@ module Polars
3970
4467
  Utf8 => RbSeries.method(:new_str),
3971
4468
  Object => RbSeries.method(:new_object),
3972
4469
  Categorical => RbSeries.method(:new_str),
4470
+ Enum => RbSeries.method(:new_str),
3973
4471
  Binary => RbSeries.method(:new_binary),
3974
4472
  Null => RbSeries.method(:new_null)
3975
4473
  }