polars-df 0.7.0-x86_64-linux → 0.9.0-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +41 -0
- data/Cargo.lock +353 -237
- data/Cargo.toml +0 -3
- data/LICENSE-THIRD-PARTY.txt +1978 -1459
- data/LICENSE.txt +1 -1
- data/README.md +2 -2
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/{3.0 → 3.3}/polars.so +0 -0
- data/lib/polars/array_expr.rb +449 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +248 -108
- data/lib/polars/data_types.rb +195 -29
- data/lib/polars/date_time_expr.rb +41 -24
- data/lib/polars/date_time_name_space.rb +12 -12
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +1080 -195
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +27 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +3 -3
- data/lib/polars/io.rb +21 -28
- data/lib/polars/lazy_frame.rb +390 -76
- data/lib/polars/list_expr.rb +152 -6
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +557 -59
- data/lib/polars/sql_context.rb +1 -1
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +412 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +64 -20
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -2
- metadata +36 -7
- data/lib/polars/lazy_functions.rb +0 -1197
data/lib/polars/series.rb
CHANGED
@@ -34,7 +34,7 @@ module Polars
|
|
34
34
|
# s3 = Polars::Series.new([1, 2, 3])
|
35
35
|
def initialize(name = nil, values = nil, dtype: nil, strict: true, nan_to_null: false, dtype_if_empty: nil)
|
36
36
|
# Handle case where values are passed as the first argument
|
37
|
-
if !name.nil? && !name.is_a?(String)
|
37
|
+
if !name.nil? && !name.is_a?(::String)
|
38
38
|
if values.nil?
|
39
39
|
values = name
|
40
40
|
name = nil
|
@@ -46,7 +46,7 @@ module Polars
|
|
46
46
|
name = "" if name.nil?
|
47
47
|
|
48
48
|
# TODO improve
|
49
|
-
if values.is_a?(Range) && values.begin.is_a?(String)
|
49
|
+
if values.is_a?(Range) && values.begin.is_a?(::String)
|
50
50
|
values = values.to_a
|
51
51
|
end
|
52
52
|
|
@@ -214,6 +214,126 @@ module Polars
|
|
214
214
|
_comp(other, :lt_eq)
|
215
215
|
end
|
216
216
|
|
217
|
+
# Method equivalent of operator expression `series <= other`.
|
218
|
+
#
|
219
|
+
# @return [Series]
|
220
|
+
def le(other)
|
221
|
+
self <= other
|
222
|
+
end
|
223
|
+
|
224
|
+
# Method equivalent of operator expression `series < other`.
|
225
|
+
#
|
226
|
+
# @return [Series]
|
227
|
+
def lt(other)
|
228
|
+
self < other
|
229
|
+
end
|
230
|
+
|
231
|
+
# Method equivalent of operator expression `series == other`.
|
232
|
+
#
|
233
|
+
# @return [Series]
|
234
|
+
def eq(other)
|
235
|
+
self == other
|
236
|
+
end
|
237
|
+
|
238
|
+
# Method equivalent of equality operator `series == other` where `nil == nil`.
|
239
|
+
#
|
240
|
+
# This differs from the standard `ne` where null values are propagated.
|
241
|
+
#
|
242
|
+
# @param other [Object]
|
243
|
+
# A literal or expression value to compare with.
|
244
|
+
#
|
245
|
+
# @return [Object]
|
246
|
+
#
|
247
|
+
# @example
|
248
|
+
# s1 = Polars::Series.new("a", [333, 200, nil])
|
249
|
+
# s2 = Polars::Series.new("a", [100, 200, nil])
|
250
|
+
# s1.eq(s2)
|
251
|
+
# # =>
|
252
|
+
# # shape: (3,)
|
253
|
+
# # Series: 'a' [bool]
|
254
|
+
# # [
|
255
|
+
# # false
|
256
|
+
# # true
|
257
|
+
# # null
|
258
|
+
# # ]
|
259
|
+
#
|
260
|
+
# @example
|
261
|
+
# s1.eq_missing(s2)
|
262
|
+
# # =>
|
263
|
+
# # shape: (3,)
|
264
|
+
# # Series: 'a' [bool]
|
265
|
+
# # [
|
266
|
+
# # false
|
267
|
+
# # true
|
268
|
+
# # true
|
269
|
+
# # ]
|
270
|
+
def eq_missing(other)
|
271
|
+
if other.is_a?(Expr)
|
272
|
+
return Polars.lit(self).eq_missing(other)
|
273
|
+
end
|
274
|
+
to_frame.select(Polars.col(name).eq_missing(other)).to_series
|
275
|
+
end
|
276
|
+
|
277
|
+
# Method equivalent of operator expression `series != other`.
|
278
|
+
#
|
279
|
+
# @return [Series]
|
280
|
+
def ne(other)
|
281
|
+
self != other
|
282
|
+
end
|
283
|
+
|
284
|
+
# Method equivalent of equality operator `series != other` where `None == None`.
|
285
|
+
#
|
286
|
+
# This differs from the standard `ne` where null values are propagated.
|
287
|
+
#
|
288
|
+
# @param other [Object]
|
289
|
+
# A literal or expression value to compare with.
|
290
|
+
#
|
291
|
+
# @return [Object]
|
292
|
+
#
|
293
|
+
# @example
|
294
|
+
# s1 = Polars::Series.new("a", [333, 200, nil])
|
295
|
+
# s2 = Polars::Series.new("a", [100, 200, nil])
|
296
|
+
# s1.ne(s2)
|
297
|
+
# # =>
|
298
|
+
# # shape: (3,)
|
299
|
+
# # Series: 'a' [bool]
|
300
|
+
# # [
|
301
|
+
# # true
|
302
|
+
# # false
|
303
|
+
# # null
|
304
|
+
# # ]
|
305
|
+
#
|
306
|
+
# @example
|
307
|
+
# s1.ne_missing(s2)
|
308
|
+
# # =>
|
309
|
+
# # shape: (3,)
|
310
|
+
# # Series: 'a' [bool]
|
311
|
+
# # [
|
312
|
+
# # true
|
313
|
+
# # false
|
314
|
+
# # false
|
315
|
+
# # ]
|
316
|
+
def ne_missing(other)
|
317
|
+
if other.is_a?(Expr)
|
318
|
+
return Polars.lit(self).ne_missing(other)
|
319
|
+
end
|
320
|
+
to_frame.select(Polars.col(name).ne_missing(other)).to_series
|
321
|
+
end
|
322
|
+
|
323
|
+
# Method equivalent of operator expression `series >= other`.
|
324
|
+
#
|
325
|
+
# @return [Series]
|
326
|
+
def ge(other)
|
327
|
+
self >= other
|
328
|
+
end
|
329
|
+
|
330
|
+
# Method equivalent of operator expression `series > other`.
|
331
|
+
#
|
332
|
+
# @return [Series]
|
333
|
+
def gt(other)
|
334
|
+
self > other
|
335
|
+
end
|
336
|
+
|
217
337
|
# Performs addition.
|
218
338
|
#
|
219
339
|
# @return [Series]
|
@@ -341,7 +461,7 @@ module Polars
|
|
341
461
|
def []=(key, value)
|
342
462
|
if value.is_a?(::Array)
|
343
463
|
if is_numeric || is_datelike
|
344
|
-
|
464
|
+
scatter(key, value)
|
345
465
|
return
|
346
466
|
end
|
347
467
|
raise ArgumentError, "cannot set Series of dtype: #{dtype} with list/tuple as value; use a scalar value"
|
@@ -351,9 +471,9 @@ module Polars
|
|
351
471
|
if key.dtype == Boolean
|
352
472
|
self._s = set(key, value)._s
|
353
473
|
elsif key.dtype == UInt64
|
354
|
-
self._s =
|
474
|
+
self._s = scatter(key.cast(UInt32), value)._s
|
355
475
|
elsif key.dtype == UInt32
|
356
|
-
self._s =
|
476
|
+
self._s = scatter(key, value)._s
|
357
477
|
else
|
358
478
|
raise Todo
|
359
479
|
end
|
@@ -411,11 +531,11 @@ module Polars
|
|
411
531
|
# Check if any boolean value in the column is `true`.
|
412
532
|
#
|
413
533
|
# @return [Boolean]
|
414
|
-
def any?(&block)
|
534
|
+
def any?(ignore_nulls: true, &block)
|
415
535
|
if block_given?
|
416
|
-
apply(&block).any?
|
536
|
+
apply(skip_nulls: ignore_nulls, &block).any?
|
417
537
|
else
|
418
|
-
|
538
|
+
_s.any(ignore_nulls)
|
419
539
|
end
|
420
540
|
end
|
421
541
|
alias_method :any, :any?
|
@@ -423,11 +543,11 @@ module Polars
|
|
423
543
|
# Check if all boolean values in the column are `true`.
|
424
544
|
#
|
425
545
|
# @return [Boolean]
|
426
|
-
def all?(&block)
|
546
|
+
def all?(ignore_nulls: true, &block)
|
427
547
|
if block_given?
|
428
|
-
apply(&block).all?
|
548
|
+
apply(skip_nulls: ignore_nulls, &block).all?
|
429
549
|
else
|
430
|
-
|
550
|
+
_s.all(ignore_nulls)
|
431
551
|
end
|
432
552
|
end
|
433
553
|
alias_method :all, :all?
|
@@ -735,6 +855,212 @@ module Polars
|
|
735
855
|
Utils.wrap_df(_s.to_dummies(separator, drop_first))
|
736
856
|
end
|
737
857
|
|
858
|
+
# Bin continuous values into discrete categories.
|
859
|
+
#
|
860
|
+
# @param breaks [Array]
|
861
|
+
# List of unique cut points.
|
862
|
+
# @param labels [Array]
|
863
|
+
# Names of the categories. The number of labels must be equal to the number
|
864
|
+
# of cut points plus one.
|
865
|
+
# @param left_closed [Boolean]
|
866
|
+
# Set the intervals to be left-closed instead of right-closed.
|
867
|
+
# @param include_breaks [Boolean]
|
868
|
+
# Include a column with the right endpoint of the bin each observation falls
|
869
|
+
# in. This will change the data type of the output from a
|
870
|
+
# `Categorical` to a `Struct`.
|
871
|
+
#
|
872
|
+
# @return [Series]
|
873
|
+
#
|
874
|
+
# @example Divide the column into three categories.
|
875
|
+
# s = Polars::Series.new("foo", [-2, -1, 0, 1, 2])
|
876
|
+
# s.cut([-1, 1], labels: ["a", "b", "c"])
|
877
|
+
# # =>
|
878
|
+
# # shape: (5,)
|
879
|
+
# # Series: 'foo' [cat]
|
880
|
+
# # [
|
881
|
+
# # "a"
|
882
|
+
# # "a"
|
883
|
+
# # "b"
|
884
|
+
# # "b"
|
885
|
+
# # "c"
|
886
|
+
# # ]
|
887
|
+
#
|
888
|
+
# @example Create a DataFrame with the breakpoint and category for each value.
|
889
|
+
# cut = s.cut([-1, 1], include_breaks: true).alias("cut")
|
890
|
+
# s.to_frame.with_columns(cut).unnest("cut")
|
891
|
+
# # =>
|
892
|
+
# # shape: (5, 3)
|
893
|
+
# # ┌─────┬─────────────┬────────────┐
|
894
|
+
# # │ foo ┆ break_point ┆ category │
|
895
|
+
# # │ --- ┆ --- ┆ --- │
|
896
|
+
# # │ i64 ┆ f64 ┆ cat │
|
897
|
+
# # ╞═════╪═════════════╪════════════╡
|
898
|
+
# # │ -2 ┆ -1.0 ┆ (-inf, -1] │
|
899
|
+
# # │ -1 ┆ -1.0 ┆ (-inf, -1] │
|
900
|
+
# # │ 0 ┆ 1.0 ┆ (-1, 1] │
|
901
|
+
# # │ 1 ┆ 1.0 ┆ (-1, 1] │
|
902
|
+
# # │ 2 ┆ inf ┆ (1, inf] │
|
903
|
+
# # └─────┴─────────────┴────────────┘
|
904
|
+
def cut(breaks, labels: nil, left_closed: false, include_breaks: false)
|
905
|
+
result = (
|
906
|
+
to_frame
|
907
|
+
.select(
|
908
|
+
Polars.col(name).cut(
|
909
|
+
breaks,
|
910
|
+
labels: labels,
|
911
|
+
left_closed: left_closed,
|
912
|
+
include_breaks: include_breaks
|
913
|
+
)
|
914
|
+
)
|
915
|
+
.to_series
|
916
|
+
)
|
917
|
+
|
918
|
+
if include_breaks
|
919
|
+
result = result.struct.rename_fields(["break_point", "category"])
|
920
|
+
end
|
921
|
+
|
922
|
+
result
|
923
|
+
end
|
924
|
+
|
925
|
+
# Bin continuous values into discrete categories based on their quantiles.
|
926
|
+
#
|
927
|
+
# @param quantiles [Array]
|
928
|
+
# Either a list of quantile probabilities between 0 and 1 or a positive
|
929
|
+
# integer determining the number of bins with uniform probability.
|
930
|
+
# @param labels [Array]
|
931
|
+
# Names of the categories. The number of labels must be equal to the number
|
932
|
+
# of cut points plus one.
|
933
|
+
# @param left_closed [Boolean]
|
934
|
+
# Set the intervals to be left-closed instead of right-closed.
|
935
|
+
# @param allow_duplicates [Boolean]
|
936
|
+
# If set to `true`, duplicates in the resulting quantiles are dropped,
|
937
|
+
# rather than raising a `DuplicateError`. This can happen even with unique
|
938
|
+
# probabilities, depending on the data.
|
939
|
+
# @param include_breaks [Boolean]
|
940
|
+
# Include a column with the right endpoint of the bin each observation falls
|
941
|
+
# in. This will change the data type of the output from a
|
942
|
+
# `Categorical` to a `Struct`.
|
943
|
+
#
|
944
|
+
# @return [Series]
|
945
|
+
#
|
946
|
+
# @example Divide a column into three categories according to pre-defined quantile probabilities.
|
947
|
+
# s = Polars::Series.new("foo", [-2, -1, 0, 1, 2])
|
948
|
+
# s.qcut([0.25, 0.75], labels: ["a", "b", "c"])
|
949
|
+
# # =>
|
950
|
+
# # shape: (5,)
|
951
|
+
# # Series: 'foo' [cat]
|
952
|
+
# # [
|
953
|
+
# # "a"
|
954
|
+
# # "a"
|
955
|
+
# # "b"
|
956
|
+
# # "b"
|
957
|
+
# # "c"
|
958
|
+
# # ]
|
959
|
+
#
|
960
|
+
# @example Divide a column into two categories using uniform quantile probabilities.
|
961
|
+
# s.qcut(2, labels: ["low", "high"], left_closed: true)
|
962
|
+
# # =>
|
963
|
+
# # shape: (5,)
|
964
|
+
# # Series: 'foo' [cat]
|
965
|
+
# # [
|
966
|
+
# # "low"
|
967
|
+
# # "low"
|
968
|
+
# # "high"
|
969
|
+
# # "high"
|
970
|
+
# # "high"
|
971
|
+
# # ]
|
972
|
+
#
|
973
|
+
# @example Create a DataFrame with the breakpoint and category for each value.
|
974
|
+
# cut = s.qcut([0.25, 0.75], include_breaks: true).alias("cut")
|
975
|
+
# s.to_frame.with_columns(cut).unnest("cut")
|
976
|
+
# # =>
|
977
|
+
# # shape: (5, 3)
|
978
|
+
# # ┌─────┬─────────────┬────────────┐
|
979
|
+
# # │ foo ┆ break_point ┆ category │
|
980
|
+
# # │ --- ┆ --- ┆ --- │
|
981
|
+
# # │ i64 ┆ f64 ┆ cat │
|
982
|
+
# # ╞═════╪═════════════╪════════════╡
|
983
|
+
# # │ -2 ┆ -1.0 ┆ (-inf, -1] │
|
984
|
+
# # │ -1 ┆ -1.0 ┆ (-inf, -1] │
|
985
|
+
# # │ 0 ┆ 1.0 ┆ (-1, 1] │
|
986
|
+
# # │ 1 ┆ 1.0 ┆ (-1, 1] │
|
987
|
+
# # │ 2 ┆ inf ┆ (1, inf] │
|
988
|
+
# # └─────┴─────────────┴────────────┘
|
989
|
+
def qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false)
|
990
|
+
result = (
|
991
|
+
to_frame
|
992
|
+
.select(
|
993
|
+
Polars.col(name).qcut(
|
994
|
+
quantiles,
|
995
|
+
labels: labels,
|
996
|
+
left_closed: left_closed,
|
997
|
+
allow_duplicates: allow_duplicates,
|
998
|
+
include_breaks: include_breaks
|
999
|
+
)
|
1000
|
+
)
|
1001
|
+
.to_series
|
1002
|
+
)
|
1003
|
+
|
1004
|
+
if include_breaks
|
1005
|
+
result = result.struct.rename_fields(["break_point", "category"])
|
1006
|
+
end
|
1007
|
+
|
1008
|
+
result
|
1009
|
+
end
|
1010
|
+
|
1011
|
+
# Get the lengths of runs of identical values.
|
1012
|
+
#
|
1013
|
+
# @return [Series]
|
1014
|
+
#
|
1015
|
+
# @example
|
1016
|
+
# s = Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3])
|
1017
|
+
# s.rle.struct.unnest
|
1018
|
+
# # =>
|
1019
|
+
# # shape: (6, 2)
|
1020
|
+
# # ┌─────────┬────────┐
|
1021
|
+
# # │ lengths ┆ values │
|
1022
|
+
# # │ --- ┆ --- │
|
1023
|
+
# # │ i32 ┆ i64 │
|
1024
|
+
# # ╞═════════╪════════╡
|
1025
|
+
# # │ 2 ┆ 1 │
|
1026
|
+
# # │ 1 ┆ 2 │
|
1027
|
+
# # │ 1 ┆ 1 │
|
1028
|
+
# # │ 1 ┆ null │
|
1029
|
+
# # │ 1 ┆ 1 │
|
1030
|
+
# # │ 2 ┆ 3 │
|
1031
|
+
# # └─────────┴────────┘
|
1032
|
+
def rle
|
1033
|
+
super
|
1034
|
+
end
|
1035
|
+
|
1036
|
+
# Map values to run IDs.
|
1037
|
+
#
|
1038
|
+
# Similar to RLE, but it maps each value to an ID corresponding to the run into
|
1039
|
+
# which it falls. This is especially useful when you want to define groups by
|
1040
|
+
# runs of identical values rather than the values themselves.
|
1041
|
+
#
|
1042
|
+
# @return [Series]
|
1043
|
+
#
|
1044
|
+
# @example
|
1045
|
+
# s = Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3])
|
1046
|
+
# s.rle_id
|
1047
|
+
# # =>
|
1048
|
+
# # shape: (8,)
|
1049
|
+
# # Series: 's' [u32]
|
1050
|
+
# # [
|
1051
|
+
# # 0
|
1052
|
+
# # 0
|
1053
|
+
# # 1
|
1054
|
+
# # 2
|
1055
|
+
# # 3
|
1056
|
+
# # 4
|
1057
|
+
# # 5
|
1058
|
+
# # 5
|
1059
|
+
# # ]
|
1060
|
+
def rle_id
|
1061
|
+
super
|
1062
|
+
end
|
1063
|
+
|
738
1064
|
# Count the unique values in a Series.
|
739
1065
|
#
|
740
1066
|
# @param sort [Boolean]
|
@@ -929,7 +1255,7 @@ module Polars
|
|
929
1255
|
#
|
930
1256
|
# @example
|
931
1257
|
# s = Polars::Series.new("a", [1, 2, 3])
|
932
|
-
# s.
|
1258
|
+
# s.cum_sum
|
933
1259
|
# # =>
|
934
1260
|
# # shape: (3,)
|
935
1261
|
# # Series: 'a' [i64]
|
@@ -938,9 +1264,10 @@ module Polars
|
|
938
1264
|
# # 3
|
939
1265
|
# # 6
|
940
1266
|
# # ]
|
941
|
-
def
|
1267
|
+
def cum_sum(reverse: false)
|
942
1268
|
super
|
943
1269
|
end
|
1270
|
+
alias_method :cumsum, :cum_sum
|
944
1271
|
|
945
1272
|
# Get an array with the cumulative min computed at every element.
|
946
1273
|
#
|
@@ -951,7 +1278,7 @@ module Polars
|
|
951
1278
|
#
|
952
1279
|
# @example
|
953
1280
|
# s = Polars::Series.new("a", [3, 5, 1])
|
954
|
-
# s.
|
1281
|
+
# s.cum_min
|
955
1282
|
# # =>
|
956
1283
|
# # shape: (3,)
|
957
1284
|
# # Series: 'a' [i64]
|
@@ -960,9 +1287,10 @@ module Polars
|
|
960
1287
|
# # 3
|
961
1288
|
# # 1
|
962
1289
|
# # ]
|
963
|
-
def
|
1290
|
+
def cum_min(reverse: false)
|
964
1291
|
super
|
965
1292
|
end
|
1293
|
+
alias_method :cummin, :cum_min
|
966
1294
|
|
967
1295
|
# Get an array with the cumulative max computed at every element.
|
968
1296
|
#
|
@@ -973,7 +1301,7 @@ module Polars
|
|
973
1301
|
#
|
974
1302
|
# @example
|
975
1303
|
# s = Polars::Series.new("a", [3, 5, 1])
|
976
|
-
# s.
|
1304
|
+
# s.cum_max
|
977
1305
|
# # =>
|
978
1306
|
# # shape: (3,)
|
979
1307
|
# # Series: 'a' [i64]
|
@@ -982,9 +1310,10 @@ module Polars
|
|
982
1310
|
# # 5
|
983
1311
|
# # 5
|
984
1312
|
# # ]
|
985
|
-
def
|
1313
|
+
def cum_max(reverse: false)
|
986
1314
|
super
|
987
1315
|
end
|
1316
|
+
alias_method :cummax, :cum_max
|
988
1317
|
|
989
1318
|
# Get an array with the cumulative product computed at every element.
|
990
1319
|
#
|
@@ -999,7 +1328,7 @@ module Polars
|
|
999
1328
|
#
|
1000
1329
|
# @example
|
1001
1330
|
# s = Polars::Series.new("a", [1, 2, 3])
|
1002
|
-
# s.
|
1331
|
+
# s.cum_prod
|
1003
1332
|
# # =>
|
1004
1333
|
# # shape: (3,)
|
1005
1334
|
# # Series: 'a' [i64]
|
@@ -1008,9 +1337,10 @@ module Polars
|
|
1008
1337
|
# # 2
|
1009
1338
|
# # 6
|
1010
1339
|
# # ]
|
1011
|
-
def
|
1340
|
+
def cum_prod(reverse: false)
|
1012
1341
|
super
|
1013
1342
|
end
|
1343
|
+
alias_method :cumprod, :cum_prod
|
1014
1344
|
|
1015
1345
|
# Get the first `n` rows.
|
1016
1346
|
#
|
@@ -1237,26 +1567,56 @@ module Polars
|
|
1237
1567
|
# # 2
|
1238
1568
|
# # 1
|
1239
1569
|
# # ]
|
1240
|
-
def sort(reverse: false, in_place: false)
|
1570
|
+
def sort(reverse: false, nulls_last: false, in_place: false)
|
1241
1571
|
if in_place
|
1242
|
-
self._s = _s.sort(reverse)
|
1572
|
+
self._s = _s.sort(reverse, nulls_last)
|
1243
1573
|
self
|
1244
1574
|
else
|
1245
|
-
Utils.wrap_s(_s.sort(reverse))
|
1575
|
+
Utils.wrap_s(_s.sort(reverse, nulls_last))
|
1246
1576
|
end
|
1247
1577
|
end
|
1248
1578
|
|
1249
1579
|
# Return the `k` largest elements.
|
1250
1580
|
#
|
1251
|
-
#
|
1581
|
+
# @param k [Integer]
|
1582
|
+
# Number of elements to return.
|
1583
|
+
#
|
1584
|
+
# @return [Boolean]
|
1585
|
+
#
|
1586
|
+
# @example
|
1587
|
+
# s = Polars::Series.new("a", [2, 5, 1, 4, 3])
|
1588
|
+
# s.top_k(k: 3)
|
1589
|
+
# # =>
|
1590
|
+
# # shape: (3,)
|
1591
|
+
# # Series: 'a' [i64]
|
1592
|
+
# # [
|
1593
|
+
# # 5
|
1594
|
+
# # 4
|
1595
|
+
# # 3
|
1596
|
+
# # ]
|
1597
|
+
def top_k(k: 5)
|
1598
|
+
super
|
1599
|
+
end
|
1600
|
+
|
1601
|
+
# Return the `k` smallest elements.
|
1252
1602
|
#
|
1253
1603
|
# @param k [Integer]
|
1254
1604
|
# Number of elements to return.
|
1255
|
-
# @param reverse [Boolean]
|
1256
|
-
# Return the smallest elements.
|
1257
1605
|
#
|
1258
1606
|
# @return [Boolean]
|
1259
|
-
|
1607
|
+
#
|
1608
|
+
# @example
|
1609
|
+
# s = Polars::Series.new("a", [2, 5, 1, 4, 3])
|
1610
|
+
# s.bottom_k(k: 3)
|
1611
|
+
# # =>
|
1612
|
+
# # shape: (3,)
|
1613
|
+
# # Series: 'a' [i64]
|
1614
|
+
# # [
|
1615
|
+
# # 1
|
1616
|
+
# # 2
|
1617
|
+
# # 3
|
1618
|
+
# # ]
|
1619
|
+
def bottom_k(k: 5)
|
1260
1620
|
super
|
1261
1621
|
end
|
1262
1622
|
|
@@ -1705,26 +2065,38 @@ module Polars
|
|
1705
2065
|
# @example
|
1706
2066
|
# s = Polars::Series.new("a", [1, 2, 3])
|
1707
2067
|
# s2 = Polars::Series.new("b", [4, 5, 6])
|
1708
|
-
# s.
|
2068
|
+
# s.equals(s)
|
1709
2069
|
# # => true
|
1710
|
-
# s.
|
2070
|
+
# s.equals(s2)
|
1711
2071
|
# # => false
|
1712
|
-
def
|
1713
|
-
_s.
|
2072
|
+
def equals(other, null_equal: false, strict: false)
|
2073
|
+
_s.equals(other._s, null_equal, strict)
|
1714
2074
|
end
|
2075
|
+
alias_method :series_equal, :equals
|
1715
2076
|
|
1716
|
-
#
|
2077
|
+
# Return the number of elements in the Series.
|
1717
2078
|
#
|
1718
2079
|
# @return [Integer]
|
1719
2080
|
#
|
1720
2081
|
# @example
|
1721
|
-
# s = Polars::Series.new("a", [1, 2,
|
2082
|
+
# s = Polars::Series.new("a", [1, 2, nil])
|
2083
|
+
# s.count
|
2084
|
+
# # => 2
|
2085
|
+
def count
|
2086
|
+
len - null_count
|
2087
|
+
end
|
2088
|
+
|
2089
|
+
# Return the number of elements in the Series.
|
2090
|
+
#
|
2091
|
+
# @return [Integer]
|
2092
|
+
#
|
2093
|
+
# @example
|
2094
|
+
# s = Polars::Series.new("a", [1, 2, nil])
|
1722
2095
|
# s.len
|
1723
2096
|
# # => 3
|
1724
2097
|
def len
|
1725
2098
|
_s.len
|
1726
2099
|
end
|
1727
|
-
alias_method :count, :len
|
1728
2100
|
alias_method :length, :len
|
1729
2101
|
alias_method :size, :len
|
1730
2102
|
|
@@ -1886,7 +2258,7 @@ module Polars
|
|
1886
2258
|
# s.is_utf8
|
1887
2259
|
# # => true
|
1888
2260
|
def is_utf8
|
1889
|
-
dtype ==
|
2261
|
+
dtype == String
|
1890
2262
|
end
|
1891
2263
|
alias_method :utf8?, :is_utf8
|
1892
2264
|
|
@@ -1920,7 +2292,7 @@ module Polars
|
|
1920
2292
|
Int64 => Numo::Int64,
|
1921
2293
|
Float32 => Numo::SFloat,
|
1922
2294
|
Float64 => Numo::DFloat
|
1923
|
-
}.fetch(dtype).cast(to_a)
|
2295
|
+
}.fetch(dtype.class).cast(to_a)
|
1924
2296
|
elsif is_boolean
|
1925
2297
|
Numo::Bit.cast(to_a)
|
1926
2298
|
else
|
@@ -1959,7 +2331,7 @@ module Polars
|
|
1959
2331
|
# # 3
|
1960
2332
|
# # ]
|
1961
2333
|
def set(filter, value)
|
1962
|
-
Utils.wrap_s(_s.send("set_with_mask_#{DTYPE_TO_FFINAME.fetch(dtype)}", filter._s, value))
|
2334
|
+
Utils.wrap_s(_s.send("set_with_mask_#{DTYPE_TO_FFINAME.fetch(dtype.class)}", filter._s, value))
|
1963
2335
|
end
|
1964
2336
|
|
1965
2337
|
# Set values at the index locations.
|
@@ -1982,7 +2354,7 @@ module Polars
|
|
1982
2354
|
# # 10
|
1983
2355
|
# # 3
|
1984
2356
|
# # ]
|
1985
|
-
def
|
2357
|
+
def scatter(idx, value)
|
1986
2358
|
if idx.is_a?(Integer)
|
1987
2359
|
idx = [idx]
|
1988
2360
|
end
|
@@ -1991,7 +2363,7 @@ module Polars
|
|
1991
2363
|
end
|
1992
2364
|
|
1993
2365
|
idx = Series.new("", idx)
|
1994
|
-
if value.is_a?(Integer) || value.is_a?(Float) || Utils.bool?(value) || value.is_a?(String) || value.nil?
|
2366
|
+
if value.is_a?(Integer) || value.is_a?(Float) || Utils.bool?(value) || value.is_a?(::String) || value.nil?
|
1995
2367
|
value = Series.new("", [value])
|
1996
2368
|
|
1997
2369
|
# if we need to set more than a single value, we extend it
|
@@ -2001,9 +2373,10 @@ module Polars
|
|
2001
2373
|
elsif !value.is_a?(Series)
|
2002
2374
|
value = Series.new("", value)
|
2003
2375
|
end
|
2004
|
-
_s.
|
2376
|
+
_s.scatter(idx._s, value._s)
|
2005
2377
|
self
|
2006
2378
|
end
|
2379
|
+
alias_method :set_at_idx, :scatter
|
2007
2380
|
|
2008
2381
|
# Create an empty copy of the current Series.
|
2009
2382
|
#
|
@@ -2484,7 +2857,7 @@ module Polars
|
|
2484
2857
|
#
|
2485
2858
|
# @example
|
2486
2859
|
# s = Polars::Series.new("a", [1, 2, 3])
|
2487
|
-
# s.
|
2860
|
+
# s.map_elements { |x| x + 10 }
|
2488
2861
|
# # =>
|
2489
2862
|
# # shape: (3,)
|
2490
2863
|
# # Series: 'a' [i64]
|
@@ -2493,7 +2866,7 @@ module Polars
|
|
2493
2866
|
# # 12
|
2494
2867
|
# # 13
|
2495
2868
|
# # ]
|
2496
|
-
def
|
2869
|
+
def map_elements(return_dtype: nil, skip_nulls: true, &func)
|
2497
2870
|
if return_dtype.nil?
|
2498
2871
|
pl_return_dtype = nil
|
2499
2872
|
else
|
@@ -2501,7 +2874,8 @@ module Polars
|
|
2501
2874
|
end
|
2502
2875
|
Utils.wrap_s(_s.apply_lambda(func, pl_return_dtype, skip_nulls))
|
2503
2876
|
end
|
2504
|
-
alias_method :map, :
|
2877
|
+
alias_method :map, :map_elements
|
2878
|
+
alias_method :apply, :map_elements
|
2505
2879
|
|
2506
2880
|
# Shift the values by a given period.
|
2507
2881
|
#
|
@@ -2830,7 +3204,8 @@ module Polars
|
|
2830
3204
|
weights: nil,
|
2831
3205
|
min_periods: nil,
|
2832
3206
|
center: false,
|
2833
|
-
ddof: 1
|
3207
|
+
ddof: 1,
|
3208
|
+
warn_if_unsorted: true
|
2834
3209
|
)
|
2835
3210
|
to_frame
|
2836
3211
|
.select(
|
@@ -2839,7 +3214,8 @@ module Polars
|
|
2839
3214
|
weights: weights,
|
2840
3215
|
min_periods: min_periods,
|
2841
3216
|
center: center,
|
2842
|
-
ddof: ddof
|
3217
|
+
ddof: ddof,
|
3218
|
+
warn_if_unsorted: warn_if_unsorted
|
2843
3219
|
)
|
2844
3220
|
)
|
2845
3221
|
.to_series
|
@@ -2883,7 +3259,8 @@ module Polars
|
|
2883
3259
|
weights: nil,
|
2884
3260
|
min_periods: nil,
|
2885
3261
|
center: false,
|
2886
|
-
ddof: 1
|
3262
|
+
ddof: 1,
|
3263
|
+
warn_if_unsorted: true
|
2887
3264
|
)
|
2888
3265
|
to_frame
|
2889
3266
|
.select(
|
@@ -2892,7 +3269,8 @@ module Polars
|
|
2892
3269
|
weights: weights,
|
2893
3270
|
min_periods: min_periods,
|
2894
3271
|
center: center,
|
2895
|
-
ddof: ddof
|
3272
|
+
ddof: ddof,
|
3273
|
+
warn_if_unsorted: warn_if_unsorted
|
2896
3274
|
)
|
2897
3275
|
)
|
2898
3276
|
.to_series
|
@@ -2934,7 +3312,8 @@ module Polars
|
|
2934
3312
|
window_size,
|
2935
3313
|
weights: nil,
|
2936
3314
|
min_periods: nil,
|
2937
|
-
center: false
|
3315
|
+
center: false,
|
3316
|
+
warn_if_unsorted: true
|
2938
3317
|
)
|
2939
3318
|
if min_periods.nil?
|
2940
3319
|
min_periods = window_size
|
@@ -2946,7 +3325,8 @@ module Polars
|
|
2946
3325
|
window_size,
|
2947
3326
|
weights: weights,
|
2948
3327
|
min_periods: min_periods,
|
2949
|
-
center: center
|
3328
|
+
center: center,
|
3329
|
+
warn_if_unsorted: warn_if_unsorted
|
2950
3330
|
)
|
2951
3331
|
)
|
2952
3332
|
.to_series
|
@@ -3005,7 +3385,8 @@ module Polars
|
|
3005
3385
|
window_size: 2,
|
3006
3386
|
weights: nil,
|
3007
3387
|
min_periods: nil,
|
3008
|
-
center: false
|
3388
|
+
center: false,
|
3389
|
+
warn_if_unsorted: true
|
3009
3390
|
)
|
3010
3391
|
if min_periods.nil?
|
3011
3392
|
min_periods = window_size
|
@@ -3019,7 +3400,8 @@ module Polars
|
|
3019
3400
|
window_size: window_size,
|
3020
3401
|
weights: weights,
|
3021
3402
|
min_periods: min_periods,
|
3022
|
-
center: center
|
3403
|
+
center: center,
|
3404
|
+
warn_if_unsorted: warn_if_unsorted
|
3023
3405
|
)
|
3024
3406
|
)
|
3025
3407
|
.to_series
|
@@ -3076,8 +3458,8 @@ module Polars
|
|
3076
3458
|
# # shape: (2,)
|
3077
3459
|
# # Series: 'a' [i64]
|
3078
3460
|
# # [
|
3079
|
-
# #
|
3080
|
-
# #
|
3461
|
+
# # 5
|
3462
|
+
# # 3
|
3081
3463
|
# # ]
|
3082
3464
|
def sample(
|
3083
3465
|
n: nil,
|
@@ -3456,6 +3838,113 @@ module Polars
|
|
3456
3838
|
super
|
3457
3839
|
end
|
3458
3840
|
|
3841
|
+
# Replace values by different values.
|
3842
|
+
#
|
3843
|
+
# @param old [Object]
|
3844
|
+
# Value or sequence of values to replace.
|
3845
|
+
# Also accepts a mapping of values to their replacement.
|
3846
|
+
# @param new [Object]
|
3847
|
+
# Value or sequence of values to replace by.
|
3848
|
+
# Length must match the length of `old` or have length 1.
|
3849
|
+
# @param default [Object]
|
3850
|
+
# Set values that were not replaced to this value.
|
3851
|
+
# Defaults to keeping the original value.
|
3852
|
+
# Accepts expression input. Non-expression inputs are parsed as literals.
|
3853
|
+
# @param return_dtype [Object]
|
3854
|
+
# The data type of the resulting Series. If set to `nil` (default),
|
3855
|
+
# the data type is determined automatically based on the other inputs.
|
3856
|
+
#
|
3857
|
+
# @return [Series]
|
3858
|
+
#
|
3859
|
+
# @example Replace a single value by another value. Values that were not replaced remain unchanged.
|
3860
|
+
# s = Polars::Series.new([1, 2, 2, 3])
|
3861
|
+
# s.replace(2, 100)
|
3862
|
+
# # =>
|
3863
|
+
# # shape: (4,)
|
3864
|
+
# # Series: '' [i64]
|
3865
|
+
# # [
|
3866
|
+
# # 1
|
3867
|
+
# # 100
|
3868
|
+
# # 100
|
3869
|
+
# # 3
|
3870
|
+
# # ]
|
3871
|
+
#
|
3872
|
+
# @example Replace multiple values by passing sequences to the `old` and `new` parameters.
|
3873
|
+
# s.replace([2, 3], [100, 200])
|
3874
|
+
# # =>
|
3875
|
+
# # shape: (4,)
|
3876
|
+
# # Series: '' [i64]
|
3877
|
+
# # [
|
3878
|
+
# # 1
|
3879
|
+
# # 100
|
3880
|
+
# # 100
|
3881
|
+
# # 200
|
3882
|
+
# # ]
|
3883
|
+
#
|
3884
|
+
# @example Passing a mapping with replacements is also supported as syntactic sugar. Specify a default to set all values that were not matched.
|
3885
|
+
# mapping = {2 => 100, 3 => 200}
|
3886
|
+
# s.replace(mapping, default: -1)
|
3887
|
+
# # =>
|
3888
|
+
# # shape: (4,)
|
3889
|
+
# # Series: '' [i64]
|
3890
|
+
# # [
|
3891
|
+
# # -1
|
3892
|
+
# # 100
|
3893
|
+
# # 100
|
3894
|
+
# # 200
|
3895
|
+
# # ]
|
3896
|
+
#
|
3897
|
+
# @example The default can be another Series.
|
3898
|
+
# default = Polars::Series.new([2.5, 5.0, 7.5, 10.0])
|
3899
|
+
# s.replace(2, 100, default: default)
|
3900
|
+
# # =>
|
3901
|
+
# # shape: (4,)
|
3902
|
+
# # Series: '' [f64]
|
3903
|
+
# # [
|
3904
|
+
# # 2.5
|
3905
|
+
# # 100.0
|
3906
|
+
# # 100.0
|
3907
|
+
# # 10.0
|
3908
|
+
# # ]
|
3909
|
+
#
|
3910
|
+
# @example Replacing by values of a different data type sets the return type based on a combination of the `new` data type and either the original data type or the default data type if it was set.
|
3911
|
+
# s = Polars::Series.new(["x", "y", "z"])
|
3912
|
+
# mapping = {"x" => 1, "y" => 2, "z" => 3}
|
3913
|
+
# s.replace(mapping)
|
3914
|
+
# # =>
|
3915
|
+
# # shape: (3,)
|
3916
|
+
# # Series: '' [str]
|
3917
|
+
# # [
|
3918
|
+
# # "1"
|
3919
|
+
# # "2"
|
3920
|
+
# # "3"
|
3921
|
+
# # ]
|
3922
|
+
#
|
3923
|
+
# @example
|
3924
|
+
# s.replace(mapping, default: nil)
|
3925
|
+
# # =>
|
3926
|
+
# # shape: (3,)
|
3927
|
+
# # Series: '' [i64]
|
3928
|
+
# # [
|
3929
|
+
# # 1
|
3930
|
+
# # 2
|
3931
|
+
# # 3
|
3932
|
+
# # ]
|
3933
|
+
#
|
3934
|
+
# @example Set the `return_dtype` parameter to control the resulting data type directly.
|
3935
|
+
# s.replace(mapping, return_dtype: Polars::UInt8)
|
3936
|
+
# # =>
|
3937
|
+
# # shape: (3,)
|
3938
|
+
# # Series: '' [u8]
|
3939
|
+
# # [
|
3940
|
+
# # 1
|
3941
|
+
# # 2
|
3942
|
+
# # 3
|
3943
|
+
# # ]
|
3944
|
+
def replace(old, new = Expr::NO_DEFAULT, default: Expr::NO_DEFAULT, return_dtype: nil)
|
3945
|
+
super
|
3946
|
+
end
|
3947
|
+
|
3459
3948
|
# Reshape this Series to a flat Series or a Series of Lists.
|
3460
3949
|
#
|
3461
3950
|
# @param dims [Array]
|
@@ -3665,7 +4154,7 @@ module Polars
|
|
3665
4154
|
end
|
3666
4155
|
|
3667
4156
|
def _pos_idxs(idxs)
|
3668
|
-
idx_type =
|
4157
|
+
idx_type = Plr.get_index_type
|
3669
4158
|
|
3670
4159
|
if idxs.is_a?(Series)
|
3671
4160
|
if idxs.dtype == idx_type
|
@@ -3750,7 +4239,7 @@ module Polars
|
|
3750
4239
|
end
|
3751
4240
|
|
3752
4241
|
def ffi_func(name, dtype, _s)
|
3753
|
-
_s.method(name.sub("<>", DTYPE_TO_FFINAME.fetch(dtype))) if DTYPE_TO_FFINAME.key?(dtype)
|
4242
|
+
_s.method(name.sub("<>", DTYPE_TO_FFINAME.fetch(dtype.class))) if DTYPE_TO_FFINAME.key?(dtype.class)
|
3754
4243
|
end
|
3755
4244
|
|
3756
4245
|
def _arithmetic(other, op)
|
@@ -3761,7 +4250,7 @@ module Polars
|
|
3761
4250
|
return Utils.wrap_s(_s.send(op, other._s))
|
3762
4251
|
end
|
3763
4252
|
|
3764
|
-
if (other.is_a?(Float) || other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(String)) && !is_float
|
4253
|
+
if (other.is_a?(Float) || other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(::String)) && !is_float
|
3765
4254
|
_s2 = sequence_to_rbseries(name, [other])
|
3766
4255
|
return Utils.wrap_s(_s.send(op, _s2))
|
3767
4256
|
end
|
@@ -3865,21 +4354,29 @@ module Polars
|
|
3865
4354
|
end
|
3866
4355
|
end
|
3867
4356
|
|
3868
|
-
if !dtype.nil? && ![List, Unknown].include?(dtype) && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
|
4357
|
+
if !dtype.nil? && ![List, Struct, Unknown].include?(dtype) && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
|
3869
4358
|
if dtype == Array && !dtype.is_a?(Array) && value.is_a?(::Array)
|
3870
|
-
dtype = Array.new(value.size)
|
4359
|
+
dtype = Array.new(nil, value.size)
|
3871
4360
|
end
|
3872
4361
|
|
3873
4362
|
constructor = polars_type_to_constructor(dtype)
|
3874
4363
|
rbseries = constructor.call(name, values, strict)
|
3875
4364
|
|
3876
4365
|
base_type = dtype.is_a?(DataType) ? dtype.class : dtype
|
3877
|
-
if [Date, Datetime, Duration, Time, Categorical, Boolean].include?(base_type)
|
4366
|
+
if [Date, Datetime, Duration, Time, Categorical, Boolean, Enum].include?(base_type)
|
3878
4367
|
if rbseries.dtype != dtype
|
3879
4368
|
rbseries = rbseries.cast(dtype, true)
|
3880
4369
|
end
|
3881
4370
|
end
|
3882
4371
|
return rbseries
|
4372
|
+
elsif dtype == Struct
|
4373
|
+
struct_schema = dtype.is_a?(Struct) ? dtype.to_schema : nil
|
4374
|
+
empty = {}
|
4375
|
+
return DataFrame.sequence_to_rbdf(
|
4376
|
+
values.map { |v| v.nil? ? empty : v },
|
4377
|
+
schema: struct_schema,
|
4378
|
+
orient: "row",
|
4379
|
+
).to_struct(name)
|
3883
4380
|
else
|
3884
4381
|
if ruby_dtype.nil?
|
3885
4382
|
if value.nil?
|
@@ -3927,7 +4424,7 @@ module Polars
|
|
3927
4424
|
return RbSeries.new_series_list(name, values, strict)
|
3928
4425
|
else
|
3929
4426
|
constructor =
|
3930
|
-
if value.is_a?(String)
|
4427
|
+
if value.is_a?(::String)
|
3931
4428
|
if value.encoding == Encoding::UTF_8
|
3932
4429
|
RbSeries.method(:new_str)
|
3933
4430
|
else
|
@@ -3970,6 +4467,7 @@ module Polars
|
|
3970
4467
|
Utf8 => RbSeries.method(:new_str),
|
3971
4468
|
Object => RbSeries.method(:new_object),
|
3972
4469
|
Categorical => RbSeries.method(:new_str),
|
4470
|
+
Enum => RbSeries.method(:new_str),
|
3973
4471
|
Binary => RbSeries.method(:new_binary),
|
3974
4472
|
Null => RbSeries.method(:new_null)
|
3975
4473
|
}
|