polars-df 0.5.0-arm64-darwin → 0.7.0-arm64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +595 -709
- data/Cargo.toml +1 -0
- data/LICENSE-THIRD-PARTY.txt +4572 -5214
- data/README.md +11 -9
- data/lib/polars/3.0/polars.bundle +0 -0
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +206 -131
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +22 -28
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +241 -151
- data/lib/polars/functions.rb +29 -38
- data/lib/polars/group_by.rb +38 -76
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +174 -95
- data/lib/polars/lazy_functions.rb +87 -63
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +40 -36
- data/lib/polars/list_name_space.rb +15 -15
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +6 -4
- data/lib/polars/series.rb +95 -28
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +249 -69
- data/lib/polars/string_name_space.rb +155 -25
- data/lib/polars/utils.rb +119 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +7 -2
@@ -14,7 +14,7 @@ module Polars
|
|
14
14
|
|
15
15
|
if name.is_a?(DataType)
|
16
16
|
Utils.wrap_expr(_dtype_cols([name]))
|
17
|
-
elsif name.is_a?(Array)
|
17
|
+
elsif name.is_a?(::Array)
|
18
18
|
if name.length == 0 || Utils.strlike?(name[0])
|
19
19
|
name = name.map { |v| v.is_a?(Symbol) ? v.to_s : v }
|
20
20
|
Utils.wrap_expr(RbExpr.cols(name))
|
@@ -36,14 +36,14 @@ module Polars
|
|
36
36
|
# @example A horizontal rank computation by taking the elements of a list
|
37
37
|
# df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
|
38
38
|
# df.with_column(
|
39
|
-
# Polars.concat_list(["a", "b"]).
|
39
|
+
# Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
|
40
40
|
# )
|
41
41
|
# # =>
|
42
42
|
# # shape: (3, 3)
|
43
43
|
# # ┌─────┬─────┬────────────┐
|
44
44
|
# # │ a ┆ b ┆ rank │
|
45
45
|
# # │ --- ┆ --- ┆ --- │
|
46
|
-
# # │ i64 ┆ i64 ┆ list[
|
46
|
+
# # │ i64 ┆ i64 ┆ list[f64] │
|
47
47
|
# # ╞═════╪═════╪════════════╡
|
48
48
|
# # │ 1 ┆ 4 ┆ [1.0, 2.0] │
|
49
49
|
# # │ 8 ┆ 5 ┆ [2.0, 1.0] │
|
@@ -156,10 +156,9 @@ module Polars
|
|
156
156
|
column.sum
|
157
157
|
elsif Utils.strlike?(column)
|
158
158
|
col(column.to_s).sum
|
159
|
-
elsif column.is_a?(Array)
|
159
|
+
elsif column.is_a?(::Array)
|
160
160
|
exprs = Utils.selection_to_rbexpr_list(column)
|
161
|
-
|
162
|
-
Utils.wrap_expr(_sum_exprs(exprs))
|
161
|
+
Utils.wrap_expr(_sum_horizontal(exprs))
|
163
162
|
else
|
164
163
|
fold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("sum")
|
165
164
|
end
|
@@ -283,18 +282,33 @@ module Polars
|
|
283
282
|
# Return an expression representing a literal value.
|
284
283
|
#
|
285
284
|
# @return [Expr]
|
286
|
-
def lit(value)
|
287
|
-
if value.is_a?(
|
285
|
+
def lit(value, dtype: nil, allow_object: nil)
|
286
|
+
if value.is_a?(::Time) || value.is_a?(::DateTime)
|
287
|
+
time_unit = dtype&.time_unit || "ns"
|
288
|
+
time_zone = dtype.&time_zone
|
289
|
+
e = lit(Utils._datetime_to_pl_timestamp(value, time_unit)).cast(Datetime.new(time_unit))
|
290
|
+
if time_zone
|
291
|
+
return e.dt.replace_time_zone(time_zone.to_s)
|
292
|
+
else
|
293
|
+
return e
|
294
|
+
end
|
295
|
+
elsif value.is_a?(::Date)
|
296
|
+
return lit(::Time.utc(value.year, value.month, value.day)).cast(Date)
|
297
|
+
elsif value.is_a?(Polars::Series)
|
288
298
|
name = value.name
|
289
299
|
value = value._s
|
290
|
-
e = Utils.wrap_expr(RbExpr.lit(value))
|
300
|
+
e = Utils.wrap_expr(RbExpr.lit(value, allow_object))
|
291
301
|
if name == ""
|
292
302
|
return e
|
293
303
|
end
|
294
304
|
return e.alias(name)
|
305
|
+
elsif (defined?(Numo::NArray) && value.is_a?(Numo::NArray)) || value.is_a?(::Array)
|
306
|
+
return lit(Series.new("", value))
|
307
|
+
elsif dtype
|
308
|
+
return Utils.wrap_expr(RbExpr.lit(value, allow_object)).cast(dtype)
|
295
309
|
end
|
296
310
|
|
297
|
-
Utils.wrap_expr(RbExpr.lit(value))
|
311
|
+
Utils.wrap_expr(RbExpr.lit(value, allow_object))
|
298
312
|
end
|
299
313
|
|
300
314
|
# Cumulatively sum values in a column/Series, or horizontally across list of columns/expressions.
|
@@ -611,39 +625,43 @@ module Polars
|
|
611
625
|
# This can be used in a `select`, `with_column`, etc. Be sure that the resulting
|
612
626
|
# range size is equal to the length of the DataFrame you are collecting.
|
613
627
|
#
|
614
|
-
# @param
|
628
|
+
# @param start [Integer, Expr, Series]
|
615
629
|
# Lower bound of range.
|
616
|
-
# @param
|
630
|
+
# @param stop [Integer, Expr, Series]
|
617
631
|
# Upper bound of range.
|
618
632
|
# @param step [Integer]
|
619
633
|
# Step size of the range.
|
620
634
|
# @param eager [Boolean]
|
621
635
|
# If eager evaluation is `True`, a Series is returned instead of an Expr.
|
622
636
|
# @param dtype [Symbol]
|
623
|
-
# Apply an explicit integer dtype to the resulting expression (default is
|
637
|
+
# Apply an explicit integer dtype to the resulting expression (default is `Int64`).
|
624
638
|
#
|
625
639
|
# @return [Expr, Series]
|
626
640
|
#
|
627
641
|
# @example
|
628
|
-
#
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
642
|
+
# Polars.arange(0, 3, eager: true)
|
643
|
+
# # =>
|
644
|
+
# # shape: (3,)
|
645
|
+
# # Series: 'arange' [i64]
|
646
|
+
# # [
|
647
|
+
# # 0
|
648
|
+
# # 1
|
649
|
+
# # 2
|
650
|
+
# # ]
|
651
|
+
def int_range(start, stop, step: 1, eager: false, dtype: nil)
|
652
|
+
start = Utils.parse_as_expression(start)
|
653
|
+
stop = Utils.parse_as_expression(stop)
|
654
|
+
dtype ||= Int64
|
655
|
+
dtype = dtype.to_s if dtype.is_a?(Symbol)
|
656
|
+
result = Utils.wrap_expr(RbExpr.int_range(start, stop, step, dtype)).alias("arange")
|
637
657
|
|
638
|
-
if
|
639
|
-
|
640
|
-
else
|
641
|
-
DataFrame.new
|
642
|
-
.select(range_expr)
|
643
|
-
.to_series
|
644
|
-
.rename("arange", in_place: true)
|
658
|
+
if eager
|
659
|
+
return select(result).to_series
|
645
660
|
end
|
661
|
+
|
662
|
+
result
|
646
663
|
end
|
664
|
+
alias_method :arange, :int_range
|
647
665
|
|
648
666
|
# Find the indexes that would sort the columns.
|
649
667
|
#
|
@@ -658,7 +676,7 @@ module Polars
|
|
658
676
|
#
|
659
677
|
# @return [Expr]
|
660
678
|
def arg_sort_by(exprs, reverse: false)
|
661
|
-
if !exprs.is_a?(Array)
|
679
|
+
if !exprs.is_a?(::Array)
|
662
680
|
exprs = [exprs]
|
663
681
|
end
|
664
682
|
if reverse == true || reverse == false
|
@@ -702,15 +720,22 @@ module Polars
|
|
702
720
|
# # │ 2022-01-16 00:00:00 ┆ 2022-01-04 00:00:00 ┆ 2022-01-02 00:00:02 ┆ 2022-01-02 00:00:00.002 ┆ 2022-01-02 02:00:00 │
|
703
721
|
# # └─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────────┴─────────────────────┘
|
704
722
|
def duration(
|
723
|
+
weeks: nil,
|
705
724
|
days: nil,
|
725
|
+
hours: nil,
|
726
|
+
minutes: nil,
|
706
727
|
seconds: nil,
|
707
|
-
nanoseconds: nil,
|
708
|
-
microseconds: nil,
|
709
728
|
milliseconds: nil,
|
710
|
-
|
711
|
-
|
712
|
-
|
729
|
+
microseconds: nil,
|
730
|
+
nanoseconds: nil,
|
731
|
+
time_unit: "us"
|
713
732
|
)
|
733
|
+
if !weeks.nil?
|
734
|
+
weeks = Utils.expr_to_lit_or_expr(weeks, str_to_lit: false)._rbexpr
|
735
|
+
end
|
736
|
+
if !days.nil?
|
737
|
+
days = Utils.expr_to_lit_or_expr(days, str_to_lit: false)._rbexpr
|
738
|
+
end
|
714
739
|
if !hours.nil?
|
715
740
|
hours = Utils.expr_to_lit_or_expr(hours, str_to_lit: false)._rbexpr
|
716
741
|
end
|
@@ -729,23 +754,18 @@ module Polars
|
|
729
754
|
if !nanoseconds.nil?
|
730
755
|
nanoseconds = Utils.expr_to_lit_or_expr(nanoseconds, str_to_lit: false)._rbexpr
|
731
756
|
end
|
732
|
-
if !days.nil?
|
733
|
-
days = Utils.expr_to_lit_or_expr(days, str_to_lit: false)._rbexpr
|
734
|
-
end
|
735
|
-
if !weeks.nil?
|
736
|
-
weeks = Utils.expr_to_lit_or_expr(weeks, str_to_lit: false)._rbexpr
|
737
|
-
end
|
738
757
|
|
739
758
|
Utils.wrap_expr(
|
740
759
|
_rb_duration(
|
760
|
+
weeks,
|
741
761
|
days,
|
762
|
+
hours,
|
763
|
+
minutes,
|
742
764
|
seconds,
|
743
|
-
nanoseconds,
|
744
|
-
microseconds,
|
745
765
|
milliseconds,
|
746
|
-
|
747
|
-
|
748
|
-
|
766
|
+
microseconds,
|
767
|
+
nanoseconds,
|
768
|
+
time_unit
|
749
769
|
)
|
750
770
|
)
|
751
771
|
end
|
@@ -911,7 +931,8 @@ module Polars
|
|
911
931
|
simplify_expression,
|
912
932
|
slice_pushdown,
|
913
933
|
common_subplan_elimination,
|
914
|
-
allow_streaming
|
934
|
+
allow_streaming,
|
935
|
+
false
|
915
936
|
)
|
916
937
|
prepared << ldf
|
917
938
|
end
|
@@ -997,19 +1018,24 @@ module Polars
|
|
997
1018
|
# Only used in `eager` mode. As expression, use `alias`.
|
998
1019
|
#
|
999
1020
|
# @return [Expr]
|
1000
|
-
def repeat(value, n, eager: false, name: nil)
|
1021
|
+
def repeat(value, n, dtype: nil, eager: false, name: nil)
|
1022
|
+
if !name.nil?
|
1023
|
+
warn "the `name` argument is deprecated. Use the `alias` method instead."
|
1024
|
+
end
|
1025
|
+
|
1026
|
+
if n.is_a?(Integer)
|
1027
|
+
n = lit(n)
|
1028
|
+
end
|
1029
|
+
|
1030
|
+
value = Utils.parse_as_expression(value, str_as_lit: true)
|
1031
|
+
expr = Utils.wrap_expr(RbExpr.repeat(value, n._rbexpr, dtype))
|
1032
|
+
if !name.nil?
|
1033
|
+
expr = expr.alias(name)
|
1034
|
+
end
|
1001
1035
|
if eager
|
1002
|
-
|
1003
|
-
name = ""
|
1004
|
-
end
|
1005
|
-
dtype = py_type_to_dtype(type(value))
|
1006
|
-
Series._repeat(name, value, n, dtype)
|
1007
|
-
else
|
1008
|
-
if n.is_a?(Integer)
|
1009
|
-
n = lit(n)
|
1010
|
-
end
|
1011
|
-
Utils.wrap_expr(RbExpr.repeat(value, n._rbexpr))
|
1036
|
+
return select(expr).to_series
|
1012
1037
|
end
|
1038
|
+
expr
|
1013
1039
|
end
|
1014
1040
|
|
1015
1041
|
# Return indices where `condition` evaluates `true`.
|
@@ -1124,13 +1150,11 @@ module Polars
|
|
1124
1150
|
end
|
1125
1151
|
|
1126
1152
|
if unit == "d"
|
1127
|
-
expr = column.cast(
|
1153
|
+
expr = column.cast(Date)
|
1128
1154
|
elsif unit == "s"
|
1129
|
-
|
1130
|
-
# expr = (column.cast(:i64) * 1_000_000).cast(Datetime("us"))
|
1155
|
+
expr = (column.cast(Int64) * 1_000_000).cast(Datetime.new("us"))
|
1131
1156
|
elsif Utils::DTYPE_TEMPORAL_UNITS.include?(unit)
|
1132
|
-
|
1133
|
-
# expr = column.cast(Datetime(unit))
|
1157
|
+
expr = column.cast(Datetime.new(unit))
|
1134
1158
|
else
|
1135
1159
|
raise ArgumentError, "'unit' must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got '#{unit}'."
|
1136
1160
|
end
|
data/lib/polars/lazy_group_by.rb
CHANGED
@@ -1,10 +1,9 @@
|
|
1
1
|
module Polars
|
2
|
-
# Created by `df.lazy.
|
2
|
+
# Created by `df.lazy.group_by("foo")`.
|
3
3
|
class LazyGroupBy
|
4
4
|
# @private
|
5
|
-
def initialize(lgb
|
5
|
+
def initialize(lgb)
|
6
6
|
@lgb = lgb
|
7
|
-
@lazyframe_class = lazyframe_class
|
8
7
|
end
|
9
8
|
|
10
9
|
# Describe the aggregation that need to be done on a group.
|
@@ -12,7 +11,7 @@ module Polars
|
|
12
11
|
# @return [LazyFrame]
|
13
12
|
def agg(aggs)
|
14
13
|
rbexprs = Utils.selection_to_rbexpr_list(aggs)
|
15
|
-
|
14
|
+
Utils.wrap_ldf(@lgb.agg(rbexprs))
|
16
15
|
end
|
17
16
|
|
18
17
|
# Get the first `n` rows of each group.
|
@@ -29,7 +28,7 @@ module Polars
|
|
29
28
|
# "nrs" => [1, 2, 3, 4, 5, 6]
|
30
29
|
# }
|
31
30
|
# )
|
32
|
-
# df.
|
31
|
+
# df.group_by("letters").head(2).sort("letters")
|
33
32
|
# # =>
|
34
33
|
# # shape: (5, 2)
|
35
34
|
# # ┌─────────┬─────┐
|
@@ -44,7 +43,7 @@ module Polars
|
|
44
43
|
# # │ c ┆ 2 │
|
45
44
|
# # └─────────┴─────┘
|
46
45
|
def head(n = 5)
|
47
|
-
|
46
|
+
Utils.wrap_ldf(@lgb.head(n))
|
48
47
|
end
|
49
48
|
|
50
49
|
# Get the last `n` rows of each group.
|
@@ -61,7 +60,7 @@ module Polars
|
|
61
60
|
# "nrs" => [1, 2, 3, 4, 5, 6]
|
62
61
|
# }
|
63
62
|
# )
|
64
|
-
# df.
|
63
|
+
# df.group_by("letters").tail(2).sort("letters")
|
65
64
|
# # =>
|
66
65
|
# # shape: (5, 2)
|
67
66
|
# # ┌─────────┬─────┐
|
@@ -76,7 +75,7 @@ module Polars
|
|
76
75
|
# # │ c ┆ 4 │
|
77
76
|
# # └─────────┴─────┘
|
78
77
|
def tail(n = 5)
|
79
|
-
|
78
|
+
Utils.wrap_ldf(@lgb.tail(n))
|
80
79
|
end
|
81
80
|
|
82
81
|
# def apply
|
data/lib/polars/list_expr.rb
CHANGED
@@ -15,7 +15,7 @@ module Polars
|
|
15
15
|
#
|
16
16
|
# @example
|
17
17
|
# df = Polars::DataFrame.new({"foo" => [1, 2], "bar" => [["a", "b"], ["c"]]})
|
18
|
-
# df.select(Polars.col("bar").
|
18
|
+
# df.select(Polars.col("bar").list.lengths)
|
19
19
|
# # =>
|
20
20
|
# # shape: (2, 1)
|
21
21
|
# # ┌─────┐
|
@@ -27,8 +27,9 @@ module Polars
|
|
27
27
|
# # │ 1 │
|
28
28
|
# # └─────┘
|
29
29
|
def lengths
|
30
|
-
Utils.wrap_expr(_rbexpr.
|
30
|
+
Utils.wrap_expr(_rbexpr.list_len)
|
31
31
|
end
|
32
|
+
alias_method :len, :lengths
|
32
33
|
|
33
34
|
# Sum all the lists in the array.
|
34
35
|
#
|
@@ -36,7 +37,7 @@ module Polars
|
|
36
37
|
#
|
37
38
|
# @example
|
38
39
|
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
39
|
-
# df.select(Polars.col("values").
|
40
|
+
# df.select(Polars.col("values").list.sum)
|
40
41
|
# # =>
|
41
42
|
# # shape: (2, 1)
|
42
43
|
# # ┌────────┐
|
@@ -57,7 +58,7 @@ module Polars
|
|
57
58
|
#
|
58
59
|
# @example
|
59
60
|
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
60
|
-
# df.select(Polars.col("values").
|
61
|
+
# df.select(Polars.col("values").list.max)
|
61
62
|
# # =>
|
62
63
|
# # shape: (2, 1)
|
63
64
|
# # ┌────────┐
|
@@ -78,7 +79,7 @@ module Polars
|
|
78
79
|
#
|
79
80
|
# @example
|
80
81
|
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
81
|
-
# df.select(Polars.col("values").
|
82
|
+
# df.select(Polars.col("values").list.min)
|
82
83
|
# # =>
|
83
84
|
# # shape: (2, 1)
|
84
85
|
# # ┌────────┐
|
@@ -99,7 +100,7 @@ module Polars
|
|
99
100
|
#
|
100
101
|
# @example
|
101
102
|
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
102
|
-
# df.select(Polars.col("values").
|
103
|
+
# df.select(Polars.col("values").list.mean)
|
103
104
|
# # =>
|
104
105
|
# # shape: (2, 1)
|
105
106
|
# # ┌────────┐
|
@@ -124,7 +125,7 @@ module Polars
|
|
124
125
|
# "a" => [[3, 2, 1], [9, 1, 2]]
|
125
126
|
# }
|
126
127
|
# )
|
127
|
-
# df.select(Polars.col("a").
|
128
|
+
# df.select(Polars.col("a").list.sort)
|
128
129
|
# # =>
|
129
130
|
# # shape: (2, 1)
|
130
131
|
# # ┌───────────┐
|
@@ -149,7 +150,7 @@ module Polars
|
|
149
150
|
# "a" => [[3, 2, 1], [9, 1, 2]]
|
150
151
|
# }
|
151
152
|
# )
|
152
|
-
# df.select(Polars.col("a").
|
153
|
+
# df.select(Polars.col("a").list.reverse)
|
153
154
|
# # =>
|
154
155
|
# # shape: (2, 1)
|
155
156
|
# # ┌───────────┐
|
@@ -174,7 +175,7 @@ module Polars
|
|
174
175
|
# "a" => [[1, 1, 2]]
|
175
176
|
# }
|
176
177
|
# )
|
177
|
-
# df.select(Polars.col("a").
|
178
|
+
# df.select(Polars.col("a").list.unique)
|
178
179
|
# # =>
|
179
180
|
# # shape: (1, 1)
|
180
181
|
# # ┌───────────┐
|
@@ -202,7 +203,7 @@ module Polars
|
|
202
203
|
# "b" => [["b", "c"], ["y", "z"]]
|
203
204
|
# }
|
204
205
|
# )
|
205
|
-
# df.select(Polars.col("a").
|
206
|
+
# df.select(Polars.col("a").list.concat("b"))
|
206
207
|
# # =>
|
207
208
|
# # shape: (2, 1)
|
208
209
|
# # ┌─────────────────┐
|
@@ -214,11 +215,11 @@ module Polars
|
|
214
215
|
# # │ ["x", "y", "z"] │
|
215
216
|
# # └─────────────────┘
|
216
217
|
def concat(other)
|
217
|
-
if other.is_a?(Array) && ![Expr, String, Series].any? { |c| other[0].is_a?(c) }
|
218
|
+
if other.is_a?(::Array) && ![Expr, String, Series].any? { |c| other[0].is_a?(c) }
|
218
219
|
return concat(Series.new([other]))
|
219
220
|
end
|
220
221
|
|
221
|
-
if !other.is_a?(Array)
|
222
|
+
if !other.is_a?(::Array)
|
222
223
|
other_list = [other]
|
223
224
|
else
|
224
225
|
other_list = other.dup
|
@@ -241,7 +242,7 @@ module Polars
|
|
241
242
|
#
|
242
243
|
# @example
|
243
244
|
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
244
|
-
# df.select(Polars.col("foo").
|
245
|
+
# df.select(Polars.col("foo").list.get(0))
|
245
246
|
# # =>
|
246
247
|
# # shape: (3, 1)
|
247
248
|
# # ┌──────┐
|
@@ -254,7 +255,7 @@ module Polars
|
|
254
255
|
# # │ 1 │
|
255
256
|
# # └──────┘
|
256
257
|
def get(index)
|
257
|
-
index = Utils.
|
258
|
+
index = Utils.parse_as_expression(index)
|
258
259
|
Utils.wrap_expr(_rbexpr.list_get(index))
|
259
260
|
end
|
260
261
|
|
@@ -280,7 +281,7 @@ module Polars
|
|
280
281
|
#
|
281
282
|
# @return [Expr]
|
282
283
|
def take(index, null_on_oob: false)
|
283
|
-
if index.is_a?(Array)
|
284
|
+
if index.is_a?(::Array)
|
284
285
|
index = Series.new(index)
|
285
286
|
end
|
286
287
|
index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
|
@@ -293,7 +294,7 @@ module Polars
|
|
293
294
|
#
|
294
295
|
# @example
|
295
296
|
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
296
|
-
# df.select(Polars.col("foo").
|
297
|
+
# df.select(Polars.col("foo").list.first)
|
297
298
|
# # =>
|
298
299
|
# # shape: (3, 1)
|
299
300
|
# # ┌──────┐
|
@@ -315,7 +316,7 @@ module Polars
|
|
315
316
|
#
|
316
317
|
# @example
|
317
318
|
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
318
|
-
# df.select(Polars.col("foo").
|
319
|
+
# df.select(Polars.col("foo").list.last)
|
319
320
|
# # =>
|
320
321
|
# # shape: (3, 1)
|
321
322
|
# # ┌──────┐
|
@@ -340,7 +341,7 @@ module Polars
|
|
340
341
|
#
|
341
342
|
# @example
|
342
343
|
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
343
|
-
# df.select(Polars.col("foo").
|
344
|
+
# df.select(Polars.col("foo").list.contains(1))
|
344
345
|
# # =>
|
345
346
|
# # shape: (3, 1)
|
346
347
|
# # ┌───────┐
|
@@ -367,7 +368,7 @@ module Polars
|
|
367
368
|
#
|
368
369
|
# @example
|
369
370
|
# df = Polars::DataFrame.new({"s" => [["a", "b", "c"], ["x", "y"]]})
|
370
|
-
# df.select(Polars.col("s").
|
371
|
+
# df.select(Polars.col("s").list.join(" "))
|
371
372
|
# # =>
|
372
373
|
# # shape: (2, 1)
|
373
374
|
# # ┌───────┐
|
@@ -379,6 +380,7 @@ module Polars
|
|
379
380
|
# # │ x y │
|
380
381
|
# # └───────┘
|
381
382
|
def join(separator)
|
383
|
+
separator = Utils.parse_as_expression(separator, str_as_lit: true)
|
382
384
|
Utils.wrap_expr(_rbexpr.list_join(separator))
|
383
385
|
end
|
384
386
|
|
@@ -392,7 +394,7 @@ module Polars
|
|
392
394
|
# "a" => [[1, 2], [2, 1]]
|
393
395
|
# }
|
394
396
|
# )
|
395
|
-
# df.select(Polars.col("a").
|
397
|
+
# df.select(Polars.col("a").list.arg_min)
|
396
398
|
# # =>
|
397
399
|
# # shape: (2, 1)
|
398
400
|
# # ┌─────┐
|
@@ -417,7 +419,7 @@ module Polars
|
|
417
419
|
# "a" => [[1, 2], [2, 1]]
|
418
420
|
# }
|
419
421
|
# )
|
420
|
-
# df.select(Polars.col("a").
|
422
|
+
# df.select(Polars.col("a").list.arg_max)
|
421
423
|
# # =>
|
422
424
|
# # shape: (2, 1)
|
423
425
|
# # ┌─────┐
|
@@ -443,7 +445,7 @@ module Polars
|
|
443
445
|
#
|
444
446
|
# @example
|
445
447
|
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
446
|
-
# s.
|
448
|
+
# s.list.diff
|
447
449
|
# # =>
|
448
450
|
# # shape: (2,)
|
449
451
|
# # Series: 'a' [list[i64]]
|
@@ -457,14 +459,14 @@ module Polars
|
|
457
459
|
|
458
460
|
# Shift values by the given period.
|
459
461
|
#
|
460
|
-
# @param
|
462
|
+
# @param n [Integer]
|
461
463
|
# Number of places to shift (may be negative).
|
462
464
|
#
|
463
465
|
# @return [Expr]
|
464
466
|
#
|
465
467
|
# @example
|
466
468
|
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
467
|
-
# s.
|
469
|
+
# s.list.shift
|
468
470
|
# # =>
|
469
471
|
# # shape: (2,)
|
470
472
|
# # Series: 'a' [list[i64]]
|
@@ -472,8 +474,9 @@ module Polars
|
|
472
474
|
# # [null, 1, … 3]
|
473
475
|
# # [null, 10, 2]
|
474
476
|
# # ]
|
475
|
-
def shift(
|
476
|
-
Utils.
|
477
|
+
def shift(n = 1)
|
478
|
+
n = Utils.parse_as_expression(n)
|
479
|
+
Utils.wrap_expr(_rbexpr.list_shift(n))
|
477
480
|
end
|
478
481
|
|
479
482
|
# Slice every sublist.
|
@@ -488,7 +491,7 @@ module Polars
|
|
488
491
|
#
|
489
492
|
# @example
|
490
493
|
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
491
|
-
# s.
|
494
|
+
# s.list.slice(1, 2)
|
492
495
|
# # =>
|
493
496
|
# # shape: (2,)
|
494
497
|
# # Series: 'a' [list[i64]]
|
@@ -511,7 +514,7 @@ module Polars
|
|
511
514
|
#
|
512
515
|
# @example
|
513
516
|
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
514
|
-
# s.
|
517
|
+
# s.list.head(2)
|
515
518
|
# # =>
|
516
519
|
# # shape: (2,)
|
517
520
|
# # Series: 'a' [list[i64]]
|
@@ -532,7 +535,7 @@ module Polars
|
|
532
535
|
#
|
533
536
|
# @example
|
534
537
|
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
535
|
-
# s.
|
538
|
+
# s.list.tail(2)
|
536
539
|
# # =>
|
537
540
|
# # shape: (2,)
|
538
541
|
# # Series: 'a' [list[i64]]
|
@@ -554,7 +557,7 @@ module Polars
|
|
554
557
|
#
|
555
558
|
# @example
|
556
559
|
# df = Polars::DataFrame.new({"listcol" => [[0], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]]})
|
557
|
-
# df.select(Polars.col("listcol").
|
560
|
+
# df.select(Polars.col("listcol").list.count_match(2).alias("number_of_twos"))
|
558
561
|
# # =>
|
559
562
|
# # shape: (5, 1)
|
560
563
|
# # ┌────────────────┐
|
@@ -568,9 +571,10 @@ module Polars
|
|
568
571
|
# # │ 1 │
|
569
572
|
# # │ 0 │
|
570
573
|
# # └────────────────┘
|
571
|
-
def
|
572
|
-
Utils.wrap_expr(_rbexpr.
|
574
|
+
def count_matches(element)
|
575
|
+
Utils.wrap_expr(_rbexpr.list_count_matches(Utils.expr_to_lit_or_expr(element)._rbexpr))
|
573
576
|
end
|
577
|
+
alias_method :count_match, :count_matches
|
574
578
|
|
575
579
|
# Convert the series of type `List` to a series of type `Struct`.
|
576
580
|
#
|
@@ -584,7 +588,7 @@ module Polars
|
|
584
588
|
#
|
585
589
|
# @example
|
586
590
|
# df = Polars::DataFrame.new({"a" => [[1, 2, 3], [1, 2]]})
|
587
|
-
# df.select([Polars.col("a").
|
591
|
+
# df.select([Polars.col("a").list.to_struct])
|
588
592
|
# # =>
|
589
593
|
# # shape: (2, 1)
|
590
594
|
# # ┌────────────┐
|
@@ -609,7 +613,7 @@ module Polars
|
|
609
613
|
# Run all expression parallel. Don't activate this blindly.
|
610
614
|
# Parallelism is worth it if there is enough work to do per thread.
|
611
615
|
#
|
612
|
-
# This likely should not be use in the
|
616
|
+
# This likely should not be use in the group by context, because we already
|
613
617
|
# parallel execution per group
|
614
618
|
#
|
615
619
|
# @return [Expr]
|
@@ -617,14 +621,14 @@ module Polars
|
|
617
621
|
# @example
|
618
622
|
# df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
|
619
623
|
# df.with_column(
|
620
|
-
# Polars.concat_list(["a", "b"]).
|
624
|
+
# Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
|
621
625
|
# )
|
622
626
|
# # =>
|
623
627
|
# # shape: (3, 3)
|
624
628
|
# # ┌─────┬─────┬────────────┐
|
625
629
|
# # │ a ┆ b ┆ rank │
|
626
630
|
# # │ --- ┆ --- ┆ --- │
|
627
|
-
# # │ i64 ┆ i64 ┆ list[
|
631
|
+
# # │ i64 ┆ i64 ┆ list[f64] │
|
628
632
|
# # ╞═════╪═════╪════════════╡
|
629
633
|
# # │ 1 ┆ 4 ┆ [1.0, 2.0] │
|
630
634
|
# # │ 8 ┆ 5 ┆ [2.0, 1.0] │
|