polars-df 0.5.0-x86_64-darwin → 0.7.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +595 -709
- data/Cargo.toml +1 -0
- data/LICENSE-THIRD-PARTY.txt +4572 -5214
- data/README.md +11 -9
- data/lib/polars/3.0/polars.bundle +0 -0
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +206 -131
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +22 -28
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +241 -151
- data/lib/polars/functions.rb +29 -38
- data/lib/polars/group_by.rb +38 -76
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +174 -95
- data/lib/polars/lazy_functions.rb +87 -63
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +40 -36
- data/lib/polars/list_name_space.rb +15 -15
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +6 -4
- data/lib/polars/series.rb +95 -28
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +249 -69
- data/lib/polars/string_name_space.rb +155 -25
- data/lib/polars/utils.rb +119 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +7 -2
@@ -14,7 +14,7 @@ module Polars
|
|
14
14
|
|
15
15
|
if name.is_a?(DataType)
|
16
16
|
Utils.wrap_expr(_dtype_cols([name]))
|
17
|
-
elsif name.is_a?(Array)
|
17
|
+
elsif name.is_a?(::Array)
|
18
18
|
if name.length == 0 || Utils.strlike?(name[0])
|
19
19
|
name = name.map { |v| v.is_a?(Symbol) ? v.to_s : v }
|
20
20
|
Utils.wrap_expr(RbExpr.cols(name))
|
@@ -36,14 +36,14 @@ module Polars
|
|
36
36
|
# @example A horizontal rank computation by taking the elements of a list
|
37
37
|
# df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
|
38
38
|
# df.with_column(
|
39
|
-
# Polars.concat_list(["a", "b"]).
|
39
|
+
# Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
|
40
40
|
# )
|
41
41
|
# # =>
|
42
42
|
# # shape: (3, 3)
|
43
43
|
# # ┌─────┬─────┬────────────┐
|
44
44
|
# # │ a ┆ b ┆ rank │
|
45
45
|
# # │ --- ┆ --- ┆ --- │
|
46
|
-
# # │ i64 ┆ i64 ┆ list[
|
46
|
+
# # │ i64 ┆ i64 ┆ list[f64] │
|
47
47
|
# # ╞═════╪═════╪════════════╡
|
48
48
|
# # │ 1 ┆ 4 ┆ [1.0, 2.0] │
|
49
49
|
# # │ 8 ┆ 5 ┆ [2.0, 1.0] │
|
@@ -156,10 +156,9 @@ module Polars
|
|
156
156
|
column.sum
|
157
157
|
elsif Utils.strlike?(column)
|
158
158
|
col(column.to_s).sum
|
159
|
-
elsif column.is_a?(Array)
|
159
|
+
elsif column.is_a?(::Array)
|
160
160
|
exprs = Utils.selection_to_rbexpr_list(column)
|
161
|
-
|
162
|
-
Utils.wrap_expr(_sum_exprs(exprs))
|
161
|
+
Utils.wrap_expr(_sum_horizontal(exprs))
|
163
162
|
else
|
164
163
|
fold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("sum")
|
165
164
|
end
|
@@ -283,18 +282,33 @@ module Polars
|
|
283
282
|
# Return an expression representing a literal value.
|
284
283
|
#
|
285
284
|
# @return [Expr]
|
286
|
-
def lit(value)
|
287
|
-
if value.is_a?(
|
285
|
+
def lit(value, dtype: nil, allow_object: nil)
|
286
|
+
if value.is_a?(::Time) || value.is_a?(::DateTime)
|
287
|
+
time_unit = dtype&.time_unit || "ns"
|
288
|
+
time_zone = dtype.&time_zone
|
289
|
+
e = lit(Utils._datetime_to_pl_timestamp(value, time_unit)).cast(Datetime.new(time_unit))
|
290
|
+
if time_zone
|
291
|
+
return e.dt.replace_time_zone(time_zone.to_s)
|
292
|
+
else
|
293
|
+
return e
|
294
|
+
end
|
295
|
+
elsif value.is_a?(::Date)
|
296
|
+
return lit(::Time.utc(value.year, value.month, value.day)).cast(Date)
|
297
|
+
elsif value.is_a?(Polars::Series)
|
288
298
|
name = value.name
|
289
299
|
value = value._s
|
290
|
-
e = Utils.wrap_expr(RbExpr.lit(value))
|
300
|
+
e = Utils.wrap_expr(RbExpr.lit(value, allow_object))
|
291
301
|
if name == ""
|
292
302
|
return e
|
293
303
|
end
|
294
304
|
return e.alias(name)
|
305
|
+
elsif (defined?(Numo::NArray) && value.is_a?(Numo::NArray)) || value.is_a?(::Array)
|
306
|
+
return lit(Series.new("", value))
|
307
|
+
elsif dtype
|
308
|
+
return Utils.wrap_expr(RbExpr.lit(value, allow_object)).cast(dtype)
|
295
309
|
end
|
296
310
|
|
297
|
-
Utils.wrap_expr(RbExpr.lit(value))
|
311
|
+
Utils.wrap_expr(RbExpr.lit(value, allow_object))
|
298
312
|
end
|
299
313
|
|
300
314
|
# Cumulatively sum values in a column/Series, or horizontally across list of columns/expressions.
|
@@ -611,39 +625,43 @@ module Polars
|
|
611
625
|
# This can be used in a `select`, `with_column`, etc. Be sure that the resulting
|
612
626
|
# range size is equal to the length of the DataFrame you are collecting.
|
613
627
|
#
|
614
|
-
# @param
|
628
|
+
# @param start [Integer, Expr, Series]
|
615
629
|
# Lower bound of range.
|
616
|
-
# @param
|
630
|
+
# @param stop [Integer, Expr, Series]
|
617
631
|
# Upper bound of range.
|
618
632
|
# @param step [Integer]
|
619
633
|
# Step size of the range.
|
620
634
|
# @param eager [Boolean]
|
621
635
|
# If eager evaluation is `True`, a Series is returned instead of an Expr.
|
622
636
|
# @param dtype [Symbol]
|
623
|
-
# Apply an explicit integer dtype to the resulting expression (default is
|
637
|
+
# Apply an explicit integer dtype to the resulting expression (default is `Int64`).
|
624
638
|
#
|
625
639
|
# @return [Expr, Series]
|
626
640
|
#
|
627
641
|
# @example
|
628
|
-
#
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
642
|
+
# Polars.arange(0, 3, eager: true)
|
643
|
+
# # =>
|
644
|
+
# # shape: (3,)
|
645
|
+
# # Series: 'arange' [i64]
|
646
|
+
# # [
|
647
|
+
# # 0
|
648
|
+
# # 1
|
649
|
+
# # 2
|
650
|
+
# # ]
|
651
|
+
def int_range(start, stop, step: 1, eager: false, dtype: nil)
|
652
|
+
start = Utils.parse_as_expression(start)
|
653
|
+
stop = Utils.parse_as_expression(stop)
|
654
|
+
dtype ||= Int64
|
655
|
+
dtype = dtype.to_s if dtype.is_a?(Symbol)
|
656
|
+
result = Utils.wrap_expr(RbExpr.int_range(start, stop, step, dtype)).alias("arange")
|
637
657
|
|
638
|
-
if
|
639
|
-
|
640
|
-
else
|
641
|
-
DataFrame.new
|
642
|
-
.select(range_expr)
|
643
|
-
.to_series
|
644
|
-
.rename("arange", in_place: true)
|
658
|
+
if eager
|
659
|
+
return select(result).to_series
|
645
660
|
end
|
661
|
+
|
662
|
+
result
|
646
663
|
end
|
664
|
+
alias_method :arange, :int_range
|
647
665
|
|
648
666
|
# Find the indexes that would sort the columns.
|
649
667
|
#
|
@@ -658,7 +676,7 @@ module Polars
|
|
658
676
|
#
|
659
677
|
# @return [Expr]
|
660
678
|
def arg_sort_by(exprs, reverse: false)
|
661
|
-
if !exprs.is_a?(Array)
|
679
|
+
if !exprs.is_a?(::Array)
|
662
680
|
exprs = [exprs]
|
663
681
|
end
|
664
682
|
if reverse == true || reverse == false
|
@@ -702,15 +720,22 @@ module Polars
|
|
702
720
|
# # │ 2022-01-16 00:00:00 ┆ 2022-01-04 00:00:00 ┆ 2022-01-02 00:00:02 ┆ 2022-01-02 00:00:00.002 ┆ 2022-01-02 02:00:00 │
|
703
721
|
# # └─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────────┴─────────────────────┘
|
704
722
|
def duration(
|
723
|
+
weeks: nil,
|
705
724
|
days: nil,
|
725
|
+
hours: nil,
|
726
|
+
minutes: nil,
|
706
727
|
seconds: nil,
|
707
|
-
nanoseconds: nil,
|
708
|
-
microseconds: nil,
|
709
728
|
milliseconds: nil,
|
710
|
-
|
711
|
-
|
712
|
-
|
729
|
+
microseconds: nil,
|
730
|
+
nanoseconds: nil,
|
731
|
+
time_unit: "us"
|
713
732
|
)
|
733
|
+
if !weeks.nil?
|
734
|
+
weeks = Utils.expr_to_lit_or_expr(weeks, str_to_lit: false)._rbexpr
|
735
|
+
end
|
736
|
+
if !days.nil?
|
737
|
+
days = Utils.expr_to_lit_or_expr(days, str_to_lit: false)._rbexpr
|
738
|
+
end
|
714
739
|
if !hours.nil?
|
715
740
|
hours = Utils.expr_to_lit_or_expr(hours, str_to_lit: false)._rbexpr
|
716
741
|
end
|
@@ -729,23 +754,18 @@ module Polars
|
|
729
754
|
if !nanoseconds.nil?
|
730
755
|
nanoseconds = Utils.expr_to_lit_or_expr(nanoseconds, str_to_lit: false)._rbexpr
|
731
756
|
end
|
732
|
-
if !days.nil?
|
733
|
-
days = Utils.expr_to_lit_or_expr(days, str_to_lit: false)._rbexpr
|
734
|
-
end
|
735
|
-
if !weeks.nil?
|
736
|
-
weeks = Utils.expr_to_lit_or_expr(weeks, str_to_lit: false)._rbexpr
|
737
|
-
end
|
738
757
|
|
739
758
|
Utils.wrap_expr(
|
740
759
|
_rb_duration(
|
760
|
+
weeks,
|
741
761
|
days,
|
762
|
+
hours,
|
763
|
+
minutes,
|
742
764
|
seconds,
|
743
|
-
nanoseconds,
|
744
|
-
microseconds,
|
745
765
|
milliseconds,
|
746
|
-
|
747
|
-
|
748
|
-
|
766
|
+
microseconds,
|
767
|
+
nanoseconds,
|
768
|
+
time_unit
|
749
769
|
)
|
750
770
|
)
|
751
771
|
end
|
@@ -911,7 +931,8 @@ module Polars
|
|
911
931
|
simplify_expression,
|
912
932
|
slice_pushdown,
|
913
933
|
common_subplan_elimination,
|
914
|
-
allow_streaming
|
934
|
+
allow_streaming,
|
935
|
+
false
|
915
936
|
)
|
916
937
|
prepared << ldf
|
917
938
|
end
|
@@ -997,19 +1018,24 @@ module Polars
|
|
997
1018
|
# Only used in `eager` mode. As expression, use `alias`.
|
998
1019
|
#
|
999
1020
|
# @return [Expr]
|
1000
|
-
def repeat(value, n, eager: false, name: nil)
|
1021
|
+
def repeat(value, n, dtype: nil, eager: false, name: nil)
|
1022
|
+
if !name.nil?
|
1023
|
+
warn "the `name` argument is deprecated. Use the `alias` method instead."
|
1024
|
+
end
|
1025
|
+
|
1026
|
+
if n.is_a?(Integer)
|
1027
|
+
n = lit(n)
|
1028
|
+
end
|
1029
|
+
|
1030
|
+
value = Utils.parse_as_expression(value, str_as_lit: true)
|
1031
|
+
expr = Utils.wrap_expr(RbExpr.repeat(value, n._rbexpr, dtype))
|
1032
|
+
if !name.nil?
|
1033
|
+
expr = expr.alias(name)
|
1034
|
+
end
|
1001
1035
|
if eager
|
1002
|
-
|
1003
|
-
name = ""
|
1004
|
-
end
|
1005
|
-
dtype = py_type_to_dtype(type(value))
|
1006
|
-
Series._repeat(name, value, n, dtype)
|
1007
|
-
else
|
1008
|
-
if n.is_a?(Integer)
|
1009
|
-
n = lit(n)
|
1010
|
-
end
|
1011
|
-
Utils.wrap_expr(RbExpr.repeat(value, n._rbexpr))
|
1036
|
+
return select(expr).to_series
|
1012
1037
|
end
|
1038
|
+
expr
|
1013
1039
|
end
|
1014
1040
|
|
1015
1041
|
# Return indices where `condition` evaluates `true`.
|
@@ -1124,13 +1150,11 @@ module Polars
|
|
1124
1150
|
end
|
1125
1151
|
|
1126
1152
|
if unit == "d"
|
1127
|
-
expr = column.cast(
|
1153
|
+
expr = column.cast(Date)
|
1128
1154
|
elsif unit == "s"
|
1129
|
-
|
1130
|
-
# expr = (column.cast(:i64) * 1_000_000).cast(Datetime("us"))
|
1155
|
+
expr = (column.cast(Int64) * 1_000_000).cast(Datetime.new("us"))
|
1131
1156
|
elsif Utils::DTYPE_TEMPORAL_UNITS.include?(unit)
|
1132
|
-
|
1133
|
-
# expr = column.cast(Datetime(unit))
|
1157
|
+
expr = column.cast(Datetime.new(unit))
|
1134
1158
|
else
|
1135
1159
|
raise ArgumentError, "'unit' must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got '#{unit}'."
|
1136
1160
|
end
|
data/lib/polars/lazy_group_by.rb
CHANGED
@@ -1,10 +1,9 @@
|
|
1
1
|
module Polars
|
2
|
-
# Created by `df.lazy.
|
2
|
+
# Created by `df.lazy.group_by("foo")`.
|
3
3
|
class LazyGroupBy
|
4
4
|
# @private
|
5
|
-
def initialize(lgb
|
5
|
+
def initialize(lgb)
|
6
6
|
@lgb = lgb
|
7
|
-
@lazyframe_class = lazyframe_class
|
8
7
|
end
|
9
8
|
|
10
9
|
# Describe the aggregation that need to be done on a group.
|
@@ -12,7 +11,7 @@ module Polars
|
|
12
11
|
# @return [LazyFrame]
|
13
12
|
def agg(aggs)
|
14
13
|
rbexprs = Utils.selection_to_rbexpr_list(aggs)
|
15
|
-
|
14
|
+
Utils.wrap_ldf(@lgb.agg(rbexprs))
|
16
15
|
end
|
17
16
|
|
18
17
|
# Get the first `n` rows of each group.
|
@@ -29,7 +28,7 @@ module Polars
|
|
29
28
|
# "nrs" => [1, 2, 3, 4, 5, 6]
|
30
29
|
# }
|
31
30
|
# )
|
32
|
-
# df.
|
31
|
+
# df.group_by("letters").head(2).sort("letters")
|
33
32
|
# # =>
|
34
33
|
# # shape: (5, 2)
|
35
34
|
# # ┌─────────┬─────┐
|
@@ -44,7 +43,7 @@ module Polars
|
|
44
43
|
# # │ c ┆ 2 │
|
45
44
|
# # └─────────┴─────┘
|
46
45
|
def head(n = 5)
|
47
|
-
|
46
|
+
Utils.wrap_ldf(@lgb.head(n))
|
48
47
|
end
|
49
48
|
|
50
49
|
# Get the last `n` rows of each group.
|
@@ -61,7 +60,7 @@ module Polars
|
|
61
60
|
# "nrs" => [1, 2, 3, 4, 5, 6]
|
62
61
|
# }
|
63
62
|
# )
|
64
|
-
# df.
|
63
|
+
# df.group_by("letters").tail(2).sort("letters")
|
65
64
|
# # =>
|
66
65
|
# # shape: (5, 2)
|
67
66
|
# # ┌─────────┬─────┐
|
@@ -76,7 +75,7 @@ module Polars
|
|
76
75
|
# # │ c ┆ 4 │
|
77
76
|
# # └─────────┴─────┘
|
78
77
|
def tail(n = 5)
|
79
|
-
|
78
|
+
Utils.wrap_ldf(@lgb.tail(n))
|
80
79
|
end
|
81
80
|
|
82
81
|
# def apply
|
data/lib/polars/list_expr.rb
CHANGED
@@ -15,7 +15,7 @@ module Polars
|
|
15
15
|
#
|
16
16
|
# @example
|
17
17
|
# df = Polars::DataFrame.new({"foo" => [1, 2], "bar" => [["a", "b"], ["c"]]})
|
18
|
-
# df.select(Polars.col("bar").
|
18
|
+
# df.select(Polars.col("bar").list.lengths)
|
19
19
|
# # =>
|
20
20
|
# # shape: (2, 1)
|
21
21
|
# # ┌─────┐
|
@@ -27,8 +27,9 @@ module Polars
|
|
27
27
|
# # │ 1 │
|
28
28
|
# # └─────┘
|
29
29
|
def lengths
|
30
|
-
Utils.wrap_expr(_rbexpr.
|
30
|
+
Utils.wrap_expr(_rbexpr.list_len)
|
31
31
|
end
|
32
|
+
alias_method :len, :lengths
|
32
33
|
|
33
34
|
# Sum all the lists in the array.
|
34
35
|
#
|
@@ -36,7 +37,7 @@ module Polars
|
|
36
37
|
#
|
37
38
|
# @example
|
38
39
|
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
39
|
-
# df.select(Polars.col("values").
|
40
|
+
# df.select(Polars.col("values").list.sum)
|
40
41
|
# # =>
|
41
42
|
# # shape: (2, 1)
|
42
43
|
# # ┌────────┐
|
@@ -57,7 +58,7 @@ module Polars
|
|
57
58
|
#
|
58
59
|
# @example
|
59
60
|
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
60
|
-
# df.select(Polars.col("values").
|
61
|
+
# df.select(Polars.col("values").list.max)
|
61
62
|
# # =>
|
62
63
|
# # shape: (2, 1)
|
63
64
|
# # ┌────────┐
|
@@ -78,7 +79,7 @@ module Polars
|
|
78
79
|
#
|
79
80
|
# @example
|
80
81
|
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
81
|
-
# df.select(Polars.col("values").
|
82
|
+
# df.select(Polars.col("values").list.min)
|
82
83
|
# # =>
|
83
84
|
# # shape: (2, 1)
|
84
85
|
# # ┌────────┐
|
@@ -99,7 +100,7 @@ module Polars
|
|
99
100
|
#
|
100
101
|
# @example
|
101
102
|
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
102
|
-
# df.select(Polars.col("values").
|
103
|
+
# df.select(Polars.col("values").list.mean)
|
103
104
|
# # =>
|
104
105
|
# # shape: (2, 1)
|
105
106
|
# # ┌────────┐
|
@@ -124,7 +125,7 @@ module Polars
|
|
124
125
|
# "a" => [[3, 2, 1], [9, 1, 2]]
|
125
126
|
# }
|
126
127
|
# )
|
127
|
-
# df.select(Polars.col("a").
|
128
|
+
# df.select(Polars.col("a").list.sort)
|
128
129
|
# # =>
|
129
130
|
# # shape: (2, 1)
|
130
131
|
# # ┌───────────┐
|
@@ -149,7 +150,7 @@ module Polars
|
|
149
150
|
# "a" => [[3, 2, 1], [9, 1, 2]]
|
150
151
|
# }
|
151
152
|
# )
|
152
|
-
# df.select(Polars.col("a").
|
153
|
+
# df.select(Polars.col("a").list.reverse)
|
153
154
|
# # =>
|
154
155
|
# # shape: (2, 1)
|
155
156
|
# # ┌───────────┐
|
@@ -174,7 +175,7 @@ module Polars
|
|
174
175
|
# "a" => [[1, 1, 2]]
|
175
176
|
# }
|
176
177
|
# )
|
177
|
-
# df.select(Polars.col("a").
|
178
|
+
# df.select(Polars.col("a").list.unique)
|
178
179
|
# # =>
|
179
180
|
# # shape: (1, 1)
|
180
181
|
# # ┌───────────┐
|
@@ -202,7 +203,7 @@ module Polars
|
|
202
203
|
# "b" => [["b", "c"], ["y", "z"]]
|
203
204
|
# }
|
204
205
|
# )
|
205
|
-
# df.select(Polars.col("a").
|
206
|
+
# df.select(Polars.col("a").list.concat("b"))
|
206
207
|
# # =>
|
207
208
|
# # shape: (2, 1)
|
208
209
|
# # ┌─────────────────┐
|
@@ -214,11 +215,11 @@ module Polars
|
|
214
215
|
# # │ ["x", "y", "z"] │
|
215
216
|
# # └─────────────────┘
|
216
217
|
def concat(other)
|
217
|
-
if other.is_a?(Array) && ![Expr, String, Series].any? { |c| other[0].is_a?(c) }
|
218
|
+
if other.is_a?(::Array) && ![Expr, String, Series].any? { |c| other[0].is_a?(c) }
|
218
219
|
return concat(Series.new([other]))
|
219
220
|
end
|
220
221
|
|
221
|
-
if !other.is_a?(Array)
|
222
|
+
if !other.is_a?(::Array)
|
222
223
|
other_list = [other]
|
223
224
|
else
|
224
225
|
other_list = other.dup
|
@@ -241,7 +242,7 @@ module Polars
|
|
241
242
|
#
|
242
243
|
# @example
|
243
244
|
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
244
|
-
# df.select(Polars.col("foo").
|
245
|
+
# df.select(Polars.col("foo").list.get(0))
|
245
246
|
# # =>
|
246
247
|
# # shape: (3, 1)
|
247
248
|
# # ┌──────┐
|
@@ -254,7 +255,7 @@ module Polars
|
|
254
255
|
# # │ 1 │
|
255
256
|
# # └──────┘
|
256
257
|
def get(index)
|
257
|
-
index = Utils.
|
258
|
+
index = Utils.parse_as_expression(index)
|
258
259
|
Utils.wrap_expr(_rbexpr.list_get(index))
|
259
260
|
end
|
260
261
|
|
@@ -280,7 +281,7 @@ module Polars
|
|
280
281
|
#
|
281
282
|
# @return [Expr]
|
282
283
|
def take(index, null_on_oob: false)
|
283
|
-
if index.is_a?(Array)
|
284
|
+
if index.is_a?(::Array)
|
284
285
|
index = Series.new(index)
|
285
286
|
end
|
286
287
|
index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
|
@@ -293,7 +294,7 @@ module Polars
|
|
293
294
|
#
|
294
295
|
# @example
|
295
296
|
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
296
|
-
# df.select(Polars.col("foo").
|
297
|
+
# df.select(Polars.col("foo").list.first)
|
297
298
|
# # =>
|
298
299
|
# # shape: (3, 1)
|
299
300
|
# # ┌──────┐
|
@@ -315,7 +316,7 @@ module Polars
|
|
315
316
|
#
|
316
317
|
# @example
|
317
318
|
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
318
|
-
# df.select(Polars.col("foo").
|
319
|
+
# df.select(Polars.col("foo").list.last)
|
319
320
|
# # =>
|
320
321
|
# # shape: (3, 1)
|
321
322
|
# # ┌──────┐
|
@@ -340,7 +341,7 @@ module Polars
|
|
340
341
|
#
|
341
342
|
# @example
|
342
343
|
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
343
|
-
# df.select(Polars.col("foo").
|
344
|
+
# df.select(Polars.col("foo").list.contains(1))
|
344
345
|
# # =>
|
345
346
|
# # shape: (3, 1)
|
346
347
|
# # ┌───────┐
|
@@ -367,7 +368,7 @@ module Polars
|
|
367
368
|
#
|
368
369
|
# @example
|
369
370
|
# df = Polars::DataFrame.new({"s" => [["a", "b", "c"], ["x", "y"]]})
|
370
|
-
# df.select(Polars.col("s").
|
371
|
+
# df.select(Polars.col("s").list.join(" "))
|
371
372
|
# # =>
|
372
373
|
# # shape: (2, 1)
|
373
374
|
# # ┌───────┐
|
@@ -379,6 +380,7 @@ module Polars
|
|
379
380
|
# # │ x y │
|
380
381
|
# # └───────┘
|
381
382
|
def join(separator)
|
383
|
+
separator = Utils.parse_as_expression(separator, str_as_lit: true)
|
382
384
|
Utils.wrap_expr(_rbexpr.list_join(separator))
|
383
385
|
end
|
384
386
|
|
@@ -392,7 +394,7 @@ module Polars
|
|
392
394
|
# "a" => [[1, 2], [2, 1]]
|
393
395
|
# }
|
394
396
|
# )
|
395
|
-
# df.select(Polars.col("a").
|
397
|
+
# df.select(Polars.col("a").list.arg_min)
|
396
398
|
# # =>
|
397
399
|
# # shape: (2, 1)
|
398
400
|
# # ┌─────┐
|
@@ -417,7 +419,7 @@ module Polars
|
|
417
419
|
# "a" => [[1, 2], [2, 1]]
|
418
420
|
# }
|
419
421
|
# )
|
420
|
-
# df.select(Polars.col("a").
|
422
|
+
# df.select(Polars.col("a").list.arg_max)
|
421
423
|
# # =>
|
422
424
|
# # shape: (2, 1)
|
423
425
|
# # ┌─────┐
|
@@ -443,7 +445,7 @@ module Polars
|
|
443
445
|
#
|
444
446
|
# @example
|
445
447
|
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
446
|
-
# s.
|
448
|
+
# s.list.diff
|
447
449
|
# # =>
|
448
450
|
# # shape: (2,)
|
449
451
|
# # Series: 'a' [list[i64]]
|
@@ -457,14 +459,14 @@ module Polars
|
|
457
459
|
|
458
460
|
# Shift values by the given period.
|
459
461
|
#
|
460
|
-
# @param
|
462
|
+
# @param n [Integer]
|
461
463
|
# Number of places to shift (may be negative).
|
462
464
|
#
|
463
465
|
# @return [Expr]
|
464
466
|
#
|
465
467
|
# @example
|
466
468
|
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
467
|
-
# s.
|
469
|
+
# s.list.shift
|
468
470
|
# # =>
|
469
471
|
# # shape: (2,)
|
470
472
|
# # Series: 'a' [list[i64]]
|
@@ -472,8 +474,9 @@ module Polars
|
|
472
474
|
# # [null, 1, … 3]
|
473
475
|
# # [null, 10, 2]
|
474
476
|
# # ]
|
475
|
-
def shift(
|
476
|
-
Utils.
|
477
|
+
def shift(n = 1)
|
478
|
+
n = Utils.parse_as_expression(n)
|
479
|
+
Utils.wrap_expr(_rbexpr.list_shift(n))
|
477
480
|
end
|
478
481
|
|
479
482
|
# Slice every sublist.
|
@@ -488,7 +491,7 @@ module Polars
|
|
488
491
|
#
|
489
492
|
# @example
|
490
493
|
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
491
|
-
# s.
|
494
|
+
# s.list.slice(1, 2)
|
492
495
|
# # =>
|
493
496
|
# # shape: (2,)
|
494
497
|
# # Series: 'a' [list[i64]]
|
@@ -511,7 +514,7 @@ module Polars
|
|
511
514
|
#
|
512
515
|
# @example
|
513
516
|
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
514
|
-
# s.
|
517
|
+
# s.list.head(2)
|
515
518
|
# # =>
|
516
519
|
# # shape: (2,)
|
517
520
|
# # Series: 'a' [list[i64]]
|
@@ -532,7 +535,7 @@ module Polars
|
|
532
535
|
#
|
533
536
|
# @example
|
534
537
|
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
535
|
-
# s.
|
538
|
+
# s.list.tail(2)
|
536
539
|
# # =>
|
537
540
|
# # shape: (2,)
|
538
541
|
# # Series: 'a' [list[i64]]
|
@@ -554,7 +557,7 @@ module Polars
|
|
554
557
|
#
|
555
558
|
# @example
|
556
559
|
# df = Polars::DataFrame.new({"listcol" => [[0], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]]})
|
557
|
-
# df.select(Polars.col("listcol").
|
560
|
+
# df.select(Polars.col("listcol").list.count_match(2).alias("number_of_twos"))
|
558
561
|
# # =>
|
559
562
|
# # shape: (5, 1)
|
560
563
|
# # ┌────────────────┐
|
@@ -568,9 +571,10 @@ module Polars
|
|
568
571
|
# # │ 1 │
|
569
572
|
# # │ 0 │
|
570
573
|
# # └────────────────┘
|
571
|
-
def
|
572
|
-
Utils.wrap_expr(_rbexpr.
|
574
|
+
def count_matches(element)
|
575
|
+
Utils.wrap_expr(_rbexpr.list_count_matches(Utils.expr_to_lit_or_expr(element)._rbexpr))
|
573
576
|
end
|
577
|
+
alias_method :count_match, :count_matches
|
574
578
|
|
575
579
|
# Convert the series of type `List` to a series of type `Struct`.
|
576
580
|
#
|
@@ -584,7 +588,7 @@ module Polars
|
|
584
588
|
#
|
585
589
|
# @example
|
586
590
|
# df = Polars::DataFrame.new({"a" => [[1, 2, 3], [1, 2]]})
|
587
|
-
# df.select([Polars.col("a").
|
591
|
+
# df.select([Polars.col("a").list.to_struct])
|
588
592
|
# # =>
|
589
593
|
# # shape: (2, 1)
|
590
594
|
# # ┌────────────┐
|
@@ -609,7 +613,7 @@ module Polars
|
|
609
613
|
# Run all expression parallel. Don't activate this blindly.
|
610
614
|
# Parallelism is worth it if there is enough work to do per thread.
|
611
615
|
#
|
612
|
-
# This likely should not be use in the
|
616
|
+
# This likely should not be use in the group by context, because we already
|
613
617
|
# parallel execution per group
|
614
618
|
#
|
615
619
|
# @return [Expr]
|
@@ -617,14 +621,14 @@ module Polars
|
|
617
621
|
# @example
|
618
622
|
# df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
|
619
623
|
# df.with_column(
|
620
|
-
# Polars.concat_list(["a", "b"]).
|
624
|
+
# Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
|
621
625
|
# )
|
622
626
|
# # =>
|
623
627
|
# # shape: (3, 3)
|
624
628
|
# # ┌─────┬─────┬────────────┐
|
625
629
|
# # │ a ┆ b ┆ rank │
|
626
630
|
# # │ --- ┆ --- ┆ --- │
|
627
|
-
# # │ i64 ┆ i64 ┆ list[
|
631
|
+
# # │ i64 ┆ i64 ┆ list[f64] │
|
628
632
|
# # ╞═════╪═════╪════════════╡
|
629
633
|
# # │ 1 ┆ 4 ┆ [1.0, 2.0] │
|
630
634
|
# # │ 8 ┆ 5 ┆ [2.0, 1.0] │
|