polars-df 0.2.0-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +3 -0
- data/CHANGELOG.md +33 -0
- data/Cargo.lock +2230 -0
- data/Cargo.toml +10 -0
- data/LICENSE-THIRD-PARTY.txt +38828 -0
- data/LICENSE.txt +20 -0
- data/README.md +91 -0
- data/lib/polars/3.0/polars.so +0 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/batched_csv_reader.rb +96 -0
- data/lib/polars/cat_expr.rb +52 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +4833 -0
- data/lib/polars/data_types.rb +122 -0
- data/lib/polars/date_time_expr.rb +1418 -0
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/dynamic_group_by.rb +52 -0
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +5307 -0
- data/lib/polars/expr_dispatch.rb +22 -0
- data/lib/polars/functions.rb +453 -0
- data/lib/polars/group_by.rb +558 -0
- data/lib/polars/io.rb +814 -0
- data/lib/polars/lazy_frame.rb +2442 -0
- data/lib/polars/lazy_functions.rb +1195 -0
- data/lib/polars/lazy_group_by.rb +93 -0
- data/lib/polars/list_expr.rb +610 -0
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/meta_expr.rb +54 -0
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +3730 -0
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +972 -0
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_expr.rb +100 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +192 -0
- data/lib/polars/version.rb +4 -0
- data/lib/polars/when.rb +16 -0
- data/lib/polars/when_then.rb +19 -0
- data/lib/polars-df.rb +1 -0
- data/lib/polars.rb +50 -0
- metadata +89 -0
@@ -0,0 +1,93 @@
|
|
1
|
+
module Polars
|
2
|
+
# Created by `df.lazy.groupby("foo")`.
|
3
|
+
class LazyGroupBy
|
4
|
+
# @private
|
5
|
+
def initialize(lgb, lazyframe_class)
|
6
|
+
@lgb = lgb
|
7
|
+
@lazyframe_class = lazyframe_class
|
8
|
+
end
|
9
|
+
|
10
|
+
# Describe the aggregation that need to be done on a group.
|
11
|
+
#
|
12
|
+
# @return [LazyFrame]
|
13
|
+
def agg(aggs)
|
14
|
+
rbexprs = Utils.selection_to_rbexpr_list(aggs)
|
15
|
+
@lazyframe_class._from_rbldf(@lgb.agg(rbexprs))
|
16
|
+
end
|
17
|
+
|
18
|
+
# Get the first `n` rows of each group.
|
19
|
+
#
|
20
|
+
# @param n [Integer]
|
21
|
+
# Number of rows to return.
|
22
|
+
#
|
23
|
+
# @return [LazyFrame]
|
24
|
+
#
|
25
|
+
# @example
|
26
|
+
# df = Polars::DataFrame.new(
|
27
|
+
# {
|
28
|
+
# "letters" => ["c", "c", "a", "c", "a", "b"],
|
29
|
+
# "nrs" => [1, 2, 3, 4, 5, 6]
|
30
|
+
# }
|
31
|
+
# )
|
32
|
+
# df.groupby("letters").head(2).sort("letters")
|
33
|
+
# # =>
|
34
|
+
# # shape: (5, 2)
|
35
|
+
# # ┌─────────┬─────┐
|
36
|
+
# # │ letters ┆ nrs │
|
37
|
+
# # │ --- ┆ --- │
|
38
|
+
# # │ str ┆ i64 │
|
39
|
+
# # ╞═════════╪═════╡
|
40
|
+
# # │ a ┆ 3 │
|
41
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
42
|
+
# # │ a ┆ 5 │
|
43
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
44
|
+
# # │ b ┆ 6 │
|
45
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
46
|
+
# # │ c ┆ 1 │
|
47
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
48
|
+
# # │ c ┆ 2 │
|
49
|
+
# # └─────────┴─────┘
|
50
|
+
def head(n = 5)
|
51
|
+
@lazyframe_class._from_rbldf(@lgb.head(n))
|
52
|
+
end
|
53
|
+
|
54
|
+
# Get the last `n` rows of each group.
|
55
|
+
#
|
56
|
+
# @param n [Integer]
|
57
|
+
# Number of rows to return.
|
58
|
+
#
|
59
|
+
# @return [LazyFrame]
|
60
|
+
#
|
61
|
+
# @example
|
62
|
+
# df = Polars::DataFrame.new(
|
63
|
+
# {
|
64
|
+
# "letters" => ["c", "c", "a", "c", "a", "b"],
|
65
|
+
# "nrs" => [1, 2, 3, 4, 5, 6]
|
66
|
+
# }
|
67
|
+
# )
|
68
|
+
# df.groupby("letters").tail(2).sort("letters")
|
69
|
+
# # =>
|
70
|
+
# # shape: (5, 2)
|
71
|
+
# # ┌─────────┬─────┐
|
72
|
+
# # │ letters ┆ nrs │
|
73
|
+
# # │ --- ┆ --- │
|
74
|
+
# # │ str ┆ i64 │
|
75
|
+
# # ╞═════════╪═════╡
|
76
|
+
# # │ a ┆ 3 │
|
77
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
78
|
+
# # │ a ┆ 5 │
|
79
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
80
|
+
# # │ b ┆ 6 │
|
81
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
82
|
+
# # │ c ┆ 2 │
|
83
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
84
|
+
# # │ c ┆ 4 │
|
85
|
+
# # └─────────┴─────┘
|
86
|
+
def tail(n = 5)
|
87
|
+
@lazyframe_class._from_rbldf(@lgb.tail(n))
|
88
|
+
end
|
89
|
+
|
90
|
+
# def apply
|
91
|
+
# end
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,610 @@
|
|
1
|
+
module Polars
|
2
|
+
# Namespace for list related expressions.
|
3
|
+
class ListExpr
|
4
|
+
# @private
|
5
|
+
attr_accessor :_rbexpr
|
6
|
+
|
7
|
+
# @private
|
8
|
+
def initialize(expr)
|
9
|
+
self._rbexpr = expr._rbexpr
|
10
|
+
end
|
11
|
+
|
12
|
+
# Get the length of the arrays as `:u32`.
|
13
|
+
#
|
14
|
+
# @return [Expr]
|
15
|
+
#
|
16
|
+
# @example
|
17
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2], "bar" => [["a", "b"], ["c"]]})
|
18
|
+
# df.select(Polars.col("bar").arr.lengths)
|
19
|
+
# # =>
|
20
|
+
# # shape: (2, 1)
|
21
|
+
# # ┌─────┐
|
22
|
+
# # │ bar │
|
23
|
+
# # │ --- │
|
24
|
+
# # │ u32 │
|
25
|
+
# # ╞═════╡
|
26
|
+
# # │ 2 │
|
27
|
+
# # ├╌╌╌╌╌┤
|
28
|
+
# # │ 1 │
|
29
|
+
# # └─────┘
|
30
|
+
def lengths
|
31
|
+
Utils.wrap_expr(_rbexpr.arr_lengths)
|
32
|
+
end
|
33
|
+
|
34
|
+
# Sum all the lists in the array.
|
35
|
+
#
|
36
|
+
# @return [Expr]
|
37
|
+
#
|
38
|
+
# @example
|
39
|
+
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
40
|
+
# df.select(Polars.col("values").arr.sum)
|
41
|
+
# # =>
|
42
|
+
# # shape: (2, 1)
|
43
|
+
# # ┌────────┐
|
44
|
+
# # │ values │
|
45
|
+
# # │ --- │
|
46
|
+
# # │ i64 │
|
47
|
+
# # ╞════════╡
|
48
|
+
# # │ 1 │
|
49
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
50
|
+
# # │ 5 │
|
51
|
+
# # └────────┘
|
52
|
+
def sum
|
53
|
+
Utils.wrap_expr(_rbexpr.lst_sum)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Compute the max value of the lists in the array.
|
57
|
+
#
|
58
|
+
# @return [Expr]
|
59
|
+
#
|
60
|
+
# @example
|
61
|
+
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
62
|
+
# df.select(Polars.col("values").arr.max)
|
63
|
+
# # =>
|
64
|
+
# # shape: (2, 1)
|
65
|
+
# # ┌────────┐
|
66
|
+
# # │ values │
|
67
|
+
# # │ --- │
|
68
|
+
# # │ i64 │
|
69
|
+
# # ╞════════╡
|
70
|
+
# # │ 1 │
|
71
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
72
|
+
# # │ 3 │
|
73
|
+
# # └────────┘
|
74
|
+
def max
|
75
|
+
Utils.wrap_expr(_rbexpr.lst_max)
|
76
|
+
end
|
77
|
+
|
78
|
+
# Compute the min value of the lists in the array.
|
79
|
+
#
|
80
|
+
# @return [Expr]
|
81
|
+
#
|
82
|
+
# @example
|
83
|
+
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
84
|
+
# df.select(Polars.col("values").arr.min)
|
85
|
+
# # =>
|
86
|
+
# # shape: (2, 1)
|
87
|
+
# # ┌────────┐
|
88
|
+
# # │ values │
|
89
|
+
# # │ --- │
|
90
|
+
# # │ i64 │
|
91
|
+
# # ╞════════╡
|
92
|
+
# # │ 1 │
|
93
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
94
|
+
# # │ 2 │
|
95
|
+
# # └────────┘
|
96
|
+
def min
|
97
|
+
Utils.wrap_expr(_rbexpr.lst_min)
|
98
|
+
end
|
99
|
+
|
100
|
+
# Compute the mean value of the lists in the array.
|
101
|
+
#
|
102
|
+
# @return [Expr]
|
103
|
+
#
|
104
|
+
# @example
|
105
|
+
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
106
|
+
# df.select(Polars.col("values").arr.mean)
|
107
|
+
# # =>
|
108
|
+
# # shape: (2, 1)
|
109
|
+
# # ┌────────┐
|
110
|
+
# # │ values │
|
111
|
+
# # │ --- │
|
112
|
+
# # │ f64 │
|
113
|
+
# # ╞════════╡
|
114
|
+
# # │ 1.0 │
|
115
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
116
|
+
# # │ 2.5 │
|
117
|
+
# # └────────┘
|
118
|
+
def mean
|
119
|
+
Utils.wrap_expr(_rbexpr.lst_mean)
|
120
|
+
end
|
121
|
+
|
122
|
+
# Sort the arrays in the list.
|
123
|
+
#
|
124
|
+
# @return [Expr]
|
125
|
+
#
|
126
|
+
# @example
|
127
|
+
# df = Polars::DataFrame.new(
|
128
|
+
# {
|
129
|
+
# "a" => [[3, 2, 1], [9, 1, 2]]
|
130
|
+
# }
|
131
|
+
# )
|
132
|
+
# df.select(Polars.col("a").arr.sort)
|
133
|
+
# # =>
|
134
|
+
# # shape: (2, 1)
|
135
|
+
# # ┌───────────┐
|
136
|
+
# # │ a │
|
137
|
+
# # │ --- │
|
138
|
+
# # │ list[i64] │
|
139
|
+
# # ╞═══════════╡
|
140
|
+
# # │ [1, 2, 3] │
|
141
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌┤
|
142
|
+
# # │ [1, 2, 9] │
|
143
|
+
# # └───────────┘
|
144
|
+
def sort(reverse: false)
|
145
|
+
Utils.wrap_expr(_rbexpr.lst_sort(reverse))
|
146
|
+
end
|
147
|
+
|
148
|
+
# Reverse the arrays in the list.
|
149
|
+
#
|
150
|
+
# @return [Expr]
|
151
|
+
#
|
152
|
+
# @example
|
153
|
+
# df = Polars::DataFrame.new(
|
154
|
+
# {
|
155
|
+
# "a" => [[3, 2, 1], [9, 1, 2]]
|
156
|
+
# }
|
157
|
+
# )
|
158
|
+
# df.select(Polars.col("a").arr.reverse)
|
159
|
+
# # =>
|
160
|
+
# # shape: (2, 1)
|
161
|
+
# # ┌───────────┐
|
162
|
+
# # │ a │
|
163
|
+
# # │ --- │
|
164
|
+
# # │ list[i64] │
|
165
|
+
# # ╞═══════════╡
|
166
|
+
# # │ [1, 2, 3] │
|
167
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌┤
|
168
|
+
# # │ [2, 1, 9] │
|
169
|
+
# # └───────────┘
|
170
|
+
def reverse
|
171
|
+
Utils.wrap_expr(_rbexpr.lst_reverse)
|
172
|
+
end
|
173
|
+
|
174
|
+
# Get the unique/distinct values in the list.
|
175
|
+
#
|
176
|
+
# @return [Expr]
|
177
|
+
#
|
178
|
+
# @example
|
179
|
+
# df = Polars::DataFrame.new(
|
180
|
+
# {
|
181
|
+
# "a" => [[1, 1, 2]]
|
182
|
+
# }
|
183
|
+
# )
|
184
|
+
# df.select(Polars.col("a").arr.unique)
|
185
|
+
# # =>
|
186
|
+
# # shape: (1, 1)
|
187
|
+
# # ┌───────────┐
|
188
|
+
# # │ a │
|
189
|
+
# # │ --- │
|
190
|
+
# # │ list[i64] │
|
191
|
+
# # ╞═══════════╡
|
192
|
+
# # │ [1, 2] │
|
193
|
+
# # └───────────┘
|
194
|
+
def unique
|
195
|
+
Utils.wrap_expr(_rbexpr.lst_unique)
|
196
|
+
end
|
197
|
+
|
198
|
+
# Concat the arrays in a Series dtype List in linear time.
|
199
|
+
#
|
200
|
+
# @param other [Object]
|
201
|
+
# Columns to concat into a List Series
|
202
|
+
#
|
203
|
+
# @return [Expr]
|
204
|
+
#
|
205
|
+
# @example
|
206
|
+
# df = Polars::DataFrame.new(
|
207
|
+
# {
|
208
|
+
# "a" => [["a"], ["x"]],
|
209
|
+
# "b" => [["b", "c"], ["y", "z"]]
|
210
|
+
# }
|
211
|
+
# )
|
212
|
+
# df.select(Polars.col("a").arr.concat("b"))
|
213
|
+
# # =>
|
214
|
+
# # shape: (2, 1)
|
215
|
+
# # ┌─────────────────┐
|
216
|
+
# # │ a │
|
217
|
+
# # │ --- │
|
218
|
+
# # │ list[str] │
|
219
|
+
# # ╞═════════════════╡
|
220
|
+
# # │ ["a", "b", "c"] │
|
221
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
222
|
+
# # │ ["x", "y", "z"] │
|
223
|
+
# # └─────────────────┘
|
224
|
+
def concat(other)
|
225
|
+
if other.is_a?(Array) && ![Expr, String, Series].any? { |c| other[0].is_a?(c) }
|
226
|
+
return concat(Series.new([other]))
|
227
|
+
end
|
228
|
+
|
229
|
+
if !other.is_a?(Array)
|
230
|
+
other_list = [other]
|
231
|
+
else
|
232
|
+
other_list = other.dup
|
233
|
+
end
|
234
|
+
|
235
|
+
other_list.insert(0, Utils.wrap_expr(_rbexpr))
|
236
|
+
Polars.concat_list(other_list)
|
237
|
+
end
|
238
|
+
|
239
|
+
# Get the value by index in the sublists.
|
240
|
+
#
|
241
|
+
# So index `0` would return the first item of every sublist
|
242
|
+
# and index `-1` would return the last item of every sublist
|
243
|
+
# if an index is out of bounds, it will return a `None`.
|
244
|
+
#
|
245
|
+
# @param index [Integer]
|
246
|
+
# Index to return per sublist
|
247
|
+
#
|
248
|
+
# @return [Expr]
|
249
|
+
#
|
250
|
+
# @example
|
251
|
+
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
252
|
+
# df.select(Polars.col("foo").arr.get(0))
|
253
|
+
# # =>
|
254
|
+
# # shape: (3, 1)
|
255
|
+
# # ┌──────┐
|
256
|
+
# # │ foo │
|
257
|
+
# # │ --- │
|
258
|
+
# # │ i64 │
|
259
|
+
# # ╞══════╡
|
260
|
+
# # │ 3 │
|
261
|
+
# # ├╌╌╌╌╌╌┤
|
262
|
+
# # │ null │
|
263
|
+
# # ├╌╌╌╌╌╌┤
|
264
|
+
# # │ 1 │
|
265
|
+
# # └──────┘
|
266
|
+
def get(index)
|
267
|
+
index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
|
268
|
+
Utils.wrap_expr(_rbexpr.lst_get(index))
|
269
|
+
end
|
270
|
+
|
271
|
+
# Get the value by index in the sublists.
|
272
|
+
#
|
273
|
+
# @return [Expr]
|
274
|
+
def [](item)
|
275
|
+
get(item)
|
276
|
+
end
|
277
|
+
|
278
|
+
# Get the first value of the sublists.
|
279
|
+
#
|
280
|
+
# @return [Expr]
|
281
|
+
#
|
282
|
+
# @example
|
283
|
+
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
284
|
+
# df.select(Polars.col("foo").arr.first)
|
285
|
+
# # =>
|
286
|
+
# # shape: (3, 1)
|
287
|
+
# # ┌──────┐
|
288
|
+
# # │ foo │
|
289
|
+
# # │ --- │
|
290
|
+
# # │ i64 │
|
291
|
+
# # ╞══════╡
|
292
|
+
# # │ 3 │
|
293
|
+
# # ├╌╌╌╌╌╌┤
|
294
|
+
# # │ null │
|
295
|
+
# # ├╌╌╌╌╌╌┤
|
296
|
+
# # │ 1 │
|
297
|
+
# # └──────┘
|
298
|
+
def first
|
299
|
+
get(0)
|
300
|
+
end
|
301
|
+
|
302
|
+
# Get the last value of the sublists.
|
303
|
+
#
|
304
|
+
# @return [Expr]
|
305
|
+
#
|
306
|
+
# @example
|
307
|
+
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
308
|
+
# df.select(Polars.col("foo").arr.last)
|
309
|
+
# # =>
|
310
|
+
# # shape: (3, 1)
|
311
|
+
# # ┌──────┐
|
312
|
+
# # │ foo │
|
313
|
+
# # │ --- │
|
314
|
+
# # │ i64 │
|
315
|
+
# # ╞══════╡
|
316
|
+
# # │ 1 │
|
317
|
+
# # ├╌╌╌╌╌╌┤
|
318
|
+
# # │ null │
|
319
|
+
# # ├╌╌╌╌╌╌┤
|
320
|
+
# # │ 2 │
|
321
|
+
# # └──────┘
|
322
|
+
def last
|
323
|
+
get(-1)
|
324
|
+
end
|
325
|
+
|
326
|
+
# Check if sublists contain the given item.
|
327
|
+
#
|
328
|
+
# @param item [Object]
|
329
|
+
# Item that will be checked for membership
|
330
|
+
#
|
331
|
+
# @return [Expr]
|
332
|
+
#
|
333
|
+
# @example
|
334
|
+
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
335
|
+
# df.select(Polars.col("foo").arr.contains(1))
|
336
|
+
# # =>
|
337
|
+
# # shape: (3, 1)
|
338
|
+
# # ┌───────┐
|
339
|
+
# # │ foo │
|
340
|
+
# # │ --- │
|
341
|
+
# # │ bool │
|
342
|
+
# # ╞═══════╡
|
343
|
+
# # │ true │
|
344
|
+
# # ├╌╌╌╌╌╌╌┤
|
345
|
+
# # │ false │
|
346
|
+
# # ├╌╌╌╌╌╌╌┤
|
347
|
+
# # │ true │
|
348
|
+
# # └───────┘
|
349
|
+
def contains(item)
|
350
|
+
Utils.wrap_expr(_rbexpr.arr_contains(Utils.expr_to_lit_or_expr(item)._rbexpr))
|
351
|
+
end
|
352
|
+
|
353
|
+
# Join all string items in a sublist and place a separator between them.
|
354
|
+
#
|
355
|
+
# This errors if inner type of list `!= :str`.
|
356
|
+
#
|
357
|
+
# @param separator [String]
|
358
|
+
# string to separate the items with
|
359
|
+
#
|
360
|
+
# @return [Expr]
|
361
|
+
#
|
362
|
+
# @example
|
363
|
+
# df = Polars::DataFrame.new({"s" => [["a", "b", "c"], ["x", "y"]]})
|
364
|
+
# df.select(Polars.col("s").arr.join(" "))
|
365
|
+
# # =>
|
366
|
+
# # shape: (2, 1)
|
367
|
+
# # ┌───────┐
|
368
|
+
# # │ s │
|
369
|
+
# # │ --- │
|
370
|
+
# # │ str │
|
371
|
+
# # ╞═══════╡
|
372
|
+
# # │ a b c │
|
373
|
+
# # ├╌╌╌╌╌╌╌┤
|
374
|
+
# # │ x y │
|
375
|
+
# # └───────┘
|
376
|
+
def join(separator)
|
377
|
+
Utils.wrap_expr(_rbexpr.lst_join(separator))
|
378
|
+
end
|
379
|
+
|
380
|
+
# Retrieve the index of the minimal value in every sublist.
|
381
|
+
#
|
382
|
+
# @return [Expr]
|
383
|
+
#
|
384
|
+
# @example
|
385
|
+
# df = Polars::DataFrame.new(
|
386
|
+
# {
|
387
|
+
# "a" => [[1, 2], [2, 1]]
|
388
|
+
# }
|
389
|
+
# )
|
390
|
+
# df.select(Polars.col("a").arr.arg_min)
|
391
|
+
# # =>
|
392
|
+
# # shape: (2, 1)
|
393
|
+
# # ┌─────┐
|
394
|
+
# # │ a │
|
395
|
+
# # │ --- │
|
396
|
+
# # │ u32 │
|
397
|
+
# # ╞═════╡
|
398
|
+
# # │ 0 │
|
399
|
+
# # ├╌╌╌╌╌┤
|
400
|
+
# # │ 1 │
|
401
|
+
# # └─────┘
|
402
|
+
def arg_min
|
403
|
+
Utils.wrap_expr(_rbexpr.lst_arg_min)
|
404
|
+
end
|
405
|
+
|
406
|
+
# Retrieve the index of the maximum value in every sublist.
|
407
|
+
#
|
408
|
+
# @return [Expr]
|
409
|
+
#
|
410
|
+
# @example
|
411
|
+
# df = Polars::DataFrame.new(
|
412
|
+
# {
|
413
|
+
# "a" => [[1, 2], [2, 1]]
|
414
|
+
# }
|
415
|
+
# )
|
416
|
+
# df.select(Polars.col("a").arr.arg_max)
|
417
|
+
# # =>
|
418
|
+
# # shape: (2, 1)
|
419
|
+
# # ┌─────┐
|
420
|
+
# # │ a │
|
421
|
+
# # │ --- │
|
422
|
+
# # │ u32 │
|
423
|
+
# # ╞═════╡
|
424
|
+
# # │ 1 │
|
425
|
+
# # ├╌╌╌╌╌┤
|
426
|
+
# # │ 0 │
|
427
|
+
# # └─────┘
|
428
|
+
def arg_max
|
429
|
+
Utils.wrap_expr(_rbexpr.lst_arg_max)
|
430
|
+
end
|
431
|
+
|
432
|
+
# Calculate the n-th discrete difference of every sublist.
|
433
|
+
#
|
434
|
+
# @param n [Integer]
|
435
|
+
# Number of slots to shift.
|
436
|
+
# @param null_behavior ["ignore", "drop"]
|
437
|
+
# How to handle null values.
|
438
|
+
#
|
439
|
+
# @return [Expr]
|
440
|
+
#
|
441
|
+
# @example
|
442
|
+
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
443
|
+
# s.arr.diff
|
444
|
+
# # =>
|
445
|
+
# # shape: (2,)
|
446
|
+
# # Series: 'a' [list]
|
447
|
+
# # [
|
448
|
+
# # [null, 1, ... 1]
|
449
|
+
# # [null, -8, -1]
|
450
|
+
# # ]
|
451
|
+
def diff(n: 1, null_behavior: "ignore")
|
452
|
+
Utils.wrap_expr(_rbexpr.lst_diff(n, null_behavior))
|
453
|
+
end
|
454
|
+
|
455
|
+
# Shift values by the given period.
|
456
|
+
#
|
457
|
+
# @param periods [Integer]
|
458
|
+
# Number of places to shift (may be negative).
|
459
|
+
#
|
460
|
+
# @return [Expr]
|
461
|
+
#
|
462
|
+
# @example
|
463
|
+
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
464
|
+
# s.arr.shift
|
465
|
+
# # =>
|
466
|
+
# # shape: (2,)
|
467
|
+
# # Series: 'a' [list]
|
468
|
+
# # [
|
469
|
+
# # [null, 1, ... 3]
|
470
|
+
# # [null, 10, 2]
|
471
|
+
# # ]
|
472
|
+
def shift(periods = 1)
|
473
|
+
Utils.wrap_expr(_rbexpr.lst_shift(periods))
|
474
|
+
end
|
475
|
+
|
476
|
+
# Slice every sublist.
|
477
|
+
#
|
478
|
+
# @param offset [Integer]
|
479
|
+
# Start index. Negative indexing is supported.
|
480
|
+
# @param length [Integer]
|
481
|
+
# Length of the slice. If set to `nil` (default), the slice is taken to the
|
482
|
+
# end of the list.
|
483
|
+
#
|
484
|
+
# @return [Expr]
|
485
|
+
#
|
486
|
+
# @example
|
487
|
+
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
488
|
+
# s.arr.slice(1, 2)
|
489
|
+
# # =>
|
490
|
+
# # shape: (2,)
|
491
|
+
# # Series: 'a' [list]
|
492
|
+
# # [
|
493
|
+
# # [2, 3]
|
494
|
+
# # [2, 1]
|
495
|
+
# # ]
|
496
|
+
def slice(offset, length = nil)
|
497
|
+
offset = Utils.expr_to_lit_or_expr(offset, str_to_lit: false)._rbexpr
|
498
|
+
length = Utils.expr_to_lit_or_expr(length, str_to_lit: false)._rbexpr
|
499
|
+
Utils.wrap_expr(_rbexpr.lst_slice(offset, length))
|
500
|
+
end
|
501
|
+
|
502
|
+
# Slice the first `n` values of every sublist.
|
503
|
+
#
|
504
|
+
# @param n [Integer]
|
505
|
+
# Number of values to return for each sublist.
|
506
|
+
#
|
507
|
+
# @return [Expr]
|
508
|
+
#
|
509
|
+
# @example
|
510
|
+
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
511
|
+
# s.arr.head(2)
|
512
|
+
# # =>
|
513
|
+
# # shape: (2,)
|
514
|
+
# # Series: 'a' [list]
|
515
|
+
# # [
|
516
|
+
# # [1, 2]
|
517
|
+
# # [10, 2]
|
518
|
+
# # ]
|
519
|
+
def head(n = 5)
|
520
|
+
slice(0, n)
|
521
|
+
end
|
522
|
+
|
523
|
+
# Slice the last `n` values of every sublist.
|
524
|
+
#
|
525
|
+
# @param n [Integer]
|
526
|
+
# Number of values to return for each sublist.
|
527
|
+
#
|
528
|
+
# @return [Expr]
|
529
|
+
#
|
530
|
+
# @example
|
531
|
+
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
532
|
+
# s.arr.tail(2)
|
533
|
+
# # =>
|
534
|
+
# # shape: (2,)
|
535
|
+
# # Series: 'a' [list]
|
536
|
+
# # [
|
537
|
+
# # [3, 4]
|
538
|
+
# # [2, 1]
|
539
|
+
# # ]
|
540
|
+
def tail(n = 5)
|
541
|
+
offset = -Utils.expr_to_lit_or_expr(n, str_to_lit: false)
|
542
|
+
slice(offset, n)
|
543
|
+
end
|
544
|
+
|
545
|
+
# Convert the series of type `List` to a series of type `Struct`.
|
546
|
+
#
|
547
|
+
# @param n_field_strategy ["first_non_null", "max_width"]
|
548
|
+
# Strategy to determine the number of fields of the struct.
|
549
|
+
# @param name_generator [Object]
|
550
|
+
# A custom function that can be used to generate the field names.
|
551
|
+
# Default field names are `field_0, field_1 .. field_n`
|
552
|
+
#
|
553
|
+
# @return [Expr]
|
554
|
+
#
|
555
|
+
# @example
|
556
|
+
# df = Polars::DataFrame.new({"a" => [[1, 2, 3], [1, 2]]})
|
557
|
+
# df.select([Polars.col("a").arr.to_struct])
|
558
|
+
# # =>
|
559
|
+
# # shape: (2, 1)
|
560
|
+
# # ┌────────────┐
|
561
|
+
# # │ a │
|
562
|
+
# # │ --- │
|
563
|
+
# # │ struct[3] │
|
564
|
+
# # ╞════════════╡
|
565
|
+
# # │ {1,2,3} │
|
566
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
|
567
|
+
# # │ {1,2,null} │
|
568
|
+
# # └────────────┘
|
569
|
+
def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
|
570
|
+
raise Todo if name_generator
|
571
|
+
Utils.wrap_expr(_rbexpr.lst_to_struct(n_field_strategy, name_generator, 0))
|
572
|
+
end
|
573
|
+
|
574
|
+
# Run any polars expression against the lists' elements.
|
575
|
+
#
|
576
|
+
# @param expr [Expr]
|
577
|
+
# Expression to run. Note that you can select an element with `Polars.first`, or
|
578
|
+
# `Polars.col`
|
579
|
+
# @param parallel [Boolean]
|
580
|
+
# Run all expression parallel. Don't activate this blindly.
|
581
|
+
# Parallelism is worth it if there is enough work to do per thread.
|
582
|
+
#
|
583
|
+
# This likely should not be use in the groupby context, because we already
|
584
|
+
# parallel execution per group
|
585
|
+
#
|
586
|
+
# @return [Expr]
|
587
|
+
#
|
588
|
+
# @example
|
589
|
+
# df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
|
590
|
+
# df.with_column(
|
591
|
+
# Polars.concat_list(["a", "b"]).arr.eval(Polars.element.rank).alias("rank")
|
592
|
+
# )
|
593
|
+
# # =>
|
594
|
+
# # shape: (3, 3)
|
595
|
+
# # ┌─────┬─────┬────────────┐
|
596
|
+
# # │ a ┆ b ┆ rank │
|
597
|
+
# # │ --- ┆ --- ┆ --- │
|
598
|
+
# # │ i64 ┆ i64 ┆ list[f32] │
|
599
|
+
# # ╞═════╪═════╪════════════╡
|
600
|
+
# # │ 1 ┆ 4 ┆ [1.0, 2.0] │
|
601
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
602
|
+
# # │ 8 ┆ 5 ┆ [2.0, 1.0] │
|
603
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
604
|
+
# # │ 3 ┆ 2 ┆ [2.0, 1.0] │
|
605
|
+
# # └─────┴─────┴────────────┘
|
606
|
+
def eval(expr, parallel: false)
|
607
|
+
Utils.wrap_expr(_rbexpr.lst_eval(expr._rbexpr, parallel))
|
608
|
+
end
|
609
|
+
end
|
610
|
+
end
|