polars-df 0.13.0-x64-mingw-ucrt
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +3 -0
- data/CHANGELOG.md +208 -0
- data/Cargo.lock +2556 -0
- data/Cargo.toml +6 -0
- data/LICENSE-THIRD-PARTY.txt +39278 -0
- data/LICENSE.txt +20 -0
- data/README.md +437 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/array_expr.rb +537 -0
- data/lib/polars/array_name_space.rb +423 -0
- data/lib/polars/batched_csv_reader.rb +104 -0
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/cat_expr.rb +36 -0
- data/lib/polars/cat_name_space.rb +88 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/convert.rb +98 -0
- data/lib/polars/data_frame.rb +5191 -0
- data/lib/polars/data_types.rb +466 -0
- data/lib/polars/date_time_expr.rb +1397 -0
- data/lib/polars/date_time_name_space.rb +1287 -0
- data/lib/polars/dynamic_group_by.rb +52 -0
- data/lib/polars/exceptions.rb +38 -0
- data/lib/polars/expr.rb +7256 -0
- data/lib/polars/expr_dispatch.rb +22 -0
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +271 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1329 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +136 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +96 -0
- data/lib/polars/functions.rb +57 -0
- data/lib/polars/group_by.rb +613 -0
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/io/csv.rb +696 -0
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +275 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +233 -0
- data/lib/polars/lazy_frame.rb +2708 -0
- data/lib/polars/lazy_group_by.rb +181 -0
- data/lib/polars/list_expr.rb +791 -0
- data/lib/polars/list_name_space.rb +449 -0
- data/lib/polars/meta_expr.rb +222 -0
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/plot.rb +109 -0
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +4444 -0
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +1495 -0
- data/lib/polars/string_name_space.rb +811 -0
- data/lib/polars/struct_expr.rb +98 -0
- data/lib/polars/struct_name_space.rb +96 -0
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +130 -0
- data/lib/polars/version.rb +4 -0
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars-df.rb +1 -0
- data/lib/polars.rb +91 -0
- metadata +138 -0
@@ -0,0 +1,791 @@
|
|
1
|
+
module Polars
|
2
|
+
# Namespace for list related expressions.
|
3
|
+
class ListExpr
|
4
|
+
# @private
|
5
|
+
attr_accessor :_rbexpr
|
6
|
+
|
7
|
+
# @private
|
8
|
+
def initialize(expr)
|
9
|
+
self._rbexpr = expr._rbexpr
|
10
|
+
end
|
11
|
+
|
12
|
+
# Evaluate whether all boolean values in a list are true.
|
13
|
+
#
|
14
|
+
# @return [Expr]
|
15
|
+
#
|
16
|
+
# @example
|
17
|
+
# df = Polars::DataFrame.new(
|
18
|
+
# {"a" => [[true, true], [false, true], [false, false], [nil], [], nil]}
|
19
|
+
# )
|
20
|
+
# df.with_columns(all: Polars.col("a").list.all)
|
21
|
+
# # =>
|
22
|
+
# # shape: (6, 2)
|
23
|
+
# # ┌────────────────┬───────┐
|
24
|
+
# # │ a ┆ all │
|
25
|
+
# # │ --- ┆ --- │
|
26
|
+
# # │ list[bool] ┆ bool │
|
27
|
+
# # ╞════════════════╪═══════╡
|
28
|
+
# # │ [true, true] ┆ true │
|
29
|
+
# # │ [false, true] ┆ false │
|
30
|
+
# # │ [false, false] ┆ false │
|
31
|
+
# # │ [null] ┆ true │
|
32
|
+
# # │ [] ┆ true │
|
33
|
+
# # │ null ┆ null │
|
34
|
+
# # └────────────────┴───────┘
|
35
|
+
def all
|
36
|
+
Utils.wrap_expr(_rbexpr.list_all)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Evaluate whether any boolean value in a list is true.
|
40
|
+
#
|
41
|
+
# @return [Expr]
|
42
|
+
#
|
43
|
+
# @example
|
44
|
+
# df = Polars::DataFrame.new(
|
45
|
+
# {"a" => [[true, true], [false, true], [false, false], [nil], [], nil]}
|
46
|
+
# )
|
47
|
+
# df.with_columns(any: Polars.col("a").list.any)
|
48
|
+
# # =>
|
49
|
+
# # shape: (6, 2)
|
50
|
+
# # ┌────────────────┬───────┐
|
51
|
+
# # │ a ┆ any │
|
52
|
+
# # │ --- ┆ --- │
|
53
|
+
# # │ list[bool] ┆ bool │
|
54
|
+
# # ╞════════════════╪═══════╡
|
55
|
+
# # │ [true, true] ┆ true │
|
56
|
+
# # │ [false, true] ┆ true │
|
57
|
+
# # │ [false, false] ┆ false │
|
58
|
+
# # │ [null] ┆ false │
|
59
|
+
# # │ [] ┆ false │
|
60
|
+
# # │ null ┆ null │
|
61
|
+
# # └────────────────┴───────┘
|
62
|
+
def any
|
63
|
+
Utils.wrap_expr(_rbexpr.list_any)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Get the length of the arrays as `:u32`.
|
67
|
+
#
|
68
|
+
# @return [Expr]
|
69
|
+
#
|
70
|
+
# @example
|
71
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2], "bar" => [["a", "b"], ["c"]]})
|
72
|
+
# df.select(Polars.col("bar").list.lengths)
|
73
|
+
# # =>
|
74
|
+
# # shape: (2, 1)
|
75
|
+
# # ┌─────┐
|
76
|
+
# # │ bar │
|
77
|
+
# # │ --- │
|
78
|
+
# # │ u32 │
|
79
|
+
# # ╞═════╡
|
80
|
+
# # │ 2 │
|
81
|
+
# # │ 1 │
|
82
|
+
# # └─────┘
|
83
|
+
def lengths
|
84
|
+
Utils.wrap_expr(_rbexpr.list_len)
|
85
|
+
end
|
86
|
+
alias_method :len, :lengths
|
87
|
+
|
88
|
+
# Drop all null values in the list.
|
89
|
+
#
|
90
|
+
# The original order of the remaining elements is preserved.
|
91
|
+
#
|
92
|
+
# @return [Expr]
|
93
|
+
#
|
94
|
+
# @example
|
95
|
+
# df = Polars::DataFrame.new({"values" => [[nil, 1, nil, 2], [nil], [3, 4]]})
|
96
|
+
# df.with_columns(drop_nulls: Polars.col("values").list.drop_nulls)
|
97
|
+
# # =>
|
98
|
+
# # shape: (3, 2)
|
99
|
+
# # ┌────────────────┬────────────┐
|
100
|
+
# # │ values ┆ drop_nulls │
|
101
|
+
# # │ --- ┆ --- │
|
102
|
+
# # │ list[i64] ┆ list[i64] │
|
103
|
+
# # ╞════════════════╪════════════╡
|
104
|
+
# # │ [null, 1, … 2] ┆ [1, 2] │
|
105
|
+
# # │ [null] ┆ [] │
|
106
|
+
# # │ [3, 4] ┆ [3, 4] │
|
107
|
+
# # └────────────────┴────────────┘
|
108
|
+
def drop_nulls
|
109
|
+
Utils.wrap_expr(_rbexpr.list_drop_nulls)
|
110
|
+
end
|
111
|
+
|
112
|
+
# Sample from this list.
|
113
|
+
#
|
114
|
+
# @param n [Integer]
|
115
|
+
# Number of items to return. Cannot be used with `fraction`. Defaults to 1 if
|
116
|
+
# `fraction` is nil.
|
117
|
+
# @param fraction [Float]
|
118
|
+
# Fraction of items to return. Cannot be used with `n`.
|
119
|
+
# @param with_replacement [Boolean]
|
120
|
+
# Allow values to be sampled more than once.
|
121
|
+
# @param shuffle [Boolean]
|
122
|
+
# Shuffle the order of sampled data points.
|
123
|
+
# @param seed [Integer]
|
124
|
+
# Seed for the random number generator. If set to nil (default), a
|
125
|
+
# random seed is generated for each sample operation.
|
126
|
+
#
|
127
|
+
# @return [Expr]
|
128
|
+
#
|
129
|
+
# @example
|
130
|
+
# df = Polars::DataFrame.new({"values" => [[1, 2, 3], [4, 5]], "n" => [2, 1]})
|
131
|
+
# df.with_columns(sample: Polars.col("values").list.sample(n: Polars.col("n"), seed: 1))
|
132
|
+
# # =>
|
133
|
+
# # shape: (2, 3)
|
134
|
+
# # ┌───────────┬─────┬───────────┐
|
135
|
+
# # │ values ┆ n ┆ sample │
|
136
|
+
# # │ --- ┆ --- ┆ --- │
|
137
|
+
# # │ list[i64] ┆ i64 ┆ list[i64] │
|
138
|
+
# # ╞═══════════╪═════╪═══════════╡
|
139
|
+
# # │ [1, 2, 3] ┆ 2 ┆ [2, 1] │
|
140
|
+
# # │ [4, 5] ┆ 1 ┆ [5] │
|
141
|
+
# # └───────────┴─────┴───────────┘
|
142
|
+
def sample(n: nil, fraction: nil, with_replacement: false, shuffle: false, seed: nil)
|
143
|
+
if !n.nil? && !fraction.nil?
|
144
|
+
msg = "cannot specify both `n` and `fraction`"
|
145
|
+
raise ArgumentError, msg
|
146
|
+
end
|
147
|
+
|
148
|
+
if !fraction.nil?
|
149
|
+
fraction = Utils.parse_into_expression(fraction)
|
150
|
+
return Utils.wrap_expr(
|
151
|
+
_rbexpr.list_sample_fraction(
|
152
|
+
fraction, with_replacement, shuffle, seed
|
153
|
+
)
|
154
|
+
)
|
155
|
+
end
|
156
|
+
|
157
|
+
n = 1 if n.nil?
|
158
|
+
n = Utils.parse_into_expression(n)
|
159
|
+
Utils.wrap_expr(_rbexpr.list_sample_n(n, with_replacement, shuffle, seed))
|
160
|
+
end
|
161
|
+
|
162
|
+
# Sum all the lists in the array.
|
163
|
+
#
|
164
|
+
# @return [Expr]
|
165
|
+
#
|
166
|
+
# @example
|
167
|
+
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
168
|
+
# df.select(Polars.col("values").list.sum)
|
169
|
+
# # =>
|
170
|
+
# # shape: (2, 1)
|
171
|
+
# # ┌────────┐
|
172
|
+
# # │ values │
|
173
|
+
# # │ --- │
|
174
|
+
# # │ i64 │
|
175
|
+
# # ╞════════╡
|
176
|
+
# # │ 1 │
|
177
|
+
# # │ 5 │
|
178
|
+
# # └────────┘
|
179
|
+
def sum
|
180
|
+
Utils.wrap_expr(_rbexpr.list_sum)
|
181
|
+
end
|
182
|
+
|
183
|
+
# Compute the max value of the lists in the array.
|
184
|
+
#
|
185
|
+
# @return [Expr]
|
186
|
+
#
|
187
|
+
# @example
|
188
|
+
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
189
|
+
# df.select(Polars.col("values").list.max)
|
190
|
+
# # =>
|
191
|
+
# # shape: (2, 1)
|
192
|
+
# # ┌────────┐
|
193
|
+
# # │ values │
|
194
|
+
# # │ --- │
|
195
|
+
# # │ i64 │
|
196
|
+
# # ╞════════╡
|
197
|
+
# # │ 1 │
|
198
|
+
# # │ 3 │
|
199
|
+
# # └────────┘
|
200
|
+
def max
|
201
|
+
Utils.wrap_expr(_rbexpr.list_max)
|
202
|
+
end
|
203
|
+
|
204
|
+
# Compute the min value of the lists in the array.
|
205
|
+
#
|
206
|
+
# @return [Expr]
|
207
|
+
#
|
208
|
+
# @example
|
209
|
+
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
210
|
+
# df.select(Polars.col("values").list.min)
|
211
|
+
# # =>
|
212
|
+
# # shape: (2, 1)
|
213
|
+
# # ┌────────┐
|
214
|
+
# # │ values │
|
215
|
+
# # │ --- │
|
216
|
+
# # │ i64 │
|
217
|
+
# # ╞════════╡
|
218
|
+
# # │ 1 │
|
219
|
+
# # │ 2 │
|
220
|
+
# # └────────┘
|
221
|
+
def min
|
222
|
+
Utils.wrap_expr(_rbexpr.list_min)
|
223
|
+
end
|
224
|
+
|
225
|
+
# Compute the mean value of the lists in the array.
|
226
|
+
#
|
227
|
+
# @return [Expr]
|
228
|
+
#
|
229
|
+
# @example
|
230
|
+
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
231
|
+
# df.select(Polars.col("values").list.mean)
|
232
|
+
# # =>
|
233
|
+
# # shape: (2, 1)
|
234
|
+
# # ┌────────┐
|
235
|
+
# # │ values │
|
236
|
+
# # │ --- │
|
237
|
+
# # │ f64 │
|
238
|
+
# # ╞════════╡
|
239
|
+
# # │ 1.0 │
|
240
|
+
# # │ 2.5 │
|
241
|
+
# # └────────┘
|
242
|
+
def mean
|
243
|
+
Utils.wrap_expr(_rbexpr.list_mean)
|
244
|
+
end
|
245
|
+
|
246
|
+
# Sort the arrays in the list.
|
247
|
+
#
|
248
|
+
# @return [Expr]
|
249
|
+
#
|
250
|
+
# @example
|
251
|
+
# df = Polars::DataFrame.new(
|
252
|
+
# {
|
253
|
+
# "a" => [[3, 2, 1], [9, 1, 2]]
|
254
|
+
# }
|
255
|
+
# )
|
256
|
+
# df.select(Polars.col("a").list.sort)
|
257
|
+
# # =>
|
258
|
+
# # shape: (2, 1)
|
259
|
+
# # ┌───────────┐
|
260
|
+
# # │ a │
|
261
|
+
# # │ --- │
|
262
|
+
# # │ list[i64] │
|
263
|
+
# # ╞═══════════╡
|
264
|
+
# # │ [1, 2, 3] │
|
265
|
+
# # │ [1, 2, 9] │
|
266
|
+
# # └───────────┘
|
267
|
+
def sort(reverse: false)
|
268
|
+
Utils.wrap_expr(_rbexpr.list_sort(reverse))
|
269
|
+
end
|
270
|
+
|
271
|
+
# Reverse the arrays in the list.
|
272
|
+
#
|
273
|
+
# @return [Expr]
|
274
|
+
#
|
275
|
+
# @example
|
276
|
+
# df = Polars::DataFrame.new(
|
277
|
+
# {
|
278
|
+
# "a" => [[3, 2, 1], [9, 1, 2]]
|
279
|
+
# }
|
280
|
+
# )
|
281
|
+
# df.select(Polars.col("a").list.reverse)
|
282
|
+
# # =>
|
283
|
+
# # shape: (2, 1)
|
284
|
+
# # ┌───────────┐
|
285
|
+
# # │ a │
|
286
|
+
# # │ --- │
|
287
|
+
# # │ list[i64] │
|
288
|
+
# # ╞═══════════╡
|
289
|
+
# # │ [1, 2, 3] │
|
290
|
+
# # │ [2, 1, 9] │
|
291
|
+
# # └───────────┘
|
292
|
+
def reverse
|
293
|
+
Utils.wrap_expr(_rbexpr.list_reverse)
|
294
|
+
end
|
295
|
+
|
296
|
+
# Get the unique/distinct values in the list.
|
297
|
+
#
|
298
|
+
# @return [Expr]
|
299
|
+
#
|
300
|
+
# @example
|
301
|
+
# df = Polars::DataFrame.new(
|
302
|
+
# {
|
303
|
+
# "a" => [[1, 1, 2]]
|
304
|
+
# }
|
305
|
+
# )
|
306
|
+
# df.select(Polars.col("a").list.unique)
|
307
|
+
# # =>
|
308
|
+
# # shape: (1, 1)
|
309
|
+
# # ┌───────────┐
|
310
|
+
# # │ a │
|
311
|
+
# # │ --- │
|
312
|
+
# # │ list[i64] │
|
313
|
+
# # ╞═══════════╡
|
314
|
+
# # │ [1, 2] │
|
315
|
+
# # └───────────┘
|
316
|
+
def unique(maintain_order: false)
|
317
|
+
Utils.wrap_expr(_rbexpr.list_unique(maintain_order))
|
318
|
+
end
|
319
|
+
|
320
|
+
# Concat the arrays in a Series dtype List in linear time.
|
321
|
+
#
|
322
|
+
# @param other [Object]
|
323
|
+
# Columns to concat into a List Series
|
324
|
+
#
|
325
|
+
# @return [Expr]
|
326
|
+
#
|
327
|
+
# @example
|
328
|
+
# df = Polars::DataFrame.new(
|
329
|
+
# {
|
330
|
+
# "a" => [["a"], ["x"]],
|
331
|
+
# "b" => [["b", "c"], ["y", "z"]]
|
332
|
+
# }
|
333
|
+
# )
|
334
|
+
# df.select(Polars.col("a").list.concat("b"))
|
335
|
+
# # =>
|
336
|
+
# # shape: (2, 1)
|
337
|
+
# # ┌─────────────────┐
|
338
|
+
# # │ a │
|
339
|
+
# # │ --- │
|
340
|
+
# # │ list[str] │
|
341
|
+
# # ╞═════════════════╡
|
342
|
+
# # │ ["a", "b", "c"] │
|
343
|
+
# # │ ["x", "y", "z"] │
|
344
|
+
# # └─────────────────┘
|
345
|
+
def concat(other)
|
346
|
+
if other.is_a?(::Array) && ![Expr, String, Series].any? { |c| other[0].is_a?(c) }
|
347
|
+
return concat(Series.new([other]))
|
348
|
+
end
|
349
|
+
|
350
|
+
if !other.is_a?(::Array)
|
351
|
+
other_list = [other]
|
352
|
+
else
|
353
|
+
other_list = other.dup
|
354
|
+
end
|
355
|
+
|
356
|
+
other_list.insert(0, Utils.wrap_expr(_rbexpr))
|
357
|
+
Polars.concat_list(other_list)
|
358
|
+
end
|
359
|
+
|
360
|
+
# Get the value by index in the sublists.
|
361
|
+
#
|
362
|
+
# So index `0` would return the first item of every sublist
|
363
|
+
# and index `-1` would return the last item of every sublist
|
364
|
+
# if an index is out of bounds, it will return a `None`.
|
365
|
+
#
|
366
|
+
# @param index [Integer]
|
367
|
+
# Index to return per sublist
|
368
|
+
# @param null_on_oob [Boolean]
|
369
|
+
# Behavior if an index is out of bounds:
|
370
|
+
# true -> set as null
|
371
|
+
# false -> raise an error
|
372
|
+
#
|
373
|
+
# @return [Expr]
|
374
|
+
#
|
375
|
+
# @example
|
376
|
+
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
377
|
+
# df.select(Polars.col("foo").list.get(0))
|
378
|
+
# # =>
|
379
|
+
# # shape: (3, 1)
|
380
|
+
# # ┌──────┐
|
381
|
+
# # │ foo │
|
382
|
+
# # │ --- │
|
383
|
+
# # │ i64 │
|
384
|
+
# # ╞══════╡
|
385
|
+
# # │ 3 │
|
386
|
+
# # │ null │
|
387
|
+
# # │ 1 │
|
388
|
+
# # └──────┘
|
389
|
+
def get(index, null_on_oob: true)
|
390
|
+
index = Utils.parse_into_expression(index)
|
391
|
+
Utils.wrap_expr(_rbexpr.list_get(index, null_on_oob))
|
392
|
+
end
|
393
|
+
|
394
|
+
# Get the value by index in the sublists.
|
395
|
+
#
|
396
|
+
# @return [Expr]
|
397
|
+
def [](item)
|
398
|
+
get(item)
|
399
|
+
end
|
400
|
+
|
401
|
+
# Take sublists by multiple indices.
|
402
|
+
#
|
403
|
+
# The indices may be defined in a single column, or by sublists in another
|
404
|
+
# column of dtype `List`.
|
405
|
+
#
|
406
|
+
# @param index [Object]
|
407
|
+
# Indices to return per sublist
|
408
|
+
# @param null_on_oob [Boolean]
|
409
|
+
# Behavior if an index is out of bounds:
|
410
|
+
# True -> set as null
|
411
|
+
# False -> raise an error
|
412
|
+
# Note that defaulting to raising an error is much cheaper
|
413
|
+
#
|
414
|
+
# @return [Expr]
|
415
|
+
#
|
416
|
+
# @example
|
417
|
+
# df = Polars::DataFrame.new({"a" => [[3, 2, 1], [], [1, 2, 3, 4, 5]]})
|
418
|
+
# df.with_columns(gather: Polars.col("a").list.gather([0, 4], null_on_oob: true))
|
419
|
+
# # =>
|
420
|
+
# # shape: (3, 2)
|
421
|
+
# # ┌─────────────┬──────────────┐
|
422
|
+
# # │ a ┆ gather │
|
423
|
+
# # │ --- ┆ --- │
|
424
|
+
# # │ list[i64] ┆ list[i64] │
|
425
|
+
# # ╞═════════════╪══════════════╡
|
426
|
+
# # │ [3, 2, 1] ┆ [3, null] │
|
427
|
+
# # │ [] ┆ [null, null] │
|
428
|
+
# # │ [1, 2, … 5] ┆ [1, 5] │
|
429
|
+
# # └─────────────┴──────────────┘
|
430
|
+
def gather(index, null_on_oob: false)
|
431
|
+
if index.is_a?(::Array)
|
432
|
+
index = Series.new(index)
|
433
|
+
end
|
434
|
+
index = Utils.parse_into_expression(index, str_as_lit: false)
|
435
|
+
Utils.wrap_expr(_rbexpr.list_gather(index, null_on_oob))
|
436
|
+
end
|
437
|
+
alias_method :take, :gather
|
438
|
+
|
439
|
+
# Get the first value of the sublists.
|
440
|
+
#
|
441
|
+
# @return [Expr]
|
442
|
+
#
|
443
|
+
# @example
|
444
|
+
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
445
|
+
# df.select(Polars.col("foo").list.first)
|
446
|
+
# # =>
|
447
|
+
# # shape: (3, 1)
|
448
|
+
# # ┌──────┐
|
449
|
+
# # │ foo │
|
450
|
+
# # │ --- │
|
451
|
+
# # │ i64 │
|
452
|
+
# # ╞══════╡
|
453
|
+
# # │ 3 │
|
454
|
+
# # │ null │
|
455
|
+
# # │ 1 │
|
456
|
+
# # └──────┘
|
457
|
+
def first
|
458
|
+
get(0)
|
459
|
+
end
|
460
|
+
|
461
|
+
# Get the last value of the sublists.
|
462
|
+
#
|
463
|
+
# @return [Expr]
|
464
|
+
#
|
465
|
+
# @example
|
466
|
+
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
467
|
+
# df.select(Polars.col("foo").list.last)
|
468
|
+
# # =>
|
469
|
+
# # shape: (3, 1)
|
470
|
+
# # ┌──────┐
|
471
|
+
# # │ foo │
|
472
|
+
# # │ --- │
|
473
|
+
# # │ i64 │
|
474
|
+
# # ╞══════╡
|
475
|
+
# # │ 1 │
|
476
|
+
# # │ null │
|
477
|
+
# # │ 2 │
|
478
|
+
# # └──────┘
|
479
|
+
def last
|
480
|
+
get(-1)
|
481
|
+
end
|
482
|
+
|
483
|
+
# Check if sublists contain the given item.
|
484
|
+
#
|
485
|
+
# @param item [Object]
|
486
|
+
# Item that will be checked for membership
|
487
|
+
#
|
488
|
+
# @return [Expr]
|
489
|
+
#
|
490
|
+
# @example
|
491
|
+
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
492
|
+
# df.select(Polars.col("foo").list.contains(1))
|
493
|
+
# # =>
|
494
|
+
# # shape: (3, 1)
|
495
|
+
# # ┌───────┐
|
496
|
+
# # │ foo │
|
497
|
+
# # │ --- │
|
498
|
+
# # │ bool │
|
499
|
+
# # ╞═══════╡
|
500
|
+
# # │ true │
|
501
|
+
# # │ false │
|
502
|
+
# # │ true │
|
503
|
+
# # └───────┘
|
504
|
+
def contains(item)
|
505
|
+
Utils.wrap_expr(_rbexpr.list_contains(Utils.parse_into_expression(item)))
|
506
|
+
end
|
507
|
+
|
508
|
+
# Join all string items in a sublist and place a separator between them.
|
509
|
+
#
|
510
|
+
# This errors if inner type of list `!= :str`.
|
511
|
+
#
|
512
|
+
# @param separator [String]
|
513
|
+
# string to separate the items with
|
514
|
+
# @param ignore_nulls [Boolean]
|
515
|
+
# Ignore null values (default).
|
516
|
+
#
|
517
|
+
# @return [Expr]
|
518
|
+
#
|
519
|
+
# @example
|
520
|
+
# df = Polars::DataFrame.new({"s" => [["a", "b", "c"], ["x", "y"]]})
|
521
|
+
# df.select(Polars.col("s").list.join(" "))
|
522
|
+
# # =>
|
523
|
+
# # shape: (2, 1)
|
524
|
+
# # ┌───────┐
|
525
|
+
# # │ s │
|
526
|
+
# # │ --- │
|
527
|
+
# # │ str │
|
528
|
+
# # ╞═══════╡
|
529
|
+
# # │ a b c │
|
530
|
+
# # │ x y │
|
531
|
+
# # └───────┘
|
532
|
+
def join(separator, ignore_nulls: true)
|
533
|
+
separator = Utils.parse_into_expression(separator, str_as_lit: true)
|
534
|
+
Utils.wrap_expr(_rbexpr.list_join(separator, ignore_nulls))
|
535
|
+
end
|
536
|
+
|
537
|
+
# Retrieve the index of the minimal value in every sublist.
|
538
|
+
#
|
539
|
+
# @return [Expr]
|
540
|
+
#
|
541
|
+
# @example
|
542
|
+
# df = Polars::DataFrame.new(
|
543
|
+
# {
|
544
|
+
# "a" => [[1, 2], [2, 1]]
|
545
|
+
# }
|
546
|
+
# )
|
547
|
+
# df.select(Polars.col("a").list.arg_min)
|
548
|
+
# # =>
|
549
|
+
# # shape: (2, 1)
|
550
|
+
# # ┌─────┐
|
551
|
+
# # │ a │
|
552
|
+
# # │ --- │
|
553
|
+
# # │ u32 │
|
554
|
+
# # ╞═════╡
|
555
|
+
# # │ 0 │
|
556
|
+
# # │ 1 │
|
557
|
+
# # └─────┘
|
558
|
+
def arg_min
|
559
|
+
Utils.wrap_expr(_rbexpr.list_arg_min)
|
560
|
+
end
|
561
|
+
|
562
|
+
# Retrieve the index of the maximum value in every sublist.
|
563
|
+
#
|
564
|
+
# @return [Expr]
|
565
|
+
#
|
566
|
+
# @example
|
567
|
+
# df = Polars::DataFrame.new(
|
568
|
+
# {
|
569
|
+
# "a" => [[1, 2], [2, 1]]
|
570
|
+
# }
|
571
|
+
# )
|
572
|
+
# df.select(Polars.col("a").list.arg_max)
|
573
|
+
# # =>
|
574
|
+
# # shape: (2, 1)
|
575
|
+
# # ┌─────┐
|
576
|
+
# # │ a │
|
577
|
+
# # │ --- │
|
578
|
+
# # │ u32 │
|
579
|
+
# # ╞═════╡
|
580
|
+
# # │ 1 │
|
581
|
+
# # │ 0 │
|
582
|
+
# # └─────┘
|
583
|
+
def arg_max
|
584
|
+
Utils.wrap_expr(_rbexpr.list_arg_max)
|
585
|
+
end
|
586
|
+
|
587
|
+
# Calculate the n-th discrete difference of every sublist.
|
588
|
+
#
|
589
|
+
# @param n [Integer]
|
590
|
+
# Number of slots to shift.
|
591
|
+
# @param null_behavior ["ignore", "drop"]
|
592
|
+
# How to handle null values.
|
593
|
+
#
|
594
|
+
# @return [Expr]
|
595
|
+
#
|
596
|
+
# @example
|
597
|
+
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
598
|
+
# s.list.diff
|
599
|
+
# # =>
|
600
|
+
# # shape: (2,)
|
601
|
+
# # Series: 'a' [list[i64]]
|
602
|
+
# # [
|
603
|
+
# # [null, 1, … 1]
|
604
|
+
# # [null, -8, -1]
|
605
|
+
# # ]
|
606
|
+
def diff(n: 1, null_behavior: "ignore")
|
607
|
+
Utils.wrap_expr(_rbexpr.list_diff(n, null_behavior))
|
608
|
+
end
|
609
|
+
|
610
|
+
# Shift values by the given period.
|
611
|
+
#
|
612
|
+
# @param n [Integer]
|
613
|
+
# Number of places to shift (may be negative).
|
614
|
+
#
|
615
|
+
# @return [Expr]
|
616
|
+
#
|
617
|
+
# @example
|
618
|
+
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
619
|
+
# s.list.shift
|
620
|
+
# # =>
|
621
|
+
# # shape: (2,)
|
622
|
+
# # Series: 'a' [list[i64]]
|
623
|
+
# # [
|
624
|
+
# # [null, 1, … 3]
|
625
|
+
# # [null, 10, 2]
|
626
|
+
# # ]
|
627
|
+
def shift(n = 1)
|
628
|
+
n = Utils.parse_into_expression(n)
|
629
|
+
Utils.wrap_expr(_rbexpr.list_shift(n))
|
630
|
+
end
|
631
|
+
|
632
|
+
# Slice every sublist.
|
633
|
+
#
|
634
|
+
# @param offset [Integer]
|
635
|
+
# Start index. Negative indexing is supported.
|
636
|
+
# @param length [Integer]
|
637
|
+
# Length of the slice. If set to `nil` (default), the slice is taken to the
|
638
|
+
# end of the list.
|
639
|
+
#
|
640
|
+
# @return [Expr]
|
641
|
+
#
|
642
|
+
# @example
|
643
|
+
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
644
|
+
# s.list.slice(1, 2)
|
645
|
+
# # =>
|
646
|
+
# # shape: (2,)
|
647
|
+
# # Series: 'a' [list[i64]]
|
648
|
+
# # [
|
649
|
+
# # [2, 3]
|
650
|
+
# # [2, 1]
|
651
|
+
# # ]
|
652
|
+
def slice(offset, length = nil)
|
653
|
+
offset = Utils.parse_into_expression(offset, str_as_lit: false)
|
654
|
+
length = Utils.parse_into_expression(length, str_as_lit: false)
|
655
|
+
Utils.wrap_expr(_rbexpr.list_slice(offset, length))
|
656
|
+
end
|
657
|
+
|
658
|
+
# Slice the first `n` values of every sublist.
|
659
|
+
#
|
660
|
+
# @param n [Integer]
|
661
|
+
# Number of values to return for each sublist.
|
662
|
+
#
|
663
|
+
# @return [Expr]
|
664
|
+
#
|
665
|
+
# @example
|
666
|
+
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
667
|
+
# s.list.head(2)
|
668
|
+
# # =>
|
669
|
+
# # shape: (2,)
|
670
|
+
# # Series: 'a' [list[i64]]
|
671
|
+
# # [
|
672
|
+
# # [1, 2]
|
673
|
+
# # [10, 2]
|
674
|
+
# # ]
|
675
|
+
def head(n = 5)
|
676
|
+
slice(0, n)
|
677
|
+
end
|
678
|
+
|
679
|
+
# Slice the last `n` values of every sublist.
|
680
|
+
#
|
681
|
+
# @param n [Integer]
|
682
|
+
# Number of values to return for each sublist.
|
683
|
+
#
|
684
|
+
# @return [Expr]
|
685
|
+
#
|
686
|
+
# @example
|
687
|
+
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
688
|
+
# s.list.tail(2)
|
689
|
+
# # =>
|
690
|
+
# # shape: (2,)
|
691
|
+
# # Series: 'a' [list[i64]]
|
692
|
+
# # [
|
693
|
+
# # [3, 4]
|
694
|
+
# # [2, 1]
|
695
|
+
# # ]
|
696
|
+
def tail(n = 5)
|
697
|
+
n = Utils.parse_into_expression(n)
|
698
|
+
Utils.wrap_expr(_rbexpr.list_tail(n))
|
699
|
+
end
|
700
|
+
|
701
|
+
# Count how often the value produced by ``element`` occurs.
|
702
|
+
#
|
703
|
+
# @param element [Expr]
|
704
|
+
# An expression that produces a single value
|
705
|
+
#
|
706
|
+
# @return [Expr]
|
707
|
+
#
|
708
|
+
# @example
|
709
|
+
# df = Polars::DataFrame.new({"listcol" => [[0], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]]})
|
710
|
+
# df.select(Polars.col("listcol").list.count_match(2).alias("number_of_twos"))
|
711
|
+
# # =>
|
712
|
+
# # shape: (5, 1)
|
713
|
+
# # ┌────────────────┐
|
714
|
+
# # │ number_of_twos │
|
715
|
+
# # │ --- │
|
716
|
+
# # │ u32 │
|
717
|
+
# # ╞════════════════╡
|
718
|
+
# # │ 0 │
|
719
|
+
# # │ 0 │
|
720
|
+
# # │ 2 │
|
721
|
+
# # │ 1 │
|
722
|
+
# # │ 0 │
|
723
|
+
# # └────────────────┘
|
724
|
+
def count_matches(element)
|
725
|
+
Utils.wrap_expr(_rbexpr.list_count_matches(Utils.parse_into_expression(element)))
|
726
|
+
end
|
727
|
+
alias_method :count_match, :count_matches
|
728
|
+
|
729
|
+
# Convert the series of type `List` to a series of type `Struct`.
|
730
|
+
#
|
731
|
+
# @param n_field_strategy ["first_non_null", "max_width"]
|
732
|
+
# Strategy to determine the number of fields of the struct.
|
733
|
+
# @param name_generator [Object]
|
734
|
+
# A custom function that can be used to generate the field names.
|
735
|
+
# Default field names are `field_0, field_1 .. field_n`
|
736
|
+
#
|
737
|
+
# @return [Expr]
|
738
|
+
#
|
739
|
+
# @example
|
740
|
+
# df = Polars::DataFrame.new({"a" => [[1, 2, 3], [1, 2]]})
|
741
|
+
# df.select([Polars.col("a").list.to_struct])
|
742
|
+
# # =>
|
743
|
+
# # shape: (2, 1)
|
744
|
+
# # ┌────────────┐
|
745
|
+
# # │ a │
|
746
|
+
# # │ --- │
|
747
|
+
# # │ struct[3] │
|
748
|
+
# # ╞════════════╡
|
749
|
+
# # │ {1,2,3} │
|
750
|
+
# # │ {1,2,null} │
|
751
|
+
# # └────────────┘
|
752
|
+
def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
|
753
|
+
raise Todo if name_generator
|
754
|
+
Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, name_generator, 0))
|
755
|
+
end
|
756
|
+
|
757
|
+
# Run any polars expression against the lists' elements.
|
758
|
+
#
|
759
|
+
# @param expr [Expr]
|
760
|
+
# Expression to run. Note that you can select an element with `Polars.first`, or
|
761
|
+
# `Polars.col`
|
762
|
+
# @param parallel [Boolean]
|
763
|
+
# Run all expression parallel. Don't activate this blindly.
|
764
|
+
# Parallelism is worth it if there is enough work to do per thread.
|
765
|
+
#
|
766
|
+
# This likely should not be use in the group by context, because we already
|
767
|
+
# parallel execution per group
|
768
|
+
#
|
769
|
+
# @return [Expr]
|
770
|
+
#
|
771
|
+
# @example
|
772
|
+
# df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
|
773
|
+
# df.with_column(
|
774
|
+
# Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
|
775
|
+
# )
|
776
|
+
# # =>
|
777
|
+
# # shape: (3, 3)
|
778
|
+
# # ┌─────┬─────┬────────────┐
|
779
|
+
# # │ a ┆ b ┆ rank │
|
780
|
+
# # │ --- ┆ --- ┆ --- │
|
781
|
+
# # │ i64 ┆ i64 ┆ list[f64] │
|
782
|
+
# # ╞═════╪═════╪════════════╡
|
783
|
+
# # │ 1 ┆ 4 ┆ [1.0, 2.0] │
|
784
|
+
# # │ 8 ┆ 5 ┆ [2.0, 1.0] │
|
785
|
+
# # │ 3 ┆ 2 ┆ [2.0, 1.0] │
|
786
|
+
# # └─────┴─────┴────────────┘
|
787
|
+
def eval(expr, parallel: false)
|
788
|
+
Utils.wrap_expr(_rbexpr.list_eval(expr._rbexpr, parallel))
|
789
|
+
end
|
790
|
+
end
|
791
|
+
end
|