polars-df 0.10.0-x86_64-linux-musl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +175 -0
  4. data/Cargo.lock +2536 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +38726 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +98 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +72 -0
  18. data/lib/polars/cat_name_space.rb +125 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +93 -0
  21. data/lib/polars/data_frame.rb +5418 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1444 -0
  24. data/lib/polars/date_time_name_space.rb +1484 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +31 -0
  27. data/lib/polars/expr.rb +6105 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +248 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1280 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +103 -0
  39. data/lib/polars/functions/range/int_range.rb +51 -0
  40. data/lib/polars/functions/repeat.rb +144 -0
  41. data/lib/polars/functions/whenthen.rb +96 -0
  42. data/lib/polars/functions.rb +57 -0
  43. data/lib/polars/group_by.rb +548 -0
  44. data/lib/polars/io.rb +890 -0
  45. data/lib/polars/lazy_frame.rb +2833 -0
  46. data/lib/polars/lazy_group_by.rb +84 -0
  47. data/lib/polars/list_expr.rb +791 -0
  48. data/lib/polars/list_name_space.rb +445 -0
  49. data/lib/polars/meta_expr.rb +222 -0
  50. data/lib/polars/name_expr.rb +198 -0
  51. data/lib/polars/plot.rb +109 -0
  52. data/lib/polars/rolling_group_by.rb +37 -0
  53. data/lib/polars/series.rb +4527 -0
  54. data/lib/polars/slice.rb +104 -0
  55. data/lib/polars/sql_context.rb +194 -0
  56. data/lib/polars/string_cache.rb +75 -0
  57. data/lib/polars/string_expr.rb +1519 -0
  58. data/lib/polars/string_name_space.rb +810 -0
  59. data/lib/polars/struct_expr.rb +98 -0
  60. data/lib/polars/struct_name_space.rb +96 -0
  61. data/lib/polars/testing.rb +507 -0
  62. data/lib/polars/utils.rb +422 -0
  63. data/lib/polars/version.rb +4 -0
  64. data/lib/polars/whenthen.rb +83 -0
  65. data/lib/polars-df.rb +1 -0
  66. data/lib/polars.rb +72 -0
  67. metadata +125 -0
@@ -0,0 +1,791 @@
1
+ module Polars
2
+ # Namespace for list related expressions.
3
+ class ListExpr
4
+ # @private
5
+ attr_accessor :_rbexpr
6
+
7
+ # @private
8
+ def initialize(expr)
9
+ self._rbexpr = expr._rbexpr
10
+ end
11
+
12
+ # Evaluate whether all boolean values in a list are true.
13
+ #
14
+ # @return [Expr]
15
+ #
16
+ # @example
17
+ # df = Polars::DataFrame.new(
18
+ # {"a" => [[true, true], [false, true], [false, false], [nil], [], nil]}
19
+ # )
20
+ # df.with_columns(all: Polars.col("a").list.all)
21
+ # # =>
22
+ # # shape: (6, 2)
23
+ # # ┌────────────────┬───────┐
24
+ # # │ a ┆ all │
25
+ # # │ --- ┆ --- │
26
+ # # │ list[bool] ┆ bool │
27
+ # # ╞════════════════╪═══════╡
28
+ # # │ [true, true] ┆ true │
29
+ # # │ [false, true] ┆ false │
30
+ # # │ [false, false] ┆ false │
31
+ # # │ [null] ┆ true │
32
+ # # │ [] ┆ true │
33
+ # # │ null ┆ null │
34
+ # # └────────────────┴───────┘
35
+ def all
36
+ Utils.wrap_expr(_rbexpr.list_all)
37
+ end
38
+
39
+ # Evaluate whether any boolean value in a list is true.
40
+ #
41
+ # @return [Expr]
42
+ #
43
+ # @example
44
+ # df = Polars::DataFrame.new(
45
+ # {"a" => [[true, true], [false, true], [false, false], [nil], [], nil]}
46
+ # )
47
+ # df.with_columns(any: Polars.col("a").list.any)
48
+ # # =>
49
+ # # shape: (6, 2)
50
+ # # ┌────────────────┬───────┐
51
+ # # │ a ┆ any │
52
+ # # │ --- ┆ --- │
53
+ # # │ list[bool] ┆ bool │
54
+ # # ╞════════════════╪═══════╡
55
+ # # │ [true, true] ┆ true │
56
+ # # │ [false, true] ┆ true │
57
+ # # │ [false, false] ┆ false │
58
+ # # │ [null] ┆ false │
59
+ # # │ [] ┆ false │
60
+ # # │ null ┆ null │
61
+ # # └────────────────┴───────┘
62
+ def any
63
+ Utils.wrap_expr(_rbexpr.list_any)
64
+ end
65
+
66
+ # Get the length of the arrays as `:u32`.
67
+ #
68
+ # @return [Expr]
69
+ #
70
+ # @example
71
+ # df = Polars::DataFrame.new({"foo" => [1, 2], "bar" => [["a", "b"], ["c"]]})
72
+ # df.select(Polars.col("bar").list.lengths)
73
+ # # =>
74
+ # # shape: (2, 1)
75
+ # # ┌─────┐
76
+ # # │ bar │
77
+ # # │ --- │
78
+ # # │ u32 │
79
+ # # ╞═════╡
80
+ # # │ 2 │
81
+ # # │ 1 │
82
+ # # └─────┘
83
+ def lengths
84
+ Utils.wrap_expr(_rbexpr.list_len)
85
+ end
86
+ alias_method :len, :lengths
87
+
88
+ # Drop all null values in the list.
89
+ #
90
+ # The original order of the remaining elements is preserved.
91
+ #
92
+ # @return [Expr]
93
+ #
94
+ # @example
95
+ # df = Polars::DataFrame.new({"values" => [[nil, 1, nil, 2], [nil], [3, 4]]})
96
+ # df.with_columns(drop_nulls: Polars.col("values").list.drop_nulls)
97
+ # # =>
98
+ # # shape: (3, 2)
99
+ # # ┌────────────────┬────────────┐
100
+ # # │ values ┆ drop_nulls │
101
+ # # │ --- ┆ --- │
102
+ # # │ list[i64] ┆ list[i64] │
103
+ # # ╞════════════════╪════════════╡
104
+ # # │ [null, 1, … 2] ┆ [1, 2] │
105
+ # # │ [null] ┆ [] │
106
+ # # │ [3, 4] ┆ [3, 4] │
107
+ # # └────────────────┴────────────┘
108
+ def drop_nulls
109
+ Utils.wrap_expr(_rbexpr.list_drop_nulls)
110
+ end
111
+
112
+ # Sample from this list.
113
+ #
114
+ # @param n [Integer]
115
+ # Number of items to return. Cannot be used with `fraction`. Defaults to 1 if
116
+ # `fraction` is nil.
117
+ # @param fraction [Float]
118
+ # Fraction of items to return. Cannot be used with `n`.
119
+ # @param with_replacement [Boolean]
120
+ # Allow values to be sampled more than once.
121
+ # @param shuffle [Boolean]
122
+ # Shuffle the order of sampled data points.
123
+ # @param seed [Integer]
124
+ # Seed for the random number generator. If set to nil (default), a
125
+ # random seed is generated for each sample operation.
126
+ #
127
+ # @return [Expr]
128
+ #
129
+ # @example
130
+ # df = Polars::DataFrame.new({"values" => [[1, 2, 3], [4, 5]], "n" => [2, 1]})
131
+ # df.with_columns(sample: Polars.col("values").list.sample(n: Polars.col("n"), seed: 1))
132
+ # # =>
133
+ # # shape: (2, 3)
134
+ # # ┌───────────┬─────┬───────────┐
135
+ # # │ values ┆ n ┆ sample │
136
+ # # │ --- ┆ --- ┆ --- │
137
+ # # │ list[i64] ┆ i64 ┆ list[i64] │
138
+ # # ╞═══════════╪═════╪═══════════╡
139
+ # # │ [1, 2, 3] ┆ 2 ┆ [2, 1] │
140
+ # # │ [4, 5] ┆ 1 ┆ [5] │
141
+ # # └───────────┴─────┴───────────┘
142
+ def sample(n: nil, fraction: nil, with_replacement: false, shuffle: false, seed: nil)
143
+ if !n.nil? && !fraction.nil?
144
+ msg = "cannot specify both `n` and `fraction`"
145
+ raise ArgumentError, msg
146
+ end
147
+
148
+ if !fraction.nil?
149
+ fraction = Utils.parse_as_expression(fraction)
150
+ return Utils.wrap_expr(
151
+ _rbexpr.list_sample_fraction(
152
+ fraction, with_replacement, shuffle, seed
153
+ )
154
+ )
155
+ end
156
+
157
+ n = 1 if n.nil?
158
+ n = Utils.parse_as_expression(n)
159
+ Utils.wrap_expr(_rbexpr.list_sample_n(n, with_replacement, shuffle, seed))
160
+ end
161
+
162
+ # Sum all the lists in the array.
163
+ #
164
+ # @return [Expr]
165
+ #
166
+ # @example
167
+ # df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
168
+ # df.select(Polars.col("values").list.sum)
169
+ # # =>
170
+ # # shape: (2, 1)
171
+ # # ┌────────┐
172
+ # # │ values │
173
+ # # │ --- │
174
+ # # │ i64 │
175
+ # # ╞════════╡
176
+ # # │ 1 │
177
+ # # │ 5 │
178
+ # # └────────┘
179
+ def sum
180
+ Utils.wrap_expr(_rbexpr.list_sum)
181
+ end
182
+
183
+ # Compute the max value of the lists in the array.
184
+ #
185
+ # @return [Expr]
186
+ #
187
+ # @example
188
+ # df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
189
+ # df.select(Polars.col("values").list.max)
190
+ # # =>
191
+ # # shape: (2, 1)
192
+ # # ┌────────┐
193
+ # # │ values │
194
+ # # │ --- │
195
+ # # │ i64 │
196
+ # # ╞════════╡
197
+ # # │ 1 │
198
+ # # │ 3 │
199
+ # # └────────┘
200
+ def max
201
+ Utils.wrap_expr(_rbexpr.list_max)
202
+ end
203
+
204
+ # Compute the min value of the lists in the array.
205
+ #
206
+ # @return [Expr]
207
+ #
208
+ # @example
209
+ # df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
210
+ # df.select(Polars.col("values").list.min)
211
+ # # =>
212
+ # # shape: (2, 1)
213
+ # # ┌────────┐
214
+ # # │ values │
215
+ # # │ --- │
216
+ # # │ i64 │
217
+ # # ╞════════╡
218
+ # # │ 1 │
219
+ # # │ 2 │
220
+ # # └────────┘
221
+ def min
222
+ Utils.wrap_expr(_rbexpr.list_min)
223
+ end
224
+
225
+ # Compute the mean value of the lists in the array.
226
+ #
227
+ # @return [Expr]
228
+ #
229
+ # @example
230
+ # df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
231
+ # df.select(Polars.col("values").list.mean)
232
+ # # =>
233
+ # # shape: (2, 1)
234
+ # # ┌────────┐
235
+ # # │ values │
236
+ # # │ --- │
237
+ # # │ f64 │
238
+ # # ╞════════╡
239
+ # # │ 1.0 │
240
+ # # │ 2.5 │
241
+ # # └────────┘
242
+ def mean
243
+ Utils.wrap_expr(_rbexpr.list_mean)
244
+ end
245
+
246
+ # Sort the arrays in the list.
247
+ #
248
+ # @return [Expr]
249
+ #
250
+ # @example
251
+ # df = Polars::DataFrame.new(
252
+ # {
253
+ # "a" => [[3, 2, 1], [9, 1, 2]]
254
+ # }
255
+ # )
256
+ # df.select(Polars.col("a").list.sort)
257
+ # # =>
258
+ # # shape: (2, 1)
259
+ # # ┌───────────┐
260
+ # # │ a │
261
+ # # │ --- │
262
+ # # │ list[i64] │
263
+ # # ╞═══════════╡
264
+ # # │ [1, 2, 3] │
265
+ # # │ [1, 2, 9] │
266
+ # # └───────────┘
267
+ def sort(reverse: false)
268
+ Utils.wrap_expr(_rbexpr.list_sort(reverse))
269
+ end
270
+
271
+ # Reverse the arrays in the list.
272
+ #
273
+ # @return [Expr]
274
+ #
275
+ # @example
276
+ # df = Polars::DataFrame.new(
277
+ # {
278
+ # "a" => [[3, 2, 1], [9, 1, 2]]
279
+ # }
280
+ # )
281
+ # df.select(Polars.col("a").list.reverse)
282
+ # # =>
283
+ # # shape: (2, 1)
284
+ # # ┌───────────┐
285
+ # # │ a │
286
+ # # │ --- │
287
+ # # │ list[i64] │
288
+ # # ╞═══════════╡
289
+ # # │ [1, 2, 3] │
290
+ # # │ [2, 1, 9] │
291
+ # # └───────────┘
292
+ def reverse
293
+ Utils.wrap_expr(_rbexpr.list_reverse)
294
+ end
295
+
296
+ # Get the unique/distinct values in the list.
297
+ #
298
+ # @return [Expr]
299
+ #
300
+ # @example
301
+ # df = Polars::DataFrame.new(
302
+ # {
303
+ # "a" => [[1, 1, 2]]
304
+ # }
305
+ # )
306
+ # df.select(Polars.col("a").list.unique)
307
+ # # =>
308
+ # # shape: (1, 1)
309
+ # # ┌───────────┐
310
+ # # │ a │
311
+ # # │ --- │
312
+ # # │ list[i64] │
313
+ # # ╞═══════════╡
314
+ # # │ [1, 2] │
315
+ # # └───────────┘
316
+ def unique(maintain_order: false)
317
+ Utils.wrap_expr(_rbexpr.list_unique(maintain_order))
318
+ end
319
+
320
+ # Concat the arrays in a Series dtype List in linear time.
321
+ #
322
+ # @param other [Object]
323
+ # Columns to concat into a List Series
324
+ #
325
+ # @return [Expr]
326
+ #
327
+ # @example
328
+ # df = Polars::DataFrame.new(
329
+ # {
330
+ # "a" => [["a"], ["x"]],
331
+ # "b" => [["b", "c"], ["y", "z"]]
332
+ # }
333
+ # )
334
+ # df.select(Polars.col("a").list.concat("b"))
335
+ # # =>
336
+ # # shape: (2, 1)
337
+ # # ┌─────────────────┐
338
+ # # │ a │
339
+ # # │ --- │
340
+ # # │ list[str] │
341
+ # # ╞═════════════════╡
342
+ # # │ ["a", "b", "c"] │
343
+ # # │ ["x", "y", "z"] │
344
+ # # └─────────────────┘
345
+ def concat(other)
346
+ if other.is_a?(::Array) && ![Expr, String, Series].any? { |c| other[0].is_a?(c) }
347
+ return concat(Series.new([other]))
348
+ end
349
+
350
+ if !other.is_a?(::Array)
351
+ other_list = [other]
352
+ else
353
+ other_list = other.dup
354
+ end
355
+
356
+ other_list.insert(0, Utils.wrap_expr(_rbexpr))
357
+ Polars.concat_list(other_list)
358
+ end
359
+
360
+ # Get the value by index in the sublists.
361
+ #
362
+ # So index `0` would return the first item of every sublist
363
+ # and index `-1` would return the last item of every sublist
364
+ # if an index is out of bounds, it will return a `None`.
365
+ #
366
+ # @param index [Integer]
367
+ # Index to return per sublist
368
+ # @param null_on_oob [Boolean]
369
+ # Behavior if an index is out of bounds:
370
+ # true -> set as null
371
+ # false -> raise an error
372
+ #
373
+ # @return [Expr]
374
+ #
375
+ # @example
376
+ # df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
377
+ # df.select(Polars.col("foo").list.get(0))
378
+ # # =>
379
+ # # shape: (3, 1)
380
+ # # ┌──────┐
381
+ # # │ foo │
382
+ # # │ --- │
383
+ # # │ i64 │
384
+ # # ╞══════╡
385
+ # # │ 3 │
386
+ # # │ null │
387
+ # # │ 1 │
388
+ # # └──────┘
389
+ def get(index, null_on_oob: true)
390
+ index = Utils.parse_as_expression(index)
391
+ Utils.wrap_expr(_rbexpr.list_get(index, null_on_oob))
392
+ end
393
+
394
+ # Get the value by index in the sublists.
395
+ #
396
+ # @return [Expr]
397
+ def [](item)
398
+ get(item)
399
+ end
400
+
401
+ # Take sublists by multiple indices.
402
+ #
403
+ # The indices may be defined in a single column, or by sublists in another
404
+ # column of dtype `List`.
405
+ #
406
+ # @param index [Object]
407
+ # Indices to return per sublist
408
+ # @param null_on_oob [Boolean]
409
+ # Behavior if an index is out of bounds:
410
+ # True -> set as null
411
+ # False -> raise an error
412
+ # Note that defaulting to raising an error is much cheaper
413
+ #
414
+ # @return [Expr]
415
+ #
416
+ # @example
417
+ # df = Polars::DataFrame.new({"a" => [[3, 2, 1], [], [1, 2, 3, 4, 5]]})
418
+ # df.with_columns(gather: Polars.col("a").list.gather([0, 4], null_on_oob: true))
419
+ # # =>
420
+ # # shape: (3, 2)
421
+ # # ┌─────────────┬──────────────┐
422
+ # # │ a ┆ gather │
423
+ # # │ --- ┆ --- │
424
+ # # │ list[i64] ┆ list[i64] │
425
+ # # ╞═════════════╪══════════════╡
426
+ # # │ [3, 2, 1] ┆ [3, null] │
427
+ # # │ [] ┆ [null, null] │
428
+ # # │ [1, 2, … 5] ┆ [1, 5] │
429
+ # # └─────────────┴──────────────┘
430
+ def gather(index, null_on_oob: false)
431
+ if index.is_a?(::Array)
432
+ index = Series.new(index)
433
+ end
434
+ index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
435
+ Utils.wrap_expr(_rbexpr.list_gather(index, null_on_oob))
436
+ end
437
+ alias_method :take, :gather
438
+
439
+ # Get the first value of the sublists.
440
+ #
441
+ # @return [Expr]
442
+ #
443
+ # @example
444
+ # df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
445
+ # df.select(Polars.col("foo").list.first)
446
+ # # =>
447
+ # # shape: (3, 1)
448
+ # # ┌──────┐
449
+ # # │ foo │
450
+ # # │ --- │
451
+ # # │ i64 │
452
+ # # ╞══════╡
453
+ # # │ 3 │
454
+ # # │ null │
455
+ # # │ 1 │
456
+ # # └──────┘
457
+ def first
458
+ get(0)
459
+ end
460
+
461
+ # Get the last value of the sublists.
462
+ #
463
+ # @return [Expr]
464
+ #
465
+ # @example
466
+ # df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
467
+ # df.select(Polars.col("foo").list.last)
468
+ # # =>
469
+ # # shape: (3, 1)
470
+ # # ┌──────┐
471
+ # # │ foo │
472
+ # # │ --- │
473
+ # # │ i64 │
474
+ # # ╞══════╡
475
+ # # │ 1 │
476
+ # # │ null │
477
+ # # │ 2 │
478
+ # # └──────┘
479
+ def last
480
+ get(-1)
481
+ end
482
+
483
+ # Check if sublists contain the given item.
484
+ #
485
+ # @param item [Object]
486
+ # Item that will be checked for membership
487
+ #
488
+ # @return [Expr]
489
+ #
490
+ # @example
491
+ # df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
492
+ # df.select(Polars.col("foo").list.contains(1))
493
+ # # =>
494
+ # # shape: (3, 1)
495
+ # # ┌───────┐
496
+ # # │ foo │
497
+ # # │ --- │
498
+ # # │ bool │
499
+ # # ╞═══════╡
500
+ # # │ true │
501
+ # # │ false │
502
+ # # │ true │
503
+ # # └───────┘
504
+ def contains(item)
505
+ Utils.wrap_expr(_rbexpr.list_contains(Utils.expr_to_lit_or_expr(item)._rbexpr))
506
+ end
507
+
508
+ # Join all string items in a sublist and place a separator between them.
509
+ #
510
+ # This errors if inner type of list `!= :str`.
511
+ #
512
+ # @param separator [String]
513
+ # string to separate the items with
514
+ # @param ignore_nulls [Boolean]
515
+ # Ignore null values (default).
516
+ #
517
+ # @return [Expr]
518
+ #
519
+ # @example
520
+ # df = Polars::DataFrame.new({"s" => [["a", "b", "c"], ["x", "y"]]})
521
+ # df.select(Polars.col("s").list.join(" "))
522
+ # # =>
523
+ # # shape: (2, 1)
524
+ # # ┌───────┐
525
+ # # │ s │
526
+ # # │ --- │
527
+ # # │ str │
528
+ # # ╞═══════╡
529
+ # # │ a b c │
530
+ # # │ x y │
531
+ # # └───────┘
532
+ def join(separator, ignore_nulls: true)
533
+ separator = Utils.parse_as_expression(separator, str_as_lit: true)
534
+ Utils.wrap_expr(_rbexpr.list_join(separator, ignore_nulls))
535
+ end
536
+
537
+ # Retrieve the index of the minimal value in every sublist.
538
+ #
539
+ # @return [Expr]
540
+ #
541
+ # @example
542
+ # df = Polars::DataFrame.new(
543
+ # {
544
+ # "a" => [[1, 2], [2, 1]]
545
+ # }
546
+ # )
547
+ # df.select(Polars.col("a").list.arg_min)
548
+ # # =>
549
+ # # shape: (2, 1)
550
+ # # ┌─────┐
551
+ # # │ a │
552
+ # # │ --- │
553
+ # # │ u32 │
554
+ # # ╞═════╡
555
+ # # │ 0 │
556
+ # # │ 1 │
557
+ # # └─────┘
558
+ def arg_min
559
+ Utils.wrap_expr(_rbexpr.list_arg_min)
560
+ end
561
+
562
+ # Retrieve the index of the maximum value in every sublist.
563
+ #
564
+ # @return [Expr]
565
+ #
566
+ # @example
567
+ # df = Polars::DataFrame.new(
568
+ # {
569
+ # "a" => [[1, 2], [2, 1]]
570
+ # }
571
+ # )
572
+ # df.select(Polars.col("a").list.arg_max)
573
+ # # =>
574
+ # # shape: (2, 1)
575
+ # # ┌─────┐
576
+ # # │ a │
577
+ # # │ --- │
578
+ # # │ u32 │
579
+ # # ╞═════╡
580
+ # # │ 1 │
581
+ # # │ 0 │
582
+ # # └─────┘
583
+ def arg_max
584
+ Utils.wrap_expr(_rbexpr.list_arg_max)
585
+ end
586
+
587
+ # Calculate the n-th discrete difference of every sublist.
588
+ #
589
+ # @param n [Integer]
590
+ # Number of slots to shift.
591
+ # @param null_behavior ["ignore", "drop"]
592
+ # How to handle null values.
593
+ #
594
+ # @return [Expr]
595
+ #
596
+ # @example
597
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
598
+ # s.list.diff
599
+ # # =>
600
+ # # shape: (2,)
601
+ # # Series: 'a' [list[i64]]
602
+ # # [
603
+ # # [null, 1, … 1]
604
+ # # [null, -8, -1]
605
+ # # ]
606
+ def diff(n: 1, null_behavior: "ignore")
607
+ Utils.wrap_expr(_rbexpr.list_diff(n, null_behavior))
608
+ end
609
+
610
+ # Shift values by the given period.
611
+ #
612
+ # @param n [Integer]
613
+ # Number of places to shift (may be negative).
614
+ #
615
+ # @return [Expr]
616
+ #
617
+ # @example
618
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
619
+ # s.list.shift
620
+ # # =>
621
+ # # shape: (2,)
622
+ # # Series: 'a' [list[i64]]
623
+ # # [
624
+ # # [null, 1, … 3]
625
+ # # [null, 10, 2]
626
+ # # ]
627
+ def shift(n = 1)
628
+ n = Utils.parse_as_expression(n)
629
+ Utils.wrap_expr(_rbexpr.list_shift(n))
630
+ end
631
+
632
+ # Slice every sublist.
633
+ #
634
+ # @param offset [Integer]
635
+ # Start index. Negative indexing is supported.
636
+ # @param length [Integer]
637
+ # Length of the slice. If set to `nil` (default), the slice is taken to the
638
+ # end of the list.
639
+ #
640
+ # @return [Expr]
641
+ #
642
+ # @example
643
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
644
+ # s.list.slice(1, 2)
645
+ # # =>
646
+ # # shape: (2,)
647
+ # # Series: 'a' [list[i64]]
648
+ # # [
649
+ # # [2, 3]
650
+ # # [2, 1]
651
+ # # ]
652
+ def slice(offset, length = nil)
653
+ offset = Utils.expr_to_lit_or_expr(offset, str_to_lit: false)._rbexpr
654
+ length = Utils.expr_to_lit_or_expr(length, str_to_lit: false)._rbexpr
655
+ Utils.wrap_expr(_rbexpr.list_slice(offset, length))
656
+ end
657
+
658
+ # Slice the first `n` values of every sublist.
659
+ #
660
+ # @param n [Integer]
661
+ # Number of values to return for each sublist.
662
+ #
663
+ # @return [Expr]
664
+ #
665
+ # @example
666
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
667
+ # s.list.head(2)
668
+ # # =>
669
+ # # shape: (2,)
670
+ # # Series: 'a' [list[i64]]
671
+ # # [
672
+ # # [1, 2]
673
+ # # [10, 2]
674
+ # # ]
675
+ def head(n = 5)
676
+ slice(0, n)
677
+ end
678
+
679
+ # Slice the last `n` values of every sublist.
680
+ #
681
+ # @param n [Integer]
682
+ # Number of values to return for each sublist.
683
+ #
684
+ # @return [Expr]
685
+ #
686
+ # @example
687
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
688
+ # s.list.tail(2)
689
+ # # =>
690
+ # # shape: (2,)
691
+ # # Series: 'a' [list[i64]]
692
+ # # [
693
+ # # [3, 4]
694
+ # # [2, 1]
695
+ # # ]
696
+ def tail(n = 5)
697
+ n = Utils.parse_as_expression(n)
698
+ Utils.wrap_expr(_rbexpr.list_tail(n))
699
+ end
700
+
701
+ # Count how often the value produced by ``element`` occurs.
702
+ #
703
+ # @param element [Expr]
704
+ # An expression that produces a single value
705
+ #
706
+ # @return [Expr]
707
+ #
708
+ # @example
709
+ # df = Polars::DataFrame.new({"listcol" => [[0], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]]})
710
+ # df.select(Polars.col("listcol").list.count_match(2).alias("number_of_twos"))
711
+ # # =>
712
+ # # shape: (5, 1)
713
+ # # ┌────────────────┐
714
+ # # │ number_of_twos │
715
+ # # │ --- │
716
+ # # │ u32 │
717
+ # # ╞════════════════╡
718
+ # # │ 0 │
719
+ # # │ 0 │
720
+ # # │ 2 │
721
+ # # │ 1 │
722
+ # # │ 0 │
723
+ # # └────────────────┘
724
+ def count_matches(element)
725
+ Utils.wrap_expr(_rbexpr.list_count_matches(Utils.expr_to_lit_or_expr(element)._rbexpr))
726
+ end
727
+ alias_method :count_match, :count_matches
728
+
729
+ # Convert the series of type `List` to a series of type `Struct`.
730
+ #
731
+ # @param n_field_strategy ["first_non_null", "max_width"]
732
+ # Strategy to determine the number of fields of the struct.
733
+ # @param name_generator [Object]
734
+ # A custom function that can be used to generate the field names.
735
+ # Default field names are `field_0, field_1 .. field_n`
736
+ #
737
+ # @return [Expr]
738
+ #
739
+ # @example
740
+ # df = Polars::DataFrame.new({"a" => [[1, 2, 3], [1, 2]]})
741
+ # df.select([Polars.col("a").list.to_struct])
742
+ # # =>
743
+ # # shape: (2, 1)
744
+ # # ┌────────────┐
745
+ # # │ a │
746
+ # # │ --- │
747
+ # # │ struct[3] │
748
+ # # ╞════════════╡
749
+ # # │ {1,2,3} │
750
+ # # │ {1,2,null} │
751
+ # # └────────────┘
752
+ def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
753
+ raise Todo if name_generator
754
+ Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, name_generator, 0))
755
+ end
756
+
757
+ # Run any polars expression against the lists' elements.
758
+ #
759
+ # @param expr [Expr]
760
+ # Expression to run. Note that you can select an element with `Polars.first`, or
761
+ # `Polars.col`
762
+ # @param parallel [Boolean]
763
+ # Run all expression parallel. Don't activate this blindly.
764
+ # Parallelism is worth it if there is enough work to do per thread.
765
+ #
766
+ # This likely should not be use in the group by context, because we already
767
+ # parallel execution per group
768
+ #
769
+ # @return [Expr]
770
+ #
771
+ # @example
772
+ # df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
773
+ # df.with_column(
774
+ # Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
775
+ # )
776
+ # # =>
777
+ # # shape: (3, 3)
778
+ # # ┌─────┬─────┬────────────┐
779
+ # # │ a ┆ b ┆ rank │
780
+ # # │ --- ┆ --- ┆ --- │
781
+ # # │ i64 ┆ i64 ┆ list[f64] │
782
+ # # ╞═════╪═════╪════════════╡
783
+ # # │ 1 ┆ 4 ┆ [1.0, 2.0] │
784
+ # # │ 8 ┆ 5 ┆ [2.0, 1.0] │
785
+ # # │ 3 ┆ 2 ┆ [2.0, 1.0] │
786
+ # # └─────┴─────┴────────────┘
787
+ def eval(expr, parallel: false)
788
+ Utils.wrap_expr(_rbexpr.list_eval(expr._rbexpr, parallel))
789
+ end
790
+ end
791
+ end