polars-df 0.13.0-x64-mingw-ucrt

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +208 -0
  4. data/Cargo.lock +2556 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +39278 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +104 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +36 -0
  18. data/lib/polars/cat_name_space.rb +88 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +98 -0
  21. data/lib/polars/data_frame.rb +5191 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1397 -0
  24. data/lib/polars/date_time_name_space.rb +1287 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +38 -0
  27. data/lib/polars/expr.rb +7256 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +271 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1329 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +136 -0
  39. data/lib/polars/functions/range/datetime_range.rb +149 -0
  40. data/lib/polars/functions/range/int_range.rb +51 -0
  41. data/lib/polars/functions/range/time_range.rb +141 -0
  42. data/lib/polars/functions/repeat.rb +144 -0
  43. data/lib/polars/functions/whenthen.rb +96 -0
  44. data/lib/polars/functions.rb +57 -0
  45. data/lib/polars/group_by.rb +613 -0
  46. data/lib/polars/io/avro.rb +24 -0
  47. data/lib/polars/io/csv.rb +696 -0
  48. data/lib/polars/io/database.rb +73 -0
  49. data/lib/polars/io/ipc.rb +275 -0
  50. data/lib/polars/io/json.rb +29 -0
  51. data/lib/polars/io/ndjson.rb +80 -0
  52. data/lib/polars/io/parquet.rb +233 -0
  53. data/lib/polars/lazy_frame.rb +2708 -0
  54. data/lib/polars/lazy_group_by.rb +181 -0
  55. data/lib/polars/list_expr.rb +791 -0
  56. data/lib/polars/list_name_space.rb +449 -0
  57. data/lib/polars/meta_expr.rb +222 -0
  58. data/lib/polars/name_expr.rb +198 -0
  59. data/lib/polars/plot.rb +109 -0
  60. data/lib/polars/rolling_group_by.rb +35 -0
  61. data/lib/polars/series.rb +4444 -0
  62. data/lib/polars/slice.rb +104 -0
  63. data/lib/polars/sql_context.rb +194 -0
  64. data/lib/polars/string_cache.rb +75 -0
  65. data/lib/polars/string_expr.rb +1495 -0
  66. data/lib/polars/string_name_space.rb +811 -0
  67. data/lib/polars/struct_expr.rb +98 -0
  68. data/lib/polars/struct_name_space.rb +96 -0
  69. data/lib/polars/testing.rb +507 -0
  70. data/lib/polars/utils/constants.rb +9 -0
  71. data/lib/polars/utils/convert.rb +97 -0
  72. data/lib/polars/utils/parse.rb +89 -0
  73. data/lib/polars/utils/various.rb +76 -0
  74. data/lib/polars/utils/wrap.rb +19 -0
  75. data/lib/polars/utils.rb +130 -0
  76. data/lib/polars/version.rb +4 -0
  77. data/lib/polars/whenthen.rb +83 -0
  78. data/lib/polars-df.rb +1 -0
  79. data/lib/polars.rb +91 -0
  80. metadata +138 -0
@@ -0,0 +1,791 @@
1
+ module Polars
2
+ # Namespace for list related expressions.
3
+ class ListExpr
4
+ # @private
5
+ attr_accessor :_rbexpr
6
+
7
+ # @private
8
+ def initialize(expr)
9
+ self._rbexpr = expr._rbexpr
10
+ end
11
+
12
+ # Evaluate whether all boolean values in a list are true.
13
+ #
14
+ # @return [Expr]
15
+ #
16
+ # @example
17
+ # df = Polars::DataFrame.new(
18
+ # {"a" => [[true, true], [false, true], [false, false], [nil], [], nil]}
19
+ # )
20
+ # df.with_columns(all: Polars.col("a").list.all)
21
+ # # =>
22
+ # # shape: (6, 2)
23
+ # # ┌────────────────┬───────┐
24
+ # # │ a ┆ all │
25
+ # # │ --- ┆ --- │
26
+ # # │ list[bool] ┆ bool │
27
+ # # ╞════════════════╪═══════╡
28
+ # # │ [true, true] ┆ true │
29
+ # # │ [false, true] ┆ false │
30
+ # # │ [false, false] ┆ false │
31
+ # # │ [null] ┆ true │
32
+ # # │ [] ┆ true │
33
+ # # │ null ┆ null │
34
+ # # └────────────────┴───────┘
35
+ def all
36
+ Utils.wrap_expr(_rbexpr.list_all)
37
+ end
38
+
39
+ # Evaluate whether any boolean value in a list is true.
40
+ #
41
+ # @return [Expr]
42
+ #
43
+ # @example
44
+ # df = Polars::DataFrame.new(
45
+ # {"a" => [[true, true], [false, true], [false, false], [nil], [], nil]}
46
+ # )
47
+ # df.with_columns(any: Polars.col("a").list.any)
48
+ # # =>
49
+ # # shape: (6, 2)
50
+ # # ┌────────────────┬───────┐
51
+ # # │ a ┆ any │
52
+ # # │ --- ┆ --- │
53
+ # # │ list[bool] ┆ bool │
54
+ # # ╞════════════════╪═══════╡
55
+ # # │ [true, true] ┆ true │
56
+ # # │ [false, true] ┆ true │
57
+ # # │ [false, false] ┆ false │
58
+ # # │ [null] ┆ false │
59
+ # # │ [] ┆ false │
60
+ # # │ null ┆ null │
61
+ # # └────────────────┴───────┘
62
+ def any
63
+ Utils.wrap_expr(_rbexpr.list_any)
64
+ end
65
+
66
+ # Get the length of the arrays as `:u32`.
67
+ #
68
+ # @return [Expr]
69
+ #
70
+ # @example
71
+ # df = Polars::DataFrame.new({"foo" => [1, 2], "bar" => [["a", "b"], ["c"]]})
72
+ # df.select(Polars.col("bar").list.lengths)
73
+ # # =>
74
+ # # shape: (2, 1)
75
+ # # ┌─────┐
76
+ # # │ bar │
77
+ # # │ --- │
78
+ # # │ u32 │
79
+ # # ╞═════╡
80
+ # # │ 2 │
81
+ # # │ 1 │
82
+ # # └─────┘
83
+ def lengths
84
+ Utils.wrap_expr(_rbexpr.list_len)
85
+ end
86
+ alias_method :len, :lengths
87
+
88
+ # Drop all null values in the list.
89
+ #
90
+ # The original order of the remaining elements is preserved.
91
+ #
92
+ # @return [Expr]
93
+ #
94
+ # @example
95
+ # df = Polars::DataFrame.new({"values" => [[nil, 1, nil, 2], [nil], [3, 4]]})
96
+ # df.with_columns(drop_nulls: Polars.col("values").list.drop_nulls)
97
+ # # =>
98
+ # # shape: (3, 2)
99
+ # # ┌────────────────┬────────────┐
100
+ # # │ values ┆ drop_nulls │
101
+ # # │ --- ┆ --- │
102
+ # # │ list[i64] ┆ list[i64] │
103
+ # # ╞════════════════╪════════════╡
104
+ # # │ [null, 1, … 2] ┆ [1, 2] │
105
+ # # │ [null] ┆ [] │
106
+ # # │ [3, 4] ┆ [3, 4] │
107
+ # # └────────────────┴────────────┘
108
+ def drop_nulls
109
+ Utils.wrap_expr(_rbexpr.list_drop_nulls)
110
+ end
111
+
112
+ # Sample from this list.
113
+ #
114
+ # @param n [Integer]
115
+ # Number of items to return. Cannot be used with `fraction`. Defaults to 1 if
116
+ # `fraction` is nil.
117
+ # @param fraction [Float]
118
+ # Fraction of items to return. Cannot be used with `n`.
119
+ # @param with_replacement [Boolean]
120
+ # Allow values to be sampled more than once.
121
+ # @param shuffle [Boolean]
122
+ # Shuffle the order of sampled data points.
123
+ # @param seed [Integer]
124
+ # Seed for the random number generator. If set to nil (default), a
125
+ # random seed is generated for each sample operation.
126
+ #
127
+ # @return [Expr]
128
+ #
129
+ # @example
130
+ # df = Polars::DataFrame.new({"values" => [[1, 2, 3], [4, 5]], "n" => [2, 1]})
131
+ # df.with_columns(sample: Polars.col("values").list.sample(n: Polars.col("n"), seed: 1))
132
+ # # =>
133
+ # # shape: (2, 3)
134
+ # # ┌───────────┬─────┬───────────┐
135
+ # # │ values ┆ n ┆ sample │
136
+ # # │ --- ┆ --- ┆ --- │
137
+ # # │ list[i64] ┆ i64 ┆ list[i64] │
138
+ # # ╞═══════════╪═════╪═══════════╡
139
+ # # │ [1, 2, 3] ┆ 2 ┆ [2, 1] │
140
+ # # │ [4, 5] ┆ 1 ┆ [5] │
141
+ # # └───────────┴─────┴───────────┘
142
+ def sample(n: nil, fraction: nil, with_replacement: false, shuffle: false, seed: nil)
143
+ if !n.nil? && !fraction.nil?
144
+ msg = "cannot specify both `n` and `fraction`"
145
+ raise ArgumentError, msg
146
+ end
147
+
148
+ if !fraction.nil?
149
+ fraction = Utils.parse_into_expression(fraction)
150
+ return Utils.wrap_expr(
151
+ _rbexpr.list_sample_fraction(
152
+ fraction, with_replacement, shuffle, seed
153
+ )
154
+ )
155
+ end
156
+
157
+ n = 1 if n.nil?
158
+ n = Utils.parse_into_expression(n)
159
+ Utils.wrap_expr(_rbexpr.list_sample_n(n, with_replacement, shuffle, seed))
160
+ end
161
+
162
+ # Sum all the lists in the array.
163
+ #
164
+ # @return [Expr]
165
+ #
166
+ # @example
167
+ # df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
168
+ # df.select(Polars.col("values").list.sum)
169
+ # # =>
170
+ # # shape: (2, 1)
171
+ # # ┌────────┐
172
+ # # │ values │
173
+ # # │ --- │
174
+ # # │ i64 │
175
+ # # ╞════════╡
176
+ # # │ 1 │
177
+ # # │ 5 │
178
+ # # └────────┘
179
+ def sum
180
+ Utils.wrap_expr(_rbexpr.list_sum)
181
+ end
182
+
183
+ # Compute the max value of the lists in the array.
184
+ #
185
+ # @return [Expr]
186
+ #
187
+ # @example
188
+ # df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
189
+ # df.select(Polars.col("values").list.max)
190
+ # # =>
191
+ # # shape: (2, 1)
192
+ # # ┌────────┐
193
+ # # │ values │
194
+ # # │ --- │
195
+ # # │ i64 │
196
+ # # ╞════════╡
197
+ # # │ 1 │
198
+ # # │ 3 │
199
+ # # └────────┘
200
+ def max
201
+ Utils.wrap_expr(_rbexpr.list_max)
202
+ end
203
+
204
+ # Compute the min value of the lists in the array.
205
+ #
206
+ # @return [Expr]
207
+ #
208
+ # @example
209
+ # df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
210
+ # df.select(Polars.col("values").list.min)
211
+ # # =>
212
+ # # shape: (2, 1)
213
+ # # ┌────────┐
214
+ # # │ values │
215
+ # # │ --- │
216
+ # # │ i64 │
217
+ # # ╞════════╡
218
+ # # │ 1 │
219
+ # # │ 2 │
220
+ # # └────────┘
221
+ def min
222
+ Utils.wrap_expr(_rbexpr.list_min)
223
+ end
224
+
225
+ # Compute the mean value of the lists in the array.
226
+ #
227
+ # @return [Expr]
228
+ #
229
+ # @example
230
+ # df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
231
+ # df.select(Polars.col("values").list.mean)
232
+ # # =>
233
+ # # shape: (2, 1)
234
+ # # ┌────────┐
235
+ # # │ values │
236
+ # # │ --- │
237
+ # # │ f64 │
238
+ # # ╞════════╡
239
+ # # │ 1.0 │
240
+ # # │ 2.5 │
241
+ # # └────────┘
242
+ def mean
243
+ Utils.wrap_expr(_rbexpr.list_mean)
244
+ end
245
+
246
+ # Sort the arrays in the list.
247
+ #
248
+ # @return [Expr]
249
+ #
250
+ # @example
251
+ # df = Polars::DataFrame.new(
252
+ # {
253
+ # "a" => [[3, 2, 1], [9, 1, 2]]
254
+ # }
255
+ # )
256
+ # df.select(Polars.col("a").list.sort)
257
+ # # =>
258
+ # # shape: (2, 1)
259
+ # # ┌───────────┐
260
+ # # │ a │
261
+ # # │ --- │
262
+ # # │ list[i64] │
263
+ # # ╞═══════════╡
264
+ # # │ [1, 2, 3] │
265
+ # # │ [1, 2, 9] │
266
+ # # └───────────┘
267
+ def sort(reverse: false)
268
+ Utils.wrap_expr(_rbexpr.list_sort(reverse))
269
+ end
270
+
271
+ # Reverse the arrays in the list.
272
+ #
273
+ # @return [Expr]
274
+ #
275
+ # @example
276
+ # df = Polars::DataFrame.new(
277
+ # {
278
+ # "a" => [[3, 2, 1], [9, 1, 2]]
279
+ # }
280
+ # )
281
+ # df.select(Polars.col("a").list.reverse)
282
+ # # =>
283
+ # # shape: (2, 1)
284
+ # # ┌───────────┐
285
+ # # │ a │
286
+ # # │ --- │
287
+ # # │ list[i64] │
288
+ # # ╞═══════════╡
289
+ # # │ [1, 2, 3] │
290
+ # # │ [2, 1, 9] │
291
+ # # └───────────┘
292
+ def reverse
293
+ Utils.wrap_expr(_rbexpr.list_reverse)
294
+ end
295
+
296
+ # Get the unique/distinct values in the list.
297
+ #
298
+ # @return [Expr]
299
+ #
300
+ # @example
301
+ # df = Polars::DataFrame.new(
302
+ # {
303
+ # "a" => [[1, 1, 2]]
304
+ # }
305
+ # )
306
+ # df.select(Polars.col("a").list.unique)
307
+ # # =>
308
+ # # shape: (1, 1)
309
+ # # ┌───────────┐
310
+ # # │ a │
311
+ # # │ --- │
312
+ # # │ list[i64] │
313
+ # # ╞═══════════╡
314
+ # # │ [1, 2] │
315
+ # # └───────────┘
316
+ def unique(maintain_order: false)
317
+ Utils.wrap_expr(_rbexpr.list_unique(maintain_order))
318
+ end
319
+
320
+ # Concat the arrays in a Series dtype List in linear time.
321
+ #
322
+ # @param other [Object]
323
+ # Columns to concat into a List Series
324
+ #
325
+ # @return [Expr]
326
+ #
327
+ # @example
328
+ # df = Polars::DataFrame.new(
329
+ # {
330
+ # "a" => [["a"], ["x"]],
331
+ # "b" => [["b", "c"], ["y", "z"]]
332
+ # }
333
+ # )
334
+ # df.select(Polars.col("a").list.concat("b"))
335
+ # # =>
336
+ # # shape: (2, 1)
337
+ # # ┌─────────────────┐
338
+ # # │ a │
339
+ # # │ --- │
340
+ # # │ list[str] │
341
+ # # ╞═════════════════╡
342
+ # # │ ["a", "b", "c"] │
343
+ # # │ ["x", "y", "z"] │
344
+ # # └─────────────────┘
345
+ def concat(other)
346
+ if other.is_a?(::Array) && ![Expr, String, Series].any? { |c| other[0].is_a?(c) }
347
+ return concat(Series.new([other]))
348
+ end
349
+
350
+ if !other.is_a?(::Array)
351
+ other_list = [other]
352
+ else
353
+ other_list = other.dup
354
+ end
355
+
356
+ other_list.insert(0, Utils.wrap_expr(_rbexpr))
357
+ Polars.concat_list(other_list)
358
+ end
359
+
360
+ # Get the value by index in the sublists.
361
+ #
362
+ # So index `0` would return the first item of every sublist
363
+ # and index `-1` would return the last item of every sublist
364
+ # if an index is out of bounds, it will return a `None`.
365
+ #
366
+ # @param index [Integer]
367
+ # Index to return per sublist
368
+ # @param null_on_oob [Boolean]
369
+ # Behavior if an index is out of bounds:
370
+ # true -> set as null
371
+ # false -> raise an error
372
+ #
373
+ # @return [Expr]
374
+ #
375
+ # @example
376
+ # df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
377
+ # df.select(Polars.col("foo").list.get(0))
378
+ # # =>
379
+ # # shape: (3, 1)
380
+ # # ┌──────┐
381
+ # # │ foo │
382
+ # # │ --- │
383
+ # # │ i64 │
384
+ # # ╞══════╡
385
+ # # │ 3 │
386
+ # # │ null │
387
+ # # │ 1 │
388
+ # # └──────┘
389
+ def get(index, null_on_oob: true)
390
+ index = Utils.parse_into_expression(index)
391
+ Utils.wrap_expr(_rbexpr.list_get(index, null_on_oob))
392
+ end
393
+
394
+ # Get the value by index in the sublists.
395
+ #
396
+ # @return [Expr]
397
+ def [](item)
398
+ get(item)
399
+ end
400
+
401
+ # Take sublists by multiple indices.
402
+ #
403
+ # The indices may be defined in a single column, or by sublists in another
404
+ # column of dtype `List`.
405
+ #
406
+ # @param index [Object]
407
+ # Indices to return per sublist
408
+ # @param null_on_oob [Boolean]
409
+ # Behavior if an index is out of bounds:
410
+ # True -> set as null
411
+ # False -> raise an error
412
+ # Note that defaulting to raising an error is much cheaper
413
+ #
414
+ # @return [Expr]
415
+ #
416
+ # @example
417
+ # df = Polars::DataFrame.new({"a" => [[3, 2, 1], [], [1, 2, 3, 4, 5]]})
418
+ # df.with_columns(gather: Polars.col("a").list.gather([0, 4], null_on_oob: true))
419
+ # # =>
420
+ # # shape: (3, 2)
421
+ # # ┌─────────────┬──────────────┐
422
+ # # │ a ┆ gather │
423
+ # # │ --- ┆ --- │
424
+ # # │ list[i64] ┆ list[i64] │
425
+ # # ╞═════════════╪══════════════╡
426
+ # # │ [3, 2, 1] ┆ [3, null] │
427
+ # # │ [] ┆ [null, null] │
428
+ # # │ [1, 2, … 5] ┆ [1, 5] │
429
+ # # └─────────────┴──────────────┘
430
+ def gather(index, null_on_oob: false)
431
+ if index.is_a?(::Array)
432
+ index = Series.new(index)
433
+ end
434
+ index = Utils.parse_into_expression(index, str_as_lit: false)
435
+ Utils.wrap_expr(_rbexpr.list_gather(index, null_on_oob))
436
+ end
437
+ alias_method :take, :gather
438
+
439
+ # Get the first value of the sublists.
440
+ #
441
+ # @return [Expr]
442
+ #
443
+ # @example
444
+ # df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
445
+ # df.select(Polars.col("foo").list.first)
446
+ # # =>
447
+ # # shape: (3, 1)
448
+ # # ┌──────┐
449
+ # # │ foo │
450
+ # # │ --- │
451
+ # # │ i64 │
452
+ # # ╞══════╡
453
+ # # │ 3 │
454
+ # # │ null │
455
+ # # │ 1 │
456
+ # # └──────┘
457
+ def first
458
+ get(0)
459
+ end
460
+
461
+ # Get the last value of the sublists.
462
+ #
463
+ # @return [Expr]
464
+ #
465
+ # @example
466
+ # df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
467
+ # df.select(Polars.col("foo").list.last)
468
+ # # =>
469
+ # # shape: (3, 1)
470
+ # # ┌──────┐
471
+ # # │ foo │
472
+ # # │ --- │
473
+ # # │ i64 │
474
+ # # ╞══════╡
475
+ # # │ 1 │
476
+ # # │ null │
477
+ # # │ 2 │
478
+ # # └──────┘
479
+ def last
480
+ get(-1)
481
+ end
482
+
483
+ # Check if sublists contain the given item.
484
+ #
485
+ # @param item [Object]
486
+ # Item that will be checked for membership
487
+ #
488
+ # @return [Expr]
489
+ #
490
+ # @example
491
+ # df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
492
+ # df.select(Polars.col("foo").list.contains(1))
493
+ # # =>
494
+ # # shape: (3, 1)
495
+ # # ┌───────┐
496
+ # # │ foo │
497
+ # # │ --- │
498
+ # # │ bool │
499
+ # # ╞═══════╡
500
+ # # │ true │
501
+ # # │ false │
502
+ # # │ true │
503
+ # # └───────┘
504
+ def contains(item)
505
+ Utils.wrap_expr(_rbexpr.list_contains(Utils.parse_into_expression(item)))
506
+ end
507
+
508
+ # Join all string items in a sublist and place a separator between them.
509
+ #
510
+ # This errors if inner type of list `!= :str`.
511
+ #
512
+ # @param separator [String]
513
+ # string to separate the items with
514
+ # @param ignore_nulls [Boolean]
515
+ # Ignore null values (default).
516
+ #
517
+ # @return [Expr]
518
+ #
519
+ # @example
520
+ # df = Polars::DataFrame.new({"s" => [["a", "b", "c"], ["x", "y"]]})
521
+ # df.select(Polars.col("s").list.join(" "))
522
+ # # =>
523
+ # # shape: (2, 1)
524
+ # # ┌───────┐
525
+ # # │ s │
526
+ # # │ --- │
527
+ # # │ str │
528
+ # # ╞═══════╡
529
+ # # │ a b c │
530
+ # # │ x y │
531
+ # # └───────┘
532
+ def join(separator, ignore_nulls: true)
533
+ separator = Utils.parse_into_expression(separator, str_as_lit: true)
534
+ Utils.wrap_expr(_rbexpr.list_join(separator, ignore_nulls))
535
+ end
536
+
537
+ # Retrieve the index of the minimal value in every sublist.
538
+ #
539
+ # @return [Expr]
540
+ #
541
+ # @example
542
+ # df = Polars::DataFrame.new(
543
+ # {
544
+ # "a" => [[1, 2], [2, 1]]
545
+ # }
546
+ # )
547
+ # df.select(Polars.col("a").list.arg_min)
548
+ # # =>
549
+ # # shape: (2, 1)
550
+ # # ┌─────┐
551
+ # # │ a │
552
+ # # │ --- │
553
+ # # │ u32 │
554
+ # # ╞═════╡
555
+ # # │ 0 │
556
+ # # │ 1 │
557
+ # # └─────┘
558
+ def arg_min
559
+ Utils.wrap_expr(_rbexpr.list_arg_min)
560
+ end
561
+
562
+ # Retrieve the index of the maximum value in every sublist.
563
+ #
564
+ # @return [Expr]
565
+ #
566
+ # @example
567
+ # df = Polars::DataFrame.new(
568
+ # {
569
+ # "a" => [[1, 2], [2, 1]]
570
+ # }
571
+ # )
572
+ # df.select(Polars.col("a").list.arg_max)
573
+ # # =>
574
+ # # shape: (2, 1)
575
+ # # ┌─────┐
576
+ # # │ a │
577
+ # # │ --- │
578
+ # # │ u32 │
579
+ # # ╞═════╡
580
+ # # │ 1 │
581
+ # # │ 0 │
582
+ # # └─────┘
583
+ def arg_max
584
+ Utils.wrap_expr(_rbexpr.list_arg_max)
585
+ end
586
+
587
+ # Calculate the n-th discrete difference of every sublist.
588
+ #
589
+ # @param n [Integer]
590
+ # Number of slots to shift.
591
+ # @param null_behavior ["ignore", "drop"]
592
+ # How to handle null values.
593
+ #
594
+ # @return [Expr]
595
+ #
596
+ # @example
597
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
598
+ # s.list.diff
599
+ # # =>
600
+ # # shape: (2,)
601
+ # # Series: 'a' [list[i64]]
602
+ # # [
603
+ # # [null, 1, … 1]
604
+ # # [null, -8, -1]
605
+ # # ]
606
+ def diff(n: 1, null_behavior: "ignore")
607
+ Utils.wrap_expr(_rbexpr.list_diff(n, null_behavior))
608
+ end
609
+
610
+ # Shift values by the given period.
611
+ #
612
+ # @param n [Integer]
613
+ # Number of places to shift (may be negative).
614
+ #
615
+ # @return [Expr]
616
+ #
617
+ # @example
618
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
619
+ # s.list.shift
620
+ # # =>
621
+ # # shape: (2,)
622
+ # # Series: 'a' [list[i64]]
623
+ # # [
624
+ # # [null, 1, … 3]
625
+ # # [null, 10, 2]
626
+ # # ]
627
+ def shift(n = 1)
628
+ n = Utils.parse_into_expression(n)
629
+ Utils.wrap_expr(_rbexpr.list_shift(n))
630
+ end
631
+
632
+ # Slice every sublist.
633
+ #
634
+ # @param offset [Integer]
635
+ # Start index. Negative indexing is supported.
636
+ # @param length [Integer]
637
+ # Length of the slice. If set to `nil` (default), the slice is taken to the
638
+ # end of the list.
639
+ #
640
+ # @return [Expr]
641
+ #
642
+ # @example
643
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
644
+ # s.list.slice(1, 2)
645
+ # # =>
646
+ # # shape: (2,)
647
+ # # Series: 'a' [list[i64]]
648
+ # # [
649
+ # # [2, 3]
650
+ # # [2, 1]
651
+ # # ]
652
+ def slice(offset, length = nil)
653
+ offset = Utils.parse_into_expression(offset, str_as_lit: false)
654
+ length = Utils.parse_into_expression(length, str_as_lit: false)
655
+ Utils.wrap_expr(_rbexpr.list_slice(offset, length))
656
+ end
657
+
658
+ # Slice the first `n` values of every sublist.
659
+ #
660
+ # @param n [Integer]
661
+ # Number of values to return for each sublist.
662
+ #
663
+ # @return [Expr]
664
+ #
665
+ # @example
666
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
667
+ # s.list.head(2)
668
+ # # =>
669
+ # # shape: (2,)
670
+ # # Series: 'a' [list[i64]]
671
+ # # [
672
+ # # [1, 2]
673
+ # # [10, 2]
674
+ # # ]
675
+ def head(n = 5)
676
+ slice(0, n)
677
+ end
678
+
679
+ # Slice the last `n` values of every sublist.
680
+ #
681
+ # @param n [Integer]
682
+ # Number of values to return for each sublist.
683
+ #
684
+ # @return [Expr]
685
+ #
686
+ # @example
687
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
688
+ # s.list.tail(2)
689
+ # # =>
690
+ # # shape: (2,)
691
+ # # Series: 'a' [list[i64]]
692
+ # # [
693
+ # # [3, 4]
694
+ # # [2, 1]
695
+ # # ]
696
+ def tail(n = 5)
697
+ n = Utils.parse_into_expression(n)
698
+ Utils.wrap_expr(_rbexpr.list_tail(n))
699
+ end
700
+
701
+ # Count how often the value produced by ``element`` occurs.
702
+ #
703
+ # @param element [Expr]
704
+ # An expression that produces a single value
705
+ #
706
+ # @return [Expr]
707
+ #
708
+ # @example
709
+ # df = Polars::DataFrame.new({"listcol" => [[0], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]]})
710
+ # df.select(Polars.col("listcol").list.count_match(2).alias("number_of_twos"))
711
+ # # =>
712
+ # # shape: (5, 1)
713
+ # # ┌────────────────┐
714
+ # # │ number_of_twos │
715
+ # # │ --- │
716
+ # # │ u32 │
717
+ # # ╞════════════════╡
718
+ # # │ 0 │
719
+ # # │ 0 │
720
+ # # │ 2 │
721
+ # # │ 1 │
722
+ # # │ 0 │
723
+ # # └────────────────┘
724
+ def count_matches(element)
725
+ Utils.wrap_expr(_rbexpr.list_count_matches(Utils.parse_into_expression(element)))
726
+ end
727
+ alias_method :count_match, :count_matches
728
+
729
+ # Convert the series of type `List` to a series of type `Struct`.
730
+ #
731
+ # @param n_field_strategy ["first_non_null", "max_width"]
732
+ # Strategy to determine the number of fields of the struct.
733
+ # @param name_generator [Object]
734
+ # A custom function that can be used to generate the field names.
735
+ # Default field names are `field_0, field_1 .. field_n`
736
+ #
737
+ # @return [Expr]
738
+ #
739
+ # @example
740
+ # df = Polars::DataFrame.new({"a" => [[1, 2, 3], [1, 2]]})
741
+ # df.select([Polars.col("a").list.to_struct])
742
+ # # =>
743
+ # # shape: (2, 1)
744
+ # # ┌────────────┐
745
+ # # │ a │
746
+ # # │ --- │
747
+ # # │ struct[3] │
748
+ # # ╞════════════╡
749
+ # # │ {1,2,3} │
750
+ # # │ {1,2,null} │
751
+ # # └────────────┘
752
+ def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
753
+ raise Todo if name_generator
754
+ Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, name_generator, 0))
755
+ end
756
+
757
+ # Run any polars expression against the lists' elements.
758
+ #
759
+ # @param expr [Expr]
760
+ # Expression to run. Note that you can select an element with `Polars.first`, or
761
+ # `Polars.col`
762
+ # @param parallel [Boolean]
763
+ # Run all expression parallel. Don't activate this blindly.
764
+ # Parallelism is worth it if there is enough work to do per thread.
765
+ #
766
+ # This likely should not be use in the group by context, because we already
767
+ # parallel execution per group
768
+ #
769
+ # @return [Expr]
770
+ #
771
+ # @example
772
+ # df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
773
+ # df.with_column(
774
+ # Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
775
+ # )
776
+ # # =>
777
+ # # shape: (3, 3)
778
+ # # ┌─────┬─────┬────────────┐
779
+ # # │ a ┆ b ┆ rank │
780
+ # # │ --- ┆ --- ┆ --- │
781
+ # # │ i64 ┆ i64 ┆ list[f64] │
782
+ # # ╞═════╪═════╪════════════╡
783
+ # # │ 1 ┆ 4 ┆ [1.0, 2.0] │
784
+ # # │ 8 ┆ 5 ┆ [2.0, 1.0] │
785
+ # # │ 3 ┆ 2 ┆ [2.0, 1.0] │
786
+ # # └─────┴─────┴────────────┘
787
+ def eval(expr, parallel: false)
788
+ Utils.wrap_expr(_rbexpr.list_eval(expr._rbexpr, parallel))
789
+ end
790
+ end
791
+ end