polars-df 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +127 -1
  3. data/Cargo.lock +72 -58
  4. data/README.md +31 -27
  5. data/ext/polars/Cargo.toml +15 -6
  6. data/ext/polars/src/batched_csv.rs +35 -39
  7. data/ext/polars/src/c_api/allocator.rs +7 -0
  8. data/ext/polars/src/c_api/mod.rs +1 -0
  9. data/ext/polars/src/catalog/unity.rs +123 -101
  10. data/ext/polars/src/conversion/any_value.rs +13 -17
  11. data/ext/polars/src/conversion/chunked_array.rs +5 -5
  12. data/ext/polars/src/conversion/datetime.rs +3 -2
  13. data/ext/polars/src/conversion/mod.rs +50 -45
  14. data/ext/polars/src/dataframe/export.rs +13 -13
  15. data/ext/polars/src/dataframe/general.rs +223 -223
  16. data/ext/polars/src/dataframe/io.rs +27 -141
  17. data/ext/polars/src/dataframe/mod.rs +13 -5
  18. data/ext/polars/src/dataframe/serde.rs +1 -1
  19. data/ext/polars/src/error.rs +44 -7
  20. data/ext/polars/src/exceptions.rs +45 -12
  21. data/ext/polars/src/expr/array.rs +12 -0
  22. data/ext/polars/src/expr/datatype.rs +2 -2
  23. data/ext/polars/src/expr/datetime.rs +4 -5
  24. data/ext/polars/src/expr/general.rs +49 -13
  25. data/ext/polars/src/expr/list.rs +4 -0
  26. data/ext/polars/src/expr/meta.rs +8 -3
  27. data/ext/polars/src/expr/mod.rs +22 -6
  28. data/ext/polars/src/expr/name.rs +19 -8
  29. data/ext/polars/src/expr/rolling.rs +50 -1
  30. data/ext/polars/src/expr/string.rs +0 -1
  31. data/ext/polars/src/expr/struct.rs +7 -2
  32. data/ext/polars/src/file.rs +136 -103
  33. data/ext/polars/src/functions/aggregation.rs +9 -8
  34. data/ext/polars/src/functions/io.rs +81 -10
  35. data/ext/polars/src/functions/lazy.rs +95 -21
  36. data/ext/polars/src/functions/mod.rs +2 -0
  37. data/ext/polars/src/functions/range.rs +19 -3
  38. data/ext/polars/src/functions/strings.rs +6 -0
  39. data/ext/polars/src/functions/utils.rs +6 -0
  40. data/ext/polars/src/interop/arrow/mod.rs +50 -1
  41. data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
  42. data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
  43. data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
  44. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  45. data/ext/polars/src/lazyframe/exitable.rs +39 -0
  46. data/ext/polars/src/lazyframe/general.rs +340 -236
  47. data/ext/polars/src/lazyframe/mod.rs +46 -10
  48. data/ext/polars/src/lazyframe/optflags.rs +5 -4
  49. data/ext/polars/src/lazyframe/serde.rs +11 -3
  50. data/ext/polars/src/lazyframe/sink.rs +10 -5
  51. data/ext/polars/src/lazygroupby.rs +6 -7
  52. data/ext/polars/src/lib.rs +141 -76
  53. data/ext/polars/src/map/dataframe.rs +12 -12
  54. data/ext/polars/src/map/lazy.rs +7 -5
  55. data/ext/polars/src/map/mod.rs +15 -8
  56. data/ext/polars/src/map/series.rs +3 -3
  57. data/ext/polars/src/on_startup.rs +16 -8
  58. data/ext/polars/src/prelude.rs +1 -0
  59. data/ext/polars/src/rb_modules.rs +19 -49
  60. data/ext/polars/src/series/aggregation.rs +79 -140
  61. data/ext/polars/src/series/arithmetic.rs +16 -22
  62. data/ext/polars/src/series/comparison.rs +101 -222
  63. data/ext/polars/src/series/construction.rs +17 -18
  64. data/ext/polars/src/series/export.rs +1 -1
  65. data/ext/polars/src/series/general.rs +254 -289
  66. data/ext/polars/src/series/import.rs +17 -0
  67. data/ext/polars/src/series/map.rs +178 -160
  68. data/ext/polars/src/series/mod.rs +28 -12
  69. data/ext/polars/src/series/scatter.rs +12 -9
  70. data/ext/polars/src/sql.rs +16 -9
  71. data/ext/polars/src/testing/frame.rs +31 -0
  72. data/ext/polars/src/testing/mod.rs +5 -0
  73. data/ext/polars/src/testing/series.rs +31 -0
  74. data/ext/polars/src/timeout.rs +105 -0
  75. data/ext/polars/src/utils.rs +159 -1
  76. data/lib/polars/array_expr.rb +81 -12
  77. data/lib/polars/array_name_space.rb +74 -7
  78. data/lib/polars/batched_csv_reader.rb +21 -21
  79. data/lib/polars/binary_name_space.rb +1 -1
  80. data/lib/polars/cat_expr.rb +7 -7
  81. data/lib/polars/config.rb +1 -1
  82. data/lib/polars/convert.rb +189 -34
  83. data/lib/polars/data_frame.rb +1066 -831
  84. data/lib/polars/data_frame_plot.rb +173 -0
  85. data/lib/polars/data_type_group.rb +1 -0
  86. data/lib/polars/data_types.rb +31 -12
  87. data/lib/polars/date_time_expr.rb +51 -69
  88. data/lib/polars/date_time_name_space.rb +80 -112
  89. data/lib/polars/dynamic_group_by.rb +7 -7
  90. data/lib/polars/exceptions.rb +50 -10
  91. data/lib/polars/expr.rb +470 -517
  92. data/lib/polars/functions/aggregation/horizontal.rb +0 -1
  93. data/lib/polars/functions/aggregation/vertical.rb +2 -3
  94. data/lib/polars/functions/as_datatype.rb +290 -8
  95. data/lib/polars/functions/eager.rb +204 -10
  96. data/lib/polars/functions/escape_regex.rb +21 -0
  97. data/lib/polars/functions/lazy.rb +409 -169
  98. data/lib/polars/functions/lit.rb +17 -1
  99. data/lib/polars/functions/range/int_range.rb +74 -2
  100. data/lib/polars/functions/range/linear_space.rb +77 -0
  101. data/lib/polars/functions/range/time_range.rb +1 -1
  102. data/lib/polars/functions/repeat.rb +3 -12
  103. data/lib/polars/functions/whenthen.rb +2 -2
  104. data/lib/polars/group_by.rb +72 -20
  105. data/lib/polars/iceberg_dataset.rb +1 -6
  106. data/lib/polars/in_process_query.rb +37 -0
  107. data/lib/polars/io/cloud.rb +18 -0
  108. data/lib/polars/io/csv.rb +265 -126
  109. data/lib/polars/io/database.rb +0 -1
  110. data/lib/polars/io/delta.rb +15 -7
  111. data/lib/polars/io/ipc.rb +24 -17
  112. data/lib/polars/io/ndjson.rb +161 -24
  113. data/lib/polars/io/parquet.rb +101 -38
  114. data/lib/polars/lazy_frame.rb +849 -558
  115. data/lib/polars/lazy_group_by.rb +327 -2
  116. data/lib/polars/list_expr.rb +94 -16
  117. data/lib/polars/list_name_space.rb +88 -24
  118. data/lib/polars/meta_expr.rb +42 -1
  119. data/lib/polars/name_expr.rb +41 -4
  120. data/lib/polars/query_opt_flags.rb +198 -2
  121. data/lib/polars/rolling_group_by.rb +3 -3
  122. data/lib/polars/schema.rb +21 -3
  123. data/lib/polars/selector.rb +37 -2
  124. data/lib/polars/selectors.rb +45 -9
  125. data/lib/polars/series.rb +1156 -728
  126. data/lib/polars/series_plot.rb +72 -0
  127. data/lib/polars/slice.rb +1 -1
  128. data/lib/polars/sql_context.rb +11 -4
  129. data/lib/polars/string_expr.rb +59 -68
  130. data/lib/polars/string_name_space.rb +51 -87
  131. data/lib/polars/struct_expr.rb +36 -18
  132. data/lib/polars/testing.rb +24 -273
  133. data/lib/polars/utils/constants.rb +2 -0
  134. data/lib/polars/utils/construction/data_frame.rb +410 -0
  135. data/lib/polars/utils/construction/series.rb +364 -0
  136. data/lib/polars/utils/construction/utils.rb +9 -0
  137. data/lib/polars/utils/deprecation.rb +11 -0
  138. data/lib/polars/utils/serde.rb +8 -3
  139. data/lib/polars/utils/unstable.rb +19 -0
  140. data/lib/polars/utils/various.rb +59 -0
  141. data/lib/polars/utils.rb +46 -47
  142. data/lib/polars/version.rb +1 -1
  143. data/lib/polars.rb +47 -1
  144. metadata +25 -6
  145. data/ext/polars/src/allocator.rs +0 -13
  146. data/lib/polars/plot.rb +0 -109
@@ -175,7 +175,332 @@ module Polars
175
175
  Utils.wrap_ldf(@lgb.tail(n))
176
176
  end
177
177
 
178
- # def apply
179
- # end
178
+ # Aggregate the groups into Series.
179
+ #
180
+ # @return [LazyFrame]
181
+ #
182
+ # @example
183
+ # ldf = Polars::DataFrame.new(
184
+ # {
185
+ # "a" => ["one", "two", "one", "two"],
186
+ # "b" => [1, 2, 3, 4]
187
+ # }
188
+ # ).lazy
189
+ # ldf.group_by("a", maintain_order: true).all.collect
190
+ # # =>
191
+ # # shape: (2, 2)
192
+ # # ┌─────┬───────────┐
193
+ # # │ a ┆ b │
194
+ # # │ --- ┆ --- │
195
+ # # │ str ┆ list[i64] │
196
+ # # ╞═════╪═══════════╡
197
+ # # │ one ┆ [1, 3] │
198
+ # # │ two ┆ [2, 4] │
199
+ # # └─────┴───────────┘
200
+ def all
201
+ agg(F.all)
202
+ end
203
+
204
+ # Return the number of rows in each group.
205
+ #
206
+ # @param name [String]
207
+ # Assign a name to the resulting column; if unset, defaults to "len".
208
+ #
209
+ # @return [LazyFrame]
210
+ #
211
+ # @example
212
+ # lf = Polars::LazyFrame.new({"a" => ["Apple", "Apple", "Orange"], "b" => [1, nil, 2]})
213
+ # lf.group_by("a").len.collect
214
+ # # =>
215
+ # # shape: (2, 2)
216
+ # # ┌────────┬─────┐
217
+ # # │ a ┆ len │
218
+ # # │ --- ┆ --- │
219
+ # # │ str ┆ u32 │
220
+ # # ╞════════╪═════╡
221
+ # # │ Apple ┆ 2 │
222
+ # # │ Orange ┆ 1 │
223
+ # # └────────┴─────┘
224
+ #
225
+ # @example
226
+ # lf.group_by("a").len(name: "n").collect
227
+ # # =>
228
+ # # shape: (2, 2)
229
+ # # ┌────────┬─────┐
230
+ # # │ a ┆ n │
231
+ # # │ --- ┆ --- │
232
+ # # │ str ┆ u32 │
233
+ # # ╞════════╪═════╡
234
+ # # │ Apple ┆ 2 │
235
+ # # │ Orange ┆ 1 │
236
+ # # └────────┴─────┘
237
+ def len(name: nil)
238
+ len_expr = F.len
239
+ if !name.nil?
240
+ len_expr = len_expr.alias(name)
241
+ end
242
+ agg(len_expr)
243
+ end
244
+
245
+ # Aggregate the first values in the group.
246
+ #
247
+ # @return [LazyFrame]
248
+ #
249
+ # @example
250
+ # ldf = Polars::DataFrame.new(
251
+ # {
252
+ # "a" => [1, 2, 2, 3, 4, 5],
253
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
254
+ # "c" => [true, true, true, false, false, true],
255
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
256
+ # }
257
+ # ).lazy
258
+ # ldf.group_by("d", maintain_order: true).first.collect
259
+ # # =>
260
+ # # shape: (3, 4)
261
+ # # ┌────────┬─────┬──────┬───────┐
262
+ # # │ d ┆ a ┆ b ┆ c │
263
+ # # │ --- ┆ --- ┆ --- ┆ --- │
264
+ # # │ str ┆ i64 ┆ f64 ┆ bool │
265
+ # # ╞════════╪═════╪══════╪═══════╡
266
+ # # │ Apple ┆ 1 ┆ 0.5 ┆ true │
267
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ true │
268
+ # # │ Banana ┆ 4 ┆ 13.0 ┆ false │
269
+ # # └────────┴─────┴──────┴───────┘
270
+ def first
271
+ agg(F.all.first)
272
+ end
273
+
274
+ # Aggregate the last values in the group.
275
+ #
276
+ # @return [LazyFrame]
277
+ #
278
+ # @example
279
+ # ldf = Polars::DataFrame.new(
280
+ # {
281
+ # "a" => [1, 2, 2, 3, 4, 5],
282
+ # "b" => [0.5, 0.5, 4, 10, 14, 13],
283
+ # "c" => [true, true, true, false, false, true],
284
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
285
+ # }
286
+ # ).lazy
287
+ # ldf.group_by("d", maintain_order: true).last.collect
288
+ # # =>
289
+ # # shape: (3, 4)
290
+ # # ┌────────┬─────┬──────┬───────┐
291
+ # # │ d ┆ a ┆ b ┆ c │
292
+ # # │ --- ┆ --- ┆ --- ┆ --- │
293
+ # # │ str ┆ i64 ┆ f64 ┆ bool │
294
+ # # ╞════════╪═════╪══════╪═══════╡
295
+ # # │ Apple ┆ 3 ┆ 10.0 ┆ false │
296
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ true │
297
+ # # │ Banana ┆ 5 ┆ 13.0 ┆ true │
298
+ # # └────────┴─────┴──────┴───────┘
299
+ def last
300
+ agg(F.all.last)
301
+ end
302
+
303
+ # Reduce the groups to the maximal value.
304
+ #
305
+ # @return [LazyFrame]
306
+ #
307
+ # @example
308
+ # ldf = Polars::DataFrame.new(
309
+ # {
310
+ # "a" => [1, 2, 2, 3, 4, 5],
311
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
312
+ # "c" => [true, true, true, false, false, true],
313
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
314
+ # }
315
+ # ).lazy
316
+ # ldf.group_by("d", maintain_order: true).max.collect
317
+ # # =>
318
+ # # shape: (3, 4)
319
+ # # ┌────────┬─────┬──────┬──────┐
320
+ # # │ d ┆ a ┆ b ┆ c │
321
+ # # │ --- ┆ --- ┆ --- ┆ --- │
322
+ # # │ str ┆ i64 ┆ f64 ┆ bool │
323
+ # # ╞════════╪═════╪══════╪══════╡
324
+ # # │ Apple ┆ 3 ┆ 10.0 ┆ true │
325
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ true │
326
+ # # │ Banana ┆ 5 ┆ 14.0 ┆ true │
327
+ # # └────────┴─────┴──────┴──────┘
328
+ def max
329
+ agg(F.all.max)
330
+ end
331
+
332
+ # Reduce the groups to the mean values.
333
+ #
334
+ # @return [LazyFrame]
335
+ #
336
+ # @example
337
+ # ldf = Polars::DataFrame.new(
338
+ # {
339
+ # "a" => [1, 2, 2, 3, 4, 5],
340
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
341
+ # "c" => [true, true, true, false, false, true],
342
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
343
+ # }
344
+ # ).lazy
345
+ # ldf.group_by("d", maintain_order: true).mean.collect
346
+ # # =>
347
+ # # shape: (3, 4)
348
+ # # ┌────────┬─────┬──────────┬──────────┐
349
+ # # │ d ┆ a ┆ b ┆ c │
350
+ # # │ --- ┆ --- ┆ --- ┆ --- │
351
+ # # │ str ┆ f64 ┆ f64 ┆ f64 │
352
+ # # ╞════════╪═════╪══════════╪══════════╡
353
+ # # │ Apple ┆ 2.0 ┆ 4.833333 ┆ 0.666667 │
354
+ # # │ Orange ┆ 2.0 ┆ 0.5 ┆ 1.0 │
355
+ # # │ Banana ┆ 4.5 ┆ 13.5 ┆ 0.5 │
356
+ # # └────────┴─────┴──────────┴──────────┘
357
+ def mean
358
+ agg(F.all.mean)
359
+ end
360
+
361
+ # Return the median per group.
362
+ #
363
+ # @return [LazyFrame]
364
+ #
365
+ # @example
366
+ # ldf = Polars::DataFrame.new(
367
+ # {
368
+ # "a" => [1, 2, 2, 3, 4, 5],
369
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
370
+ # "d" => ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"]
371
+ # }
372
+ # ).lazy
373
+ # ldf.group_by("d", maintain_order: true).median.collect
374
+ # # =>
375
+ # # shape: (2, 3)
376
+ # # ┌────────┬─────┬──────┐
377
+ # # │ d ┆ a ┆ b │
378
+ # # │ --- ┆ --- ┆ --- │
379
+ # # │ str ┆ f64 ┆ f64 │
380
+ # # ╞════════╪═════╪══════╡
381
+ # # │ Apple ┆ 2.0 ┆ 4.0 │
382
+ # # │ Banana ┆ 4.0 ┆ 13.0 │
383
+ # # └────────┴─────┴──────┘
384
+ def median
385
+ agg(F.all.median)
386
+ end
387
+
388
+ # Reduce the groups to the minimal value.
389
+ #
390
+ # @return [LazyFrame]
391
+ #
392
+ # @example
393
+ # ldf = Polars::DataFrame.new(
394
+ # {
395
+ # "a" => [1, 2, 2, 3, 4, 5],
396
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
397
+ # "c" => [true, true, true, false, false, true],
398
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
399
+ # }
400
+ # ).lazy
401
+ # ldf.group_by("d", maintain_order: true).min.collect
402
+ # # =>
403
+ # # shape: (3, 4)
404
+ # # ┌────────┬─────┬──────┬───────┐
405
+ # # │ d ┆ a ┆ b ┆ c │
406
+ # # │ --- ┆ --- ┆ --- ┆ --- │
407
+ # # │ str ┆ i64 ┆ f64 ┆ bool │
408
+ # # ╞════════╪═════╪══════╪═══════╡
409
+ # # │ Apple ┆ 1 ┆ 0.5 ┆ false │
410
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ true │
411
+ # # │ Banana ┆ 4 ┆ 13.0 ┆ false │
412
+ # # └────────┴─────┴──────┴───────┘
413
+ def min
414
+ agg(F.all.min)
415
+ end
416
+
417
+ # Count the unique values per group.
418
+ #
419
+ # @return [LazyFrame]
420
+ #
421
+ # @example
422
+ # ldf = Polars::DataFrame.new(
423
+ # {
424
+ # "a" => [1, 2, 1, 3, 4, 5],
425
+ # "b" => [0.5, 0.5, 0.5, 10, 13, 14],
426
+ # "d" => ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"]
427
+ # }
428
+ # ).lazy
429
+ # ldf.group_by("d", maintain_order: true).n_unique.collect
430
+ # # =>
431
+ # # shape: (2, 3)
432
+ # # ┌────────┬─────┬─────┐
433
+ # # │ d ┆ a ┆ b │
434
+ # # │ --- ┆ --- ┆ --- │
435
+ # # │ str ┆ u32 ┆ u32 │
436
+ # # ╞════════╪═════╪═════╡
437
+ # # │ Apple ┆ 2 ┆ 2 │
438
+ # # │ Banana ┆ 3 ┆ 3 │
439
+ # # └────────┴─────┴─────┘
440
+ def n_unique
441
+ agg(F.all.n_unique)
442
+ end
443
+
444
+ # Compute the quantile per group.
445
+ #
446
+ # @param quantile [Float]
447
+ # Quantile between 0.0 and 1.0.
448
+ # @param interpolation ['nearest', 'higher', 'lower', 'midpoint', 'linear', 'equiprobable']
449
+ # Interpolation method.
450
+ #
451
+ # @return [LazyFrame]
452
+ #
453
+ # @example
454
+ # ldf = Polars::DataFrame.new(
455
+ # {
456
+ # "a" => [1, 2, 2, 3, 4, 5],
457
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
458
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
459
+ # }
460
+ # ).lazy
461
+ # ldf.group_by("d", maintain_order: true).quantile(1).collect
462
+ # # =>
463
+ # # shape: (3, 3)
464
+ # # ┌────────┬─────┬──────┐
465
+ # # │ d ┆ a ┆ b │
466
+ # # │ --- ┆ --- ┆ --- │
467
+ # # │ str ┆ f64 ┆ f64 │
468
+ # # ╞════════╪═════╪══════╡
469
+ # # │ Apple ┆ 3.0 ┆ 10.0 │
470
+ # # │ Orange ┆ 2.0 ┆ 0.5 │
471
+ # # │ Banana ┆ 5.0 ┆ 14.0 │
472
+ # # └────────┴─────┴──────┘
473
+ def quantile(quantile, interpolation: "nearest")
474
+ agg(F.all.quantile(quantile, interpolation: interpolation))
475
+ end
476
+
477
+ # Reduce the groups to the sum.
478
+ #
479
+ # @return [LazyFrame]
480
+ #
481
+ # @example
482
+ # ldf = Polars::DataFrame.new(
483
+ # {
484
+ # "a" => [1, 2, 2, 3, 4, 5],
485
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
486
+ # "c" => [true, true, true, false, false, true],
487
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
488
+ # }
489
+ # ).lazy
490
+ # ldf.group_by("d", maintain_order: true).sum.collect
491
+ # # =>
492
+ # # shape: (3, 4)
493
+ # # ┌────────┬─────┬──────┬─────┐
494
+ # # │ d ┆ a ┆ b ┆ c │
495
+ # # │ --- ┆ --- ┆ --- ┆ --- │
496
+ # # │ str ┆ i64 ┆ f64 ┆ u32 │
497
+ # # ╞════════╪═════╪══════╪═════╡
498
+ # # │ Apple ┆ 6 ┆ 14.5 ┆ 2 │
499
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ 1 │
500
+ # # │ Banana ┆ 9 ┆ 27.0 ┆ 1 │
501
+ # # └────────┴─────┴──────┴─────┘
502
+ def sum
503
+ agg(F.all.sum)
504
+ end
180
505
  end
181
506
  end
@@ -69,7 +69,7 @@ module Polars
69
69
  #
70
70
  # @example
71
71
  # df = Polars::DataFrame.new({"foo" => [1, 2], "bar" => [["a", "b"], ["c"]]})
72
- # df.select(Polars.col("bar").list.lengths)
72
+ # df.select(Polars.col("bar").list.len)
73
73
  # # =>
74
74
  # # shape: (2, 1)
75
75
  # # ┌─────┐
@@ -83,7 +83,6 @@ module Polars
83
83
  def len
84
84
  Utils.wrap_expr(_rbexpr.list_len)
85
85
  end
86
- alias_method :lengths, :len
87
86
 
88
87
  # Drop all null values in the list.
89
88
  #
@@ -318,7 +317,7 @@ module Polars
318
317
 
319
318
  # Sort the arrays in the list.
320
319
  #
321
- # @param reverse [Boolean]
320
+ # @param descending [Boolean]
322
321
  # Sort in descending order.
323
322
  # @param nulls_last [Boolean]
324
323
  # Place null values last.
@@ -342,8 +341,8 @@ module Polars
342
341
  # # │ [1, 2, 3] │
343
342
  # # │ [1, 2, 9] │
344
343
  # # └───────────┘
345
- def sort(reverse: false, nulls_last: false)
346
- Utils.wrap_expr(_rbexpr.list_sort(reverse, nulls_last))
344
+ def sort(descending: false, nulls_last: false)
345
+ Utils.wrap_expr(_rbexpr.list_sort(descending, nulls_last))
347
346
  end
348
347
 
349
348
  # Reverse the arrays in the list.
@@ -477,7 +476,7 @@ module Polars
477
476
  #
478
477
  # @example
479
478
  # df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
480
- # df.select(Polars.col("foo").list.get(0))
479
+ # df.select(Polars.col("foo").list.get(0, null_on_oob: true))
481
480
  # # =>
482
481
  # # shape: (3, 1)
483
482
  # # ┌──────┐
@@ -489,7 +488,7 @@ module Polars
489
488
  # # │ null │
490
489
  # # │ 1 │
491
490
  # # └──────┘
492
- def get(index, null_on_oob: true)
491
+ def get(index, null_on_oob: false)
493
492
  index = Utils.parse_into_expression(index)
494
493
  Utils.wrap_expr(_rbexpr.list_get(index, null_on_oob))
495
494
  end
@@ -510,8 +509,8 @@ module Polars
510
509
  # Indices to return per sublist
511
510
  # @param null_on_oob [Boolean]
512
511
  # Behavior if an index is out of bounds:
513
- # True -> set as null
514
- # False -> raise an error
512
+ # true -> set as null
513
+ # false -> raise an error
515
514
  # Note that defaulting to raising an error is much cheaper
516
515
  #
517
516
  # @return [Expr]
@@ -534,7 +533,6 @@ module Polars
534
533
  indices = Utils.parse_into_expression(indices)
535
534
  Utils.wrap_expr(_rbexpr.list_gather(indices, null_on_oob))
536
535
  end
537
- alias_method :take, :gather
538
536
 
539
537
  # Take every n-th value start from offset in sublists.
540
538
  #
@@ -597,7 +595,7 @@ module Polars
597
595
  # # │ 1 │
598
596
  # # └──────┘
599
597
  def first
600
- get(0)
598
+ get(0, null_on_oob: true)
601
599
  end
602
600
 
603
601
  # Get the last value of the sublists.
@@ -619,7 +617,49 @@ module Polars
619
617
  # # │ 2 │
620
618
  # # └──────┘
621
619
  def last
622
- get(-1)
620
+ get(-1, null_on_oob: true)
621
+ end
622
+
623
+ # Get the single value of the sublists.
624
+ #
625
+ # This errors if the sublist length is not exactly one.
626
+ #
627
+ # @param allow_empty [Boolean]
628
+ # Allow having no values to return `null`.
629
+ #
630
+ # @return [Expr]
631
+ #
632
+ # @example
633
+ # df = Polars::DataFrame.new({"a" => [[3], [1], [2]]})
634
+ # df.with_columns(item: Polars.col("a").list.item)
635
+ # # =>
636
+ # # shape: (3, 2)
637
+ # # ┌───────────┬──────┐
638
+ # # │ a ┆ item │
639
+ # # │ --- ┆ --- │
640
+ # # │ list[i64] ┆ i64 │
641
+ # # ╞═══════════╪══════╡
642
+ # # │ [3] ┆ 3 │
643
+ # # │ [1] ┆ 1 │
644
+ # # │ [2] ┆ 2 │
645
+ # # └───────────┴──────┘
646
+ #
647
+ # @example
648
+ # df = Polars::DataFrame.new({"a" => [[], [1], [2]]})
649
+ # df.select(Polars.col("a").list.item(allow_empty: true))
650
+ # # =>
651
+ # # shape: (3, 1)
652
+ # # ┌──────┐
653
+ # # │ a │
654
+ # # │ --- │
655
+ # # │ i64 │
656
+ # # ╞══════╡
657
+ # # │ null │
658
+ # # │ 1 │
659
+ # # │ 2 │
660
+ # # └──────┘
661
+ def item(allow_empty: false)
662
+ agg(F.element.item(allow_empty: allow_empty))
623
663
  end
624
664
 
625
665
  # Check if sublists contain the given item.
@@ -876,7 +916,7 @@ module Polars
876
916
  #
877
917
  # @example
878
918
  # df = Polars::DataFrame.new({"listcol" => [[0], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]]})
879
- # df.select(Polars.col("listcol").list.count_match(2).alias("number_of_twos"))
919
+ # df.select(Polars.col("listcol").list.count_matches(2).alias("number_of_twos"))
880
920
  # # =>
881
921
  # # shape: (5, 1)
882
922
  # # ┌────────────────┐
@@ -893,7 +933,6 @@ module Polars
893
933
  def count_matches(element)
894
934
  Utils.wrap_expr(_rbexpr.list_count_matches(Utils.parse_into_expression(element)))
895
935
  end
896
- alias_method :count_match, :count_matches
897
936
 
898
937
  # Convert a List column into an Array column with the same inner data type.
899
938
  #
@@ -957,7 +996,7 @@ module Polars
957
996
  # # │ [0, 1] ┆ {0,1} │
958
997
  # # │ [0, 1, 2] ┆ {0,1} │
959
998
  # # └───────────┴───────────┘
960
- def to_struct(n_field_strategy: "first_non_null", fields: nil, upper_bound: nil)
999
+ def to_struct(n_field_strategy: nil, fields: nil, upper_bound: nil)
961
1000
  if !fields.is_a?(::Array)
962
1001
  if fields.nil?
963
1002
  fields = upper_bound.times.map { |i| "field_#{i}" }
@@ -979,7 +1018,7 @@ module Polars
979
1018
  #
980
1019
  # @example
981
1020
  # df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
982
- # df.with_column(
1021
+ # df.with_columns(
983
1022
  # Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
984
1023
  # )
985
1024
  # # =>
@@ -997,6 +1036,45 @@ module Polars
997
1036
  Utils.wrap_expr(_rbexpr.list_eval(expr._rbexpr))
998
1037
  end
999
1038
 
1039
+ # Run any polars aggregation expression against the lists' elements.
1040
+ #
1041
+ # @param expr [Expr]
1042
+ # Expression to run. Note that you can select an element with `Polars.element`.
1043
+ #
1044
+ # @return [Expr]
1045
+ #
1046
+ # @example
1047
+ # df = Polars::DataFrame.new({"a" => [[1, nil], [42, 13], [nil, nil]]})
1048
+ # df.with_columns(null_count: Polars.col("a").list.agg(Polars.element.null_count))
1049
+ # # =>
1050
+ # # shape: (3, 2)
1051
+ # # ┌──────────────┬────────────┐
1052
+ # # │ a ┆ null_count │
1053
+ # # │ --- ┆ --- │
1054
+ # # │ list[i64] ┆ u32 │
1055
+ # # ╞══════════════╪════════════╡
1056
+ # # │ [1, null] ┆ 1 │
1057
+ # # │ [42, 13] ┆ 0 │
1058
+ # # │ [null, null] ┆ 2 │
1059
+ # # └──────────────┴────────────┘
1060
+ #
1061
+ # @example
1062
+ # df.with_columns(no_nulls: Polars.col("a").list.agg(Polars.element.drop_nulls))
1063
+ # # =>
1064
+ # # shape: (3, 2)
1065
+ # # ┌──────────────┬───────────┐
1066
+ # # │ a ┆ no_nulls │
1067
+ # # │ --- ┆ --- │
1068
+ # # │ list[i64] ┆ list[i64] │
1069
+ # # ╞══════════════╪═══════════╡
1070
+ # # │ [1, null] ┆ [1] │
1071
+ # # │ [42, 13] ┆ [42, 13] │
1072
+ # # │ [null, null] ┆ [] │
1073
+ # # └──────────────┴───────────┘
1074
+ def agg(expr)
1075
+ Utils.wrap_expr(_rbexpr.list_agg(expr._rbexpr))
1076
+ end
1077
+
1000
1078
  # Filter elements in each list by a boolean expression.
1001
1079
  #
1002
1080
  # @param predicate [Object]