polars-df 0.2.0-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +33 -0
  4. data/Cargo.lock +2230 -0
  5. data/Cargo.toml +10 -0
  6. data/LICENSE-THIRD-PARTY.txt +38828 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +91 -0
  9. data/lib/polars/3.0/polars.so +0 -0
  10. data/lib/polars/3.1/polars.so +0 -0
  11. data/lib/polars/3.2/polars.so +0 -0
  12. data/lib/polars/batched_csv_reader.rb +96 -0
  13. data/lib/polars/cat_expr.rb +52 -0
  14. data/lib/polars/cat_name_space.rb +54 -0
  15. data/lib/polars/convert.rb +100 -0
  16. data/lib/polars/data_frame.rb +4833 -0
  17. data/lib/polars/data_types.rb +122 -0
  18. data/lib/polars/date_time_expr.rb +1418 -0
  19. data/lib/polars/date_time_name_space.rb +1484 -0
  20. data/lib/polars/dynamic_group_by.rb +52 -0
  21. data/lib/polars/exceptions.rb +20 -0
  22. data/lib/polars/expr.rb +5307 -0
  23. data/lib/polars/expr_dispatch.rb +22 -0
  24. data/lib/polars/functions.rb +453 -0
  25. data/lib/polars/group_by.rb +558 -0
  26. data/lib/polars/io.rb +814 -0
  27. data/lib/polars/lazy_frame.rb +2442 -0
  28. data/lib/polars/lazy_functions.rb +1195 -0
  29. data/lib/polars/lazy_group_by.rb +93 -0
  30. data/lib/polars/list_expr.rb +610 -0
  31. data/lib/polars/list_name_space.rb +346 -0
  32. data/lib/polars/meta_expr.rb +54 -0
  33. data/lib/polars/rolling_group_by.rb +35 -0
  34. data/lib/polars/series.rb +3730 -0
  35. data/lib/polars/slice.rb +104 -0
  36. data/lib/polars/string_expr.rb +972 -0
  37. data/lib/polars/string_name_space.rb +690 -0
  38. data/lib/polars/struct_expr.rb +100 -0
  39. data/lib/polars/struct_name_space.rb +64 -0
  40. data/lib/polars/utils.rb +192 -0
  41. data/lib/polars/version.rb +4 -0
  42. data/lib/polars/when.rb +16 -0
  43. data/lib/polars/when_then.rb +19 -0
  44. data/lib/polars-df.rb +1 -0
  45. data/lib/polars.rb +50 -0
  46. metadata +89 -0
@@ -0,0 +1,1195 @@
1
+ module Polars
2
+ module LazyFunctions
3
+ # Return an expression representing a column in a DataFrame.
4
+ #
5
+ # @return [Expr]
6
+ def col(name)
7
+ if name.is_a?(Series)
8
+ name = name.to_a
9
+ end
10
+
11
+ if name.is_a?(Class) && name < DataType
12
+ name = [name]
13
+ end
14
+
15
+ if name.is_a?(DataType)
16
+ Utils.wrap_expr(_dtype_cols([name]))
17
+ elsif name.is_a?(Array)
18
+ if name.length == 0 || name[0].is_a?(String) || name[0].is_a?(Symbol)
19
+ name = name.map { |v| v.is_a?(Symbol) ? v.to_s : v }
20
+ Utils.wrap_expr(RbExpr.cols(name))
21
+ elsif Utils.is_polars_dtype(name[0])
22
+ Utils.wrap_expr(_dtype_cols(name))
23
+ else
24
+ raise ArgumentError, "Expected list values to be all `str` or all `DataType`"
25
+ end
26
+ else
27
+ name = name.to_s if name.is_a?(Symbol)
28
+ Utils.wrap_expr(RbExpr.col(name))
29
+ end
30
+ end
31
+
32
+ # Alias for an element in evaluated in an `eval` expression.
33
+ #
34
+ # @return [Expr]
35
+ #
36
+ # @example A horizontal rank computation by taking the elements of a list
37
+ # df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
38
+ # df.with_column(
39
+ # Polars.concat_list(["a", "b"]).arr.eval(Polars.element.rank).alias("rank")
40
+ # )
41
+ # # =>
42
+ # # shape: (3, 3)
43
+ # # ┌─────┬─────┬────────────┐
44
+ # # │ a ┆ b ┆ rank │
45
+ # # │ --- ┆ --- ┆ --- │
46
+ # # │ i64 ┆ i64 ┆ list[f32] │
47
+ # # ╞═════╪═════╪════════════╡
48
+ # # │ 1 ┆ 4 ┆ [1.0, 2.0] │
49
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
50
+ # # │ 8 ┆ 5 ┆ [2.0, 1.0] │
51
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
52
+ # # │ 3 ┆ 2 ┆ [2.0, 1.0] │
53
+ # # └─────┴─────┴────────────┘
54
+ def element
55
+ col("")
56
+ end
57
+
58
+ # Count the number of values in this column/context.
59
+ #
60
+ # @param column [String, Series, nil]
61
+ # If dtype is:
62
+ #
63
+ # * `Series` : count the values in the series.
64
+ # * `String` : count the values in this column.
65
+ # * `None` : count the number of values in this context.
66
+ #
67
+ # @return [Expr, Integer]
68
+ def count(column = nil)
69
+ if column.nil?
70
+ return Utils.wrap_expr(RbExpr.count)
71
+ end
72
+
73
+ if column.is_a?(Series)
74
+ column.len
75
+ else
76
+ col(column).count
77
+ end
78
+ end
79
+
80
+ # Aggregate to list.
81
+ #
82
+ # @return [Expr]
83
+ def to_list(name)
84
+ col(name).list
85
+ end
86
+
87
+ # Get the standard deviation.
88
+ #
89
+ # @return [Object]
90
+ def std(column, ddof: 1)
91
+ if column.is_a?(Series)
92
+ column.std(ddof: ddof)
93
+ else
94
+ col(column).std(ddof: ddof)
95
+ end
96
+ end
97
+
98
+ # Get the variance.
99
+ #
100
+ # @return [Object]
101
+ def var(column, ddof: 1)
102
+ if column.is_a?(Series)
103
+ column.var(ddof: ddof)
104
+ else
105
+ col(column).var(ddof: ddof)
106
+ end
107
+ end
108
+
109
+ # Get the maximum value.
110
+ #
111
+ # @param column [Object]
112
+ # Column(s) to be used in aggregation. Will lead to different behavior based on
113
+ # the input:
114
+ #
115
+ # - [String, Series] -> aggregate the maximum value of that column.
116
+ # - [Array<Expr>] -> aggregate the maximum value horizontally.
117
+ #
118
+ # @return [Expr, Object]
119
+ def max(column)
120
+ if column.is_a?(Series)
121
+ column.max
122
+ elsif column.is_a?(String) || column.is_a?(Symbol)
123
+ col(column).max
124
+ else
125
+ exprs = Utils.selection_to_rbexpr_list(column)
126
+ # TODO
127
+ Utils.wrap_expr(_max_exprs(exprs))
128
+ end
129
+ end
130
+
131
+ # Get the minimum value.
132
+ #
133
+ # @param column [Object]
134
+ # Column(s) to be used in aggregation. Will lead to different behavior based on
135
+ # the input:
136
+ #
137
+ # - [String, Series] -> aggregate the minimum value of that column.
138
+ # - [Array<Expr>] -> aggregate the minimum value horizontally.
139
+ #
140
+ # @return [Expr, Object]
141
+ def min(column)
142
+ if column.is_a?(Series)
143
+ column.min
144
+ elsif column.is_a?(String) || column.is_a?(Symbol)
145
+ col(column).min
146
+ else
147
+ exprs = Utils.selection_to_rbexpr_list(column)
148
+ # TODO
149
+ Utils.wrap_expr(_min_exprs(exprs))
150
+ end
151
+ end
152
+
153
+ # Sum values in a column/Series, or horizontally across list of columns/expressions.
154
+ #
155
+ # @return [Object]
156
+ def sum(column)
157
+ if column.is_a?(Series)
158
+ column.sum
159
+ elsif column.is_a?(String) || column.is_a?(Symbol)
160
+ col(column.to_s).sum
161
+ elsif column.is_a?(Array)
162
+ exprs = Utils.selection_to_rbexpr_list(column)
163
+ # TODO
164
+ Utils.wrap_expr(_sum_exprs(exprs))
165
+ else
166
+ fold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("sum")
167
+ end
168
+ end
169
+
170
+ # Get the mean value.
171
+ #
172
+ # @return [Expr, Float]
173
+ def mean(column)
174
+ if column.is_a?(Series)
175
+ column.mean
176
+ else
177
+ col(column).mean
178
+ end
179
+ end
180
+
181
+ # Get the mean value.
182
+ #
183
+ # @return [Expr, Float]
184
+ def avg(column)
185
+ mean(column)
186
+ end
187
+
188
+ # Get the median value.
189
+ #
190
+ # @return [Object]
191
+ def median(column)
192
+ if column.is_a?(Series)
193
+ column.median
194
+ else
195
+ col(column).median
196
+ end
197
+ end
198
+
199
+ # Count unique values.
200
+ #
201
+ # @return [Object]
202
+ def n_unique(column)
203
+ if column.is_a?(Series)
204
+ column.n_unique
205
+ else
206
+ col(column).n_unique
207
+ end
208
+ end
209
+
210
+ # Get the first value.
211
+ #
212
+ # @return [Object]
213
+ def first(column = nil)
214
+ if column.nil?
215
+ return Utils.wrap_expr(RbExpr.first)
216
+ end
217
+
218
+ if column.is_a?(Series)
219
+ if column.len > 0
220
+ column[0]
221
+ else
222
+ raise IndexError, "The series is empty, so no first value can be returned."
223
+ end
224
+ else
225
+ col(column).first
226
+ end
227
+ end
228
+
229
+ # Get the last value.
230
+ #
231
+ # Depending on the input type this function does different things:
232
+ #
233
+ # - nil -> expression to take last column of a context.
234
+ # - String -> syntactic sugar for `Polars.col(..).last`
235
+ # - Series -> Take last value in `Series`
236
+ #
237
+ # @return [Object]
238
+ def last(column = nil)
239
+ if column.nil?
240
+ return Utils.wrap_expr(_last)
241
+ end
242
+
243
+ if column.is_a?(Series)
244
+ if column.len > 0
245
+ return column[-1]
246
+ else
247
+ raise IndexError, "The series is empty, so no last value can be returned"
248
+ end
249
+ end
250
+ col(column).last
251
+ end
252
+
253
+ # Get the first `n` rows.
254
+ #
255
+ # @param column [Object]
256
+ # Column name or Series.
257
+ # @param n [Integer]
258
+ # Number of rows to return.
259
+ #
260
+ # @return [Object]
261
+ def head(column, n = 10)
262
+ if column.is_a?(Series)
263
+ column.head(n)
264
+ else
265
+ col(column).head(n)
266
+ end
267
+ end
268
+
269
+ # Get the last `n` rows.
270
+ #
271
+ # @param column [Object]
272
+ # Column name or Series.
273
+ # @param n [Integer]
274
+ # Number of rows to return.
275
+ #
276
+ # @return [Object]
277
+ def tail(column, n = 10)
278
+ if column.is_a?(Series)
279
+ column.tail(n)
280
+ else
281
+ col(column).tail(n)
282
+ end
283
+ end
284
+
285
+ # Return an expression representing a literal value.
286
+ #
287
+ # @return [Expr]
288
+ def lit(value)
289
+ if value.is_a?(Polars::Series)
290
+ name = value.name
291
+ value = value._s
292
+ e = Utils.wrap_expr(RbExpr.lit(value))
293
+ if name == ""
294
+ return e
295
+ end
296
+ return e.alias(name)
297
+ end
298
+
299
+ Utils.wrap_expr(RbExpr.lit(value))
300
+ end
301
+
302
+ # Cumulatively sum values in a column/Series, or horizontally across list of columns/expressions.
303
+ #
304
+ # @param column [Object]
305
+ # Column(s) to be used in aggregation.
306
+ #
307
+ # @return [Object]
308
+ #
309
+ # @example
310
+ # df = Polars::DataFrame.new(
311
+ # {
312
+ # "a" => [1, 2],
313
+ # "b" => [3, 4],
314
+ # "c" => [5, 6]
315
+ # }
316
+ # )
317
+ # # =>
318
+ # # shape: (2, 3)
319
+ # # ┌─────┬─────┬─────┐
320
+ # # │ a ┆ b ┆ c │
321
+ # # │ --- ┆ --- ┆ --- │
322
+ # # │ i64 ┆ i64 ┆ i64 │
323
+ # # ╞═════╪═════╪═════╡
324
+ # # │ 1 ┆ 3 ┆ 5 │
325
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
326
+ # # │ 2 ┆ 4 ┆ 6 │
327
+ # # └─────┴─────┴─────┘
328
+ #
329
+ # @example Cumulatively sum a column by name:
330
+ # df.select(Polars.cumsum("a"))
331
+ # # =>
332
+ # # shape: (2, 1)
333
+ # # ┌─────┐
334
+ # # │ a │
335
+ # # │ --- │
336
+ # # │ i64 │
337
+ # # ╞═════╡
338
+ # # │ 1 │
339
+ # # ├╌╌╌╌╌┤
340
+ # # │ 3 │
341
+ # # └─────┘
342
+ #
343
+ # @example Cumulatively sum a list of columns/expressions horizontally:
344
+ # df.with_column(Polars.cumsum(["a", "c"]))
345
+ # # =>
346
+ # # shape: (2, 4)
347
+ # # ┌─────┬─────┬─────┬───────────┐
348
+ # # │ a ┆ b ┆ c ┆ cumsum │
349
+ # # │ --- ┆ --- ┆ --- ┆ --- │
350
+ # # │ i64 ┆ i64 ┆ i64 ┆ struct[2] │
351
+ # # ╞═════╪═════╪═════╪═══════════╡
352
+ # # │ 1 ┆ 3 ┆ 5 ┆ {1,6} │
353
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
354
+ # # │ 2 ┆ 4 ┆ 6 ┆ {2,8} │
355
+ # # └─────┴─────┴─────┴───────────┘
356
+ def cumsum(column)
357
+ if column.is_a?(Series)
358
+ column.cumsum
359
+ elsif column.is_a?(String)
360
+ col(column).cumsum
361
+ else
362
+ cumfold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("cumsum")
363
+ end
364
+ end
365
+
366
+ # Compute the spearman rank correlation between two columns.
367
+ #
368
+ # Missing data will be excluded from the computation.
369
+ #
370
+ # @param a [Object]
371
+ # Column name or Expression.
372
+ # @param b [Object]
373
+ # Column name or Expression.
374
+ # @param ddof [Integer]
375
+ # Delta degrees of freedom
376
+ # @param propagate_nans [Boolean]
377
+ # If `True` any `NaN` encountered will lead to `NaN` in the output.
378
+ # Defaults to `False` where `NaN` are regarded as larger than any finite number
379
+ # and thus lead to the highest rank.
380
+ #
381
+ # @return [Expr]
382
+ def spearman_rank_corr(a, b, ddof: 1, propagate_nans: false)
383
+ if a.is_a?(String)
384
+ a = col(a)
385
+ end
386
+ if b.is_a?(String)
387
+ b = col(b)
388
+ end
389
+ Utils.wrap_expr(RbExpr.spearman_rank_corr(a._rbexpr, b._rbexpr, ddof, propagate_nans))
390
+ end
391
+
392
+ # Compute the pearson's correlation between two columns.
393
+ #
394
+ # @param a [Object]
395
+ # Column name or Expression.
396
+ # @param b [Object]
397
+ # Column name or Expression.
398
+ # @param ddof [Integer]
399
+ # Delta degrees of freedom
400
+ #
401
+ # @return [Expr]
402
+ def pearson_corr(a, b, ddof: 1)
403
+ if a.is_a?(String)
404
+ a = col(a)
405
+ end
406
+ if b.is_a?(String)
407
+ b = col(b)
408
+ end
409
+ Utils.wrap_expr(RbExpr.pearson_corr(a._rbexpr, b._rbexpr, ddof))
410
+ end
411
+
412
+ # Compute the covariance between two columns/ expressions.
413
+ #
414
+ # @param a [Object]
415
+ # Column name or Expression.
416
+ # @param b [Object]
417
+ # Column name or Expression.
418
+ #
419
+ # @return [Expr]
420
+ def cov(a, b)
421
+ if a.is_a?(String)
422
+ a = col(a)
423
+ end
424
+ if b.is_a?(String)
425
+ b = col(b)
426
+ end
427
+ Utils.wrap_expr(RbExpr.cov(a._rbexpr, b._rbexpr))
428
+ end
429
+
430
+ # def map
431
+ # end
432
+
433
+ # def apply
434
+ # end
435
+
436
+ # Accumulate over multiple columns horizontally/row wise with a left fold.
437
+ #
438
+ # @return [Expr]
439
+ def fold(acc, f, exprs)
440
+ acc = Utils.expr_to_lit_or_expr(acc, str_to_lit: true)
441
+ if exprs.is_a?(Expr)
442
+ exprs = [exprs]
443
+ end
444
+
445
+ exprs = Utils.selection_to_rbexpr_list(exprs)
446
+ Utils.wrap_expr(RbExpr.fold(acc._rbexpr, f, exprs))
447
+ end
448
+
449
+ # def reduce
450
+ # end
451
+
452
+ # Cumulatively accumulate over multiple columns horizontally/row wise with a left fold.
453
+ #
454
+ # Every cumulative result is added as a separate field in a Struct column.
455
+ #
456
+ # @param acc [Object]
457
+ # Accumulator Expression. This is the value that will be initialized when the fold
458
+ # starts. For a sum this could for instance be lit(0).
459
+ # @param f [Object]
460
+ # Function to apply over the accumulator and the value.
461
+ # Fn(acc, value) -> new_value
462
+ # @param exprs [Object]
463
+ # Expressions to aggregate over. May also be a wildcard expression.
464
+ # @param include_init [Boolean]
465
+ # Include the initial accumulator state as struct field.
466
+ #
467
+ # @return [Object]
468
+ #
469
+ # @note
470
+ # If you simply want the first encountered expression as accumulator,
471
+ # consider using `cumreduce`.
472
+ def cumfold(acc, f, exprs, include_init: false)
473
+ acc = Utils.expr_to_lit_or_expr(acc, str_to_lit: true)
474
+ if exprs.is_a?(Expr)
475
+ exprs = [exprs]
476
+ end
477
+
478
+ exprs = Utils.selection_to_rbexpr_list(exprs)
479
+ Utils.wrap_expr(RbExpr.cumfold(acc._rbexpr, f, exprs, include_init))
480
+ end
481
+
482
+ # def cumreduce
483
+ # end
484
+
485
+ # Evaluate columnwise or elementwise with a bitwise OR operation.
486
+ #
487
+ # @return [Expr]
488
+ def any(name)
489
+ if name.is_a?(String)
490
+ col(name).any
491
+ else
492
+ fold(lit(false), ->(a, b) { a.cast(:bool) | b.cast(:bool) }, name).alias("any")
493
+ end
494
+ end
495
+
496
+ # Exclude certain columns from a wildcard/regex selection.
497
+ #
498
+ # @param columns [Object]
499
+ # Column(s) to exclude from selection
500
+ # This can be:
501
+ #
502
+ # - a column name, or multiple column names
503
+ # - a regular expression starting with `^` and ending with `$`
504
+ # - a dtype or multiple dtypes
505
+ #
506
+ # @return [Object]
507
+ #
508
+ # @example
509
+ # df = Polars::DataFrame.new(
510
+ # {
511
+ # "aa" => [1, 2, 3],
512
+ # "ba" => ["a", "b", nil],
513
+ # "cc" => [nil, 2.5, 1.5]
514
+ # }
515
+ # )
516
+ # # =>
517
+ # # shape: (3, 3)
518
+ # # ┌─────┬──────┬──────┐
519
+ # # │ aa ┆ ba ┆ cc │
520
+ # # │ --- ┆ --- ┆ --- │
521
+ # # │ i64 ┆ str ┆ f64 │
522
+ # # ╞═════╪══════╪══════╡
523
+ # # │ 1 ┆ a ┆ null │
524
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
525
+ # # │ 2 ┆ b ┆ 2.5 │
526
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
527
+ # # │ 3 ┆ null ┆ 1.5 │
528
+ # # └─────┴──────┴──────┘
529
+ #
530
+ # @example Exclude by column name(s):
531
+ # df.select(Polars.exclude("ba"))
532
+ # # =>
533
+ # # shape: (3, 2)
534
+ # # ┌─────┬──────┐
535
+ # # │ aa ┆ cc │
536
+ # # │ --- ┆ --- │
537
+ # # │ i64 ┆ f64 │
538
+ # # ╞═════╪══════╡
539
+ # # │ 1 ┆ null │
540
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
541
+ # # │ 2 ┆ 2.5 │
542
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
543
+ # # │ 3 ┆ 1.5 │
544
+ # # └─────┴──────┘
545
+ #
546
+ # @example Exclude by regex, e.g. removing all columns whose names end with the letter "a":
547
+ # df.select(Polars.exclude("^.*a$"))
548
+ # # =>
549
+ # # shape: (3, 1)
550
+ # # ┌──────┐
551
+ # # │ cc │
552
+ # # │ --- │
553
+ # # │ f64 │
554
+ # # ╞══════╡
555
+ # # │ null │
556
+ # # ├╌╌╌╌╌╌┤
557
+ # # │ 2.5 │
558
+ # # ├╌╌╌╌╌╌┤
559
+ # # │ 1.5 │
560
+ # # └──────┘
561
+ def exclude(columns)
562
+ col("*").exclude(columns)
563
+ end
564
+
565
+ # Do one of two things.
566
+ #
567
+ # * function can do a columnwise or elementwise AND operation
568
+ # * a wildcard column selection
569
+ #
570
+ # @param name [Object]
571
+ # If given this function will apply a bitwise & on the columns.
572
+ #
573
+ # @return [Expr]
574
+ #
575
+ # @example Sum all columns
576
+ # df = Polars::DataFrame.new(
577
+ # {"a" => [1, 2, 3], "b" => ["hello", "foo", "bar"], "c" => [1, 1, 1]}
578
+ # )
579
+ # df.select(Polars.all.sum)
580
+ # # =>
581
+ # # shape: (1, 3)
582
+ # # ┌─────┬──────┬─────┐
583
+ # # │ a ┆ b ┆ c │
584
+ # # │ --- ┆ --- ┆ --- │
585
+ # # │ i64 ┆ str ┆ i64 │
586
+ # # ╞═════╪══════╪═════╡
587
+ # # │ 6 ┆ null ┆ 3 │
588
+ # # └─────┴──────┴─────┘
589
+ def all(name = nil)
590
+ if name.nil?
591
+ col("*")
592
+ elsif name.is_a?(String) || name.is_a?(Symbol)
593
+ col(name).all
594
+ else
595
+ raise Todo
596
+ end
597
+ end
598
+
599
+ # Syntactic sugar for `Polars.col("foo").agg_groups`.
600
+ #
601
+ # @return [Object]
602
+ def groups(column)
603
+ col(column).agg_groups
604
+ end
605
+
606
+ # Syntactic sugar for `Polars.col("foo").quantile(...)`.
607
+ #
608
+ # @param column [String]
609
+ # Column name.
610
+ # @param quantile [Float]
611
+ # Quantile between 0.0 and 1.0.
612
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
613
+ # Interpolation method.
614
+ #
615
+ # @return [Expr]
616
+ def quantile(column, quantile, interpolation: "nearest")
617
+ col(column).quantile(quantile, interpolation: interpolation)
618
+ end
619
+
620
+ # Create a range expression (or Series).
621
+ #
622
+ # This can be used in a `select`, `with_column`, etc. Be sure that the resulting
623
+ # range size is equal to the length of the DataFrame you are collecting.
624
+ #
625
+ # @param low [Integer, Expr, Series]
626
+ # Lower bound of range.
627
+ # @param high [Integer, Expr, Series]
628
+ # Upper bound of range.
629
+ # @param step [Integer]
630
+ # Step size of the range.
631
+ # @param eager [Boolean]
632
+ # If eager evaluation is `True`, a Series is returned instead of an Expr.
633
+ # @param dtype [Symbol]
634
+ # Apply an explicit integer dtype to the resulting expression (default is `:i64`).
635
+ #
636
+ # @return [Expr, Series]
637
+ #
638
+ # @example
639
+ # df.lazy.filter(Polars.col("foo") < Polars.arange(0, 100)).collect
640
+ def arange(low, high, step: 1, eager: false, dtype: nil)
641
+ low = Utils.expr_to_lit_or_expr(low, str_to_lit: false)
642
+ high = Utils.expr_to_lit_or_expr(high, str_to_lit: false)
643
+ range_expr = Utils.wrap_expr(RbExpr.arange(low._rbexpr, high._rbexpr, step))
644
+
645
+ if !dtype.nil? && dtype != "i64"
646
+ range_expr = range_expr.cast(dtype)
647
+ end
648
+
649
+ if !eager
650
+ range_expr
651
+ else
652
+ DataFrame.new
653
+ .select(range_expr)
654
+ .to_series
655
+ .rename("arange", in_place: true)
656
+ end
657
+ end
658
+
659
+ # Find the indexes that would sort the columns.
660
+ #
661
+ # Argsort by multiple columns. The first column will be used for the ordering.
662
+ # If there are duplicates in the first column, the second column will be used to
663
+ # determine the ordering and so on.
664
+ #
665
+ # @param exprs [Object]
666
+ # Columns use to determine the ordering.
667
+ # @param reverse [Boolean]
668
+ # Default is ascending.
669
+ #
670
+ # @return [Expr]
671
+ def argsort_by(exprs, reverse: false)
672
+ if !exprs.is_a?(Array)
673
+ exprs = [exprs]
674
+ end
675
+ if reverse == true || reverse == false
676
+ reverse = [reverse] * exprs.length
677
+ end
678
+ exprs = Utils.selection_to_rbexpr_list(exprs)
679
+ Utils.wrap_expr(RbExpr.argsort_by(exprs, reverse))
680
+ end
681
+
682
+ # Create polars `Duration` from distinct time components.
683
+ #
684
+ # @return [Expr]
685
+ #
686
+ # @example
687
+ # df = Polars::DataFrame.new(
688
+ # {
689
+ # "datetime" => [DateTime.new(2022, 1, 1), DateTime.new(2022, 1, 2)],
690
+ # "add" => [1, 2]
691
+ # }
692
+ # )
693
+ # df.select(
694
+ # [
695
+ # (Polars.col("datetime") + Polars.duration(weeks: "add")).alias("add_weeks"),
696
+ # (Polars.col("datetime") + Polars.duration(days: "add")).alias("add_days"),
697
+ # (Polars.col("datetime") + Polars.duration(seconds: "add")).alias("add_seconds"),
698
+ # (Polars.col("datetime") + Polars.duration(milliseconds: "add")).alias(
699
+ # "add_milliseconds"
700
+ # ),
701
+ # (Polars.col("datetime") + Polars.duration(hours: "add")).alias("add_hours")
702
+ # ]
703
+ # )
704
+ # # =>
705
+ # # shape: (2, 5)
706
+ # # ┌─────────────────────┬─────────────────────┬─────────────────────┬─────────────────────────┬─────────────────────┐
707
+ # # │ add_weeks ┆ add_days ┆ add_seconds ┆ add_milliseconds ┆ add_hours │
708
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
709
+ # # │ datetime[ns] ┆ datetime[ns] ┆ datetime[ns] ┆ datetime[ns] ┆ datetime[ns] │
710
+ # # ╞═════════════════════╪═════════════════════╪═════════════════════╪═════════════════════════╪═════════════════════╡
711
+ # # │ 2022-01-08 00:00:00 ┆ 2022-01-02 00:00:00 ┆ 2022-01-01 00:00:01 ┆ 2022-01-01 00:00:00.001 ┆ 2022-01-01 01:00:00 │
712
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
713
+ # # │ 2022-01-16 00:00:00 ┆ 2022-01-04 00:00:00 ┆ 2022-01-02 00:00:02 ┆ 2022-01-02 00:00:00.002 ┆ 2022-01-02 02:00:00 │
714
+ # # └─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────────┴─────────────────────┘
715
+ def duration(
716
+ days: nil,
717
+ seconds: nil,
718
+ nanoseconds: nil,
719
+ microseconds: nil,
720
+ milliseconds: nil,
721
+ minutes: nil,
722
+ hours: nil,
723
+ weeks: nil
724
+ )
725
+ if !hours.nil?
726
+ hours = Utils.expr_to_lit_or_expr(hours, str_to_lit: false)._rbexpr
727
+ end
728
+ if !minutes.nil?
729
+ minutes = Utils.expr_to_lit_or_expr(minutes, str_to_lit: false)._rbexpr
730
+ end
731
+ if !seconds.nil?
732
+ seconds = Utils.expr_to_lit_or_expr(seconds, str_to_lit: false)._rbexpr
733
+ end
734
+ if !milliseconds.nil?
735
+ milliseconds = Utils.expr_to_lit_or_expr(milliseconds, str_to_lit: false)._rbexpr
736
+ end
737
+ if !microseconds.nil?
738
+ microseconds = Utils.expr_to_lit_or_expr(microseconds, str_to_lit: false)._rbexpr
739
+ end
740
+ if !nanoseconds.nil?
741
+ nanoseconds = Utils.expr_to_lit_or_expr(nanoseconds, str_to_lit: false)._rbexpr
742
+ end
743
+ if !days.nil?
744
+ days = Utils.expr_to_lit_or_expr(days, str_to_lit: false)._rbexpr
745
+ end
746
+ if !weeks.nil?
747
+ weeks = Utils.expr_to_lit_or_expr(weeks, str_to_lit: false)._rbexpr
748
+ end
749
+
750
+ Utils.wrap_expr(
751
+ _rb_duration(
752
+ days,
753
+ seconds,
754
+ nanoseconds,
755
+ microseconds,
756
+ milliseconds,
757
+ minutes,
758
+ hours,
759
+ weeks
760
+ )
761
+ )
762
+ end
763
+
764
+ # Horizontally concat Utf8 Series in linear time. Non-Utf8 columns are cast to Utf8.
765
+ #
766
+ # @param exprs [Object]
767
+ # Columns to concat into a Utf8 Series.
768
+ # @param sep [String]
769
+ # String value that will be used to separate the values.
770
+ #
771
+ # @return [Expr]
772
+ #
773
+ # @example
774
+ # df = Polars::DataFrame.new(
775
+ # {
776
+ # "a" => [1, 2, 3],
777
+ # "b" => ["dogs", "cats", nil],
778
+ # "c" => ["play", "swim", "walk"]
779
+ # }
780
+ # )
781
+ # df.with_columns(
782
+ # [
783
+ # Polars.concat_str(
784
+ # [
785
+ # Polars.col("a") * 2,
786
+ # Polars.col("b"),
787
+ # Polars.col("c")
788
+ # ],
789
+ # sep: " "
790
+ # ).alias("full_sentence")
791
+ # ]
792
+ # )
793
+ # # =>
794
+ # # shape: (3, 4)
795
+ # # ┌─────┬──────┬──────┬───────────────┐
796
+ # # │ a ┆ b ┆ c ┆ full_sentence │
797
+ # # │ --- ┆ --- ┆ --- ┆ --- │
798
+ # # │ i64 ┆ str ┆ str ┆ str │
799
+ # # ╞═════╪══════╪══════╪═══════════════╡
800
+ # # │ 1 ┆ dogs ┆ play ┆ 2 dogs play │
801
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
802
+ # # │ 2 ┆ cats ┆ swim ┆ 4 cats swim │
803
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
804
+ # # │ 3 ┆ null ┆ walk ┆ null │
805
+ # # └─────┴──────┴──────┴───────────────┘
806
+ def concat_str(exprs, sep: "")
807
+ exprs = Utils.selection_to_rbexpr_list(exprs)
808
+ return Utils.wrap_expr(RbExpr.concat_str(exprs, sep))
809
+ end
810
+
811
+ # Format expressions as a string.
812
+ #
813
+ # @param fstring [String]
814
+ # A string that with placeholders.
815
+ # For example: "hello_{}" or "{}_world
816
+ # @param args [Object]
817
+ # Expression(s) that fill the placeholders
818
+ #
819
+ # @return [Expr]
820
+ #
821
+ # @example
822
+ # df = Polars::DataFrame.new(
823
+ # {
824
+ # "a": ["a", "b", "c"],
825
+ # "b": [1, 2, 3]
826
+ # }
827
+ # )
828
+ # df.select(
829
+ # [
830
+ # Polars.format("foo_{}_bar_{}", Polars.col("a"), "b").alias("fmt")
831
+ # ]
832
+ # )
833
+ # # =>
834
+ # # shape: (3, 1)
835
+ # # ┌─────────────┐
836
+ # # │ fmt │
837
+ # # │ --- │
838
+ # # │ str │
839
+ # # ╞═════════════╡
840
+ # # │ foo_a_bar_1 │
841
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┤
842
+ # # │ foo_b_bar_2 │
843
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┤
844
+ # # │ foo_c_bar_3 │
845
+ # # └─────────────┘
846
+ def format(fstring, *args)
847
+ if fstring.scan("{}").length != args.length
848
+ raise ArgumentError, "number of placeholders should equal the number of arguments"
849
+ end
850
+
851
+ exprs = []
852
+
853
+ arguments = args.each
854
+ fstring.split(/(\{\})/).each do |s|
855
+ if s == "{}"
856
+ e = Utils.expr_to_lit_or_expr(arguments.next, str_to_lit: false)
857
+ exprs << e
858
+ elsif s.length > 0
859
+ exprs << lit(s)
860
+ end
861
+ end
862
+
863
+ concat_str(exprs, sep: "")
864
+ end
865
+
866
+ # Concat the arrays in a Series dtype List in linear time.
867
+ #
868
+ # @return [Expr]
869
+ def concat_list(exprs)
870
+ exprs = Utils.selection_to_rbexpr_list(exprs)
871
+ Utils.wrap_expr(RbExpr.concat_lst(exprs))
872
+ end
873
+
874
+ # Collect multiple LazyFrames at the same time.
875
+ #
876
+ # This runs all the computation graphs in parallel on Polars threadpool.
877
+ #
878
+ # @param lazy_frames [Boolean]
879
+ # A list of LazyFrames to collect.
880
+ # @param type_coercion [Boolean]
881
+ # Do type coercion optimization.
882
+ # @param predicate_pushdown [Boolean]
883
+ # Do predicate pushdown optimization.
884
+ # @param projection_pushdown [Boolean]
885
+ # Do projection pushdown optimization.
886
+ # @param simplify_expression [Boolean]
887
+ # Run simplify expressions optimization.
888
+ # @param string_cache [Boolean]
889
+ # This argument is deprecated and will be ignored
890
+ # @param no_optimization [Boolean]
891
+ # Turn off optimizations.
892
+ # @param slice_pushdown [Boolean]
893
+ # Slice pushdown optimization.
894
+ # @param common_subplan_elimination [Boolean]
895
+ # Will try to cache branching subplans that occur on self-joins or unions.
896
+ # @param allow_streaming [Boolean]
897
+ # Run parts of the query in a streaming fashion (this is in an alpha state)
898
+ #
899
+ # @return [Array]
900
+ def collect_all(
901
+ lazy_frames,
902
+ type_coercion: true,
903
+ predicate_pushdown: true,
904
+ projection_pushdown: true,
905
+ simplify_expression: true,
906
+ string_cache: false,
907
+ no_optimization: false,
908
+ slice_pushdown: true,
909
+ common_subplan_elimination: true,
910
+ allow_streaming: false
911
+ )
912
+ if no_optimization
913
+ predicate_pushdown = false
914
+ projection_pushdown = false
915
+ slice_pushdown = false
916
+ common_subplan_elimination = false
917
+ end
918
+
919
+ prepared = []
920
+
921
+ lazy_frames.each do |lf|
922
+ ldf = lf._ldf.optimization_toggle(
923
+ type_coercion,
924
+ predicate_pushdown,
925
+ projection_pushdown,
926
+ simplify_expression,
927
+ slice_pushdown,
928
+ common_subplan_elimination,
929
+ allow_streaming
930
+ )
931
+ prepared << ldf
932
+ end
933
+
934
+ out = _collect_all(prepared)
935
+
936
+ # wrap the rbdataframes into dataframe
937
+ result = out.map { |rbdf| Utils.wrap_df(rbdf) }
938
+
939
+ result
940
+ end
941
+
942
+ # Run polars expressions without a context.
943
+ #
944
+ # @return [DataFrame]
945
+ def select(exprs)
946
+ DataFrame.new([]).select(exprs)
947
+ end
948
+
949
+ # Collect several columns into a Series of dtype Struct.
950
+ #
951
+ # @param exprs [Object]
952
+ # Columns/Expressions to collect into a Struct
953
+ # @param eager [Boolean]
954
+ # Evaluate immediately
955
+ #
956
+ # @return [Object]
957
+ #
958
+ # @example
959
+ # Polars::DataFrame.new(
960
+ # {
961
+ # "int" => [1, 2],
962
+ # "str" => ["a", "b"],
963
+ # "bool" => [true, nil],
964
+ # "list" => [[1, 2], [3]],
965
+ # }
966
+ # ).select([Polars.struct(Polars.all).alias("my_struct")])
967
+ # # =>
968
+ # # shape: (2, 1)
969
+ # # ┌─────────────────────┐
970
+ # # │ my_struct │
971
+ # # │ --- │
972
+ # # │ struct[4] │
973
+ # # ╞═════════════════════╡
974
+ # # │ {1,"a",true,[1, 2]} │
975
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
976
+ # # │ {2,"b",null,[3]} │
977
+ # # └─────────────────────┘
978
+ #
979
+ # @example Only collect specific columns as a struct:
980
+ # df = Polars::DataFrame.new(
981
+ # {"a" => [1, 2, 3, 4], "b" => ["one", "two", "three", "four"], "c" => [9, 8, 7, 6]}
982
+ # )
983
+ # df.with_column(Polars.struct(Polars.col(["a", "b"])).alias("a_and_b"))
984
+ # # =>
985
+ # # shape: (4, 4)
986
+ # # ┌─────┬───────┬─────┬─────────────┐
987
+ # # │ a ┆ b ┆ c ┆ a_and_b │
988
+ # # │ --- ┆ --- ┆ --- ┆ --- │
989
+ # # │ i64 ┆ str ┆ i64 ┆ struct[2] │
990
+ # # ╞═════╪═══════╪═════╪═════════════╡
991
+ # # │ 1 ┆ one ┆ 9 ┆ {1,"one"} │
992
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
993
+ # # │ 2 ┆ two ┆ 8 ┆ {2,"two"} │
994
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
995
+ # # │ 3 ┆ three ┆ 7 ┆ {3,"three"} │
996
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
997
+ # # │ 4 ┆ four ┆ 6 ┆ {4,"four"} │
998
+ # # └─────┴───────┴─────┴─────────────┘
999
+ def struct(exprs, eager: false)
1000
+ if eager
1001
+ Polars.select(struct(exprs, eager: false)).to_series
1002
+ end
1003
+ exprs = Utils.selection_to_rbexpr_list(exprs)
1004
+ Utils.wrap_expr(_as_struct(exprs))
1005
+ end
1006
+
1007
+ # Repeat a single value n times.
1008
+ #
1009
+ # @param value [Object]
1010
+ # Value to repeat.
1011
+ # @param n [Integer]
1012
+ # Repeat `n` times.
1013
+ # @param eager [Boolean]
1014
+ # Run eagerly and collect into a `Series`.
1015
+ # @param name [String]
1016
+ # Only used in `eager` mode. As expression, use `alias`.
1017
+ #
1018
+ # @return [Expr]
1019
+ def repeat(value, n, eager: false, name: nil)
1020
+ if eager
1021
+ if name.nil?
1022
+ name = ""
1023
+ end
1024
+ dtype = py_type_to_dtype(type(value))
1025
+ Series._repeat(name, value, n, dtype)
1026
+ else
1027
+ if n.is_a?(Integer)
1028
+ n = lit(n)
1029
+ end
1030
+ Utils.wrap_expr(RbExpr.repeat(value, n._rbexpr))
1031
+ end
1032
+ end
1033
+
1034
+ # Return indices where `condition` evaluates `true`.
1035
+ #
1036
+ # @param condition [Expr]
1037
+ # Boolean expression to evaluate
1038
+ # @param eager [Boolean]
1039
+ # Whether to apply this function eagerly (as opposed to lazily).
1040
+ #
1041
+ # @return [Expr, Series]
1042
+ #
1043
+ # @example
1044
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
1045
+ # df.select(
1046
+ # [
1047
+ # Polars.arg_where(Polars.col("a") % 2 == 0)
1048
+ # ]
1049
+ # ).to_series
1050
+ # # =>
1051
+ # # shape: (2,)
1052
+ # # Series: 'a' [u32]
1053
+ # # [
1054
+ # # 1
1055
+ # # 3
1056
+ # # ]
1057
+ def arg_where(condition, eager: false)
1058
+ if eager
1059
+ if !condition.is_a?(Series)
1060
+ raise ArgumentError, "expected 'Series' in 'arg_where' if 'eager=True', got #{condition.class.name}"
1061
+ end
1062
+ condition.to_frame.select(arg_where(Polars.col(condition.name))).to_series
1063
+ else
1064
+ condition = Utils.expr_to_lit_or_expr(condition, str_to_lit: true)
1065
+ Utils.wrap_expr(_arg_where(condition._rbexpr))
1066
+ end
1067
+ end
1068
+
1069
+ # Folds the expressions from left to right, keeping the first non-null value.
1070
+ #
1071
+ # @param exprs [Object]
1072
+ # Expressions to coalesce.
1073
+ #
1074
+ # @return [Expr]
1075
+ #
1076
+ # @example
1077
+ # df = Polars::DataFrame.new(
1078
+ # [
1079
+ # [nil, 1.0, 1.0],
1080
+ # [nil, 2.0, 2.0],
1081
+ # [nil, nil, 3.0],
1082
+ # [nil, nil, nil]
1083
+ # ],
1084
+ # columns: [["a", :f64], ["b", :f64], ["c", :f64]]
1085
+ # )
1086
+ # df.with_column(Polars.coalesce(["a", "b", "c", 99.9]).alias("d"))
1087
+ # # =>
1088
+ # # shape: (4, 4)
1089
+ # # ┌──────┬──────┬──────┬──────┐
1090
+ # # │ a ┆ b ┆ c ┆ d │
1091
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1092
+ # # │ f64 ┆ f64 ┆ f64 ┆ f64 │
1093
+ # # ╞══════╪══════╪══════╪══════╡
1094
+ # # │ null ┆ 1.0 ┆ 1.0 ┆ 1.0 │
1095
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1096
+ # # │ null ┆ 2.0 ┆ 2.0 ┆ 2.0 │
1097
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1098
+ # # │ null ┆ null ┆ 3.0 ┆ 3.0 │
1099
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1100
+ # # │ null ┆ null ┆ null ┆ 99.9 │
1101
+ # # └──────┴──────┴──────┴──────┘
1102
+ def coalesce(exprs)
1103
+ exprs = Utils.selection_to_rbexpr_list(exprs)
1104
+ Utils.wrap_expr(_coalesce_exprs(exprs))
1105
+ end
1106
+
1107
+ # Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).
1108
+ #
1109
+ # Depending on the `unit` provided, this function will return a different dtype:
1110
+ # - unit: "d" returns pl.Date
1111
+ # - unit: "s" returns pl.Datetime["us"] (pl.Datetime's default)
1112
+ # - unit: "ms" returns pl.Datetime["ms"]
1113
+ # - unit: "us" returns pl.Datetime["us"]
1114
+ # - unit: "ns" returns pl.Datetime["ns"]
1115
+ #
1116
+ # @param column [Object]
1117
+ # Series or expression to parse integers to pl.Datetime.
1118
+ # @param unit [String]
1119
+ # The unit of the timesteps since epoch time.
1120
+ # @param eager [Boolean]
1121
+ # If eager evaluation is `true`, a Series is returned instead of an Expr.
1122
+ #
1123
+ # @return [Object]
1124
+ #
1125
+ # @example
1126
+ # df = Polars::DataFrame.new({"timestamp" => [1666683077, 1666683099]}).lazy
1127
+ # df.select(Polars.from_epoch(Polars.col("timestamp"), unit: "s")).collect
1128
+ # # =>
1129
+ # # shape: (2, 1)
1130
+ # # ┌─────────────────────┐
1131
+ # # │ timestamp │
1132
+ # # │ --- │
1133
+ # # │ datetime[μs] │
1134
+ # # ╞═════════════════════╡
1135
+ # # │ 2022-10-25 07:31:17 │
1136
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
1137
+ # # │ 2022-10-25 07:31:39 │
1138
+ # # └─────────────────────┘
1139
+ def from_epoch(column, unit: "s", eager: false)
1140
+ if column.is_a?(String)
1141
+ column = col(column)
1142
+ elsif !column.is_a?(Series) && !column.is_a?(Expr)
1143
+ column = Series.new(column)
1144
+ end
1145
+
1146
+ if unit == "d"
1147
+ expr = column.cast(:date)
1148
+ elsif unit == "s"
1149
+ raise Todo
1150
+ # expr = (column.cast(:i64) * 1_000_000).cast(Datetime("us"))
1151
+ elsif Utils::DTYPE_TEMPORAL_UNITS.include?(unit)
1152
+ raise Todo
1153
+ # expr = column.cast(Datetime(unit))
1154
+ else
1155
+ raise ArgumentError, "'unit' must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got '#{unit}'."
1156
+ end
1157
+
1158
+ if eager
1159
+ if !column.is_a?(Series)
1160
+ raise ArgumentError, "expected Series or Array if eager: true, got #{column.class.name}"
1161
+ else
1162
+ column.to_frame.select(expr).to_series
1163
+ end
1164
+ else
1165
+ expr
1166
+ end
1167
+ end
1168
+
1169
+ # Start a "when, then, otherwise" expression.
1170
+ #
1171
+ # @return [When]
1172
+ #
1173
+ # @example
1174
+ # df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
1175
+ # df.with_column(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
1176
+ # # =>
1177
+ # # shape: (3, 3)
1178
+ # # ┌─────┬─────┬─────────┐
1179
+ # # │ foo ┆ bar ┆ literal │
1180
+ # # │ --- ┆ --- ┆ --- │
1181
+ # # │ i64 ┆ i64 ┆ i32 │
1182
+ # # ╞═════╪═════╪═════════╡
1183
+ # # │ 1 ┆ 3 ┆ -1 │
1184
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
1185
+ # # │ 3 ┆ 4 ┆ 1 │
1186
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
1187
+ # # │ 4 ┆ 0 ┆ 1 │
1188
+ # # └─────┴─────┴─────────┘
1189
+ def when(expr)
1190
+ expr = Utils.expr_to_lit_or_expr(expr)
1191
+ pw = RbExpr.when(expr._rbexpr)
1192
+ When.new(pw)
1193
+ end
1194
+ end
1195
+ end