polars-df 0.2.0-arm64-darwin

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +33 -0
  4. data/Cargo.lock +2230 -0
  5. data/Cargo.toml +10 -0
  6. data/LICENSE-THIRD-PARTY.txt +38856 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +91 -0
  9. data/lib/polars/3.0/polars.bundle +0 -0
  10. data/lib/polars/3.1/polars.bundle +0 -0
  11. data/lib/polars/3.2/polars.bundle +0 -0
  12. data/lib/polars/batched_csv_reader.rb +96 -0
  13. data/lib/polars/cat_expr.rb +52 -0
  14. data/lib/polars/cat_name_space.rb +54 -0
  15. data/lib/polars/convert.rb +100 -0
  16. data/lib/polars/data_frame.rb +4833 -0
  17. data/lib/polars/data_types.rb +122 -0
  18. data/lib/polars/date_time_expr.rb +1418 -0
  19. data/lib/polars/date_time_name_space.rb +1484 -0
  20. data/lib/polars/dynamic_group_by.rb +52 -0
  21. data/lib/polars/exceptions.rb +20 -0
  22. data/lib/polars/expr.rb +5307 -0
  23. data/lib/polars/expr_dispatch.rb +22 -0
  24. data/lib/polars/functions.rb +453 -0
  25. data/lib/polars/group_by.rb +558 -0
  26. data/lib/polars/io.rb +814 -0
  27. data/lib/polars/lazy_frame.rb +2442 -0
  28. data/lib/polars/lazy_functions.rb +1195 -0
  29. data/lib/polars/lazy_group_by.rb +93 -0
  30. data/lib/polars/list_expr.rb +610 -0
  31. data/lib/polars/list_name_space.rb +346 -0
  32. data/lib/polars/meta_expr.rb +54 -0
  33. data/lib/polars/rolling_group_by.rb +35 -0
  34. data/lib/polars/series.rb +3730 -0
  35. data/lib/polars/slice.rb +104 -0
  36. data/lib/polars/string_expr.rb +972 -0
  37. data/lib/polars/string_name_space.rb +690 -0
  38. data/lib/polars/struct_expr.rb +100 -0
  39. data/lib/polars/struct_name_space.rb +64 -0
  40. data/lib/polars/utils.rb +192 -0
  41. data/lib/polars/version.rb +4 -0
  42. data/lib/polars/when.rb +16 -0
  43. data/lib/polars/when_then.rb +19 -0
  44. data/lib/polars-df.rb +1 -0
  45. data/lib/polars.rb +50 -0
  46. metadata +89 -0
@@ -0,0 +1,1195 @@
1
+ module Polars
2
+ module LazyFunctions
3
+ # Return an expression representing a column in a DataFrame.
4
+ #
5
+ # @return [Expr]
6
+ def col(name)
7
+ if name.is_a?(Series)
8
+ name = name.to_a
9
+ end
10
+
11
+ if name.is_a?(Class) && name < DataType
12
+ name = [name]
13
+ end
14
+
15
+ if name.is_a?(DataType)
16
+ Utils.wrap_expr(_dtype_cols([name]))
17
+ elsif name.is_a?(Array)
18
+ if name.length == 0 || name[0].is_a?(String) || name[0].is_a?(Symbol)
19
+ name = name.map { |v| v.is_a?(Symbol) ? v.to_s : v }
20
+ Utils.wrap_expr(RbExpr.cols(name))
21
+ elsif Utils.is_polars_dtype(name[0])
22
+ Utils.wrap_expr(_dtype_cols(name))
23
+ else
24
+ raise ArgumentError, "Expected list values to be all `str` or all `DataType`"
25
+ end
26
+ else
27
+ name = name.to_s if name.is_a?(Symbol)
28
+ Utils.wrap_expr(RbExpr.col(name))
29
+ end
30
+ end
31
+
32
+ # Alias for an element in evaluated in an `eval` expression.
33
+ #
34
+ # @return [Expr]
35
+ #
36
+ # @example A horizontal rank computation by taking the elements of a list
37
+ # df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
38
+ # df.with_column(
39
+ # Polars.concat_list(["a", "b"]).arr.eval(Polars.element.rank).alias("rank")
40
+ # )
41
+ # # =>
42
+ # # shape: (3, 3)
43
+ # # ┌─────┬─────┬────────────┐
44
+ # # │ a ┆ b ┆ rank │
45
+ # # │ --- ┆ --- ┆ --- │
46
+ # # │ i64 ┆ i64 ┆ list[f32] │
47
+ # # ╞═════╪═════╪════════════╡
48
+ # # │ 1 ┆ 4 ┆ [1.0, 2.0] │
49
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
50
+ # # │ 8 ┆ 5 ┆ [2.0, 1.0] │
51
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
52
+ # # │ 3 ┆ 2 ┆ [2.0, 1.0] │
53
+ # # └─────┴─────┴────────────┘
54
+ def element
55
+ col("")
56
+ end
57
+
58
+ # Count the number of values in this column/context.
59
+ #
60
+ # @param column [String, Series, nil]
61
+ # If dtype is:
62
+ #
63
+ # * `Series` : count the values in the series.
64
+ # * `String` : count the values in this column.
65
+ # * `None` : count the number of values in this context.
66
+ #
67
+ # @return [Expr, Integer]
68
+ def count(column = nil)
69
+ if column.nil?
70
+ return Utils.wrap_expr(RbExpr.count)
71
+ end
72
+
73
+ if column.is_a?(Series)
74
+ column.len
75
+ else
76
+ col(column).count
77
+ end
78
+ end
79
+
80
+ # Aggregate to list.
81
+ #
82
+ # @return [Expr]
83
+ def to_list(name)
84
+ col(name).list
85
+ end
86
+
87
+ # Get the standard deviation.
88
+ #
89
+ # @return [Object]
90
+ def std(column, ddof: 1)
91
+ if column.is_a?(Series)
92
+ column.std(ddof: ddof)
93
+ else
94
+ col(column).std(ddof: ddof)
95
+ end
96
+ end
97
+
98
+ # Get the variance.
99
+ #
100
+ # @return [Object]
101
+ def var(column, ddof: 1)
102
+ if column.is_a?(Series)
103
+ column.var(ddof: ddof)
104
+ else
105
+ col(column).var(ddof: ddof)
106
+ end
107
+ end
108
+
109
+ # Get the maximum value.
110
+ #
111
+ # @param column [Object]
112
+ # Column(s) to be used in aggregation. Will lead to different behavior based on
113
+ # the input:
114
+ #
115
+ # - [String, Series] -> aggregate the maximum value of that column.
116
+ # - [Array<Expr>] -> aggregate the maximum value horizontally.
117
+ #
118
+ # @return [Expr, Object]
119
+ def max(column)
120
+ if column.is_a?(Series)
121
+ column.max
122
+ elsif column.is_a?(String) || column.is_a?(Symbol)
123
+ col(column).max
124
+ else
125
+ exprs = Utils.selection_to_rbexpr_list(column)
126
+ # TODO
127
+ Utils.wrap_expr(_max_exprs(exprs))
128
+ end
129
+ end
130
+
131
+ # Get the minimum value.
132
+ #
133
+ # @param column [Object]
134
+ # Column(s) to be used in aggregation. Will lead to different behavior based on
135
+ # the input:
136
+ #
137
+ # - [String, Series] -> aggregate the minimum value of that column.
138
+ # - [Array<Expr>] -> aggregate the minimum value horizontally.
139
+ #
140
+ # @return [Expr, Object]
141
+ def min(column)
142
+ if column.is_a?(Series)
143
+ column.min
144
+ elsif column.is_a?(String) || column.is_a?(Symbol)
145
+ col(column).min
146
+ else
147
+ exprs = Utils.selection_to_rbexpr_list(column)
148
+ # TODO
149
+ Utils.wrap_expr(_min_exprs(exprs))
150
+ end
151
+ end
152
+
153
+ # Sum values in a column/Series, or horizontally across list of columns/expressions.
154
+ #
155
+ # @return [Object]
156
+ def sum(column)
157
+ if column.is_a?(Series)
158
+ column.sum
159
+ elsif column.is_a?(String) || column.is_a?(Symbol)
160
+ col(column.to_s).sum
161
+ elsif column.is_a?(Array)
162
+ exprs = Utils.selection_to_rbexpr_list(column)
163
+ # TODO
164
+ Utils.wrap_expr(_sum_exprs(exprs))
165
+ else
166
+ fold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("sum")
167
+ end
168
+ end
169
+
170
+ # Get the mean value.
171
+ #
172
+ # @return [Expr, Float]
173
+ def mean(column)
174
+ if column.is_a?(Series)
175
+ column.mean
176
+ else
177
+ col(column).mean
178
+ end
179
+ end
180
+
181
+ # Get the mean value.
182
+ #
183
+ # @return [Expr, Float]
184
+ def avg(column)
185
+ mean(column)
186
+ end
187
+
188
+ # Get the median value.
189
+ #
190
+ # @return [Object]
191
+ def median(column)
192
+ if column.is_a?(Series)
193
+ column.median
194
+ else
195
+ col(column).median
196
+ end
197
+ end
198
+
199
+ # Count unique values.
200
+ #
201
+ # @return [Object]
202
+ def n_unique(column)
203
+ if column.is_a?(Series)
204
+ column.n_unique
205
+ else
206
+ col(column).n_unique
207
+ end
208
+ end
209
+
210
+ # Get the first value.
211
+ #
212
+ # @return [Object]
213
+ def first(column = nil)
214
+ if column.nil?
215
+ return Utils.wrap_expr(RbExpr.first)
216
+ end
217
+
218
+ if column.is_a?(Series)
219
+ if column.len > 0
220
+ column[0]
221
+ else
222
+ raise IndexError, "The series is empty, so no first value can be returned."
223
+ end
224
+ else
225
+ col(column).first
226
+ end
227
+ end
228
+
229
+ # Get the last value.
230
+ #
231
+ # Depending on the input type this function does different things:
232
+ #
233
+ # - nil -> expression to take last column of a context.
234
+ # - String -> syntactic sugar for `Polars.col(..).last`
235
+ # - Series -> Take last value in `Series`
236
+ #
237
+ # @return [Object]
238
+ def last(column = nil)
239
+ if column.nil?
240
+ return Utils.wrap_expr(_last)
241
+ end
242
+
243
+ if column.is_a?(Series)
244
+ if column.len > 0
245
+ return column[-1]
246
+ else
247
+ raise IndexError, "The series is empty, so no last value can be returned"
248
+ end
249
+ end
250
+ col(column).last
251
+ end
252
+
253
+ # Get the first `n` rows.
254
+ #
255
+ # @param column [Object]
256
+ # Column name or Series.
257
+ # @param n [Integer]
258
+ # Number of rows to return.
259
+ #
260
+ # @return [Object]
261
+ def head(column, n = 10)
262
+ if column.is_a?(Series)
263
+ column.head(n)
264
+ else
265
+ col(column).head(n)
266
+ end
267
+ end
268
+
269
+ # Get the last `n` rows.
270
+ #
271
+ # @param column [Object]
272
+ # Column name or Series.
273
+ # @param n [Integer]
274
+ # Number of rows to return.
275
+ #
276
+ # @return [Object]
277
+ def tail(column, n = 10)
278
+ if column.is_a?(Series)
279
+ column.tail(n)
280
+ else
281
+ col(column).tail(n)
282
+ end
283
+ end
284
+
285
+ # Return an expression representing a literal value.
286
+ #
287
+ # @return [Expr]
288
+ def lit(value)
289
+ if value.is_a?(Polars::Series)
290
+ name = value.name
291
+ value = value._s
292
+ e = Utils.wrap_expr(RbExpr.lit(value))
293
+ if name == ""
294
+ return e
295
+ end
296
+ return e.alias(name)
297
+ end
298
+
299
+ Utils.wrap_expr(RbExpr.lit(value))
300
+ end
301
+
302
+ # Cumulatively sum values in a column/Series, or horizontally across list of columns/expressions.
303
+ #
304
+ # @param column [Object]
305
+ # Column(s) to be used in aggregation.
306
+ #
307
+ # @return [Object]
308
+ #
309
+ # @example
310
+ # df = Polars::DataFrame.new(
311
+ # {
312
+ # "a" => [1, 2],
313
+ # "b" => [3, 4],
314
+ # "c" => [5, 6]
315
+ # }
316
+ # )
317
+ # # =>
318
+ # # shape: (2, 3)
319
+ # # ┌─────┬─────┬─────┐
320
+ # # │ a ┆ b ┆ c │
321
+ # # │ --- ┆ --- ┆ --- │
322
+ # # │ i64 ┆ i64 ┆ i64 │
323
+ # # ╞═════╪═════╪═════╡
324
+ # # │ 1 ┆ 3 ┆ 5 │
325
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
326
+ # # │ 2 ┆ 4 ┆ 6 │
327
+ # # └─────┴─────┴─────┘
328
+ #
329
+ # @example Cumulatively sum a column by name:
330
+ # df.select(Polars.cumsum("a"))
331
+ # # =>
332
+ # # shape: (2, 1)
333
+ # # ┌─────┐
334
+ # # │ a │
335
+ # # │ --- │
336
+ # # │ i64 │
337
+ # # ╞═════╡
338
+ # # │ 1 │
339
+ # # ├╌╌╌╌╌┤
340
+ # # │ 3 │
341
+ # # └─────┘
342
+ #
343
+ # @example Cumulatively sum a list of columns/expressions horizontally:
344
+ # df.with_column(Polars.cumsum(["a", "c"]))
345
+ # # =>
346
+ # # shape: (2, 4)
347
+ # # ┌─────┬─────┬─────┬───────────┐
348
+ # # │ a ┆ b ┆ c ┆ cumsum │
349
+ # # │ --- ┆ --- ┆ --- ┆ --- │
350
+ # # │ i64 ┆ i64 ┆ i64 ┆ struct[2] │
351
+ # # ╞═════╪═════╪═════╪═══════════╡
352
+ # # │ 1 ┆ 3 ┆ 5 ┆ {1,6} │
353
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
354
+ # # │ 2 ┆ 4 ┆ 6 ┆ {2,8} │
355
+ # # └─────┴─────┴─────┴───────────┘
356
+ def cumsum(column)
357
+ if column.is_a?(Series)
358
+ column.cumsum
359
+ elsif column.is_a?(String)
360
+ col(column).cumsum
361
+ else
362
+ cumfold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("cumsum")
363
+ end
364
+ end
365
+
366
+ # Compute the spearman rank correlation between two columns.
367
+ #
368
+ # Missing data will be excluded from the computation.
369
+ #
370
+ # @param a [Object]
371
+ # Column name or Expression.
372
+ # @param b [Object]
373
+ # Column name or Expression.
374
+ # @param ddof [Integer]
375
+ # Delta degrees of freedom
376
+ # @param propagate_nans [Boolean]
377
+ # If `True` any `NaN` encountered will lead to `NaN` in the output.
378
+ # Defaults to `False` where `NaN` are regarded as larger than any finite number
379
+ # and thus lead to the highest rank.
380
+ #
381
+ # @return [Expr]
382
+ def spearman_rank_corr(a, b, ddof: 1, propagate_nans: false)
383
+ if a.is_a?(String)
384
+ a = col(a)
385
+ end
386
+ if b.is_a?(String)
387
+ b = col(b)
388
+ end
389
+ Utils.wrap_expr(RbExpr.spearman_rank_corr(a._rbexpr, b._rbexpr, ddof, propagate_nans))
390
+ end
391
+
392
+ # Compute the pearson's correlation between two columns.
393
+ #
394
+ # @param a [Object]
395
+ # Column name or Expression.
396
+ # @param b [Object]
397
+ # Column name or Expression.
398
+ # @param ddof [Integer]
399
+ # Delta degrees of freedom
400
+ #
401
+ # @return [Expr]
402
+ def pearson_corr(a, b, ddof: 1)
403
+ if a.is_a?(String)
404
+ a = col(a)
405
+ end
406
+ if b.is_a?(String)
407
+ b = col(b)
408
+ end
409
+ Utils.wrap_expr(RbExpr.pearson_corr(a._rbexpr, b._rbexpr, ddof))
410
+ end
411
+
412
+ # Compute the covariance between two columns/ expressions.
413
+ #
414
+ # @param a [Object]
415
+ # Column name or Expression.
416
+ # @param b [Object]
417
+ # Column name or Expression.
418
+ #
419
+ # @return [Expr]
420
+ def cov(a, b)
421
+ if a.is_a?(String)
422
+ a = col(a)
423
+ end
424
+ if b.is_a?(String)
425
+ b = col(b)
426
+ end
427
+ Utils.wrap_expr(RbExpr.cov(a._rbexpr, b._rbexpr))
428
+ end
429
+
430
+ # def map
431
+ # end
432
+
433
+ # def apply
434
+ # end
435
+
436
+ # Accumulate over multiple columns horizontally/row wise with a left fold.
437
+ #
438
+ # @return [Expr]
439
+ def fold(acc, f, exprs)
440
+ acc = Utils.expr_to_lit_or_expr(acc, str_to_lit: true)
441
+ if exprs.is_a?(Expr)
442
+ exprs = [exprs]
443
+ end
444
+
445
+ exprs = Utils.selection_to_rbexpr_list(exprs)
446
+ Utils.wrap_expr(RbExpr.fold(acc._rbexpr, f, exprs))
447
+ end
448
+
449
+ # def reduce
450
+ # end
451
+
452
+ # Cumulatively accumulate over multiple columns horizontally/row wise with a left fold.
453
+ #
454
+ # Every cumulative result is added as a separate field in a Struct column.
455
+ #
456
+ # @param acc [Object]
457
+ # Accumulator Expression. This is the value that will be initialized when the fold
458
+ # starts. For a sum this could for instance be lit(0).
459
+ # @param f [Object]
460
+ # Function to apply over the accumulator and the value.
461
+ # Fn(acc, value) -> new_value
462
+ # @param exprs [Object]
463
+ # Expressions to aggregate over. May also be a wildcard expression.
464
+ # @param include_init [Boolean]
465
+ # Include the initial accumulator state as struct field.
466
+ #
467
+ # @return [Object]
468
+ #
469
+ # @note
470
+ # If you simply want the first encountered expression as accumulator,
471
+ # consider using `cumreduce`.
472
+ def cumfold(acc, f, exprs, include_init: false)
473
+ acc = Utils.expr_to_lit_or_expr(acc, str_to_lit: true)
474
+ if exprs.is_a?(Expr)
475
+ exprs = [exprs]
476
+ end
477
+
478
+ exprs = Utils.selection_to_rbexpr_list(exprs)
479
+ Utils.wrap_expr(RbExpr.cumfold(acc._rbexpr, f, exprs, include_init))
480
+ end
481
+
482
+ # def cumreduce
483
+ # end
484
+
485
+ # Evaluate columnwise or elementwise with a bitwise OR operation.
486
+ #
487
+ # @return [Expr]
488
+ def any(name)
489
+ if name.is_a?(String)
490
+ col(name).any
491
+ else
492
+ fold(lit(false), ->(a, b) { a.cast(:bool) | b.cast(:bool) }, name).alias("any")
493
+ end
494
+ end
495
+
496
+ # Exclude certain columns from a wildcard/regex selection.
497
+ #
498
+ # @param columns [Object]
499
+ # Column(s) to exclude from selection
500
+ # This can be:
501
+ #
502
+ # - a column name, or multiple column names
503
+ # - a regular expression starting with `^` and ending with `$`
504
+ # - a dtype or multiple dtypes
505
+ #
506
+ # @return [Object]
507
+ #
508
+ # @example
509
+ # df = Polars::DataFrame.new(
510
+ # {
511
+ # "aa" => [1, 2, 3],
512
+ # "ba" => ["a", "b", nil],
513
+ # "cc" => [nil, 2.5, 1.5]
514
+ # }
515
+ # )
516
+ # # =>
517
+ # # shape: (3, 3)
518
+ # # ┌─────┬──────┬──────┐
519
+ # # │ aa ┆ ba ┆ cc │
520
+ # # │ --- ┆ --- ┆ --- │
521
+ # # │ i64 ┆ str ┆ f64 │
522
+ # # ╞═════╪══════╪══════╡
523
+ # # │ 1 ┆ a ┆ null │
524
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
525
+ # # │ 2 ┆ b ┆ 2.5 │
526
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
527
+ # # │ 3 ┆ null ┆ 1.5 │
528
+ # # └─────┴──────┴──────┘
529
+ #
530
+ # @example Exclude by column name(s):
531
+ # df.select(Polars.exclude("ba"))
532
+ # # =>
533
+ # # shape: (3, 2)
534
+ # # ┌─────┬──────┐
535
+ # # │ aa ┆ cc │
536
+ # # │ --- ┆ --- │
537
+ # # │ i64 ┆ f64 │
538
+ # # ╞═════╪══════╡
539
+ # # │ 1 ┆ null │
540
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
541
+ # # │ 2 ┆ 2.5 │
542
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
543
+ # # │ 3 ┆ 1.5 │
544
+ # # └─────┴──────┘
545
+ #
546
+ # @example Exclude by regex, e.g. removing all columns whose names end with the letter "a":
547
+ # df.select(Polars.exclude("^.*a$"))
548
+ # # =>
549
+ # # shape: (3, 1)
550
+ # # ┌──────┐
551
+ # # │ cc │
552
+ # # │ --- │
553
+ # # │ f64 │
554
+ # # ╞══════╡
555
+ # # │ null │
556
+ # # ├╌╌╌╌╌╌┤
557
+ # # │ 2.5 │
558
+ # # ├╌╌╌╌╌╌┤
559
+ # # │ 1.5 │
560
+ # # └──────┘
561
+ def exclude(columns)
562
+ col("*").exclude(columns)
563
+ end
564
+
565
+ # Do one of two things.
566
+ #
567
+ # * function can do a columnwise or elementwise AND operation
568
+ # * a wildcard column selection
569
+ #
570
+ # @param name [Object]
571
+ # If given this function will apply a bitwise & on the columns.
572
+ #
573
+ # @return [Expr]
574
+ #
575
+ # @example Sum all columns
576
+ # df = Polars::DataFrame.new(
577
+ # {"a" => [1, 2, 3], "b" => ["hello", "foo", "bar"], "c" => [1, 1, 1]}
578
+ # )
579
+ # df.select(Polars.all.sum)
580
+ # # =>
581
+ # # shape: (1, 3)
582
+ # # ┌─────┬──────┬─────┐
583
+ # # │ a ┆ b ┆ c │
584
+ # # │ --- ┆ --- ┆ --- │
585
+ # # │ i64 ┆ str ┆ i64 │
586
+ # # ╞═════╪══════╪═════╡
587
+ # # │ 6 ┆ null ┆ 3 │
588
+ # # └─────┴──────┴─────┘
589
+ def all(name = nil)
590
+ if name.nil?
591
+ col("*")
592
+ elsif name.is_a?(String) || name.is_a?(Symbol)
593
+ col(name).all
594
+ else
595
+ raise Todo
596
+ end
597
+ end
598
+
599
+ # Syntactic sugar for `Polars.col("foo").agg_groups`.
600
+ #
601
+ # @return [Object]
602
+ def groups(column)
603
+ col(column).agg_groups
604
+ end
605
+
606
+ # Syntactic sugar for `Polars.col("foo").quantile(...)`.
607
+ #
608
+ # @param column [String]
609
+ # Column name.
610
+ # @param quantile [Float]
611
+ # Quantile between 0.0 and 1.0.
612
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
613
+ # Interpolation method.
614
+ #
615
+ # @return [Expr]
616
+ def quantile(column, quantile, interpolation: "nearest")
617
+ col(column).quantile(quantile, interpolation: interpolation)
618
+ end
619
+
620
+ # Create a range expression (or Series).
621
+ #
622
+ # This can be used in a `select`, `with_column`, etc. Be sure that the resulting
623
+ # range size is equal to the length of the DataFrame you are collecting.
624
+ #
625
+ # @param low [Integer, Expr, Series]
626
+ # Lower bound of range.
627
+ # @param high [Integer, Expr, Series]
628
+ # Upper bound of range.
629
+ # @param step [Integer]
630
+ # Step size of the range.
631
+ # @param eager [Boolean]
632
+ # If eager evaluation is `True`, a Series is returned instead of an Expr.
633
+ # @param dtype [Symbol]
634
+ # Apply an explicit integer dtype to the resulting expression (default is `:i64`).
635
+ #
636
+ # @return [Expr, Series]
637
+ #
638
+ # @example
639
+ # df.lazy.filter(Polars.col("foo") < Polars.arange(0, 100)).collect
640
+ def arange(low, high, step: 1, eager: false, dtype: nil)
641
+ low = Utils.expr_to_lit_or_expr(low, str_to_lit: false)
642
+ high = Utils.expr_to_lit_or_expr(high, str_to_lit: false)
643
+ range_expr = Utils.wrap_expr(RbExpr.arange(low._rbexpr, high._rbexpr, step))
644
+
645
+ if !dtype.nil? && dtype != "i64"
646
+ range_expr = range_expr.cast(dtype)
647
+ end
648
+
649
+ if !eager
650
+ range_expr
651
+ else
652
+ DataFrame.new
653
+ .select(range_expr)
654
+ .to_series
655
+ .rename("arange", in_place: true)
656
+ end
657
+ end
658
+
659
+ # Find the indexes that would sort the columns.
660
+ #
661
+ # Argsort by multiple columns. The first column will be used for the ordering.
662
+ # If there are duplicates in the first column, the second column will be used to
663
+ # determine the ordering and so on.
664
+ #
665
+ # @param exprs [Object]
666
+ # Columns use to determine the ordering.
667
+ # @param reverse [Boolean]
668
+ # Default is ascending.
669
+ #
670
+ # @return [Expr]
671
+ def argsort_by(exprs, reverse: false)
672
+ if !exprs.is_a?(Array)
673
+ exprs = [exprs]
674
+ end
675
+ if reverse == true || reverse == false
676
+ reverse = [reverse] * exprs.length
677
+ end
678
+ exprs = Utils.selection_to_rbexpr_list(exprs)
679
+ Utils.wrap_expr(RbExpr.argsort_by(exprs, reverse))
680
+ end
681
+
682
+ # Create polars `Duration` from distinct time components.
683
+ #
684
+ # @return [Expr]
685
+ #
686
+ # @example
687
+ # df = Polars::DataFrame.new(
688
+ # {
689
+ # "datetime" => [DateTime.new(2022, 1, 1), DateTime.new(2022, 1, 2)],
690
+ # "add" => [1, 2]
691
+ # }
692
+ # )
693
+ # df.select(
694
+ # [
695
+ # (Polars.col("datetime") + Polars.duration(weeks: "add")).alias("add_weeks"),
696
+ # (Polars.col("datetime") + Polars.duration(days: "add")).alias("add_days"),
697
+ # (Polars.col("datetime") + Polars.duration(seconds: "add")).alias("add_seconds"),
698
+ # (Polars.col("datetime") + Polars.duration(milliseconds: "add")).alias(
699
+ # "add_milliseconds"
700
+ # ),
701
+ # (Polars.col("datetime") + Polars.duration(hours: "add")).alias("add_hours")
702
+ # ]
703
+ # )
704
+ # # =>
705
+ # # shape: (2, 5)
706
+ # # ┌─────────────────────┬─────────────────────┬─────────────────────┬─────────────────────────┬─────────────────────┐
707
+ # # │ add_weeks ┆ add_days ┆ add_seconds ┆ add_milliseconds ┆ add_hours │
708
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
709
+ # # │ datetime[ns] ┆ datetime[ns] ┆ datetime[ns] ┆ datetime[ns] ┆ datetime[ns] │
710
+ # # ╞═════════════════════╪═════════════════════╪═════════════════════╪═════════════════════════╪═════════════════════╡
711
+ # # │ 2022-01-08 00:00:00 ┆ 2022-01-02 00:00:00 ┆ 2022-01-01 00:00:01 ┆ 2022-01-01 00:00:00.001 ┆ 2022-01-01 01:00:00 │
712
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
713
+ # # │ 2022-01-16 00:00:00 ┆ 2022-01-04 00:00:00 ┆ 2022-01-02 00:00:02 ┆ 2022-01-02 00:00:00.002 ┆ 2022-01-02 02:00:00 │
714
+ # # └─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────────┴─────────────────────┘
715
+ def duration(
716
+ days: nil,
717
+ seconds: nil,
718
+ nanoseconds: nil,
719
+ microseconds: nil,
720
+ milliseconds: nil,
721
+ minutes: nil,
722
+ hours: nil,
723
+ weeks: nil
724
+ )
725
+ if !hours.nil?
726
+ hours = Utils.expr_to_lit_or_expr(hours, str_to_lit: false)._rbexpr
727
+ end
728
+ if !minutes.nil?
729
+ minutes = Utils.expr_to_lit_or_expr(minutes, str_to_lit: false)._rbexpr
730
+ end
731
+ if !seconds.nil?
732
+ seconds = Utils.expr_to_lit_or_expr(seconds, str_to_lit: false)._rbexpr
733
+ end
734
+ if !milliseconds.nil?
735
+ milliseconds = Utils.expr_to_lit_or_expr(milliseconds, str_to_lit: false)._rbexpr
736
+ end
737
+ if !microseconds.nil?
738
+ microseconds = Utils.expr_to_lit_or_expr(microseconds, str_to_lit: false)._rbexpr
739
+ end
740
+ if !nanoseconds.nil?
741
+ nanoseconds = Utils.expr_to_lit_or_expr(nanoseconds, str_to_lit: false)._rbexpr
742
+ end
743
+ if !days.nil?
744
+ days = Utils.expr_to_lit_or_expr(days, str_to_lit: false)._rbexpr
745
+ end
746
+ if !weeks.nil?
747
+ weeks = Utils.expr_to_lit_or_expr(weeks, str_to_lit: false)._rbexpr
748
+ end
749
+
750
+ Utils.wrap_expr(
751
+ _rb_duration(
752
+ days,
753
+ seconds,
754
+ nanoseconds,
755
+ microseconds,
756
+ milliseconds,
757
+ minutes,
758
+ hours,
759
+ weeks
760
+ )
761
+ )
762
+ end
763
+
764
+ # Horizontally concat Utf8 Series in linear time. Non-Utf8 columns are cast to Utf8.
765
+ #
766
+ # @param exprs [Object]
767
+ # Columns to concat into a Utf8 Series.
768
+ # @param sep [String]
769
+ # String value that will be used to separate the values.
770
+ #
771
+ # @return [Expr]
772
+ #
773
+ # @example
774
+ # df = Polars::DataFrame.new(
775
+ # {
776
+ # "a" => [1, 2, 3],
777
+ # "b" => ["dogs", "cats", nil],
778
+ # "c" => ["play", "swim", "walk"]
779
+ # }
780
+ # )
781
+ # df.with_columns(
782
+ # [
783
+ # Polars.concat_str(
784
+ # [
785
+ # Polars.col("a") * 2,
786
+ # Polars.col("b"),
787
+ # Polars.col("c")
788
+ # ],
789
+ # sep: " "
790
+ # ).alias("full_sentence")
791
+ # ]
792
+ # )
793
+ # # =>
794
+ # # shape: (3, 4)
795
+ # # ┌─────┬──────┬──────┬───────────────┐
796
+ # # │ a ┆ b ┆ c ┆ full_sentence │
797
+ # # │ --- ┆ --- ┆ --- ┆ --- │
798
+ # # │ i64 ┆ str ┆ str ┆ str │
799
+ # # ╞═════╪══════╪══════╪═══════════════╡
800
+ # # │ 1 ┆ dogs ┆ play ┆ 2 dogs play │
801
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
802
+ # # │ 2 ┆ cats ┆ swim ┆ 4 cats swim │
803
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
804
+ # # │ 3 ┆ null ┆ walk ┆ null │
805
+ # # └─────┴──────┴──────┴───────────────┘
806
+ def concat_str(exprs, sep: "")
807
+ exprs = Utils.selection_to_rbexpr_list(exprs)
808
+ return Utils.wrap_expr(RbExpr.concat_str(exprs, sep))
809
+ end
810
+
811
+ # Format expressions as a string.
812
+ #
813
+ # @param fstring [String]
814
+ # A string that with placeholders.
815
+ # For example: "hello_{}" or "{}_world
816
+ # @param args [Object]
817
+ # Expression(s) that fill the placeholders
818
+ #
819
+ # @return [Expr]
820
+ #
821
+ # @example
822
+ # df = Polars::DataFrame.new(
823
+ # {
824
+ # "a": ["a", "b", "c"],
825
+ # "b": [1, 2, 3]
826
+ # }
827
+ # )
828
+ # df.select(
829
+ # [
830
+ # Polars.format("foo_{}_bar_{}", Polars.col("a"), "b").alias("fmt")
831
+ # ]
832
+ # )
833
+ # # =>
834
+ # # shape: (3, 1)
835
+ # # ┌─────────────┐
836
+ # # │ fmt │
837
+ # # │ --- │
838
+ # # │ str │
839
+ # # ╞═════════════╡
840
+ # # │ foo_a_bar_1 │
841
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┤
842
+ # # │ foo_b_bar_2 │
843
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┤
844
+ # # │ foo_c_bar_3 │
845
+ # # └─────────────┘
846
+ def format(fstring, *args)
847
+ if fstring.scan("{}").length != args.length
848
+ raise ArgumentError, "number of placeholders should equal the number of arguments"
849
+ end
850
+
851
+ exprs = []
852
+
853
+ arguments = args.each
854
+ fstring.split(/(\{\})/).each do |s|
855
+ if s == "{}"
856
+ e = Utils.expr_to_lit_or_expr(arguments.next, str_to_lit: false)
857
+ exprs << e
858
+ elsif s.length > 0
859
+ exprs << lit(s)
860
+ end
861
+ end
862
+
863
+ concat_str(exprs, sep: "")
864
+ end
865
+
866
+ # Concat the arrays in a Series dtype List in linear time.
867
+ #
868
+ # @return [Expr]
869
+ def concat_list(exprs)
870
+ exprs = Utils.selection_to_rbexpr_list(exprs)
871
+ Utils.wrap_expr(RbExpr.concat_lst(exprs))
872
+ end
873
+
874
+ # Collect multiple LazyFrames at the same time.
875
+ #
876
+ # This runs all the computation graphs in parallel on Polars threadpool.
877
+ #
878
+ # @param lazy_frames [Boolean]
879
+ # A list of LazyFrames to collect.
880
+ # @param type_coercion [Boolean]
881
+ # Do type coercion optimization.
882
+ # @param predicate_pushdown [Boolean]
883
+ # Do predicate pushdown optimization.
884
+ # @param projection_pushdown [Boolean]
885
+ # Do projection pushdown optimization.
886
+ # @param simplify_expression [Boolean]
887
+ # Run simplify expressions optimization.
888
+ # @param string_cache [Boolean]
889
+ # This argument is deprecated and will be ignored
890
+ # @param no_optimization [Boolean]
891
+ # Turn off optimizations.
892
+ # @param slice_pushdown [Boolean]
893
+ # Slice pushdown optimization.
894
+ # @param common_subplan_elimination [Boolean]
895
+ # Will try to cache branching subplans that occur on self-joins or unions.
896
+ # @param allow_streaming [Boolean]
897
+ # Run parts of the query in a streaming fashion (this is in an alpha state)
898
+ #
899
+ # @return [Array]
900
+ def collect_all(
901
+ lazy_frames,
902
+ type_coercion: true,
903
+ predicate_pushdown: true,
904
+ projection_pushdown: true,
905
+ simplify_expression: true,
906
+ string_cache: false,
907
+ no_optimization: false,
908
+ slice_pushdown: true,
909
+ common_subplan_elimination: true,
910
+ allow_streaming: false
911
+ )
912
+ if no_optimization
913
+ predicate_pushdown = false
914
+ projection_pushdown = false
915
+ slice_pushdown = false
916
+ common_subplan_elimination = false
917
+ end
918
+
919
+ prepared = []
920
+
921
+ lazy_frames.each do |lf|
922
+ ldf = lf._ldf.optimization_toggle(
923
+ type_coercion,
924
+ predicate_pushdown,
925
+ projection_pushdown,
926
+ simplify_expression,
927
+ slice_pushdown,
928
+ common_subplan_elimination,
929
+ allow_streaming
930
+ )
931
+ prepared << ldf
932
+ end
933
+
934
+ out = _collect_all(prepared)
935
+
936
+ # wrap the rbdataframes into dataframe
937
+ result = out.map { |rbdf| Utils.wrap_df(rbdf) }
938
+
939
+ result
940
+ end
941
+
942
+ # Run polars expressions without a context.
943
+ #
944
+ # @return [DataFrame]
945
+ def select(exprs)
946
+ DataFrame.new([]).select(exprs)
947
+ end
948
+
949
+ # Collect several columns into a Series of dtype Struct.
950
+ #
951
+ # @param exprs [Object]
952
+ # Columns/Expressions to collect into a Struct
953
+ # @param eager [Boolean]
954
+ # Evaluate immediately
955
+ #
956
+ # @return [Object]
957
+ #
958
+ # @example
959
+ # Polars::DataFrame.new(
960
+ # {
961
+ # "int" => [1, 2],
962
+ # "str" => ["a", "b"],
963
+ # "bool" => [true, nil],
964
+ # "list" => [[1, 2], [3]],
965
+ # }
966
+ # ).select([Polars.struct(Polars.all).alias("my_struct")])
967
+ # # =>
968
+ # # shape: (2, 1)
969
+ # # ┌─────────────────────┐
970
+ # # │ my_struct │
971
+ # # │ --- │
972
+ # # │ struct[4] │
973
+ # # ╞═════════════════════╡
974
+ # # │ {1,"a",true,[1, 2]} │
975
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
976
+ # # │ {2,"b",null,[3]} │
977
+ # # └─────────────────────┘
978
+ #
979
+ # @example Only collect specific columns as a struct:
980
+ # df = Polars::DataFrame.new(
981
+ # {"a" => [1, 2, 3, 4], "b" => ["one", "two", "three", "four"], "c" => [9, 8, 7, 6]}
982
+ # )
983
+ # df.with_column(Polars.struct(Polars.col(["a", "b"])).alias("a_and_b"))
984
+ # # =>
985
+ # # shape: (4, 4)
986
+ # # ┌─────┬───────┬─────┬─────────────┐
987
+ # # │ a ┆ b ┆ c ┆ a_and_b │
988
+ # # │ --- ┆ --- ┆ --- ┆ --- │
989
+ # # │ i64 ┆ str ┆ i64 ┆ struct[2] │
990
+ # # ╞═════╪═══════╪═════╪═════════════╡
991
+ # # │ 1 ┆ one ┆ 9 ┆ {1,"one"} │
992
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
993
+ # # │ 2 ┆ two ┆ 8 ┆ {2,"two"} │
994
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
995
+ # # │ 3 ┆ three ┆ 7 ┆ {3,"three"} │
996
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
997
+ # # │ 4 ┆ four ┆ 6 ┆ {4,"four"} │
998
+ # # └─────┴───────┴─────┴─────────────┘
999
+ def struct(exprs, eager: false)
1000
+ if eager
1001
+ Polars.select(struct(exprs, eager: false)).to_series
1002
+ end
1003
+ exprs = Utils.selection_to_rbexpr_list(exprs)
1004
+ Utils.wrap_expr(_as_struct(exprs))
1005
+ end
1006
+
1007
+ # Repeat a single value n times.
1008
+ #
1009
+ # @param value [Object]
1010
+ # Value to repeat.
1011
+ # @param n [Integer]
1012
+ # Repeat `n` times.
1013
+ # @param eager [Boolean]
1014
+ # Run eagerly and collect into a `Series`.
1015
+ # @param name [String]
1016
+ # Only used in `eager` mode. As expression, use `alias`.
1017
+ #
1018
+ # @return [Expr]
1019
+ def repeat(value, n, eager: false, name: nil)
1020
+ if eager
1021
+ if name.nil?
1022
+ name = ""
1023
+ end
1024
+ dtype = py_type_to_dtype(type(value))
1025
+ Series._repeat(name, value, n, dtype)
1026
+ else
1027
+ if n.is_a?(Integer)
1028
+ n = lit(n)
1029
+ end
1030
+ Utils.wrap_expr(RbExpr.repeat(value, n._rbexpr))
1031
+ end
1032
+ end
1033
+
1034
+ # Return indices where `condition` evaluates `true`.
1035
+ #
1036
+ # @param condition [Expr]
1037
+ # Boolean expression to evaluate
1038
+ # @param eager [Boolean]
1039
+ # Whether to apply this function eagerly (as opposed to lazily).
1040
+ #
1041
+ # @return [Expr, Series]
1042
+ #
1043
+ # @example
1044
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
1045
+ # df.select(
1046
+ # [
1047
+ # Polars.arg_where(Polars.col("a") % 2 == 0)
1048
+ # ]
1049
+ # ).to_series
1050
+ # # =>
1051
+ # # shape: (2,)
1052
+ # # Series: 'a' [u32]
1053
+ # # [
1054
+ # # 1
1055
+ # # 3
1056
+ # # ]
1057
+ def arg_where(condition, eager: false)
1058
+ if eager
1059
+ if !condition.is_a?(Series)
1060
+ raise ArgumentError, "expected 'Series' in 'arg_where' if 'eager=True', got #{condition.class.name}"
1061
+ end
1062
+ condition.to_frame.select(arg_where(Polars.col(condition.name))).to_series
1063
+ else
1064
+ condition = Utils.expr_to_lit_or_expr(condition, str_to_lit: true)
1065
+ Utils.wrap_expr(_arg_where(condition._rbexpr))
1066
+ end
1067
+ end
1068
+
1069
+ # Folds the expressions from left to right, keeping the first non-null value.
1070
+ #
1071
+ # @param exprs [Object]
1072
+ # Expressions to coalesce.
1073
+ #
1074
+ # @return [Expr]
1075
+ #
1076
+ # @example
1077
+ # df = Polars::DataFrame.new(
1078
+ # [
1079
+ # [nil, 1.0, 1.0],
1080
+ # [nil, 2.0, 2.0],
1081
+ # [nil, nil, 3.0],
1082
+ # [nil, nil, nil]
1083
+ # ],
1084
+ # columns: [["a", :f64], ["b", :f64], ["c", :f64]]
1085
+ # )
1086
+ # df.with_column(Polars.coalesce(["a", "b", "c", 99.9]).alias("d"))
1087
+ # # =>
1088
+ # # shape: (4, 4)
1089
+ # # ┌──────┬──────┬──────┬──────┐
1090
+ # # │ a ┆ b ┆ c ┆ d │
1091
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1092
+ # # │ f64 ┆ f64 ┆ f64 ┆ f64 │
1093
+ # # ╞══════╪══════╪══════╪══════╡
1094
+ # # │ null ┆ 1.0 ┆ 1.0 ┆ 1.0 │
1095
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1096
+ # # │ null ┆ 2.0 ┆ 2.0 ┆ 2.0 │
1097
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1098
+ # # │ null ┆ null ┆ 3.0 ┆ 3.0 │
1099
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1100
+ # # │ null ┆ null ┆ null ┆ 99.9 │
1101
+ # # └──────┴──────┴──────┴──────┘
1102
+ def coalesce(exprs)
1103
+ exprs = Utils.selection_to_rbexpr_list(exprs)
1104
+ Utils.wrap_expr(_coalesce_exprs(exprs))
1105
+ end
1106
+
1107
+ # Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).
1108
+ #
1109
+ # Depending on the `unit` provided, this function will return a different dtype:
1110
+ # - unit: "d" returns pl.Date
1111
+ # - unit: "s" returns pl.Datetime["us"] (pl.Datetime's default)
1112
+ # - unit: "ms" returns pl.Datetime["ms"]
1113
+ # - unit: "us" returns pl.Datetime["us"]
1114
+ # - unit: "ns" returns pl.Datetime["ns"]
1115
+ #
1116
+ # @param column [Object]
1117
+ # Series or expression to parse integers to pl.Datetime.
1118
+ # @param unit [String]
1119
+ # The unit of the timesteps since epoch time.
1120
+ # @param eager [Boolean]
1121
+ # If eager evaluation is `true`, a Series is returned instead of an Expr.
1122
+ #
1123
+ # @return [Object]
1124
+ #
1125
+ # @example
1126
+ # df = Polars::DataFrame.new({"timestamp" => [1666683077, 1666683099]}).lazy
1127
+ # df.select(Polars.from_epoch(Polars.col("timestamp"), unit: "s")).collect
1128
+ # # =>
1129
+ # # shape: (2, 1)
1130
+ # # ┌─────────────────────┐
1131
+ # # │ timestamp │
1132
+ # # │ --- │
1133
+ # # │ datetime[μs] │
1134
+ # # ╞═════════════════════╡
1135
+ # # │ 2022-10-25 07:31:17 │
1136
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
1137
+ # # │ 2022-10-25 07:31:39 │
1138
+ # # └─────────────────────┘
1139
+ def from_epoch(column, unit: "s", eager: false)
1140
+ if column.is_a?(String)
1141
+ column = col(column)
1142
+ elsif !column.is_a?(Series) && !column.is_a?(Expr)
1143
+ column = Series.new(column)
1144
+ end
1145
+
1146
+ if unit == "d"
1147
+ expr = column.cast(:date)
1148
+ elsif unit == "s"
1149
+ raise Todo
1150
+ # expr = (column.cast(:i64) * 1_000_000).cast(Datetime("us"))
1151
+ elsif Utils::DTYPE_TEMPORAL_UNITS.include?(unit)
1152
+ raise Todo
1153
+ # expr = column.cast(Datetime(unit))
1154
+ else
1155
+ raise ArgumentError, "'unit' must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got '#{unit}'."
1156
+ end
1157
+
1158
+ if eager
1159
+ if !column.is_a?(Series)
1160
+ raise ArgumentError, "expected Series or Array if eager: true, got #{column.class.name}"
1161
+ else
1162
+ column.to_frame.select(expr).to_series
1163
+ end
1164
+ else
1165
+ expr
1166
+ end
1167
+ end
1168
+
1169
+ # Start a "when, then, otherwise" expression.
1170
+ #
1171
+ # @return [When]
1172
+ #
1173
+ # @example
1174
+ # df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
1175
+ # df.with_column(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
1176
+ # # =>
1177
+ # # shape: (3, 3)
1178
+ # # ┌─────┬─────┬─────────┐
1179
+ # # │ foo ┆ bar ┆ literal │
1180
+ # # │ --- ┆ --- ┆ --- │
1181
+ # # │ i64 ┆ i64 ┆ i32 │
1182
+ # # ╞═════╪═════╪═════════╡
1183
+ # # │ 1 ┆ 3 ┆ -1 │
1184
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
1185
+ # # │ 3 ┆ 4 ┆ 1 │
1186
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
1187
+ # # │ 4 ┆ 0 ┆ 1 │
1188
+ # # └─────┴─────┴─────────┘
1189
+ def when(expr)
1190
+ expr = Utils.expr_to_lit_or_expr(expr)
1191
+ pw = RbExpr.when(expr._rbexpr)
1192
+ When.new(pw)
1193
+ end
1194
+ end
1195
+ end