polars-df 0.13.0-x64-mingw-ucrt

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +208 -0
  4. data/Cargo.lock +2556 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +39278 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +104 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +36 -0
  18. data/lib/polars/cat_name_space.rb +88 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +98 -0
  21. data/lib/polars/data_frame.rb +5191 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1397 -0
  24. data/lib/polars/date_time_name_space.rb +1287 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +38 -0
  27. data/lib/polars/expr.rb +7256 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +271 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1329 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +136 -0
  39. data/lib/polars/functions/range/datetime_range.rb +149 -0
  40. data/lib/polars/functions/range/int_range.rb +51 -0
  41. data/lib/polars/functions/range/time_range.rb +141 -0
  42. data/lib/polars/functions/repeat.rb +144 -0
  43. data/lib/polars/functions/whenthen.rb +96 -0
  44. data/lib/polars/functions.rb +57 -0
  45. data/lib/polars/group_by.rb +613 -0
  46. data/lib/polars/io/avro.rb +24 -0
  47. data/lib/polars/io/csv.rb +696 -0
  48. data/lib/polars/io/database.rb +73 -0
  49. data/lib/polars/io/ipc.rb +275 -0
  50. data/lib/polars/io/json.rb +29 -0
  51. data/lib/polars/io/ndjson.rb +80 -0
  52. data/lib/polars/io/parquet.rb +233 -0
  53. data/lib/polars/lazy_frame.rb +2708 -0
  54. data/lib/polars/lazy_group_by.rb +181 -0
  55. data/lib/polars/list_expr.rb +791 -0
  56. data/lib/polars/list_name_space.rb +449 -0
  57. data/lib/polars/meta_expr.rb +222 -0
  58. data/lib/polars/name_expr.rb +198 -0
  59. data/lib/polars/plot.rb +109 -0
  60. data/lib/polars/rolling_group_by.rb +35 -0
  61. data/lib/polars/series.rb +4444 -0
  62. data/lib/polars/slice.rb +104 -0
  63. data/lib/polars/sql_context.rb +194 -0
  64. data/lib/polars/string_cache.rb +75 -0
  65. data/lib/polars/string_expr.rb +1495 -0
  66. data/lib/polars/string_name_space.rb +811 -0
  67. data/lib/polars/struct_expr.rb +98 -0
  68. data/lib/polars/struct_name_space.rb +96 -0
  69. data/lib/polars/testing.rb +507 -0
  70. data/lib/polars/utils/constants.rb +9 -0
  71. data/lib/polars/utils/convert.rb +97 -0
  72. data/lib/polars/utils/parse.rb +89 -0
  73. data/lib/polars/utils/various.rb +76 -0
  74. data/lib/polars/utils/wrap.rb +19 -0
  75. data/lib/polars/utils.rb +130 -0
  76. data/lib/polars/version.rb +4 -0
  77. data/lib/polars/whenthen.rb +83 -0
  78. data/lib/polars-df.rb +1 -0
  79. data/lib/polars.rb +91 -0
  80. metadata +138 -0
@@ -0,0 +1,1329 @@
1
+ module Polars
2
+ module Functions
3
+ # Alias for an element in evaluated in an `eval` expression.
4
+ #
5
+ # @return [Expr]
6
+ #
7
+ # @example A horizontal rank computation by taking the elements of a list
8
+ # df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
9
+ # df.with_column(
10
+ # Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
11
+ # )
12
+ # # =>
13
+ # # shape: (3, 3)
14
+ # # ┌─────┬─────┬────────────┐
15
+ # # │ a ┆ b ┆ rank │
16
+ # # │ --- ┆ --- ┆ --- │
17
+ # # │ i64 ┆ i64 ┆ list[f64] │
18
+ # # ╞═════╪═════╪════════════╡
19
+ # # │ 1 ┆ 4 ┆ [1.0, 2.0] │
20
+ # # │ 8 ┆ 5 ┆ [2.0, 1.0] │
21
+ # # │ 3 ┆ 2 ┆ [2.0, 1.0] │
22
+ # # └─────┴─────┴────────────┘
23
+ def element
24
+ col("")
25
+ end
26
+
27
+ # Return the number of non-null values in the column.
28
+ #
29
+ # This function is syntactic sugar for `col(columns).count`.
30
+ #
31
+ # Calling this function without any arguments returns the number of rows in the
32
+ # context. **This way of using the function is deprecated.** Please use `len`
33
+ # instead.
34
+ #
35
+ # @param columns [Array]
36
+ # One or more column names.
37
+ #
38
+ # @return [Expr]
39
+ #
40
+ # @example
41
+ # df = Polars::DataFrame.new(
42
+ # {
43
+ # "a" => [1, 2, nil],
44
+ # "b" => [3, nil, nil],
45
+ # "c" => ["foo", "bar", "foo"]
46
+ # }
47
+ # )
48
+ # df.select(Polars.count("a"))
49
+ # # =>
50
+ # # shape: (1, 1)
51
+ # # ┌─────┐
52
+ # # │ a │
53
+ # # │ --- │
54
+ # # │ u32 │
55
+ # # ╞═════╡
56
+ # # │ 2 │
57
+ # # └─────┘
58
+ #
59
+ # @example Return the number of non-null values in multiple columns.
60
+ # df.select(Polars.count("b", "c"))
61
+ # # =>
62
+ # # shape: (1, 2)
63
+ # # ┌─────┬─────┐
64
+ # # │ b ┆ c │
65
+ # # │ --- ┆ --- │
66
+ # # │ u32 ┆ u32 │
67
+ # # ╞═════╪═════╡
68
+ # # │ 1 ┆ 3 │
69
+ # # └─────┴─────┘
70
+ def count(*columns)
71
+ if columns.empty?
72
+ warn "`Polars.count` is deprecated. Use `Polars.length` instead."
73
+ return Utils.wrap_expr(Plr.len._alias("count"))
74
+ end
75
+
76
+ col(*columns).count
77
+ end
78
+
79
+ # Return the cumulative count of the non-null values in the column.
80
+ #
81
+ # This function is syntactic sugar for `col(columns).cum_count`.
82
+ #
83
+ # If no arguments are passed, returns the cumulative count of a context.
84
+ # Rows containing null values count towards the result.
85
+ #
86
+ # @param columns [Array]
87
+ # Name(s) of the columns to use.
88
+ # @param reverse [Boolean]
89
+ # Reverse the operation.
90
+ #
91
+ # @return [Expr]
92
+ #
93
+ # @example
94
+ # df = Polars::DataFrame.new({"a" => [1, 2, nil], "b" => [3, nil, nil]})
95
+ # df.select(Polars.cum_count("a"))
96
+ # # =>
97
+ # # shape: (3, 1)
98
+ # # ┌─────┐
99
+ # # │ a │
100
+ # # │ --- │
101
+ # # │ u32 │
102
+ # # ╞═════╡
103
+ # # │ 1 │
104
+ # # │ 2 │
105
+ # # │ 2 │
106
+ # # └─────┘
107
+ def cum_count(*columns, reverse: false)
108
+ col(*columns).cum_count(reverse: reverse)
109
+ end
110
+
111
+ # Aggregate all column values into a list.
112
+ #
113
+ # This function is syntactic sugar for `col(name).implode`.
114
+ #
115
+ # @param columns [Array]
116
+ # One or more column names.
117
+ #
118
+ # @return [Expr]
119
+ #
120
+ # @example
121
+ # df = Polars::DataFrame.new(
122
+ # {
123
+ # "a" => [1, 2, 3],
124
+ # "b" => [9, 8, 7],
125
+ # "c" => ["foo", "bar", "foo"]
126
+ # }
127
+ # )
128
+ # df.select(Polars.implode("a"))
129
+ # # =>
130
+ # # shape: (1, 1)
131
+ # # ┌───────────┐
132
+ # # │ a │
133
+ # # │ --- │
134
+ # # │ list[i64] │
135
+ # # ╞═══════════╡
136
+ # # │ [1, 2, 3] │
137
+ # # └───────────┘
138
+ #
139
+ # @example
140
+ # df.select(Polars.implode("b", "c"))
141
+ # # =>
142
+ # # shape: (1, 2)
143
+ # # ┌───────────┬───────────────────────┐
144
+ # # │ b ┆ c │
145
+ # # │ --- ┆ --- │
146
+ # # │ list[i64] ┆ list[str] │
147
+ # # ╞═══════════╪═══════════════════════╡
148
+ # # │ [9, 8, 7] ┆ ["foo", "bar", "foo"] │
149
+ # # └───────────┴───────────────────────┘
150
+ def implode(*columns)
151
+ col(*columns).implode
152
+ end
153
+
154
+ # Get the standard deviation.
155
+ #
156
+ # This function is syntactic sugar for `col(column).std(ddof: ddof)`.
157
+ #
158
+ # @param column [Object]
159
+ # Column name.
160
+ # @param ddof [Integer]
161
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
162
+ # where N represents the number of elements.
163
+ # By default ddof is 1.
164
+ #
165
+ # @return [Expr]
166
+ #
167
+ # @example
168
+ # df = Polars::DataFrame.new(
169
+ # {
170
+ # "a" => [1, 8, 3],
171
+ # "b" => [4, 5, 2],
172
+ # "c" => ["foo", "bar", "foo"]
173
+ # }
174
+ # )
175
+ # df.select(Polars.std("a"))
176
+ # # =>
177
+ # # shape: (1, 1)
178
+ # # ┌──────────┐
179
+ # # │ a │
180
+ # # │ --- │
181
+ # # │ f64 │
182
+ # # ╞══════════╡
183
+ # # │ 3.605551 │
184
+ # # └──────────┘
185
+ #
186
+ # @example
187
+ # df["a"].std
188
+ # # => 3.605551275463989
189
+ def std(column, ddof: 1)
190
+ col(column).std(ddof: ddof)
191
+ end
192
+
193
+ # Get the variance.
194
+ #
195
+ # This function is syntactic sugar for `col(column).var(ddof: ddof)`.
196
+ #
197
+ # @param column [Object]
198
+ # Column name.
199
+ # @param ddof [Integer]
200
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
201
+ # where N represents the number of elements.
202
+ # By default ddof is 1.
203
+ #
204
+ # @return [Expr]
205
+ #
206
+ # @example
207
+ # df = Polars::DataFrame.new(
208
+ # {
209
+ # "a" => [1, 8, 3],
210
+ # "b" => [4, 5, 2],
211
+ # "c" => ["foo", "bar", "foo"]
212
+ # }
213
+ # )
214
+ # df.select(Polars.var("a"))
215
+ # # =>
216
+ # # shape: (1, 1)
217
+ # # ┌──────┐
218
+ # # │ a │
219
+ # # │ --- │
220
+ # # │ f64 │
221
+ # # ╞══════╡
222
+ # # │ 13.0 │
223
+ # # └──────┘
224
+ #
225
+ # @example
226
+ # df["a"].var
227
+ # # => 13.0
228
+ def var(column, ddof: 1)
229
+ col(column).var(ddof: ddof)
230
+ end
231
+
232
+
233
+ # Get the mean value.
234
+ #
235
+ # This function is syntactic sugar for `col(columns).mean`.
236
+ #
237
+ # @param columns [Array]
238
+ # One or more column names.
239
+ #
240
+ # @return [Expr]
241
+ #
242
+ # @example
243
+ # df = Polars::DataFrame.new(
244
+ # {
245
+ # "a" => [1, 8, 3],
246
+ # "b" => [4, 5, 2],
247
+ # "c" => ["foo", "bar", "foo"]
248
+ # }
249
+ # )
250
+ # df.select(Polars.mean("a"))
251
+ # # =>
252
+ # # shape: (1, 1)
253
+ # # ┌─────┐
254
+ # # │ a │
255
+ # # │ --- │
256
+ # # │ f64 │
257
+ # # ╞═════╡
258
+ # # │ 4.0 │
259
+ # # └─────┘
260
+ #
261
+ # @example
262
+ # df.select(Polars.mean("a", "b"))
263
+ # # =>
264
+ # # shape: (1, 2)
265
+ # # ┌─────┬──────────┐
266
+ # # │ a ┆ b │
267
+ # # │ --- ┆ --- │
268
+ # # │ f64 ┆ f64 │
269
+ # # ╞═════╪══════════╡
270
+ # # │ 4.0 ┆ 3.666667 │
271
+ # # └─────┴──────────┘
272
+ def mean(*columns)
273
+ col(*columns).mean
274
+ end
275
+ alias_method :avg, :mean
276
+
277
+ # Get the median value.
278
+ #
279
+ # This function is syntactic sugar for `pl.col(columns).median`.
280
+ #
281
+ # @param columns [Array]
282
+ # One or more column names.
283
+ #
284
+ # @return [Expr]
285
+ #
286
+ # @example
287
+ # df = Polars::DataFrame.new(
288
+ # {
289
+ # "a" => [1, 8, 3],
290
+ # "b" => [4, 5, 2],
291
+ # "c" => ["foo", "bar", "foo"]
292
+ # }
293
+ # )
294
+ # df.select(Polars.median("a"))
295
+ # # =>
296
+ # # shape: (1, 1)
297
+ # # ┌─────┐
298
+ # # │ a │
299
+ # # │ --- │
300
+ # # │ f64 │
301
+ # # ╞═════╡
302
+ # # │ 3.0 │
303
+ # # └─────┘
304
+ #
305
+ # @example
306
+ # df.select(Polars.median("a", "b"))
307
+ # # =>
308
+ # # shape: (1, 2)
309
+ # # ┌─────┬─────┐
310
+ # # │ a ┆ b │
311
+ # # │ --- ┆ --- │
312
+ # # │ f64 ┆ f64 │
313
+ # # ╞═════╪═════╡
314
+ # # │ 3.0 ┆ 4.0 │
315
+ # # └─────┴─────┘
316
+ def median(*columns)
317
+ col(*columns).median
318
+ end
319
+
320
+ # Count unique values.
321
+ #
322
+ # This function is syntactic sugar for `col(columns).n_unique`.
323
+ #
324
+ # @param columns [Array]
325
+ # One or more column names.
326
+ #
327
+ # @return [Expr]
328
+ #
329
+ # @example
330
+ # df = Polars::DataFrame.new(
331
+ # {
332
+ # "a" => [1, 8, 1],
333
+ # "b" => [4, 5, 2],
334
+ # "c" => ["foo", "bar", "foo"]
335
+ # }
336
+ # )
337
+ # df.select(Polars.n_unique("a"))
338
+ # # =>
339
+ # # shape: (1, 1)
340
+ # # ┌─────┐
341
+ # # │ a │
342
+ # # │ --- │
343
+ # # │ u32 │
344
+ # # ╞═════╡
345
+ # # │ 2 │
346
+ # # └─────┘
347
+ #
348
+ # @example
349
+ # df.select(Polars.n_unique("b", "c"))
350
+ # # =>
351
+ # # shape: (1, 2)
352
+ # # ┌─────┬─────┐
353
+ # # │ b ┆ c │
354
+ # # │ --- ┆ --- │
355
+ # # │ u32 ┆ u32 │
356
+ # # ╞═════╪═════╡
357
+ # # │ 3 ┆ 2 │
358
+ # # └─────┴─────┘
359
+ def n_unique(*columns)
360
+ col(*columns).n_unique
361
+ end
362
+
363
+ # Approximate count of unique values.
364
+ #
365
+ # This function is syntactic sugar for `col(columns).approx_n_unique`, and
366
+ # uses the HyperLogLog++ algorithm for cardinality estimation.
367
+ #
368
+ # @param columns [Array]
369
+ # One or more column names.
370
+ #
371
+ # @return [Expr]
372
+ #
373
+ # @example
374
+ # df = Polars::DataFrame.new(
375
+ # {
376
+ # "a" => [1, 8, 1],
377
+ # "b" => [4, 5, 2],
378
+ # "c" => ["foo", "bar", "foo"]
379
+ # }
380
+ # )
381
+ # df.select(Polars.approx_n_unique("a"))
382
+ # # =>
383
+ # # shape: (1, 1)
384
+ # # ┌─────┐
385
+ # # │ a │
386
+ # # │ --- │
387
+ # # │ u32 │
388
+ # # ╞═════╡
389
+ # # │ 2 │
390
+ # # └─────┘
391
+ #
392
+ # @example
393
+ # df.select(Polars.approx_n_unique("b", "c"))
394
+ # # =>
395
+ # # shape: (1, 2)
396
+ # # ┌─────┬─────┐
397
+ # # │ b ┆ c │
398
+ # # │ --- ┆ --- │
399
+ # # │ u32 ┆ u32 │
400
+ # # ╞═════╪═════╡
401
+ # # │ 3 ┆ 2 │
402
+ # # └─────┴─────┘
403
+ def approx_n_unique(*columns)
404
+ col(*columns).approx_n_unique
405
+ end
406
+
407
+ # Get the first value.
408
+ #
409
+ # @param columns [Array]
410
+ # One or more column names. If not provided (default), returns an expression
411
+ # to take the first column of the context instead.
412
+ #
413
+ # @return [Expr]
414
+ #
415
+ # @example
416
+ # df = Polars::DataFrame.new(
417
+ # {
418
+ # "a" => [1, 8, 3],
419
+ # "b" => [4, 5, 2],
420
+ # "c" => ["foo", "bar", "baz"]
421
+ # }
422
+ # )
423
+ # df.select(Polars.first)
424
+ # # =>
425
+ # # shape: (3, 1)
426
+ # # ┌─────┐
427
+ # # │ a │
428
+ # # │ --- │
429
+ # # │ i64 │
430
+ # # ╞═════╡
431
+ # # │ 1 │
432
+ # # │ 8 │
433
+ # # │ 3 │
434
+ # # └─────┘
435
+ #
436
+ # @example
437
+ # df.select(Polars.first("b"))
438
+ # # =>
439
+ # # shape: (1, 1)
440
+ # # ┌─────┐
441
+ # # │ b │
442
+ # # │ --- │
443
+ # # │ i64 │
444
+ # # ╞═════╡
445
+ # # │ 4 │
446
+ # # └─────┘
447
+ #
448
+ # @example
449
+ # df.select(Polars.first("a", "c"))
450
+ # # =>
451
+ # # shape: (1, 2)
452
+ # # ┌─────┬─────┐
453
+ # # │ a ┆ c │
454
+ # # │ --- ┆ --- │
455
+ # # │ i64 ┆ str │
456
+ # # ╞═════╪═════╡
457
+ # # │ 1 ┆ foo │
458
+ # # └─────┴─────┘
459
+ def first(*columns)
460
+ if columns.empty?
461
+ return Utils.wrap_expr(Plr.first)
462
+ end
463
+
464
+ col(*columns).first
465
+ end
466
+
467
+ # Get the last value.
468
+ #
469
+ # @param columns [Array]
470
+ # One or more column names. If set to `nil` (default), returns an expression
471
+ # to take the last column of the context instead.
472
+ #
473
+ # @return [Expr]
474
+ #
475
+ # @example
476
+ # df = Polars::DataFrame.new(
477
+ # {
478
+ # "a" => [1, 8, 3],
479
+ # "b" => [4, 5, 2],
480
+ # "c" => ["foo", "bar", "baz"]
481
+ # }
482
+ # )
483
+ # df.select(Polars.last)
484
+ # # =>
485
+ # # shape: (3, 1)
486
+ # # ┌─────┐
487
+ # # │ c │
488
+ # # │ --- │
489
+ # # │ str │
490
+ # # ╞═════╡
491
+ # # │ foo │
492
+ # # │ bar │
493
+ # # │ baz │
494
+ # # └─────┘
495
+ #
496
+ # @example
497
+ # df.select(Polars.last("a"))
498
+ # # =>
499
+ # # shape: (1, 1)
500
+ # # ┌─────┐
501
+ # # │ a │
502
+ # # │ --- │
503
+ # # │ i64 │
504
+ # # ╞═════╡
505
+ # # │ 3 │
506
+ # # └─────┘
507
+ #
508
+ # @example
509
+ # df.select(Polars.last("b", "c"))
510
+ # # =>
511
+ # # shape: (1, 2)
512
+ # # ┌─────┬─────┐
513
+ # # │ b ┆ c │
514
+ # # │ --- ┆ --- │
515
+ # # │ i64 ┆ str │
516
+ # # ╞═════╪═════╡
517
+ # # │ 2 ┆ baz │
518
+ # # └─────┴─────┘
519
+ def last(*columns)
520
+ if columns.empty?
521
+ return Utils.wrap_expr(Plr.last)
522
+ end
523
+
524
+ col(*columns).last
525
+ end
526
+
527
+ # Get the nth column(s) of the context.
528
+ #
529
+ # @param indices [Array]
530
+ # One or more indices representing the columns to retrieve.
531
+ #
532
+ # @return [Expr]
533
+ #
534
+ # @example
535
+ # df = Polars::DataFrame.new(
536
+ # {
537
+ # "a" => [1, 8, 3],
538
+ # "b" => [4, 5, 2],
539
+ # "c" => ["foo", "bar", "baz"]
540
+ # }
541
+ # )
542
+ # df.select(Polars.nth(1))
543
+ # # =>
544
+ # # shape: (3, 1)
545
+ # # ┌─────┐
546
+ # # │ b │
547
+ # # │ --- │
548
+ # # │ i64 │
549
+ # # ╞═════╡
550
+ # # │ 4 │
551
+ # # │ 5 │
552
+ # # │ 2 │
553
+ # # └─────┘
554
+ #
555
+ # @example
556
+ # df.select(Polars.nth(2, 0))
557
+ # # =>
558
+ # # shape: (3, 2)
559
+ # # ┌─────┬─────┐
560
+ # # │ c ┆ a │
561
+ # # │ --- ┆ --- │
562
+ # # │ str ┆ i64 │
563
+ # # ╞═════╪═════╡
564
+ # # │ foo ┆ 1 │
565
+ # # │ bar ┆ 8 │
566
+ # # │ baz ┆ 3 │
567
+ # # └─────┴─────┘
568
+ def nth(*indices)
569
+ if indices.length == 1 && indices[0].is_a?(Array)
570
+ indices = indices[0]
571
+ end
572
+
573
+ Utils.wrap_expr(Plr.index_cols(indices))
574
+ end
575
+
576
+ # Get the first `n` rows.
577
+ #
578
+ # This function is syntactic sugar for `col(column).head(n)`.
579
+ #
580
+ # @param column [Object]
581
+ # Column name.
582
+ # @param n [Integer]
583
+ # Number of rows to return.
584
+ #
585
+ # @return [Expr]
586
+ #
587
+ # @example
588
+ # df = Polars::DataFrame.new(
589
+ # {
590
+ # "a" => [1, 8, 3],
591
+ # "b" => [4, 5, 2],
592
+ # "c" => ["foo", "bar", "foo"]
593
+ # }
594
+ # )
595
+ # df.select(Polars.head("a"))
596
+ # # =>
597
+ # # shape: (3, 1)
598
+ # # ┌─────┐
599
+ # # │ a │
600
+ # # │ --- │
601
+ # # │ i64 │
602
+ # # ╞═════╡
603
+ # # │ 1 │
604
+ # # │ 8 │
605
+ # # │ 3 │
606
+ # # └─────┘
607
+ #
608
+ # @example
609
+ # df.select(Polars.head("a", 2))
610
+ # # =>
611
+ # # shape: (2, 1)
612
+ # # ┌─────┐
613
+ # # │ a │
614
+ # # │ --- │
615
+ # # │ i64 │
616
+ # # ╞═════╡
617
+ # # │ 1 │
618
+ # # │ 8 │
619
+ # # └─────┘
620
+ def head(column, n = 10)
621
+ col(column).head(n)
622
+ end
623
+
624
+ # Get the last `n` rows.
625
+ #
626
+ # This function is syntactic sugar for `col(column).tail(n)`.
627
+ #
628
+ # @param column [Object]
629
+ # Column name.
630
+ # @param n [Integer]
631
+ # Number of rows to return.
632
+ #
633
+ # @return [Expr]
634
+ #
635
+ # @example
636
+ # df = Polars::DataFrame.new(
637
+ # {
638
+ # "a" => [1, 8, 3],
639
+ # "b" => [4, 5, 2],
640
+ # "c" => ["foo", "bar", "foo"]
641
+ # }
642
+ # )
643
+ # df.select(Polars.tail("a"))
644
+ # # =>
645
+ # # shape: (3, 1)
646
+ # # ┌─────┐
647
+ # # │ a │
648
+ # # │ --- │
649
+ # # │ i64 │
650
+ # # ╞═════╡
651
+ # # │ 1 │
652
+ # # │ 8 │
653
+ # # │ 3 │
654
+ # # └─────┘
655
+ #
656
+ # @example
657
+ # df.select(Polars.tail("a", 2))
658
+ # # =>
659
+ # # shape: (2, 1)
660
+ # # ┌─────┐
661
+ # # │ a │
662
+ # # │ --- │
663
+ # # │ i64 │
664
+ # # ╞═════╡
665
+ # # │ 8 │
666
+ # # │ 3 │
667
+ # # └─────┘
668
+ def tail(column, n = 10)
669
+ col(column).tail(n)
670
+ end
671
+
672
+ # Compute the Pearson's or Spearman rank correlation correlation between two columns.
673
+ #
674
+ # @param a [Object]
675
+ # Column name or Expression.
676
+ # @param b [Object]
677
+ # Column name or Expression.
678
+ # @param ddof [Integer]
679
+ # "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof,
680
+ # where N represents the number of elements.
681
+ # By default ddof is 1.
682
+ # @param method ["pearson", "spearman"]
683
+ # Correlation method.
684
+ # @param propagate_nans [Boolean]
685
+ # If `true` any `NaN` encountered will lead to `NaN` in the output.
686
+ # Defaults to `False` where `NaN` are regarded as larger than any finite number
687
+ # and thus lead to the highest rank.
688
+ #
689
+ # @return [Expr]
690
+ #
691
+ # @example Pearson's correlation:
692
+ # df = Polars::DataFrame.new(
693
+ # {
694
+ # "a" => [1, 8, 3],
695
+ # "b" => [4, 5, 2],
696
+ # "c" => ["foo", "bar", "foo"]
697
+ # }
698
+ # )
699
+ # df.select(Polars.corr("a", "b"))
700
+ # # =>
701
+ # # shape: (1, 1)
702
+ # # ┌──────────┐
703
+ # # │ a │
704
+ # # │ --- │
705
+ # # │ f64 │
706
+ # # ╞══════════╡
707
+ # # │ 0.544705 │
708
+ # # └──────────┘
709
+ #
710
+ # @example Spearman rank correlation:
711
+ # df = Polars::DataFrame.new(
712
+ # {
713
+ # "a" => [1, 8, 3],
714
+ # "b" => [4, 5, 2],
715
+ # "c" => ["foo", "bar", "foo"]
716
+ # }
717
+ # )
718
+ # df.select(Polars.corr("a", "b", method: "spearman"))
719
+ # # =>
720
+ # # shape: (1, 1)
721
+ # # ┌─────┐
722
+ # # │ a │
723
+ # # │ --- │
724
+ # # │ f64 │
725
+ # # ╞═════╡
726
+ # # │ 0.5 │
727
+ # # └─────┘
728
+ def corr(
729
+ a,
730
+ b,
731
+ method: "pearson",
732
+ ddof: 1,
733
+ propagate_nans: false
734
+ )
735
+ a = Utils.parse_into_expression(a)
736
+ b = Utils.parse_into_expression(b)
737
+
738
+ if method == "pearson"
739
+ Utils.wrap_expr(Plr.pearson_corr(a, b, ddof))
740
+ elsif method == "spearman"
741
+ Utils.wrap_expr(Plr.spearman_rank_corr(a, b, ddof, propagate_nans))
742
+ else
743
+ msg = "method must be one of {{'pearson', 'spearman'}}, got #{method}"
744
+ raise ArgumentError, msg
745
+ end
746
+ end
747
+
748
+ # Compute the covariance between two columns/ expressions.
749
+ #
750
+ # @param a [Object]
751
+ # Column name or Expression.
752
+ # @param b [Object]
753
+ # Column name or Expression.
754
+ # @param ddof [Integer]
755
+ # "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof,
756
+ # where N represents the number of elements.
757
+ # By default ddof is 1.
758
+ #
759
+ # @return [Expr]
760
+ #
761
+ # @example
762
+ # df = Polars::DataFrame.new(
763
+ # {
764
+ # "a" => [1, 8, 3],
765
+ # "b" => [4, 5, 2],
766
+ # "c" => ["foo", "bar", "foo"]
767
+ # }
768
+ # )
769
+ # df.select(Polars.cov("a", "b"))
770
+ # # =>
771
+ # # shape: (1, 1)
772
+ # # ┌─────┐
773
+ # # │ a │
774
+ # # │ --- │
775
+ # # │ f64 │
776
+ # # ╞═════╡
777
+ # # │ 3.0 │
778
+ # # └─────┘
779
+ def cov(a, b, ddof: 1)
780
+ a = Utils.parse_into_expression(a)
781
+ b = Utils.parse_into_expression(b)
782
+ Utils.wrap_expr(Plr.cov(a, b, ddof))
783
+ end
784
+
785
+ # def map
786
+ # end
787
+
788
+ # def apply
789
+ # end
790
+
791
+ # Accumulate over multiple columns horizontally/row wise with a left fold.
792
+ #
793
+ # @return [Expr]
794
+ def fold(acc, f, exprs)
795
+ acc = Utils.parse_into_expression(acc, str_as_lit: true)
796
+ if exprs.is_a?(Expr)
797
+ exprs = [exprs]
798
+ end
799
+
800
+ exprs = Utils.parse_into_list_of_expressions(exprs)
801
+ Utils.wrap_expr(Plr.fold(acc, f, exprs))
802
+ end
803
+
804
+ # def reduce
805
+ # end
806
+
807
+ # Cumulatively accumulate over multiple columns horizontally/row wise with a left fold.
808
+ #
809
+ # Every cumulative result is added as a separate field in a Struct column.
810
+ #
811
+ # @param acc [Object]
812
+ # Accumulator Expression. This is the value that will be initialized when the fold
813
+ # starts. For a sum this could for instance be lit(0).
814
+ # @param f [Object]
815
+ # Function to apply over the accumulator and the value.
816
+ # Fn(acc, value) -> new_value
817
+ # @param exprs [Object]
818
+ # Expressions to aggregate over. May also be a wildcard expression.
819
+ # @param include_init [Boolean]
820
+ # Include the initial accumulator state as struct field.
821
+ #
822
+ # @return [Object]
823
+ #
824
+ # @note
825
+ # If you simply want the first encountered expression as accumulator,
826
+ # consider using `cumreduce`.
827
+ def cum_fold(acc, f, exprs, include_init: false)
828
+ acc = Utils.parse_into_expression(acc, str_as_lit: true)
829
+ if exprs.is_a?(Expr)
830
+ exprs = [exprs]
831
+ end
832
+
833
+ exprs = Utils.parse_into_list_of_expressions(exprs)
834
+ Utils.wrap_expr(Plr.cum_fold(acc, f, exprs, include_init))
835
+ end
836
+ alias_method :cumfold, :cum_fold
837
+
838
+ # def cum_reduce
839
+ # end
840
+
841
+ # Compute two argument arctan in radians.
842
+ #
843
+ # Returns the angle (in radians) in the plane between the
844
+ # positive x-axis and the ray from the origin to (x,y).
845
+ #
846
+ # @param y [Object]
847
+ # Column name or Expression.
848
+ # @param x [Object]
849
+ # Column name or Expression.
850
+ #
851
+ # @return [Expr]
852
+ #
853
+ # @example
854
+ # twoRootTwo = Math.sqrt(2) / 2
855
+ # df = Polars::DataFrame.new(
856
+ # {
857
+ # "y" => [twoRootTwo, -twoRootTwo, twoRootTwo, -twoRootTwo],
858
+ # "x" => [twoRootTwo, twoRootTwo, -twoRootTwo, -twoRootTwo]
859
+ # }
860
+ # )
861
+ # df.select(
862
+ # Polars.arctan2d("y", "x").alias("atan2d"), Polars.arctan2("y", "x").alias("atan2")
863
+ # )
864
+ # # =>
865
+ # # shape: (4, 2)
866
+ # # ┌────────┬───────────┐
867
+ # # │ atan2d ┆ atan2 │
868
+ # # │ --- ┆ --- │
869
+ # # │ f64 ┆ f64 │
870
+ # # ╞════════╪═══════════╡
871
+ # # │ 45.0 ┆ 0.785398 │
872
+ # # │ -45.0 ┆ -0.785398 │
873
+ # # │ 135.0 ┆ 2.356194 │
874
+ # # │ -135.0 ┆ -2.356194 │
875
+ # # └────────┴───────────┘
876
+ def arctan2(y, x)
877
+ if Utils.strlike?(y)
878
+ y = col(y)
879
+ end
880
+ if Utils.strlike?(x)
881
+ x = col(x)
882
+ end
883
+ Utils.wrap_expr(Plr.arctan2(y._rbexpr, x._rbexpr))
884
+ end
885
+
886
+ # Compute two argument arctan in degrees.
887
+ #
888
+ # Returns the angle (in degrees) in the plane between the positive x-axis
889
+ # and the ray from the origin to (x,y).
890
+ #
891
+ # @param y [Object]
892
+ # Column name or Expression.
893
+ # @param x [Object]
894
+ # Column name or Expression.
895
+ #
896
+ # @return [Expr]
897
+ #
898
+ # @example
899
+ # twoRootTwo = Math.sqrt(2) / 2
900
+ # df = Polars::DataFrame.new(
901
+ # {
902
+ # "y" => [twoRootTwo, -twoRootTwo, twoRootTwo, -twoRootTwo],
903
+ # "x" => [twoRootTwo, twoRootTwo, -twoRootTwo, -twoRootTwo]
904
+ # }
905
+ # )
906
+ # df.select(
907
+ # Polars.arctan2d("y", "x").alias("atan2d"), Polars.arctan2("y", "x").alias("atan2")
908
+ # )
909
+ # # =>
910
+ # # shape: (4, 2)
911
+ # # ┌────────┬───────────┐
912
+ # # │ atan2d ┆ atan2 │
913
+ # # │ --- ┆ --- │
914
+ # # │ f64 ┆ f64 │
915
+ # # ╞════════╪═══════════╡
916
+ # # │ 45.0 ┆ 0.785398 │
917
+ # # │ -45.0 ┆ -0.785398 │
918
+ # # │ 135.0 ┆ 2.356194 │
919
+ # # │ -135.0 ┆ -2.356194 │
920
+ # # └────────┴───────────┘
921
+ def arctan2d(y, x)
922
+ if Utils.strlike?(y)
923
+ y = col(y)
924
+ end
925
+ if Utils.strlike?(x)
926
+ x = col(x)
927
+ end
928
+ Utils.wrap_expr(Plr.arctan2d(y._rbexpr, x._rbexpr))
929
+ end
930
+
931
+ # Exclude certain columns from a wildcard/regex selection.
932
+ #
933
+ # @param columns [Object]
934
+ # Column(s) to exclude from selection
935
+ # This can be:
936
+ #
937
+ # - a column name, or multiple column names
938
+ # - a regular expression starting with `^` and ending with `$`
939
+ # - a dtype or multiple dtypes
940
+ #
941
+ # @return [Object]
942
+ #
943
+ # @example
944
+ # df = Polars::DataFrame.new(
945
+ # {
946
+ # "aa" => [1, 2, 3],
947
+ # "ba" => ["a", "b", nil],
948
+ # "cc" => [nil, 2.5, 1.5]
949
+ # }
950
+ # )
951
+ # # =>
952
+ # # shape: (3, 3)
953
+ # # ┌─────┬──────┬──────┐
954
+ # # │ aa ┆ ba ┆ cc │
955
+ # # │ --- ┆ --- ┆ --- │
956
+ # # │ i64 ┆ str ┆ f64 │
957
+ # # ╞═════╪══════╪══════╡
958
+ # # │ 1 ┆ a ┆ null │
959
+ # # │ 2 ┆ b ┆ 2.5 │
960
+ # # │ 3 ┆ null ┆ 1.5 │
961
+ # # └─────┴──────┴──────┘
962
+ #
963
+ # @example Exclude by column name(s):
964
+ # df.select(Polars.exclude("ba"))
965
+ # # =>
966
+ # # shape: (3, 2)
967
+ # # ┌─────┬──────┐
968
+ # # │ aa ┆ cc │
969
+ # # │ --- ┆ --- │
970
+ # # │ i64 ┆ f64 │
971
+ # # ╞═════╪══════╡
972
+ # # │ 1 ┆ null │
973
+ # # │ 2 ┆ 2.5 │
974
+ # # │ 3 ┆ 1.5 │
975
+ # # └─────┴──────┘
976
+ #
977
+ # @example Exclude by regex, e.g. removing all columns whose names end with the letter "a":
978
+ # df.select(Polars.exclude("^.*a$"))
979
+ # # =>
980
+ # # shape: (3, 1)
981
+ # # ┌──────┐
982
+ # # │ cc │
983
+ # # │ --- │
984
+ # # │ f64 │
985
+ # # ╞══════╡
986
+ # # │ null │
987
+ # # │ 2.5 │
988
+ # # │ 1.5 │
989
+ # # └──────┘
990
+ def exclude(columns)
991
+ col("*").exclude(columns)
992
+ end
993
+
994
+ # Syntactic sugar for `Polars.col("foo").agg_groups`.
995
+ #
996
+ # @return [Object]
997
+ def groups(column)
998
+ col(column).agg_groups
999
+ end
1000
+
1001
+ # Syntactic sugar for `Polars.col("foo").quantile(...)`.
1002
+ #
1003
+ # @param column [String]
1004
+ # Column name.
1005
+ # @param quantile [Float]
1006
+ # Quantile between 0.0 and 1.0.
1007
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
1008
+ # Interpolation method.
1009
+ #
1010
+ # @return [Expr]
1011
+ def quantile(column, quantile, interpolation: "nearest")
1012
+ col(column).quantile(quantile, interpolation: interpolation)
1013
+ end
1014
+
1015
+ # Find the indexes that would sort the columns.
1016
+ #
1017
+ # Argsort by multiple columns. The first column will be used for the ordering.
1018
+ # If there are duplicates in the first column, the second column will be used to
1019
+ # determine the ordering and so on.
1020
+ #
1021
+ # @param exprs [Object]
1022
+ # Columns use to determine the ordering.
1023
+ # @param reverse [Boolean]
1024
+ # Default is ascending.
1025
+ #
1026
+ # @return [Expr]
1027
+ def arg_sort_by(exprs, reverse: false)
1028
+ if !exprs.is_a?(::Array)
1029
+ exprs = [exprs]
1030
+ end
1031
+ if reverse == true || reverse == false
1032
+ reverse = [reverse] * exprs.length
1033
+ end
1034
+ exprs = Utils.parse_into_list_of_expressions(exprs)
1035
+ Utils.wrap_expr(Plr.arg_sort_by(exprs, reverse))
1036
+ end
1037
+ alias_method :argsort_by, :arg_sort_by
1038
+
1039
+ # Collect multiple LazyFrames at the same time.
1040
+ #
1041
+ # This runs all the computation graphs in parallel on Polars threadpool.
1042
+ #
1043
+ # @param lazy_frames [Boolean]
1044
+ # A list of LazyFrames to collect.
1045
+ # @param type_coercion [Boolean]
1046
+ # Do type coercion optimization.
1047
+ # @param predicate_pushdown [Boolean]
1048
+ # Do predicate pushdown optimization.
1049
+ # @param projection_pushdown [Boolean]
1050
+ # Do projection pushdown optimization.
1051
+ # @param simplify_expression [Boolean]
1052
+ # Run simplify expressions optimization.
1053
+ # @param string_cache [Boolean]
1054
+ # This argument is deprecated and will be ignored
1055
+ # @param no_optimization [Boolean]
1056
+ # Turn off optimizations.
1057
+ # @param slice_pushdown [Boolean]
1058
+ # Slice pushdown optimization.
1059
+ # @param common_subplan_elimination [Boolean]
1060
+ # Will try to cache branching subplans that occur on self-joins or unions.
1061
+ # @param allow_streaming [Boolean]
1062
+ # Run parts of the query in a streaming fashion (this is in an alpha state)
1063
+ #
1064
+ # @return [Array]
1065
+ def collect_all(
1066
+ lazy_frames,
1067
+ type_coercion: true,
1068
+ predicate_pushdown: true,
1069
+ projection_pushdown: true,
1070
+ simplify_expression: true,
1071
+ string_cache: false,
1072
+ no_optimization: false,
1073
+ slice_pushdown: true,
1074
+ common_subplan_elimination: true,
1075
+ allow_streaming: false
1076
+ )
1077
+ if no_optimization
1078
+ predicate_pushdown = false
1079
+ projection_pushdown = false
1080
+ slice_pushdown = false
1081
+ common_subplan_elimination = false
1082
+ end
1083
+
1084
+ prepared = []
1085
+
1086
+ lazy_frames.each do |lf|
1087
+ ldf = lf._ldf.optimization_toggle(
1088
+ type_coercion,
1089
+ predicate_pushdown,
1090
+ projection_pushdown,
1091
+ simplify_expression,
1092
+ slice_pushdown,
1093
+ common_subplan_elimination,
1094
+ allow_streaming,
1095
+ false
1096
+ )
1097
+ prepared << ldf
1098
+ end
1099
+
1100
+ out = Plr.collect_all(prepared)
1101
+
1102
+ # wrap the rbdataframes into dataframe
1103
+ result = out.map { |rbdf| Utils.wrap_df(rbdf) }
1104
+
1105
+ result
1106
+ end
1107
+
1108
+ # Run polars expressions without a context.
1109
+ #
1110
+ # This is syntactic sugar for running `df.select` on an empty DataFrame.
1111
+ #
1112
+ # @param exprs [Array]
1113
+ # Column(s) to select, specified as positional arguments.
1114
+ # Accepts expression input. Strings are parsed as column names,
1115
+ # other non-expression inputs are parsed as literals.
1116
+ # @param named_exprs [Hash]
1117
+ # Additional columns to select, specified as keyword arguments.
1118
+ # The columns will be renamed to the keyword used.
1119
+ #
1120
+ # @return [DataFrame]
1121
+ #
1122
+ # @example
1123
+ # foo = Polars::Series.new("foo", [1, 2, 3])
1124
+ # bar = Polars::Series.new("bar", [3, 2, 1])
1125
+ # Polars.select(min: Polars.min_horizontal(foo, bar))
1126
+ # # =>
1127
+ # # shape: (3, 1)
1128
+ # # ┌─────┐
1129
+ # # │ min │
1130
+ # # │ --- │
1131
+ # # │ i64 │
1132
+ # # ╞═════╡
1133
+ # # │ 1 │
1134
+ # # │ 2 │
1135
+ # # │ 1 │
1136
+ # # └─────┘
1137
+ def select(*exprs, **named_exprs)
1138
+ DataFrame.new([]).select(*exprs, **named_exprs)
1139
+ end
1140
+
1141
+ # Return indices where `condition` evaluates `true`.
1142
+ #
1143
+ # @param condition [Expr]
1144
+ # Boolean expression to evaluate
1145
+ # @param eager [Boolean]
1146
+ # Whether to apply this function eagerly (as opposed to lazily).
1147
+ #
1148
+ # @return [Expr, Series]
1149
+ #
1150
+ # @example
1151
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
1152
+ # df.select(
1153
+ # [
1154
+ # Polars.arg_where(Polars.col("a") % 2 == 0)
1155
+ # ]
1156
+ # ).to_series
1157
+ # # =>
1158
+ # # shape: (2,)
1159
+ # # Series: 'a' [u32]
1160
+ # # [
1161
+ # # 1
1162
+ # # 3
1163
+ # # ]
1164
+ def arg_where(condition, eager: false)
1165
+ if eager
1166
+ if !condition.is_a?(Series)
1167
+ raise ArgumentError, "expected 'Series' in 'arg_where' if 'eager: true', got #{condition.class.name}"
1168
+ end
1169
+ condition.to_frame.select(arg_where(Polars.col(condition.name))).to_series
1170
+ else
1171
+ condition = Utils.parse_into_expression(condition, str_as_lit: true)
1172
+ Utils.wrap_expr(Plr.arg_where(condition))
1173
+ end
1174
+ end
1175
+
1176
+ # Folds the columns from left to right, keeping the first non-null value.
1177
+ #
1178
+ # @param exprs [Array]
1179
+ # Columns to coalesce. Accepts expression input. Strings are parsed as column
1180
+ # names, other non-expression inputs are parsed as literals.
1181
+ # @param more_exprs [Hash]
1182
+ # Additional columns to coalesce, specified as positional arguments.
1183
+ #
1184
+ # @return [Expr]
1185
+ #
1186
+ # @example
1187
+ # df = Polars::DataFrame.new(
1188
+ # {
1189
+ # "a" => [1, nil, nil, nil],
1190
+ # "b" => [1, 2, nil, nil],
1191
+ # "c" => [5, nil, 3, nil]
1192
+ # }
1193
+ # )
1194
+ # df.with_columns(Polars.coalesce(["a", "b", "c", 10]).alias("d"))
1195
+ # # =>
1196
+ # # shape: (4, 4)
1197
+ # # ┌──────┬──────┬──────┬─────┐
1198
+ # # │ a ┆ b ┆ c ┆ d │
1199
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1200
+ # # │ i64 ┆ i64 ┆ i64 ┆ i64 │
1201
+ # # ╞══════╪══════╪══════╪═════╡
1202
+ # # │ 1 ┆ 1 ┆ 5 ┆ 1 │
1203
+ # # │ null ┆ 2 ┆ null ┆ 2 │
1204
+ # # │ null ┆ null ┆ 3 ┆ 3 │
1205
+ # # │ null ┆ null ┆ null ┆ 10 │
1206
+ # # └──────┴──────┴──────┴─────┘
1207
+ #
1208
+ # @example
1209
+ # df.with_columns(Polars.coalesce(Polars.col(["a", "b", "c"]), 10.0).alias("d"))
1210
+ # # =>
1211
+ # # shape: (4, 4)
1212
+ # # ┌──────┬──────┬──────┬──────┐
1213
+ # # │ a ┆ b ┆ c ┆ d │
1214
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1215
+ # # │ i64 ┆ i64 ┆ i64 ┆ f64 │
1216
+ # # ╞══════╪══════╪══════╪══════╡
1217
+ # # │ 1 ┆ 1 ┆ 5 ┆ 1.0 │
1218
+ # # │ null ┆ 2 ┆ null ┆ 2.0 │
1219
+ # # │ null ┆ null ┆ 3 ┆ 3.0 │
1220
+ # # │ null ┆ null ┆ null ┆ 10.0 │
1221
+ # # └──────┴──────┴──────┴──────┘
1222
+ def coalesce(exprs, *more_exprs)
1223
+ exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
1224
+ Utils.wrap_expr(Plr.coalesce(exprs))
1225
+ end
1226
+
1227
+ # Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).
1228
+ #
1229
+ # Depending on the `unit` provided, this function will return a different dtype:
1230
+ # - unit: "d" returns pl.Date
1231
+ # - unit: "s" returns pl.Datetime["us"] (pl.Datetime's default)
1232
+ # - unit: "ms" returns pl.Datetime["ms"]
1233
+ # - unit: "us" returns pl.Datetime["us"]
1234
+ # - unit: "ns" returns pl.Datetime["ns"]
1235
+ #
1236
+ # @param column [Object]
1237
+ # Series or expression to parse integers to pl.Datetime.
1238
+ # @param unit [String]
1239
+ # The unit of the timesteps since epoch time.
1240
+ # @param eager [Boolean]
1241
+ # If eager evaluation is `true`, a Series is returned instead of an Expr.
1242
+ #
1243
+ # @return [Object]
1244
+ #
1245
+ # @example
1246
+ # df = Polars::DataFrame.new({"timestamp" => [1666683077, 1666683099]}).lazy
1247
+ # df.select(Polars.from_epoch(Polars.col("timestamp"), unit: "s")).collect
1248
+ # # =>
1249
+ # # shape: (2, 1)
1250
+ # # ┌─────────────────────┐
1251
+ # # │ timestamp │
1252
+ # # │ --- │
1253
+ # # │ datetime[μs] │
1254
+ # # ╞═════════════════════╡
1255
+ # # │ 2022-10-25 07:31:17 │
1256
+ # # │ 2022-10-25 07:31:39 │
1257
+ # # └─────────────────────┘
1258
+ def from_epoch(column, unit: "s", eager: false)
1259
+ if Utils.strlike?(column)
1260
+ column = col(column)
1261
+ elsif !column.is_a?(Series) && !column.is_a?(Expr)
1262
+ column = Series.new(column)
1263
+ end
1264
+
1265
+ if unit == "d"
1266
+ expr = column.cast(Date)
1267
+ elsif unit == "s"
1268
+ expr = (column.cast(Int64) * 1_000_000).cast(Datetime.new("us"))
1269
+ elsif Utils::DTYPE_TEMPORAL_UNITS.include?(unit)
1270
+ expr = column.cast(Datetime.new(unit))
1271
+ else
1272
+ raise ArgumentError, "'unit' must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got '#{unit}'."
1273
+ end
1274
+
1275
+ if eager
1276
+ if !column.is_a?(Series)
1277
+ raise ArgumentError, "expected Series or Array if eager: true, got #{column.class.name}"
1278
+ else
1279
+ column.to_frame.select(expr).to_series
1280
+ end
1281
+ else
1282
+ expr
1283
+ end
1284
+ end
1285
+
1286
+ # Parse one or more SQL expressions to polars expression(s).
1287
+ #
1288
+ # @param sql [Object]
1289
+ # One or more SQL expressions.
1290
+ #
1291
+ # @return [Expr]
1292
+ #
1293
+ # @example Parse a single SQL expression:
1294
+ # df = Polars::DataFrame.new({"a" => [2, 1]})
1295
+ # expr = Polars.sql_expr("MAX(a)")
1296
+ # df.select(expr)
1297
+ # # =>
1298
+ # # shape: (1, 1)
1299
+ # # ┌─────┐
1300
+ # # │ a │
1301
+ # # │ --- │
1302
+ # # │ i64 │
1303
+ # # ╞═════╡
1304
+ # # │ 2 │
1305
+ # # └─────┘
1306
+ #
1307
+ # @example Parse multiple SQL expressions:
1308
+ # df.with_columns(
1309
+ # *Polars.sql_expr(["POWER(a,a) AS a_a", "CAST(a AS TEXT) AS a_txt"])
1310
+ # )
1311
+ # # =>
1312
+ # # shape: (2, 3)
1313
+ # # ┌─────┬─────┬───────┐
1314
+ # # │ a ┆ a_a ┆ a_txt │
1315
+ # # │ --- ┆ --- ┆ --- │
1316
+ # # │ i64 ┆ i64 ┆ str │
1317
+ # # ╞═════╪═════╪═══════╡
1318
+ # # │ 2 ┆ 4 ┆ 2 │
1319
+ # # │ 1 ┆ 1 ┆ 1 │
1320
+ # # └─────┴─────┴───────┘
1321
+ def sql_expr(sql)
1322
+ if sql.is_a?(::String)
1323
+ Utils.wrap_expr(Plr.sql_expr(sql))
1324
+ else
1325
+ sql.map { |q| Utils.wrap_expr(Plr.sql_expr(q)) }
1326
+ end
1327
+ end
1328
+ end
1329
+ end