polars-df 0.8.0-x86_64-linux → 0.10.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +42 -1
  3. data/Cargo.lock +159 -66
  4. data/Cargo.toml +0 -3
  5. data/LICENSE-THIRD-PARTY.txt +3112 -1613
  6. data/LICENSE.txt +1 -1
  7. data/README.md +3 -2
  8. data/lib/polars/3.1/polars.so +0 -0
  9. data/lib/polars/3.2/polars.so +0 -0
  10. data/lib/polars/3.3/polars.so +0 -0
  11. data/lib/polars/array_expr.rb +453 -0
  12. data/lib/polars/array_name_space.rb +346 -0
  13. data/lib/polars/batched_csv_reader.rb +4 -2
  14. data/lib/polars/cat_expr.rb +24 -0
  15. data/lib/polars/cat_name_space.rb +75 -0
  16. data/lib/polars/config.rb +2 -2
  17. data/lib/polars/data_frame.rb +306 -96
  18. data/lib/polars/data_types.rb +191 -28
  19. data/lib/polars/date_time_expr.rb +41 -18
  20. data/lib/polars/date_time_name_space.rb +9 -3
  21. data/lib/polars/exceptions.rb +12 -1
  22. data/lib/polars/expr.rb +898 -215
  23. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  24. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  25. data/lib/polars/functions/as_datatype.rb +248 -0
  26. data/lib/polars/functions/col.rb +47 -0
  27. data/lib/polars/functions/eager.rb +182 -0
  28. data/lib/polars/functions/lazy.rb +1280 -0
  29. data/lib/polars/functions/len.rb +49 -0
  30. data/lib/polars/functions/lit.rb +35 -0
  31. data/lib/polars/functions/random.rb +16 -0
  32. data/lib/polars/functions/range/date_range.rb +103 -0
  33. data/lib/polars/functions/range/int_range.rb +51 -0
  34. data/lib/polars/functions/repeat.rb +144 -0
  35. data/lib/polars/functions/whenthen.rb +96 -0
  36. data/lib/polars/functions.rb +29 -416
  37. data/lib/polars/group_by.rb +2 -2
  38. data/lib/polars/io.rb +36 -31
  39. data/lib/polars/lazy_frame.rb +405 -88
  40. data/lib/polars/list_expr.rb +158 -8
  41. data/lib/polars/list_name_space.rb +102 -0
  42. data/lib/polars/meta_expr.rb +175 -7
  43. data/lib/polars/series.rb +282 -41
  44. data/lib/polars/string_cache.rb +75 -0
  45. data/lib/polars/string_expr.rb +413 -96
  46. data/lib/polars/string_name_space.rb +4 -4
  47. data/lib/polars/testing.rb +507 -0
  48. data/lib/polars/utils.rb +106 -8
  49. data/lib/polars/version.rb +1 -1
  50. data/lib/polars/whenthen.rb +83 -0
  51. data/lib/polars.rb +16 -4
  52. metadata +34 -6
  53. data/lib/polars/lazy_functions.rb +0 -1181
  54. data/lib/polars/when.rb +0 -16
  55. data/lib/polars/when_then.rb +0 -19
@@ -0,0 +1,1280 @@
1
+ module Polars
2
+ module Functions
3
+ # Alias for an element in evaluated in an `eval` expression.
4
+ #
5
+ # @return [Expr]
6
+ #
7
+ # @example A horizontal rank computation by taking the elements of a list
8
+ # df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
9
+ # df.with_column(
10
+ # Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
11
+ # )
12
+ # # =>
13
+ # # shape: (3, 3)
14
+ # # ┌─────┬─────┬────────────┐
15
+ # # │ a ┆ b ┆ rank │
16
+ # # │ --- ┆ --- ┆ --- │
17
+ # # │ i64 ┆ i64 ┆ list[f64] │
18
+ # # ╞═════╪═════╪════════════╡
19
+ # # │ 1 ┆ 4 ┆ [1.0, 2.0] │
20
+ # # │ 8 ┆ 5 ┆ [2.0, 1.0] │
21
+ # # │ 3 ┆ 2 ┆ [2.0, 1.0] │
22
+ # # └─────┴─────┴────────────┘
23
+ def element
24
+ col("")
25
+ end
26
+
27
+ # Return the number of non-null values in the column.
28
+ #
29
+ # This function is syntactic sugar for `col(columns).count`.
30
+ #
31
+ # Calling this function without any arguments returns the number of rows in the
32
+ # context. **This way of using the function is deprecated.** Please use `len`
33
+ # instead.
34
+ #
35
+ # @param columns [Array]
36
+ # One or more column names.
37
+ #
38
+ # @return [Expr]
39
+ #
40
+ # @example
41
+ # df = Polars::DataFrame.new(
42
+ # {
43
+ # "a" => [1, 2, nil],
44
+ # "b" => [3, nil, nil],
45
+ # "c" => ["foo", "bar", "foo"]
46
+ # }
47
+ # )
48
+ # df.select(Polars.count("a"))
49
+ # # =>
50
+ # # shape: (1, 1)
51
+ # # ┌─────┐
52
+ # # │ a │
53
+ # # │ --- │
54
+ # # │ u32 │
55
+ # # ╞═════╡
56
+ # # │ 2 │
57
+ # # └─────┘
58
+ #
59
+ # @example Return the number of non-null values in multiple columns.
60
+ # df.select(Polars.count("b", "c"))
61
+ # # =>
62
+ # # shape: (1, 2)
63
+ # # ┌─────┬─────┐
64
+ # # │ b ┆ c │
65
+ # # │ --- ┆ --- │
66
+ # # │ u32 ┆ u32 │
67
+ # # ╞═════╪═════╡
68
+ # # │ 1 ┆ 3 │
69
+ # # └─────┴─────┘
70
+ def count(*columns)
71
+ if columns.empty?
72
+ warn "`Polars.count` is deprecated. Use `Polars.length` instead."
73
+ return Utils.wrap_expr(Plr.len._alias("count"))
74
+ end
75
+
76
+ col(*columns).count
77
+ end
78
+
79
+ # Return the cumulative count of the non-null values in the column.
80
+ #
81
+ # This function is syntactic sugar for `col(columns).cum_count`.
82
+ #
83
+ # If no arguments are passed, returns the cumulative count of a context.
84
+ # Rows containing null values count towards the result.
85
+ #
86
+ # @param columns [Array]
87
+ # Name(s) of the columns to use.
88
+ # @param reverse [Boolean]
89
+ # Reverse the operation.
90
+ #
91
+ # @return [Expr]
92
+ #
93
+ # @example
94
+ # df = Polars::DataFrame.new({"a" => [1, 2, nil], "b" => [3, nil, nil]})
95
+ # df.select(Polars.cum_count("a"))
96
+ # # =>
97
+ # # shape: (3, 1)
98
+ # # ┌─────┐
99
+ # # │ a │
100
+ # # │ --- │
101
+ # # │ u32 │
102
+ # # ╞═════╡
103
+ # # │ 1 │
104
+ # # │ 2 │
105
+ # # │ 2 │
106
+ # # └─────┘
107
+ def cum_count(*columns, reverse: false)
108
+ col(*columns).cum_count(reverse: reverse)
109
+ end
110
+
111
+ # Aggregate all column values into a list.
112
+ #
113
+ # This function is syntactic sugar for `col(name).implode`.
114
+ #
115
+ # @param columns [Array]
116
+ # One or more column names.
117
+ #
118
+ # @return [Expr]
119
+ #
120
+ # @example
121
+ # df = Polars::DataFrame.new(
122
+ # {
123
+ # "a" => [1, 2, 3],
124
+ # "b" => [9, 8, 7],
125
+ # "c" => ["foo", "bar", "foo"]
126
+ # }
127
+ # )
128
+ # df.select(Polars.implode("a"))
129
+ # # =>
130
+ # # shape: (1, 1)
131
+ # # ┌───────────┐
132
+ # # │ a │
133
+ # # │ --- │
134
+ # # │ list[i64] │
135
+ # # ╞═══════════╡
136
+ # # │ [1, 2, 3] │
137
+ # # └───────────┘
138
+ #
139
+ # @example
140
+ # df.select(Polars.implode("b", "c"))
141
+ # # =>
142
+ # # shape: (1, 2)
143
+ # # ┌───────────┬───────────────────────┐
144
+ # # │ b ┆ c │
145
+ # # │ --- ┆ --- │
146
+ # # │ list[i64] ┆ list[str] │
147
+ # # ╞═══════════╪═══════════════════════╡
148
+ # # │ [9, 8, 7] ┆ ["foo", "bar", "foo"] │
149
+ # # └───────────┴───────────────────────┘
150
+ def implode(*columns)
151
+ col(*columns).implode
152
+ end
153
+
154
+ # Get the standard deviation.
155
+ #
156
+ # This function is syntactic sugar for `col(column).std(ddof: ddof)`.
157
+ #
158
+ # @param column [Object]
159
+ # Column name.
160
+ # @param ddof [Integer]
161
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
162
+ # where N represents the number of elements.
163
+ # By default ddof is 1.
164
+ #
165
+ # @return [Expr]
166
+ #
167
+ # @example
168
+ # df = Polars::DataFrame.new(
169
+ # {
170
+ # "a" => [1, 8, 3],
171
+ # "b" => [4, 5, 2],
172
+ # "c" => ["foo", "bar", "foo"]
173
+ # }
174
+ # )
175
+ # df.select(Polars.std("a"))
176
+ # # =>
177
+ # # shape: (1, 1)
178
+ # # ┌──────────┐
179
+ # # │ a │
180
+ # # │ --- │
181
+ # # │ f64 │
182
+ # # ╞══════════╡
183
+ # # │ 3.605551 │
184
+ # # └──────────┘
185
+ #
186
+ # @example
187
+ # df["a"].std
188
+ # # => 3.605551275463989
189
+ def std(column, ddof: 1)
190
+ col(column).std(ddof: ddof)
191
+ end
192
+
193
+ # Get the variance.
194
+ #
195
+ # This function is syntactic sugar for `col(column).var(ddof: ddof)`.
196
+ #
197
+ # @param column [Object]
198
+ # Column name.
199
+ # @param ddof [Integer]
200
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
201
+ # where N represents the number of elements.
202
+ # By default ddof is 1.
203
+ #
204
+ # @return [Expr]
205
+ #
206
+ # @example
207
+ # df = Polars::DataFrame.new(
208
+ # {
209
+ # "a" => [1, 8, 3],
210
+ # "b" => [4, 5, 2],
211
+ # "c" => ["foo", "bar", "foo"]
212
+ # }
213
+ # )
214
+ # df.select(Polars.var("a"))
215
+ # # =>
216
+ # # shape: (1, 1)
217
+ # # ┌──────┐
218
+ # # │ a │
219
+ # # │ --- │
220
+ # # │ f64 │
221
+ # # ╞══════╡
222
+ # # │ 13.0 │
223
+ # # └──────┘
224
+ #
225
+ # @example
226
+ # df["a"].var
227
+ # # => 13.0
228
+ def var(column, ddof: 1)
229
+ col(column).var(ddof: ddof)
230
+ end
231
+
232
+
233
+ # Get the mean value.
234
+ #
235
+ # This function is syntactic sugar for `col(columns).mean`.
236
+ #
237
+ # @param columns [Array]
238
+ # One or more column names.
239
+ #
240
+ # @return [Expr]
241
+ #
242
+ # @example
243
+ # df = Polars::DataFrame.new(
244
+ # {
245
+ # "a" => [1, 8, 3],
246
+ # "b" => [4, 5, 2],
247
+ # "c" => ["foo", "bar", "foo"]
248
+ # }
249
+ # )
250
+ # df.select(Polars.mean("a"))
251
+ # # =>
252
+ # # shape: (1, 1)
253
+ # # ┌─────┐
254
+ # # │ a │
255
+ # # │ --- │
256
+ # # │ f64 │
257
+ # # ╞═════╡
258
+ # # │ 4.0 │
259
+ # # └─────┘
260
+ #
261
+ # @example
262
+ # df.select(Polars.mean("a", "b"))
263
+ # # =>
264
+ # # shape: (1, 2)
265
+ # # ┌─────┬──────────┐
266
+ # # │ a ┆ b │
267
+ # # │ --- ┆ --- │
268
+ # # │ f64 ┆ f64 │
269
+ # # ╞═════╪══════════╡
270
+ # # │ 4.0 ┆ 3.666667 │
271
+ # # └─────┴──────────┘
272
+ def mean(*columns)
273
+ col(*columns).mean
274
+ end
275
+ alias_method :avg, :mean
276
+
277
+ # Get the median value.
278
+ #
279
+ # This function is syntactic sugar for `pl.col(columns).median`.
280
+ #
281
+ # @param columns [Array]
282
+ # One or more column names.
283
+ #
284
+ # @return [Expr]
285
+ #
286
+ # @example
287
+ # df = Polars::DataFrame.new(
288
+ # {
289
+ # "a" => [1, 8, 3],
290
+ # "b" => [4, 5, 2],
291
+ # "c" => ["foo", "bar", "foo"]
292
+ # }
293
+ # )
294
+ # df.select(Polars.median("a"))
295
+ # # =>
296
+ # # shape: (1, 1)
297
+ # # ┌─────┐
298
+ # # │ a │
299
+ # # │ --- │
300
+ # # │ f64 │
301
+ # # ╞═════╡
302
+ # # │ 3.0 │
303
+ # # └─────┘
304
+ #
305
+ # @example
306
+ # df.select(Polars.median("a", "b"))
307
+ # # =>
308
+ # # shape: (1, 2)
309
+ # # ┌─────┬─────┐
310
+ # # │ a ┆ b │
311
+ # # │ --- ┆ --- │
312
+ # # │ f64 ┆ f64 │
313
+ # # ╞═════╪═════╡
314
+ # # │ 3.0 ┆ 4.0 │
315
+ # # └─────┴─────┘
316
+ def median(*columns)
317
+ col(*columns).median
318
+ end
319
+
320
+ # Count unique values.
321
+ #
322
+ # This function is syntactic sugar for `col(columns).n_unique`.
323
+ #
324
+ # @param columns [Array]
325
+ # One or more column names.
326
+ #
327
+ # @return [Expr]
328
+ #
329
+ # @example
330
+ # df = Polars::DataFrame.new(
331
+ # {
332
+ # "a" => [1, 8, 1],
333
+ # "b" => [4, 5, 2],
334
+ # "c" => ["foo", "bar", "foo"]
335
+ # }
336
+ # )
337
+ # df.select(Polars.n_unique("a"))
338
+ # # =>
339
+ # # shape: (1, 1)
340
+ # # ┌─────┐
341
+ # # │ a │
342
+ # # │ --- │
343
+ # # │ u32 │
344
+ # # ╞═════╡
345
+ # # │ 2 │
346
+ # # └─────┘
347
+ #
348
+ # @example
349
+ # df.select(Polars.n_unique("b", "c"))
350
+ # # =>
351
+ # # shape: (1, 2)
352
+ # # ┌─────┬─────┐
353
+ # # │ b ┆ c │
354
+ # # │ --- ┆ --- │
355
+ # # │ u32 ┆ u32 │
356
+ # # ╞═════╪═════╡
357
+ # # │ 3 ┆ 2 │
358
+ # # └─────┴─────┘
359
+ def n_unique(*columns)
360
+ col(*columns).n_unique
361
+ end
362
+
363
+ # Approximate count of unique values.
364
+ #
365
+ # This function is syntactic sugar for `col(columns).approx_n_unique`, and
366
+ # uses the HyperLogLog++ algorithm for cardinality estimation.
367
+ #
368
+ # @param columns [Array]
369
+ # One or more column names.
370
+ #
371
+ # @return [Expr]
372
+ #
373
+ # @example
374
+ # df = Polars::DataFrame.new(
375
+ # {
376
+ # "a" => [1, 8, 1],
377
+ # "b" => [4, 5, 2],
378
+ # "c" => ["foo", "bar", "foo"]
379
+ # }
380
+ # )
381
+ # df.select(Polars.approx_n_unique("a"))
382
+ # # =>
383
+ # # shape: (1, 1)
384
+ # # ┌─────┐
385
+ # # │ a │
386
+ # # │ --- │
387
+ # # │ u32 │
388
+ # # ╞═════╡
389
+ # # │ 2 │
390
+ # # └─────┘
391
+ #
392
+ # @example
393
+ # df.select(Polars.approx_n_unique("b", "c"))
394
+ # # =>
395
+ # # shape: (1, 2)
396
+ # # ┌─────┬─────┐
397
+ # # │ b ┆ c │
398
+ # # │ --- ┆ --- │
399
+ # # │ u32 ┆ u32 │
400
+ # # ╞═════╪═════╡
401
+ # # │ 3 ┆ 2 │
402
+ # # └─────┴─────┘
403
+ def approx_n_unique(*columns)
404
+ col(*columns).approx_n_unique
405
+ end
406
+
407
+ # Get the first value.
408
+ #
409
+ # @param columns [Array]
410
+ # One or more column names. If not provided (default), returns an expression
411
+ # to take the first column of the context instead.
412
+ #
413
+ # @return [Expr]
414
+ #
415
+ # @example
416
+ # df = Polars::DataFrame.new(
417
+ # {
418
+ # "a" => [1, 8, 3],
419
+ # "b" => [4, 5, 2],
420
+ # "c" => ["foo", "bar", "baz"]
421
+ # }
422
+ # )
423
+ # df.select(Polars.first)
424
+ # # =>
425
+ # # shape: (3, 1)
426
+ # # ┌─────┐
427
+ # # │ a │
428
+ # # │ --- │
429
+ # # │ i64 │
430
+ # # ╞═════╡
431
+ # # │ 1 │
432
+ # # │ 8 │
433
+ # # │ 3 │
434
+ # # └─────┘
435
+ #
436
+ # @example
437
+ # df.select(Polars.first("b"))
438
+ # # =>
439
+ # # shape: (1, 1)
440
+ # # ┌─────┐
441
+ # # │ b │
442
+ # # │ --- │
443
+ # # │ i64 │
444
+ # # ╞═════╡
445
+ # # │ 4 │
446
+ # # └─────┘
447
+ #
448
+ # @example
449
+ # df.select(Polars.first("a", "c"))
450
+ # # =>
451
+ # # shape: (1, 2)
452
+ # # ┌─────┬─────┐
453
+ # # │ a ┆ c │
454
+ # # │ --- ┆ --- │
455
+ # # │ i64 ┆ str │
456
+ # # ╞═════╪═════╡
457
+ # # │ 1 ┆ foo │
458
+ # # └─────┴─────┘
459
+ def first(*columns)
460
+ if columns.empty?
461
+ return Utils.wrap_expr(Plr.first)
462
+ end
463
+
464
+ col(*columns).first
465
+ end
466
+
467
+ # Get the last value.
468
+ #
469
+ # @param columns [Array]
470
+ # One or more column names. If set to `nil` (default), returns an expression
471
+ # to take the last column of the context instead.
472
+ #
473
+ # @return [Expr]
474
+ #
475
+ # @example
476
+ # df = Polars::DataFrame.new(
477
+ # {
478
+ # "a" => [1, 8, 3],
479
+ # "b" => [4, 5, 2],
480
+ # "c" => ["foo", "bar", "baz"]
481
+ # }
482
+ # )
483
+ # df.select(Polars.last)
484
+ # # =>
485
+ # # shape: (3, 1)
486
+ # # ┌─────┐
487
+ # # │ c │
488
+ # # │ --- │
489
+ # # │ str │
490
+ # # ╞═════╡
491
+ # # │ foo │
492
+ # # │ bar │
493
+ # # │ baz │
494
+ # # └─────┘
495
+ #
496
+ # @example
497
+ # df.select(Polars.last("a"))
498
+ # # =>
499
+ # # shape: (1, 1)
500
+ # # ┌─────┐
501
+ # # │ a │
502
+ # # │ --- │
503
+ # # │ i64 │
504
+ # # ╞═════╡
505
+ # # │ 3 │
506
+ # # └─────┘
507
+ #
508
+ # @example
509
+ # df.select(Polars.last("b", "c"))
510
+ # # =>
511
+ # # shape: (1, 2)
512
+ # # ┌─────┬─────┐
513
+ # # │ b ┆ c │
514
+ # # │ --- ┆ --- │
515
+ # # │ i64 ┆ str │
516
+ # # ╞═════╪═════╡
517
+ # # │ 2 ┆ baz │
518
+ # # └─────┴─────┘
519
+ def last(*columns)
520
+ if columns.empty?
521
+ return Utils.wrap_expr(Plr.last)
522
+ end
523
+
524
+ col(*columns).last
525
+ end
526
+
527
+ # Get the first `n` rows.
528
+ #
529
+ # This function is syntactic sugar for `col(column).head(n)`.
530
+ #
531
+ # @param column [Object]
532
+ # Column name.
533
+ # @param n [Integer]
534
+ # Number of rows to return.
535
+ #
536
+ # @return [Expr]
537
+ #
538
+ # @example
539
+ # df = Polars::DataFrame.new(
540
+ # {
541
+ # "a" => [1, 8, 3],
542
+ # "b" => [4, 5, 2],
543
+ # "c" => ["foo", "bar", "foo"]
544
+ # }
545
+ # )
546
+ # df.select(Polars.head("a"))
547
+ # # =>
548
+ # # shape: (3, 1)
549
+ # # ┌─────┐
550
+ # # │ a │
551
+ # # │ --- │
552
+ # # │ i64 │
553
+ # # ╞═════╡
554
+ # # │ 1 │
555
+ # # │ 8 │
556
+ # # │ 3 │
557
+ # # └─────┘
558
+ #
559
+ # @example
560
+ # df.select(Polars.head("a", 2))
561
+ # # =>
562
+ # # shape: (2, 1)
563
+ # # ┌─────┐
564
+ # # │ a │
565
+ # # │ --- │
566
+ # # │ i64 │
567
+ # # ╞═════╡
568
+ # # │ 1 │
569
+ # # │ 8 │
570
+ # # └─────┘
571
+ def head(column, n = 10)
572
+ col(column).head(n)
573
+ end
574
+
575
+ # Get the last `n` rows.
576
+ #
577
+ # This function is syntactic sugar for `col(column).tail(n)`.
578
+ #
579
+ # @param column [Object]
580
+ # Column name.
581
+ # @param n [Integer]
582
+ # Number of rows to return.
583
+ #
584
+ # @return [Expr]
585
+ #
586
+ # @example
587
+ # df = Polars::DataFrame.new(
588
+ # {
589
+ # "a" => [1, 8, 3],
590
+ # "b" => [4, 5, 2],
591
+ # "c" => ["foo", "bar", "foo"]
592
+ # }
593
+ # )
594
+ # df.select(Polars.tail("a"))
595
+ # # =>
596
+ # # shape: (3, 1)
597
+ # # ┌─────┐
598
+ # # │ a │
599
+ # # │ --- │
600
+ # # │ i64 │
601
+ # # ╞═════╡
602
+ # # │ 1 │
603
+ # # │ 8 │
604
+ # # │ 3 │
605
+ # # └─────┘
606
+ #
607
+ # @example
608
+ # df.select(Polars.tail("a", 2))
609
+ # # =>
610
+ # # shape: (2, 1)
611
+ # # ┌─────┐
612
+ # # │ a │
613
+ # # │ --- │
614
+ # # │ i64 │
615
+ # # ╞═════╡
616
+ # # │ 8 │
617
+ # # │ 3 │
618
+ # # └─────┘
619
+ def tail(column, n = 10)
620
+ col(column).tail(n)
621
+ end
622
+
623
+ # Compute the Pearson's or Spearman rank correlation correlation between two columns.
624
+ #
625
+ # @param a [Object]
626
+ # Column name or Expression.
627
+ # @param b [Object]
628
+ # Column name or Expression.
629
+ # @param ddof [Integer]
630
+ # "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof,
631
+ # where N represents the number of elements.
632
+ # By default ddof is 1.
633
+ # @param method ["pearson", "spearman"]
634
+ # Correlation method.
635
+ # @param propagate_nans [Boolean]
636
+ # If `true` any `NaN` encountered will lead to `NaN` in the output.
637
+ # Defaults to `False` where `NaN` are regarded as larger than any finite number
638
+ # and thus lead to the highest rank.
639
+ #
640
+ # @return [Expr]
641
+ #
642
+ # @example Pearson's correlation:
643
+ # df = Polars::DataFrame.new(
644
+ # {
645
+ # "a" => [1, 8, 3],
646
+ # "b" => [4, 5, 2],
647
+ # "c" => ["foo", "bar", "foo"]
648
+ # }
649
+ # )
650
+ # df.select(Polars.corr("a", "b"))
651
+ # # =>
652
+ # # shape: (1, 1)
653
+ # # ┌──────────┐
654
+ # # │ a │
655
+ # # │ --- │
656
+ # # │ f64 │
657
+ # # ╞══════════╡
658
+ # # │ 0.544705 │
659
+ # # └──────────┘
660
+ #
661
+ # @example Spearman rank correlation:
662
+ # df = Polars::DataFrame.new(
663
+ # {
664
+ # "a" => [1, 8, 3],
665
+ # "b" => [4, 5, 2],
666
+ # "c" => ["foo", "bar", "foo"]
667
+ # }
668
+ # )
669
+ # df.select(Polars.corr("a", "b", method: "spearman"))
670
+ # # =>
671
+ # # shape: (1, 1)
672
+ # # ┌─────┐
673
+ # # │ a │
674
+ # # │ --- │
675
+ # # │ f64 │
676
+ # # ╞═════╡
677
+ # # │ 0.5 │
678
+ # # └─────┘
679
+ def corr(
680
+ a,
681
+ b,
682
+ method: "pearson",
683
+ ddof: 1,
684
+ propagate_nans: false
685
+ )
686
+ a = Utils.parse_as_expression(a)
687
+ b = Utils.parse_as_expression(b)
688
+
689
+ if method == "pearson"
690
+ Utils.wrap_expr(Plr.pearson_corr(a, b, ddof))
691
+ elsif method == "spearman"
692
+ Utils.wrap_expr(Plr.spearman_rank_corr(a, b, ddof, propagate_nans))
693
+ else
694
+ msg = "method must be one of {{'pearson', 'spearman'}}, got #{method}"
695
+ raise ArgumentError, msg
696
+ end
697
+ end
698
+
699
+ # Compute the covariance between two columns/ expressions.
700
+ #
701
+ # @param a [Object]
702
+ # Column name or Expression.
703
+ # @param b [Object]
704
+ # Column name or Expression.
705
+ # @param ddof [Integer]
706
+ # "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof,
707
+ # where N represents the number of elements.
708
+ # By default ddof is 1.
709
+ #
710
+ # @return [Expr]
711
+ #
712
+ # @example
713
+ # df = Polars::DataFrame.new(
714
+ # {
715
+ # "a" => [1, 8, 3],
716
+ # "b" => [4, 5, 2],
717
+ # "c" => ["foo", "bar", "foo"]
718
+ # }
719
+ # )
720
+ # df.select(Polars.cov("a", "b"))
721
+ # # =>
722
+ # # shape: (1, 1)
723
+ # # ┌─────┐
724
+ # # │ a │
725
+ # # │ --- │
726
+ # # │ f64 │
727
+ # # ╞═════╡
728
+ # # │ 3.0 │
729
+ # # └─────┘
730
+ def cov(a, b, ddof: 1)
731
+ a = Utils.parse_as_expression(a)
732
+ b = Utils.parse_as_expression(b)
733
+ Utils.wrap_expr(Plr.cov(a, b, ddof))
734
+ end
735
+
736
+ # def map
737
+ # end
738
+
739
+ # def apply
740
+ # end
741
+
742
+ # Accumulate over multiple columns horizontally/row wise with a left fold.
743
+ #
744
+ # @return [Expr]
745
+ def fold(acc, f, exprs)
746
+ acc = Utils.expr_to_lit_or_expr(acc, str_to_lit: true)
747
+ if exprs.is_a?(Expr)
748
+ exprs = [exprs]
749
+ end
750
+
751
+ exprs = Utils.selection_to_rbexpr_list(exprs)
752
+ Utils.wrap_expr(Plr.fold(acc._rbexpr, f, exprs))
753
+ end
754
+
755
+ # def reduce
756
+ # end
757
+
758
+ # Cumulatively accumulate over multiple columns horizontally/row wise with a left fold.
759
+ #
760
+ # Every cumulative result is added as a separate field in a Struct column.
761
+ #
762
+ # @param acc [Object]
763
+ # Accumulator Expression. This is the value that will be initialized when the fold
764
+ # starts. For a sum this could for instance be lit(0).
765
+ # @param f [Object]
766
+ # Function to apply over the accumulator and the value.
767
+ # Fn(acc, value) -> new_value
768
+ # @param exprs [Object]
769
+ # Expressions to aggregate over. May also be a wildcard expression.
770
+ # @param include_init [Boolean]
771
+ # Include the initial accumulator state as struct field.
772
+ #
773
+ # @return [Object]
774
+ #
775
+ # @note
776
+ # If you simply want the first encountered expression as accumulator,
777
+ # consider using `cumreduce`.
778
+ def cum_fold(acc, f, exprs, include_init: false)
779
+ acc = Utils.expr_to_lit_or_expr(acc, str_to_lit: true)
780
+ if exprs.is_a?(Expr)
781
+ exprs = [exprs]
782
+ end
783
+
784
+ exprs = Utils.selection_to_rbexpr_list(exprs)
785
+ Utils.wrap_expr(Plr.cum_fold(acc._rbexpr, f, exprs, include_init))
786
+ end
787
+ alias_method :cumfold, :cum_fold
788
+
789
+ # def cum_reduce
790
+ # end
791
+
792
+ # Compute two argument arctan in radians.
793
+ #
794
+ # Returns the angle (in radians) in the plane between the
795
+ # positive x-axis and the ray from the origin to (x,y).
796
+ #
797
+ # @param y [Object]
798
+ # Column name or Expression.
799
+ # @param x [Object]
800
+ # Column name or Expression.
801
+ #
802
+ # @return [Expr]
803
+ #
804
+ # @example
805
+ # twoRootTwo = Math.sqrt(2) / 2
806
+ # df = Polars::DataFrame.new(
807
+ # {
808
+ # "y" => [twoRootTwo, -twoRootTwo, twoRootTwo, -twoRootTwo],
809
+ # "x" => [twoRootTwo, twoRootTwo, -twoRootTwo, -twoRootTwo]
810
+ # }
811
+ # )
812
+ # df.select(
813
+ # Polars.arctan2d("y", "x").alias("atan2d"), Polars.arctan2("y", "x").alias("atan2")
814
+ # )
815
+ # # =>
816
+ # # shape: (4, 2)
817
+ # # ┌────────┬───────────┐
818
+ # # │ atan2d ┆ atan2 │
819
+ # # │ --- ┆ --- │
820
+ # # │ f64 ┆ f64 │
821
+ # # ╞════════╪═══════════╡
822
+ # # │ 45.0 ┆ 0.785398 │
823
+ # # │ -45.0 ┆ -0.785398 │
824
+ # # │ 135.0 ┆ 2.356194 │
825
+ # # │ -135.0 ┆ -2.356194 │
826
+ # # └────────┴───────────┘
827
+ def arctan2(y, x)
828
+ if Utils.strlike?(y)
829
+ y = col(y)
830
+ end
831
+ if Utils.strlike?(x)
832
+ x = col(x)
833
+ end
834
+ Utils.wrap_expr(Plr.arctan2(y._rbexpr, x._rbexpr))
835
+ end
836
+
837
+ # Compute two argument arctan in degrees.
838
+ #
839
+ # Returns the angle (in degrees) in the plane between the positive x-axis
840
+ # and the ray from the origin to (x,y).
841
+ #
842
+ # @param y [Object]
843
+ # Column name or Expression.
844
+ # @param x [Object]
845
+ # Column name or Expression.
846
+ #
847
+ # @return [Expr]
848
+ #
849
+ # @example
850
+ # twoRootTwo = Math.sqrt(2) / 2
851
+ # df = Polars::DataFrame.new(
852
+ # {
853
+ # "y" => [twoRootTwo, -twoRootTwo, twoRootTwo, -twoRootTwo],
854
+ # "x" => [twoRootTwo, twoRootTwo, -twoRootTwo, -twoRootTwo]
855
+ # }
856
+ # )
857
+ # df.select(
858
+ # Polars.arctan2d("y", "x").alias("atan2d"), Polars.arctan2("y", "x").alias("atan2")
859
+ # )
860
+ # # =>
861
+ # # shape: (4, 2)
862
+ # # ┌────────┬───────────┐
863
+ # # │ atan2d ┆ atan2 │
864
+ # # │ --- ┆ --- │
865
+ # # │ f64 ┆ f64 │
866
+ # # ╞════════╪═══════════╡
867
+ # # │ 45.0 ┆ 0.785398 │
868
+ # # │ -45.0 ┆ -0.785398 │
869
+ # # │ 135.0 ┆ 2.356194 │
870
+ # # │ -135.0 ┆ -2.356194 │
871
+ # # └────────┴───────────┘
872
+ def arctan2d(y, x)
873
+ if Utils.strlike?(y)
874
+ y = col(y)
875
+ end
876
+ if Utils.strlike?(x)
877
+ x = col(x)
878
+ end
879
+ Utils.wrap_expr(Plr.arctan2d(y._rbexpr, x._rbexpr))
880
+ end
881
+
882
+ # Exclude certain columns from a wildcard/regex selection.
883
+ #
884
+ # @param columns [Object]
885
+ # Column(s) to exclude from selection
886
+ # This can be:
887
+ #
888
+ # - a column name, or multiple column names
889
+ # - a regular expression starting with `^` and ending with `$`
890
+ # - a dtype or multiple dtypes
891
+ #
892
+ # @return [Object]
893
+ #
894
+ # @example
895
+ # df = Polars::DataFrame.new(
896
+ # {
897
+ # "aa" => [1, 2, 3],
898
+ # "ba" => ["a", "b", nil],
899
+ # "cc" => [nil, 2.5, 1.5]
900
+ # }
901
+ # )
902
+ # # =>
903
+ # # shape: (3, 3)
904
+ # # ┌─────┬──────┬──────┐
905
+ # # │ aa ┆ ba ┆ cc │
906
+ # # │ --- ┆ --- ┆ --- │
907
+ # # │ i64 ┆ str ┆ f64 │
908
+ # # ╞═════╪══════╪══════╡
909
+ # # │ 1 ┆ a ┆ null │
910
+ # # │ 2 ┆ b ┆ 2.5 │
911
+ # # │ 3 ┆ null ┆ 1.5 │
912
+ # # └─────┴──────┴──────┘
913
+ #
914
+ # @example Exclude by column name(s):
915
+ # df.select(Polars.exclude("ba"))
916
+ # # =>
917
+ # # shape: (3, 2)
918
+ # # ┌─────┬──────┐
919
+ # # │ aa ┆ cc │
920
+ # # │ --- ┆ --- │
921
+ # # │ i64 ┆ f64 │
922
+ # # ╞═════╪══════╡
923
+ # # │ 1 ┆ null │
924
+ # # │ 2 ┆ 2.5 │
925
+ # # │ 3 ┆ 1.5 │
926
+ # # └─────┴──────┘
927
+ #
928
+ # @example Exclude by regex, e.g. removing all columns whose names end with the letter "a":
929
+ # df.select(Polars.exclude("^.*a$"))
930
+ # # =>
931
+ # # shape: (3, 1)
932
+ # # ┌──────┐
933
+ # # │ cc │
934
+ # # │ --- │
935
+ # # │ f64 │
936
+ # # ╞══════╡
937
+ # # │ null │
938
+ # # │ 2.5 │
939
+ # # │ 1.5 │
940
+ # # └──────┘
941
+ def exclude(columns)
942
+ col("*").exclude(columns)
943
+ end
944
+
945
+ # Syntactic sugar for `Polars.col("foo").agg_groups`.
946
+ #
947
+ # @return [Object]
948
+ def groups(column)
949
+ col(column).agg_groups
950
+ end
951
+
952
+ # Syntactic sugar for `Polars.col("foo").quantile(...)`.
953
+ #
954
+ # @param column [String]
955
+ # Column name.
956
+ # @param quantile [Float]
957
+ # Quantile between 0.0 and 1.0.
958
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
959
+ # Interpolation method.
960
+ #
961
+ # @return [Expr]
962
+ def quantile(column, quantile, interpolation: "nearest")
963
+ col(column).quantile(quantile, interpolation: interpolation)
964
+ end
965
+
966
+ # Find the indexes that would sort the columns.
967
+ #
968
+ # Argsort by multiple columns. The first column will be used for the ordering.
969
+ # If there are duplicates in the first column, the second column will be used to
970
+ # determine the ordering and so on.
971
+ #
972
+ # @param exprs [Object]
973
+ # Columns use to determine the ordering.
974
+ # @param reverse [Boolean]
975
+ # Default is ascending.
976
+ #
977
+ # @return [Expr]
978
+ def arg_sort_by(exprs, reverse: false)
979
+ if !exprs.is_a?(::Array)
980
+ exprs = [exprs]
981
+ end
982
+ if reverse == true || reverse == false
983
+ reverse = [reverse] * exprs.length
984
+ end
985
+ exprs = Utils.selection_to_rbexpr_list(exprs)
986
+ Utils.wrap_expr(Plr.arg_sort_by(exprs, reverse))
987
+ end
988
+ alias_method :argsort_by, :arg_sort_by
989
+
990
+ # Collect multiple LazyFrames at the same time.
991
+ #
992
+ # This runs all the computation graphs in parallel on Polars threadpool.
993
+ #
994
+ # @param lazy_frames [Boolean]
995
+ # A list of LazyFrames to collect.
996
+ # @param type_coercion [Boolean]
997
+ # Do type coercion optimization.
998
+ # @param predicate_pushdown [Boolean]
999
+ # Do predicate pushdown optimization.
1000
+ # @param projection_pushdown [Boolean]
1001
+ # Do projection pushdown optimization.
1002
+ # @param simplify_expression [Boolean]
1003
+ # Run simplify expressions optimization.
1004
+ # @param string_cache [Boolean]
1005
+ # This argument is deprecated and will be ignored
1006
+ # @param no_optimization [Boolean]
1007
+ # Turn off optimizations.
1008
+ # @param slice_pushdown [Boolean]
1009
+ # Slice pushdown optimization.
1010
+ # @param common_subplan_elimination [Boolean]
1011
+ # Will try to cache branching subplans that occur on self-joins or unions.
1012
+ # @param allow_streaming [Boolean]
1013
+ # Run parts of the query in a streaming fashion (this is in an alpha state)
1014
+ #
1015
+ # @return [Array]
1016
+ def collect_all(
1017
+ lazy_frames,
1018
+ type_coercion: true,
1019
+ predicate_pushdown: true,
1020
+ projection_pushdown: true,
1021
+ simplify_expression: true,
1022
+ string_cache: false,
1023
+ no_optimization: false,
1024
+ slice_pushdown: true,
1025
+ common_subplan_elimination: true,
1026
+ allow_streaming: false
1027
+ )
1028
+ if no_optimization
1029
+ predicate_pushdown = false
1030
+ projection_pushdown = false
1031
+ slice_pushdown = false
1032
+ common_subplan_elimination = false
1033
+ end
1034
+
1035
+ prepared = []
1036
+
1037
+ lazy_frames.each do |lf|
1038
+ ldf = lf._ldf.optimization_toggle(
1039
+ type_coercion,
1040
+ predicate_pushdown,
1041
+ projection_pushdown,
1042
+ simplify_expression,
1043
+ slice_pushdown,
1044
+ common_subplan_elimination,
1045
+ allow_streaming,
1046
+ false
1047
+ )
1048
+ prepared << ldf
1049
+ end
1050
+
1051
+ out = Plr.collect_all(prepared)
1052
+
1053
+ # wrap the rbdataframes into dataframe
1054
+ result = out.map { |rbdf| Utils.wrap_df(rbdf) }
1055
+
1056
+ result
1057
+ end
1058
+
1059
+ # Run polars expressions without a context.
1060
+ #
1061
+ # This is syntactic sugar for running `df.select` on an empty DataFrame.
1062
+ #
1063
+ # @param exprs [Array]
1064
+ # Column(s) to select, specified as positional arguments.
1065
+ # Accepts expression input. Strings are parsed as column names,
1066
+ # other non-expression inputs are parsed as literals.
1067
+ # @param named_exprs [Hash]
1068
+ # Additional columns to select, specified as keyword arguments.
1069
+ # The columns will be renamed to the keyword used.
1070
+ #
1071
+ # @return [DataFrame]
1072
+ #
1073
+ # @example
1074
+ # foo = Polars::Series.new("foo", [1, 2, 3])
1075
+ # bar = Polars::Series.new("bar", [3, 2, 1])
1076
+ # Polars.select(min: Polars.min_horizontal(foo, bar))
1077
+ # # =>
1078
+ # # shape: (3, 1)
1079
+ # # ┌─────┐
1080
+ # # │ min │
1081
+ # # │ --- │
1082
+ # # │ i64 │
1083
+ # # ╞═════╡
1084
+ # # │ 1 │
1085
+ # # │ 2 │
1086
+ # # │ 1 │
1087
+ # # └─────┘
1088
+ def select(*exprs, **named_exprs)
1089
+ DataFrame.new([]).select(*exprs, **named_exprs)
1090
+ end
1091
+
1092
+ # Return indices where `condition` evaluates `true`.
1093
+ #
1094
+ # @param condition [Expr]
1095
+ # Boolean expression to evaluate
1096
+ # @param eager [Boolean]
1097
+ # Whether to apply this function eagerly (as opposed to lazily).
1098
+ #
1099
+ # @return [Expr, Series]
1100
+ #
1101
+ # @example
1102
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
1103
+ # df.select(
1104
+ # [
1105
+ # Polars.arg_where(Polars.col("a") % 2 == 0)
1106
+ # ]
1107
+ # ).to_series
1108
+ # # =>
1109
+ # # shape: (2,)
1110
+ # # Series: 'a' [u32]
1111
+ # # [
1112
+ # # 1
1113
+ # # 3
1114
+ # # ]
1115
+ def arg_where(condition, eager: false)
1116
+ if eager
1117
+ if !condition.is_a?(Series)
1118
+ raise ArgumentError, "expected 'Series' in 'arg_where' if 'eager: true', got #{condition.class.name}"
1119
+ end
1120
+ condition.to_frame.select(arg_where(Polars.col(condition.name))).to_series
1121
+ else
1122
+ condition = Utils.expr_to_lit_or_expr(condition, str_to_lit: true)
1123
+ Utils.wrap_expr(Plr.arg_where(condition._rbexpr))
1124
+ end
1125
+ end
1126
+
1127
+ # Folds the columns from left to right, keeping the first non-null value.
1128
+ #
1129
+ # @param exprs [Array]
1130
+ # Columns to coalesce. Accepts expression input. Strings are parsed as column
1131
+ # names, other non-expression inputs are parsed as literals.
1132
+ # @param more_exprs [Hash]
1133
+ # Additional columns to coalesce, specified as positional arguments.
1134
+ #
1135
+ # @return [Expr]
1136
+ #
1137
+ # @example
1138
+ # df = Polars::DataFrame.new(
1139
+ # {
1140
+ # "a" => [1, nil, nil, nil],
1141
+ # "b" => [1, 2, nil, nil],
1142
+ # "c" => [5, nil, 3, nil]
1143
+ # }
1144
+ # )
1145
+ # df.with_columns(Polars.coalesce(["a", "b", "c", 10]).alias("d"))
1146
+ # # =>
1147
+ # # shape: (4, 4)
1148
+ # # ┌──────┬──────┬──────┬─────┐
1149
+ # # │ a ┆ b ┆ c ┆ d │
1150
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1151
+ # # │ i64 ┆ i64 ┆ i64 ┆ i64 │
1152
+ # # ╞══════╪══════╪══════╪═════╡
1153
+ # # │ 1 ┆ 1 ┆ 5 ┆ 1 │
1154
+ # # │ null ┆ 2 ┆ null ┆ 2 │
1155
+ # # │ null ┆ null ┆ 3 ┆ 3 │
1156
+ # # │ null ┆ null ┆ null ┆ 10 │
1157
+ # # └──────┴──────┴──────┴─────┘
1158
+ #
1159
+ # @example
1160
+ # df.with_columns(Polars.coalesce(Polars.col(["a", "b", "c"]), 10.0).alias("d"))
1161
+ # # =>
1162
+ # # shape: (4, 4)
1163
+ # # ┌──────┬──────┬──────┬──────┐
1164
+ # # │ a ┆ b ┆ c ┆ d │
1165
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1166
+ # # │ i64 ┆ i64 ┆ i64 ┆ f64 │
1167
+ # # ╞══════╪══════╪══════╪══════╡
1168
+ # # │ 1 ┆ 1 ┆ 5 ┆ 1.0 │
1169
+ # # │ null ┆ 2 ┆ null ┆ 2.0 │
1170
+ # # │ null ┆ null ┆ 3 ┆ 3.0 │
1171
+ # # │ null ┆ null ┆ null ┆ 10.0 │
1172
+ # # └──────┴──────┴──────┴──────┘
1173
+ def coalesce(exprs, *more_exprs)
1174
+ exprs = Utils.parse_as_list_of_expressions(exprs, *more_exprs)
1175
+ Utils.wrap_expr(Plr.coalesce(exprs))
1176
+ end
1177
+
1178
+ # Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).
1179
+ #
1180
+ # Depending on the `unit` provided, this function will return a different dtype:
1181
+ # - unit: "d" returns pl.Date
1182
+ # - unit: "s" returns pl.Datetime["us"] (pl.Datetime's default)
1183
+ # - unit: "ms" returns pl.Datetime["ms"]
1184
+ # - unit: "us" returns pl.Datetime["us"]
1185
+ # - unit: "ns" returns pl.Datetime["ns"]
1186
+ #
1187
+ # @param column [Object]
1188
+ # Series or expression to parse integers to pl.Datetime.
1189
+ # @param unit [String]
1190
+ # The unit of the timesteps since epoch time.
1191
+ # @param eager [Boolean]
1192
+ # If eager evaluation is `true`, a Series is returned instead of an Expr.
1193
+ #
1194
+ # @return [Object]
1195
+ #
1196
+ # @example
1197
+ # df = Polars::DataFrame.new({"timestamp" => [1666683077, 1666683099]}).lazy
1198
+ # df.select(Polars.from_epoch(Polars.col("timestamp"), unit: "s")).collect
1199
+ # # =>
1200
+ # # shape: (2, 1)
1201
+ # # ┌─────────────────────┐
1202
+ # # │ timestamp │
1203
+ # # │ --- │
1204
+ # # │ datetime[μs] │
1205
+ # # ╞═════════════════════╡
1206
+ # # │ 2022-10-25 07:31:17 │
1207
+ # # │ 2022-10-25 07:31:39 │
1208
+ # # └─────────────────────┘
1209
+ def from_epoch(column, unit: "s", eager: false)
1210
+ if Utils.strlike?(column)
1211
+ column = col(column)
1212
+ elsif !column.is_a?(Series) && !column.is_a?(Expr)
1213
+ column = Series.new(column)
1214
+ end
1215
+
1216
+ if unit == "d"
1217
+ expr = column.cast(Date)
1218
+ elsif unit == "s"
1219
+ expr = (column.cast(Int64) * 1_000_000).cast(Datetime.new("us"))
1220
+ elsif Utils::DTYPE_TEMPORAL_UNITS.include?(unit)
1221
+ expr = column.cast(Datetime.new(unit))
1222
+ else
1223
+ raise ArgumentError, "'unit' must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got '#{unit}'."
1224
+ end
1225
+
1226
+ if eager
1227
+ if !column.is_a?(Series)
1228
+ raise ArgumentError, "expected Series or Array if eager: true, got #{column.class.name}"
1229
+ else
1230
+ column.to_frame.select(expr).to_series
1231
+ end
1232
+ else
1233
+ expr
1234
+ end
1235
+ end
1236
+
1237
+ # Parse one or more SQL expressions to polars expression(s).
1238
+ #
1239
+ # @param sql [Object]
1240
+ # One or more SQL expressions.
1241
+ #
1242
+ # @return [Expr]
1243
+ #
1244
+ # @example Parse a single SQL expression:
1245
+ # df = Polars::DataFrame.new({"a" => [2, 1]})
1246
+ # expr = Polars.sql_expr("MAX(a)")
1247
+ # df.select(expr)
1248
+ # # =>
1249
+ # # shape: (1, 1)
1250
+ # # ┌─────┐
1251
+ # # │ a │
1252
+ # # │ --- │
1253
+ # # │ i64 │
1254
+ # # ╞═════╡
1255
+ # # │ 2 │
1256
+ # # └─────┘
1257
+ #
1258
+ # @example Parse multiple SQL expressions:
1259
+ # df.with_columns(
1260
+ # *Polars.sql_expr(["POWER(a,a) AS a_a", "CAST(a AS TEXT) AS a_txt"])
1261
+ # )
1262
+ # # =>
1263
+ # # shape: (2, 3)
1264
+ # # ┌─────┬─────┬───────┐
1265
+ # # │ a ┆ a_a ┆ a_txt │
1266
+ # # │ --- ┆ --- ┆ --- │
1267
+ # # │ i64 ┆ i64 ┆ str │
1268
+ # # ╞═════╪═════╪═══════╡
1269
+ # # │ 2 ┆ 4 ┆ 2 │
1270
+ # # │ 1 ┆ 1 ┆ 1 │
1271
+ # # └─────┴─────┴───────┘
1272
+ def sql_expr(sql)
1273
+ if sql.is_a?(::String)
1274
+ Utils.wrap_expr(Plr.sql_expr(sql))
1275
+ else
1276
+ sql.map { |q| Utils.wrap_expr(Plr.sql_expr(q)) }
1277
+ end
1278
+ end
1279
+ end
1280
+ end