polars-df 0.13.0-aarch64-linux-musl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +208 -0
  4. data/Cargo.lock +2556 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +39059 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +104 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +36 -0
  18. data/lib/polars/cat_name_space.rb +88 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +98 -0
  21. data/lib/polars/data_frame.rb +5191 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1397 -0
  24. data/lib/polars/date_time_name_space.rb +1287 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +38 -0
  27. data/lib/polars/expr.rb +7256 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +271 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1329 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +136 -0
  39. data/lib/polars/functions/range/datetime_range.rb +149 -0
  40. data/lib/polars/functions/range/int_range.rb +51 -0
  41. data/lib/polars/functions/range/time_range.rb +141 -0
  42. data/lib/polars/functions/repeat.rb +144 -0
  43. data/lib/polars/functions/whenthen.rb +96 -0
  44. data/lib/polars/functions.rb +57 -0
  45. data/lib/polars/group_by.rb +613 -0
  46. data/lib/polars/io/avro.rb +24 -0
  47. data/lib/polars/io/csv.rb +696 -0
  48. data/lib/polars/io/database.rb +73 -0
  49. data/lib/polars/io/ipc.rb +275 -0
  50. data/lib/polars/io/json.rb +29 -0
  51. data/lib/polars/io/ndjson.rb +80 -0
  52. data/lib/polars/io/parquet.rb +233 -0
  53. data/lib/polars/lazy_frame.rb +2708 -0
  54. data/lib/polars/lazy_group_by.rb +181 -0
  55. data/lib/polars/list_expr.rb +791 -0
  56. data/lib/polars/list_name_space.rb +449 -0
  57. data/lib/polars/meta_expr.rb +222 -0
  58. data/lib/polars/name_expr.rb +198 -0
  59. data/lib/polars/plot.rb +109 -0
  60. data/lib/polars/rolling_group_by.rb +35 -0
  61. data/lib/polars/series.rb +4444 -0
  62. data/lib/polars/slice.rb +104 -0
  63. data/lib/polars/sql_context.rb +194 -0
  64. data/lib/polars/string_cache.rb +75 -0
  65. data/lib/polars/string_expr.rb +1495 -0
  66. data/lib/polars/string_name_space.rb +811 -0
  67. data/lib/polars/struct_expr.rb +98 -0
  68. data/lib/polars/struct_name_space.rb +96 -0
  69. data/lib/polars/testing.rb +507 -0
  70. data/lib/polars/utils/constants.rb +9 -0
  71. data/lib/polars/utils/convert.rb +97 -0
  72. data/lib/polars/utils/parse.rb +89 -0
  73. data/lib/polars/utils/various.rb +76 -0
  74. data/lib/polars/utils/wrap.rb +19 -0
  75. data/lib/polars/utils.rb +130 -0
  76. data/lib/polars/version.rb +4 -0
  77. data/lib/polars/whenthen.rb +83 -0
  78. data/lib/polars-df.rb +1 -0
  79. data/lib/polars.rb +91 -0
  80. metadata +138 -0
@@ -0,0 +1,1329 @@
1
+ module Polars
2
+ module Functions
3
+ # Alias for an element in evaluated in an `eval` expression.
4
+ #
5
+ # @return [Expr]
6
+ #
7
+ # @example A horizontal rank computation by taking the elements of a list
8
+ # df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
9
+ # df.with_column(
10
+ # Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
11
+ # )
12
+ # # =>
13
+ # # shape: (3, 3)
14
+ # # ┌─────┬─────┬────────────┐
15
+ # # │ a ┆ b ┆ rank │
16
+ # # │ --- ┆ --- ┆ --- │
17
+ # # │ i64 ┆ i64 ┆ list[f64] │
18
+ # # ╞═════╪═════╪════════════╡
19
+ # # │ 1 ┆ 4 ┆ [1.0, 2.0] │
20
+ # # │ 8 ┆ 5 ┆ [2.0, 1.0] │
21
+ # # │ 3 ┆ 2 ┆ [2.0, 1.0] │
22
+ # # └─────┴─────┴────────────┘
23
+ def element
24
+ col("")
25
+ end
26
+
27
+ # Return the number of non-null values in the column.
28
+ #
29
+ # This function is syntactic sugar for `col(columns).count`.
30
+ #
31
+ # Calling this function without any arguments returns the number of rows in the
32
+ # context. **This way of using the function is deprecated.** Please use `len`
33
+ # instead.
34
+ #
35
+ # @param columns [Array]
36
+ # One or more column names.
37
+ #
38
+ # @return [Expr]
39
+ #
40
+ # @example
41
+ # df = Polars::DataFrame.new(
42
+ # {
43
+ # "a" => [1, 2, nil],
44
+ # "b" => [3, nil, nil],
45
+ # "c" => ["foo", "bar", "foo"]
46
+ # }
47
+ # )
48
+ # df.select(Polars.count("a"))
49
+ # # =>
50
+ # # shape: (1, 1)
51
+ # # ┌─────┐
52
+ # # │ a │
53
+ # # │ --- │
54
+ # # │ u32 │
55
+ # # ╞═════╡
56
+ # # │ 2 │
57
+ # # └─────┘
58
+ #
59
+ # @example Return the number of non-null values in multiple columns.
60
+ # df.select(Polars.count("b", "c"))
61
+ # # =>
62
+ # # shape: (1, 2)
63
+ # # ┌─────┬─────┐
64
+ # # │ b ┆ c │
65
+ # # │ --- ┆ --- │
66
+ # # │ u32 ┆ u32 │
67
+ # # ╞═════╪═════╡
68
+ # # │ 1 ┆ 3 │
69
+ # # └─────┴─────┘
70
+ def count(*columns)
71
+ if columns.empty?
72
+ warn "`Polars.count` is deprecated. Use `Polars.length` instead."
73
+ return Utils.wrap_expr(Plr.len._alias("count"))
74
+ end
75
+
76
+ col(*columns).count
77
+ end
78
+
79
+ # Return the cumulative count of the non-null values in the column.
80
+ #
81
+ # This function is syntactic sugar for `col(columns).cum_count`.
82
+ #
83
+ # If no arguments are passed, returns the cumulative count of a context.
84
+ # Rows containing null values count towards the result.
85
+ #
86
+ # @param columns [Array]
87
+ # Name(s) of the columns to use.
88
+ # @param reverse [Boolean]
89
+ # Reverse the operation.
90
+ #
91
+ # @return [Expr]
92
+ #
93
+ # @example
94
+ # df = Polars::DataFrame.new({"a" => [1, 2, nil], "b" => [3, nil, nil]})
95
+ # df.select(Polars.cum_count("a"))
96
+ # # =>
97
+ # # shape: (3, 1)
98
+ # # ┌─────┐
99
+ # # │ a │
100
+ # # │ --- │
101
+ # # │ u32 │
102
+ # # ╞═════╡
103
+ # # │ 1 │
104
+ # # │ 2 │
105
+ # # │ 2 │
106
+ # # └─────┘
107
+ def cum_count(*columns, reverse: false)
108
+ col(*columns).cum_count(reverse: reverse)
109
+ end
110
+
111
+ # Aggregate all column values into a list.
112
+ #
113
+ # This function is syntactic sugar for `col(name).implode`.
114
+ #
115
+ # @param columns [Array]
116
+ # One or more column names.
117
+ #
118
+ # @return [Expr]
119
+ #
120
+ # @example
121
+ # df = Polars::DataFrame.new(
122
+ # {
123
+ # "a" => [1, 2, 3],
124
+ # "b" => [9, 8, 7],
125
+ # "c" => ["foo", "bar", "foo"]
126
+ # }
127
+ # )
128
+ # df.select(Polars.implode("a"))
129
+ # # =>
130
+ # # shape: (1, 1)
131
+ # # ┌───────────┐
132
+ # # │ a │
133
+ # # │ --- │
134
+ # # │ list[i64] │
135
+ # # ╞═══════════╡
136
+ # # │ [1, 2, 3] │
137
+ # # └───────────┘
138
+ #
139
+ # @example
140
+ # df.select(Polars.implode("b", "c"))
141
+ # # =>
142
+ # # shape: (1, 2)
143
+ # # ┌───────────┬───────────────────────┐
144
+ # # │ b ┆ c │
145
+ # # │ --- ┆ --- │
146
+ # # │ list[i64] ┆ list[str] │
147
+ # # ╞═══════════╪═══════════════════════╡
148
+ # # │ [9, 8, 7] ┆ ["foo", "bar", "foo"] │
149
+ # # └───────────┴───────────────────────┘
150
+ def implode(*columns)
151
+ col(*columns).implode
152
+ end
153
+
154
+ # Get the standard deviation.
155
+ #
156
+ # This function is syntactic sugar for `col(column).std(ddof: ddof)`.
157
+ #
158
+ # @param column [Object]
159
+ # Column name.
160
+ # @param ddof [Integer]
161
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
162
+ # where N represents the number of elements.
163
+ # By default ddof is 1.
164
+ #
165
+ # @return [Expr]
166
+ #
167
+ # @example
168
+ # df = Polars::DataFrame.new(
169
+ # {
170
+ # "a" => [1, 8, 3],
171
+ # "b" => [4, 5, 2],
172
+ # "c" => ["foo", "bar", "foo"]
173
+ # }
174
+ # )
175
+ # df.select(Polars.std("a"))
176
+ # # =>
177
+ # # shape: (1, 1)
178
+ # # ┌──────────┐
179
+ # # │ a │
180
+ # # │ --- │
181
+ # # │ f64 │
182
+ # # ╞══════════╡
183
+ # # │ 3.605551 │
184
+ # # └──────────┘
185
+ #
186
+ # @example
187
+ # df["a"].std
188
+ # # => 3.605551275463989
189
+ def std(column, ddof: 1)
190
+ col(column).std(ddof: ddof)
191
+ end
192
+
193
+ # Get the variance.
194
+ #
195
+ # This function is syntactic sugar for `col(column).var(ddof: ddof)`.
196
+ #
197
+ # @param column [Object]
198
+ # Column name.
199
+ # @param ddof [Integer]
200
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
201
+ # where N represents the number of elements.
202
+ # By default ddof is 1.
203
+ #
204
+ # @return [Expr]
205
+ #
206
+ # @example
207
+ # df = Polars::DataFrame.new(
208
+ # {
209
+ # "a" => [1, 8, 3],
210
+ # "b" => [4, 5, 2],
211
+ # "c" => ["foo", "bar", "foo"]
212
+ # }
213
+ # )
214
+ # df.select(Polars.var("a"))
215
+ # # =>
216
+ # # shape: (1, 1)
217
+ # # ┌──────┐
218
+ # # │ a │
219
+ # # │ --- │
220
+ # # │ f64 │
221
+ # # ╞══════╡
222
+ # # │ 13.0 │
223
+ # # └──────┘
224
+ #
225
+ # @example
226
+ # df["a"].var
227
+ # # => 13.0
228
+ def var(column, ddof: 1)
229
+ col(column).var(ddof: ddof)
230
+ end
231
+
232
+
233
+ # Get the mean value.
234
+ #
235
+ # This function is syntactic sugar for `col(columns).mean`.
236
+ #
237
+ # @param columns [Array]
238
+ # One or more column names.
239
+ #
240
+ # @return [Expr]
241
+ #
242
+ # @example
243
+ # df = Polars::DataFrame.new(
244
+ # {
245
+ # "a" => [1, 8, 3],
246
+ # "b" => [4, 5, 2],
247
+ # "c" => ["foo", "bar", "foo"]
248
+ # }
249
+ # )
250
+ # df.select(Polars.mean("a"))
251
+ # # =>
252
+ # # shape: (1, 1)
253
+ # # ┌─────┐
254
+ # # │ a │
255
+ # # │ --- │
256
+ # # │ f64 │
257
+ # # ╞═════╡
258
+ # # │ 4.0 │
259
+ # # └─────┘
260
+ #
261
+ # @example
262
+ # df.select(Polars.mean("a", "b"))
263
+ # # =>
264
+ # # shape: (1, 2)
265
+ # # ┌─────┬──────────┐
266
+ # # │ a ┆ b │
267
+ # # │ --- ┆ --- │
268
+ # # │ f64 ┆ f64 │
269
+ # # ╞═════╪══════════╡
270
+ # # │ 4.0 ┆ 3.666667 │
271
+ # # └─────┴──────────┘
272
+ def mean(*columns)
273
+ col(*columns).mean
274
+ end
275
+ alias_method :avg, :mean
276
+
277
+ # Get the median value.
278
+ #
279
+ # This function is syntactic sugar for `pl.col(columns).median`.
280
+ #
281
+ # @param columns [Array]
282
+ # One or more column names.
283
+ #
284
+ # @return [Expr]
285
+ #
286
+ # @example
287
+ # df = Polars::DataFrame.new(
288
+ # {
289
+ # "a" => [1, 8, 3],
290
+ # "b" => [4, 5, 2],
291
+ # "c" => ["foo", "bar", "foo"]
292
+ # }
293
+ # )
294
+ # df.select(Polars.median("a"))
295
+ # # =>
296
+ # # shape: (1, 1)
297
+ # # ┌─────┐
298
+ # # │ a │
299
+ # # │ --- │
300
+ # # │ f64 │
301
+ # # ╞═════╡
302
+ # # │ 3.0 │
303
+ # # └─────┘
304
+ #
305
+ # @example
306
+ # df.select(Polars.median("a", "b"))
307
+ # # =>
308
+ # # shape: (1, 2)
309
+ # # ┌─────┬─────┐
310
+ # # │ a ┆ b │
311
+ # # │ --- ┆ --- │
312
+ # # │ f64 ┆ f64 │
313
+ # # ╞═════╪═════╡
314
+ # # │ 3.0 ┆ 4.0 │
315
+ # # └─────┴─────┘
316
+ def median(*columns)
317
+ col(*columns).median
318
+ end
319
+
320
+ # Count unique values.
321
+ #
322
+ # This function is syntactic sugar for `col(columns).n_unique`.
323
+ #
324
+ # @param columns [Array]
325
+ # One or more column names.
326
+ #
327
+ # @return [Expr]
328
+ #
329
+ # @example
330
+ # df = Polars::DataFrame.new(
331
+ # {
332
+ # "a" => [1, 8, 1],
333
+ # "b" => [4, 5, 2],
334
+ # "c" => ["foo", "bar", "foo"]
335
+ # }
336
+ # )
337
+ # df.select(Polars.n_unique("a"))
338
+ # # =>
339
+ # # shape: (1, 1)
340
+ # # ┌─────┐
341
+ # # │ a │
342
+ # # │ --- │
343
+ # # │ u32 │
344
+ # # ╞═════╡
345
+ # # │ 2 │
346
+ # # └─────┘
347
+ #
348
+ # @example
349
+ # df.select(Polars.n_unique("b", "c"))
350
+ # # =>
351
+ # # shape: (1, 2)
352
+ # # ┌─────┬─────┐
353
+ # # │ b ┆ c │
354
+ # # │ --- ┆ --- │
355
+ # # │ u32 ┆ u32 │
356
+ # # ╞═════╪═════╡
357
+ # # │ 3 ┆ 2 │
358
+ # # └─────┴─────┘
359
+ def n_unique(*columns)
360
+ col(*columns).n_unique
361
+ end
362
+
363
+ # Approximate count of unique values.
364
+ #
365
+ # This function is syntactic sugar for `col(columns).approx_n_unique`, and
366
+ # uses the HyperLogLog++ algorithm for cardinality estimation.
367
+ #
368
+ # @param columns [Array]
369
+ # One or more column names.
370
+ #
371
+ # @return [Expr]
372
+ #
373
+ # @example
374
+ # df = Polars::DataFrame.new(
375
+ # {
376
+ # "a" => [1, 8, 1],
377
+ # "b" => [4, 5, 2],
378
+ # "c" => ["foo", "bar", "foo"]
379
+ # }
380
+ # )
381
+ # df.select(Polars.approx_n_unique("a"))
382
+ # # =>
383
+ # # shape: (1, 1)
384
+ # # ┌─────┐
385
+ # # │ a │
386
+ # # │ --- │
387
+ # # │ u32 │
388
+ # # ╞═════╡
389
+ # # │ 2 │
390
+ # # └─────┘
391
+ #
392
+ # @example
393
+ # df.select(Polars.approx_n_unique("b", "c"))
394
+ # # =>
395
+ # # shape: (1, 2)
396
+ # # ┌─────┬─────┐
397
+ # # │ b ┆ c │
398
+ # # │ --- ┆ --- │
399
+ # # │ u32 ┆ u32 │
400
+ # # ╞═════╪═════╡
401
+ # # │ 3 ┆ 2 │
402
+ # # └─────┴─────┘
403
+ def approx_n_unique(*columns)
404
+ col(*columns).approx_n_unique
405
+ end
406
+
407
+ # Get the first value.
408
+ #
409
+ # @param columns [Array]
410
+ # One or more column names. If not provided (default), returns an expression
411
+ # to take the first column of the context instead.
412
+ #
413
+ # @return [Expr]
414
+ #
415
+ # @example
416
+ # df = Polars::DataFrame.new(
417
+ # {
418
+ # "a" => [1, 8, 3],
419
+ # "b" => [4, 5, 2],
420
+ # "c" => ["foo", "bar", "baz"]
421
+ # }
422
+ # )
423
+ # df.select(Polars.first)
424
+ # # =>
425
+ # # shape: (3, 1)
426
+ # # ┌─────┐
427
+ # # │ a │
428
+ # # │ --- │
429
+ # # │ i64 │
430
+ # # ╞═════╡
431
+ # # │ 1 │
432
+ # # │ 8 │
433
+ # # │ 3 │
434
+ # # └─────┘
435
+ #
436
+ # @example
437
+ # df.select(Polars.first("b"))
438
+ # # =>
439
+ # # shape: (1, 1)
440
+ # # ┌─────┐
441
+ # # │ b │
442
+ # # │ --- │
443
+ # # │ i64 │
444
+ # # ╞═════╡
445
+ # # │ 4 │
446
+ # # └─────┘
447
+ #
448
+ # @example
449
+ # df.select(Polars.first("a", "c"))
450
+ # # =>
451
+ # # shape: (1, 2)
452
+ # # ┌─────┬─────┐
453
+ # # │ a ┆ c │
454
+ # # │ --- ┆ --- │
455
+ # # │ i64 ┆ str │
456
+ # # ╞═════╪═════╡
457
+ # # │ 1 ┆ foo │
458
+ # # └─────┴─────┘
459
+ def first(*columns)
460
+ if columns.empty?
461
+ return Utils.wrap_expr(Plr.first)
462
+ end
463
+
464
+ col(*columns).first
465
+ end
466
+
467
+ # Get the last value.
468
+ #
469
+ # @param columns [Array]
470
+ # One or more column names. If set to `nil` (default), returns an expression
471
+ # to take the last column of the context instead.
472
+ #
473
+ # @return [Expr]
474
+ #
475
+ # @example
476
+ # df = Polars::DataFrame.new(
477
+ # {
478
+ # "a" => [1, 8, 3],
479
+ # "b" => [4, 5, 2],
480
+ # "c" => ["foo", "bar", "baz"]
481
+ # }
482
+ # )
483
+ # df.select(Polars.last)
484
+ # # =>
485
+ # # shape: (3, 1)
486
+ # # ┌─────┐
487
+ # # │ c │
488
+ # # │ --- │
489
+ # # │ str │
490
+ # # ╞═════╡
491
+ # # │ foo │
492
+ # # │ bar │
493
+ # # │ baz │
494
+ # # └─────┘
495
+ #
496
+ # @example
497
+ # df.select(Polars.last("a"))
498
+ # # =>
499
+ # # shape: (1, 1)
500
+ # # ┌─────┐
501
+ # # │ a │
502
+ # # │ --- │
503
+ # # │ i64 │
504
+ # # ╞═════╡
505
+ # # │ 3 │
506
+ # # └─────┘
507
+ #
508
+ # @example
509
+ # df.select(Polars.last("b", "c"))
510
+ # # =>
511
+ # # shape: (1, 2)
512
+ # # ┌─────┬─────┐
513
+ # # │ b ┆ c │
514
+ # # │ --- ┆ --- │
515
+ # # │ i64 ┆ str │
516
+ # # ╞═════╪═════╡
517
+ # # │ 2 ┆ baz │
518
+ # # └─────┴─────┘
519
+ def last(*columns)
520
+ if columns.empty?
521
+ return Utils.wrap_expr(Plr.last)
522
+ end
523
+
524
+ col(*columns).last
525
+ end
526
+
527
+ # Get the nth column(s) of the context.
528
+ #
529
+ # @param indices [Array]
530
+ # One or more indices representing the columns to retrieve.
531
+ #
532
+ # @return [Expr]
533
+ #
534
+ # @example
535
+ # df = Polars::DataFrame.new(
536
+ # {
537
+ # "a" => [1, 8, 3],
538
+ # "b" => [4, 5, 2],
539
+ # "c" => ["foo", "bar", "baz"]
540
+ # }
541
+ # )
542
+ # df.select(Polars.nth(1))
543
+ # # =>
544
+ # # shape: (3, 1)
545
+ # # ┌─────┐
546
+ # # │ b │
547
+ # # │ --- │
548
+ # # │ i64 │
549
+ # # ╞═════╡
550
+ # # │ 4 │
551
+ # # │ 5 │
552
+ # # │ 2 │
553
+ # # └─────┘
554
+ #
555
+ # @example
556
+ # df.select(Polars.nth(2, 0))
557
+ # # =>
558
+ # # shape: (3, 2)
559
+ # # ┌─────┬─────┐
560
+ # # │ c ┆ a │
561
+ # # │ --- ┆ --- │
562
+ # # │ str ┆ i64 │
563
+ # # ╞═════╪═════╡
564
+ # # │ foo ┆ 1 │
565
+ # # │ bar ┆ 8 │
566
+ # # │ baz ┆ 3 │
567
+ # # └─────┴─────┘
568
+ def nth(*indices)
569
+ if indices.length == 1 && indices[0].is_a?(Array)
570
+ indices = indices[0]
571
+ end
572
+
573
+ Utils.wrap_expr(Plr.index_cols(indices))
574
+ end
575
+
576
+ # Get the first `n` rows.
577
+ #
578
+ # This function is syntactic sugar for `col(column).head(n)`.
579
+ #
580
+ # @param column [Object]
581
+ # Column name.
582
+ # @param n [Integer]
583
+ # Number of rows to return.
584
+ #
585
+ # @return [Expr]
586
+ #
587
+ # @example
588
+ # df = Polars::DataFrame.new(
589
+ # {
590
+ # "a" => [1, 8, 3],
591
+ # "b" => [4, 5, 2],
592
+ # "c" => ["foo", "bar", "foo"]
593
+ # }
594
+ # )
595
+ # df.select(Polars.head("a"))
596
+ # # =>
597
+ # # shape: (3, 1)
598
+ # # ┌─────┐
599
+ # # │ a │
600
+ # # │ --- │
601
+ # # │ i64 │
602
+ # # ╞═════╡
603
+ # # │ 1 │
604
+ # # │ 8 │
605
+ # # │ 3 │
606
+ # # └─────┘
607
+ #
608
+ # @example
609
+ # df.select(Polars.head("a", 2))
610
+ # # =>
611
+ # # shape: (2, 1)
612
+ # # ┌─────┐
613
+ # # │ a │
614
+ # # │ --- │
615
+ # # │ i64 │
616
+ # # ╞═════╡
617
+ # # │ 1 │
618
+ # # │ 8 │
619
+ # # └─────┘
620
+ def head(column, n = 10)
621
+ col(column).head(n)
622
+ end
623
+
624
+ # Get the last `n` rows.
625
+ #
626
+ # This function is syntactic sugar for `col(column).tail(n)`.
627
+ #
628
+ # @param column [Object]
629
+ # Column name.
630
+ # @param n [Integer]
631
+ # Number of rows to return.
632
+ #
633
+ # @return [Expr]
634
+ #
635
+ # @example
636
+ # df = Polars::DataFrame.new(
637
+ # {
638
+ # "a" => [1, 8, 3],
639
+ # "b" => [4, 5, 2],
640
+ # "c" => ["foo", "bar", "foo"]
641
+ # }
642
+ # )
643
+ # df.select(Polars.tail("a"))
644
+ # # =>
645
+ # # shape: (3, 1)
646
+ # # ┌─────┐
647
+ # # │ a │
648
+ # # │ --- │
649
+ # # │ i64 │
650
+ # # ╞═════╡
651
+ # # │ 1 │
652
+ # # │ 8 │
653
+ # # │ 3 │
654
+ # # └─────┘
655
+ #
656
+ # @example
657
+ # df.select(Polars.tail("a", 2))
658
+ # # =>
659
+ # # shape: (2, 1)
660
+ # # ┌─────┐
661
+ # # │ a │
662
+ # # │ --- │
663
+ # # │ i64 │
664
+ # # ╞═════╡
665
+ # # │ 8 │
666
+ # # │ 3 │
667
+ # # └─────┘
668
+ def tail(column, n = 10)
669
+ col(column).tail(n)
670
+ end
671
+
672
+ # Compute the Pearson's or Spearman rank correlation correlation between two columns.
673
+ #
674
+ # @param a [Object]
675
+ # Column name or Expression.
676
+ # @param b [Object]
677
+ # Column name or Expression.
678
+ # @param ddof [Integer]
679
+ # "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof,
680
+ # where N represents the number of elements.
681
+ # By default ddof is 1.
682
+ # @param method ["pearson", "spearman"]
683
+ # Correlation method.
684
+ # @param propagate_nans [Boolean]
685
+ # If `true` any `NaN` encountered will lead to `NaN` in the output.
686
+ # Defaults to `False` where `NaN` are regarded as larger than any finite number
687
+ # and thus lead to the highest rank.
688
+ #
689
+ # @return [Expr]
690
+ #
691
+ # @example Pearson's correlation:
692
+ # df = Polars::DataFrame.new(
693
+ # {
694
+ # "a" => [1, 8, 3],
695
+ # "b" => [4, 5, 2],
696
+ # "c" => ["foo", "bar", "foo"]
697
+ # }
698
+ # )
699
+ # df.select(Polars.corr("a", "b"))
700
+ # # =>
701
+ # # shape: (1, 1)
702
+ # # ┌──────────┐
703
+ # # │ a │
704
+ # # │ --- │
705
+ # # │ f64 │
706
+ # # ╞══════════╡
707
+ # # │ 0.544705 │
708
+ # # └──────────┘
709
+ #
710
+ # @example Spearman rank correlation:
711
+ # df = Polars::DataFrame.new(
712
+ # {
713
+ # "a" => [1, 8, 3],
714
+ # "b" => [4, 5, 2],
715
+ # "c" => ["foo", "bar", "foo"]
716
+ # }
717
+ # )
718
+ # df.select(Polars.corr("a", "b", method: "spearman"))
719
+ # # =>
720
+ # # shape: (1, 1)
721
+ # # ┌─────┐
722
+ # # │ a │
723
+ # # │ --- │
724
+ # # │ f64 │
725
+ # # ╞═════╡
726
+ # # │ 0.5 │
727
+ # # └─────┘
728
+ def corr(
729
+ a,
730
+ b,
731
+ method: "pearson",
732
+ ddof: 1,
733
+ propagate_nans: false
734
+ )
735
+ a = Utils.parse_into_expression(a)
736
+ b = Utils.parse_into_expression(b)
737
+
738
+ if method == "pearson"
739
+ Utils.wrap_expr(Plr.pearson_corr(a, b, ddof))
740
+ elsif method == "spearman"
741
+ Utils.wrap_expr(Plr.spearman_rank_corr(a, b, ddof, propagate_nans))
742
+ else
743
+ msg = "method must be one of {{'pearson', 'spearman'}}, got #{method}"
744
+ raise ArgumentError, msg
745
+ end
746
+ end
747
+
748
+ # Compute the covariance between two columns/ expressions.
749
+ #
750
+ # @param a [Object]
751
+ # Column name or Expression.
752
+ # @param b [Object]
753
+ # Column name or Expression.
754
+ # @param ddof [Integer]
755
+ # "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof,
756
+ # where N represents the number of elements.
757
+ # By default ddof is 1.
758
+ #
759
+ # @return [Expr]
760
+ #
761
+ # @example
762
+ # df = Polars::DataFrame.new(
763
+ # {
764
+ # "a" => [1, 8, 3],
765
+ # "b" => [4, 5, 2],
766
+ # "c" => ["foo", "bar", "foo"]
767
+ # }
768
+ # )
769
+ # df.select(Polars.cov("a", "b"))
770
+ # # =>
771
+ # # shape: (1, 1)
772
+ # # ┌─────┐
773
+ # # │ a │
774
+ # # │ --- │
775
+ # # │ f64 │
776
+ # # ╞═════╡
777
+ # # │ 3.0 │
778
+ # # └─────┘
779
+ def cov(a, b, ddof: 1)
780
+ a = Utils.parse_into_expression(a)
781
+ b = Utils.parse_into_expression(b)
782
+ Utils.wrap_expr(Plr.cov(a, b, ddof))
783
+ end
784
+
785
+ # def map
786
+ # end
787
+
788
+ # def apply
789
+ # end
790
+
791
+ # Accumulate over multiple columns horizontally/row wise with a left fold.
792
+ #
793
+ # @return [Expr]
794
+ def fold(acc, f, exprs)
795
+ acc = Utils.parse_into_expression(acc, str_as_lit: true)
796
+ if exprs.is_a?(Expr)
797
+ exprs = [exprs]
798
+ end
799
+
800
+ exprs = Utils.parse_into_list_of_expressions(exprs)
801
+ Utils.wrap_expr(Plr.fold(acc, f, exprs))
802
+ end
803
+
804
+ # def reduce
805
+ # end
806
+
807
+ # Cumulatively accumulate over multiple columns horizontally/row wise with a left fold.
808
+ #
809
+ # Every cumulative result is added as a separate field in a Struct column.
810
+ #
811
+ # @param acc [Object]
812
+ # Accumulator Expression. This is the value that will be initialized when the fold
813
+ # starts. For a sum this could for instance be lit(0).
814
+ # @param f [Object]
815
+ # Function to apply over the accumulator and the value.
816
+ # Fn(acc, value) -> new_value
817
+ # @param exprs [Object]
818
+ # Expressions to aggregate over. May also be a wildcard expression.
819
+ # @param include_init [Boolean]
820
+ # Include the initial accumulator state as struct field.
821
+ #
822
+ # @return [Object]
823
+ #
824
+ # @note
825
+ # If you simply want the first encountered expression as accumulator,
826
+ # consider using `cumreduce`.
827
+ def cum_fold(acc, f, exprs, include_init: false)
828
+ acc = Utils.parse_into_expression(acc, str_as_lit: true)
829
+ if exprs.is_a?(Expr)
830
+ exprs = [exprs]
831
+ end
832
+
833
+ exprs = Utils.parse_into_list_of_expressions(exprs)
834
+ Utils.wrap_expr(Plr.cum_fold(acc, f, exprs, include_init))
835
+ end
836
+ alias_method :cumfold, :cum_fold
837
+
838
+ # def cum_reduce
839
+ # end
840
+
841
+ # Compute two argument arctan in radians.
842
+ #
843
+ # Returns the angle (in radians) in the plane between the
844
+ # positive x-axis and the ray from the origin to (x,y).
845
+ #
846
+ # @param y [Object]
847
+ # Column name or Expression.
848
+ # @param x [Object]
849
+ # Column name or Expression.
850
+ #
851
+ # @return [Expr]
852
+ #
853
+ # @example
854
+ # twoRootTwo = Math.sqrt(2) / 2
855
+ # df = Polars::DataFrame.new(
856
+ # {
857
+ # "y" => [twoRootTwo, -twoRootTwo, twoRootTwo, -twoRootTwo],
858
+ # "x" => [twoRootTwo, twoRootTwo, -twoRootTwo, -twoRootTwo]
859
+ # }
860
+ # )
861
+ # df.select(
862
+ # Polars.arctan2d("y", "x").alias("atan2d"), Polars.arctan2("y", "x").alias("atan2")
863
+ # )
864
+ # # =>
865
+ # # shape: (4, 2)
866
+ # # ┌────────┬───────────┐
867
+ # # │ atan2d ┆ atan2 │
868
+ # # │ --- ┆ --- │
869
+ # # │ f64 ┆ f64 │
870
+ # # ╞════════╪═══════════╡
871
+ # # │ 45.0 ┆ 0.785398 │
872
+ # # │ -45.0 ┆ -0.785398 │
873
+ # # │ 135.0 ┆ 2.356194 │
874
+ # # │ -135.0 ┆ -2.356194 │
875
+ # # └────────┴───────────┘
876
+ def arctan2(y, x)
877
+ if Utils.strlike?(y)
878
+ y = col(y)
879
+ end
880
+ if Utils.strlike?(x)
881
+ x = col(x)
882
+ end
883
+ Utils.wrap_expr(Plr.arctan2(y._rbexpr, x._rbexpr))
884
+ end
885
+
886
+ # Compute two argument arctan in degrees.
887
+ #
888
+ # Returns the angle (in degrees) in the plane between the positive x-axis
889
+ # and the ray from the origin to (x,y).
890
+ #
891
+ # @param y [Object]
892
+ # Column name or Expression.
893
+ # @param x [Object]
894
+ # Column name or Expression.
895
+ #
896
+ # @return [Expr]
897
+ #
898
+ # @example
899
+ # twoRootTwo = Math.sqrt(2) / 2
900
+ # df = Polars::DataFrame.new(
901
+ # {
902
+ # "y" => [twoRootTwo, -twoRootTwo, twoRootTwo, -twoRootTwo],
903
+ # "x" => [twoRootTwo, twoRootTwo, -twoRootTwo, -twoRootTwo]
904
+ # }
905
+ # )
906
+ # df.select(
907
+ # Polars.arctan2d("y", "x").alias("atan2d"), Polars.arctan2("y", "x").alias("atan2")
908
+ # )
909
+ # # =>
910
+ # # shape: (4, 2)
911
+ # # ┌────────┬───────────┐
912
+ # # │ atan2d ┆ atan2 │
913
+ # # │ --- ┆ --- │
914
+ # # │ f64 ┆ f64 │
915
+ # # ╞════════╪═══════════╡
916
+ # # │ 45.0 ┆ 0.785398 │
917
+ # # │ -45.0 ┆ -0.785398 │
918
+ # # │ 135.0 ┆ 2.356194 │
919
+ # # │ -135.0 ┆ -2.356194 │
920
+ # # └────────┴───────────┘
921
+ def arctan2d(y, x)
922
+ if Utils.strlike?(y)
923
+ y = col(y)
924
+ end
925
+ if Utils.strlike?(x)
926
+ x = col(x)
927
+ end
928
+ Utils.wrap_expr(Plr.arctan2d(y._rbexpr, x._rbexpr))
929
+ end
930
+
931
+ # Exclude certain columns from a wildcard/regex selection.
932
+ #
933
+ # @param columns [Object]
934
+ # Column(s) to exclude from selection
935
+ # This can be:
936
+ #
937
+ # - a column name, or multiple column names
938
+ # - a regular expression starting with `^` and ending with `$`
939
+ # - a dtype or multiple dtypes
940
+ #
941
+ # @return [Object]
942
+ #
943
+ # @example
944
+ # df = Polars::DataFrame.new(
945
+ # {
946
+ # "aa" => [1, 2, 3],
947
+ # "ba" => ["a", "b", nil],
948
+ # "cc" => [nil, 2.5, 1.5]
949
+ # }
950
+ # )
951
+ # # =>
952
+ # # shape: (3, 3)
953
+ # # ┌─────┬──────┬──────┐
954
+ # # │ aa ┆ ba ┆ cc │
955
+ # # │ --- ┆ --- ┆ --- │
956
+ # # │ i64 ┆ str ┆ f64 │
957
+ # # ╞═════╪══════╪══════╡
958
+ # # │ 1 ┆ a ┆ null │
959
+ # # │ 2 ┆ b ┆ 2.5 │
960
+ # # │ 3 ┆ null ┆ 1.5 │
961
+ # # └─────┴──────┴──────┘
962
+ #
963
+ # @example Exclude by column name(s):
964
+ # df.select(Polars.exclude("ba"))
965
+ # # =>
966
+ # # shape: (3, 2)
967
+ # # ┌─────┬──────┐
968
+ # # │ aa ┆ cc │
969
+ # # │ --- ┆ --- │
970
+ # # │ i64 ┆ f64 │
971
+ # # ╞═════╪══════╡
972
+ # # │ 1 ┆ null │
973
+ # # │ 2 ┆ 2.5 │
974
+ # # │ 3 ┆ 1.5 │
975
+ # # └─────┴──────┘
976
+ #
977
+ # @example Exclude by regex, e.g. removing all columns whose names end with the letter "a":
978
+ # df.select(Polars.exclude("^.*a$"))
979
+ # # =>
980
+ # # shape: (3, 1)
981
+ # # ┌──────┐
982
+ # # │ cc │
983
+ # # │ --- │
984
+ # # │ f64 │
985
+ # # ╞══════╡
986
+ # # │ null │
987
+ # # │ 2.5 │
988
+ # # │ 1.5 │
989
+ # # └──────┘
990
+ def exclude(columns)
991
+ col("*").exclude(columns)
992
+ end
993
+
994
+ # Syntactic sugar for `Polars.col("foo").agg_groups`.
995
+ #
996
+ # @return [Object]
997
+ def groups(column)
998
+ col(column).agg_groups
999
+ end
1000
+
1001
+ # Syntactic sugar for `Polars.col("foo").quantile(...)`.
1002
+ #
1003
+ # @param column [String]
1004
+ # Column name.
1005
+ # @param quantile [Float]
1006
+ # Quantile between 0.0 and 1.0.
1007
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
1008
+ # Interpolation method.
1009
+ #
1010
+ # @return [Expr]
1011
+ def quantile(column, quantile, interpolation: "nearest")
1012
+ col(column).quantile(quantile, interpolation: interpolation)
1013
+ end
1014
+
1015
+ # Find the indexes that would sort the columns.
1016
+ #
1017
+ # Argsort by multiple columns. The first column will be used for the ordering.
1018
+ # If there are duplicates in the first column, the second column will be used to
1019
+ # determine the ordering and so on.
1020
+ #
1021
+ # @param exprs [Object]
1022
+ # Columns use to determine the ordering.
1023
+ # @param reverse [Boolean]
1024
+ # Default is ascending.
1025
+ #
1026
+ # @return [Expr]
1027
+ def arg_sort_by(exprs, reverse: false)
1028
+ if !exprs.is_a?(::Array)
1029
+ exprs = [exprs]
1030
+ end
1031
+ if reverse == true || reverse == false
1032
+ reverse = [reverse] * exprs.length
1033
+ end
1034
+ exprs = Utils.parse_into_list_of_expressions(exprs)
1035
+ Utils.wrap_expr(Plr.arg_sort_by(exprs, reverse))
1036
+ end
1037
+ alias_method :argsort_by, :arg_sort_by
1038
+
1039
+ # Collect multiple LazyFrames at the same time.
1040
+ #
1041
+ # This runs all the computation graphs in parallel on Polars threadpool.
1042
+ #
1043
+ # @param lazy_frames [Boolean]
1044
+ # A list of LazyFrames to collect.
1045
+ # @param type_coercion [Boolean]
1046
+ # Do type coercion optimization.
1047
+ # @param predicate_pushdown [Boolean]
1048
+ # Do predicate pushdown optimization.
1049
+ # @param projection_pushdown [Boolean]
1050
+ # Do projection pushdown optimization.
1051
+ # @param simplify_expression [Boolean]
1052
+ # Run simplify expressions optimization.
1053
+ # @param string_cache [Boolean]
1054
+ # This argument is deprecated and will be ignored
1055
+ # @param no_optimization [Boolean]
1056
+ # Turn off optimizations.
1057
+ # @param slice_pushdown [Boolean]
1058
+ # Slice pushdown optimization.
1059
+ # @param common_subplan_elimination [Boolean]
1060
+ # Will try to cache branching subplans that occur on self-joins or unions.
1061
+ # @param allow_streaming [Boolean]
1062
+ # Run parts of the query in a streaming fashion (this is in an alpha state)
1063
+ #
1064
+ # @return [Array]
1065
+ def collect_all(
1066
+ lazy_frames,
1067
+ type_coercion: true,
1068
+ predicate_pushdown: true,
1069
+ projection_pushdown: true,
1070
+ simplify_expression: true,
1071
+ string_cache: false,
1072
+ no_optimization: false,
1073
+ slice_pushdown: true,
1074
+ common_subplan_elimination: true,
1075
+ allow_streaming: false
1076
+ )
1077
+ if no_optimization
1078
+ predicate_pushdown = false
1079
+ projection_pushdown = false
1080
+ slice_pushdown = false
1081
+ common_subplan_elimination = false
1082
+ end
1083
+
1084
+ prepared = []
1085
+
1086
+ lazy_frames.each do |lf|
1087
+ ldf = lf._ldf.optimization_toggle(
1088
+ type_coercion,
1089
+ predicate_pushdown,
1090
+ projection_pushdown,
1091
+ simplify_expression,
1092
+ slice_pushdown,
1093
+ common_subplan_elimination,
1094
+ allow_streaming,
1095
+ false
1096
+ )
1097
+ prepared << ldf
1098
+ end
1099
+
1100
+ out = Plr.collect_all(prepared)
1101
+
1102
+ # wrap the rbdataframes into dataframe
1103
+ result = out.map { |rbdf| Utils.wrap_df(rbdf) }
1104
+
1105
+ result
1106
+ end
1107
+
1108
+ # Run polars expressions without a context.
1109
+ #
1110
+ # This is syntactic sugar for running `df.select` on an empty DataFrame.
1111
+ #
1112
+ # @param exprs [Array]
1113
+ # Column(s) to select, specified as positional arguments.
1114
+ # Accepts expression input. Strings are parsed as column names,
1115
+ # other non-expression inputs are parsed as literals.
1116
+ # @param named_exprs [Hash]
1117
+ # Additional columns to select, specified as keyword arguments.
1118
+ # The columns will be renamed to the keyword used.
1119
+ #
1120
+ # @return [DataFrame]
1121
+ #
1122
+ # @example
1123
+ # foo = Polars::Series.new("foo", [1, 2, 3])
1124
+ # bar = Polars::Series.new("bar", [3, 2, 1])
1125
+ # Polars.select(min: Polars.min_horizontal(foo, bar))
1126
+ # # =>
1127
+ # # shape: (3, 1)
1128
+ # # ┌─────┐
1129
+ # # │ min │
1130
+ # # │ --- │
1131
+ # # │ i64 │
1132
+ # # ╞═════╡
1133
+ # # │ 1 │
1134
+ # # │ 2 │
1135
+ # # │ 1 │
1136
+ # # └─────┘
1137
+ def select(*exprs, **named_exprs)
1138
+ DataFrame.new([]).select(*exprs, **named_exprs)
1139
+ end
1140
+
1141
+ # Return indices where `condition` evaluates `true`.
1142
+ #
1143
+ # @param condition [Expr]
1144
+ # Boolean expression to evaluate
1145
+ # @param eager [Boolean]
1146
+ # Whether to apply this function eagerly (as opposed to lazily).
1147
+ #
1148
+ # @return [Expr, Series]
1149
+ #
1150
+ # @example
1151
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
1152
+ # df.select(
1153
+ # [
1154
+ # Polars.arg_where(Polars.col("a") % 2 == 0)
1155
+ # ]
1156
+ # ).to_series
1157
+ # # =>
1158
+ # # shape: (2,)
1159
+ # # Series: 'a' [u32]
1160
+ # # [
1161
+ # # 1
1162
+ # # 3
1163
+ # # ]
1164
+ def arg_where(condition, eager: false)
1165
+ if eager
1166
+ if !condition.is_a?(Series)
1167
+ raise ArgumentError, "expected 'Series' in 'arg_where' if 'eager: true', got #{condition.class.name}"
1168
+ end
1169
+ condition.to_frame.select(arg_where(Polars.col(condition.name))).to_series
1170
+ else
1171
+ condition = Utils.parse_into_expression(condition, str_as_lit: true)
1172
+ Utils.wrap_expr(Plr.arg_where(condition))
1173
+ end
1174
+ end
1175
+
1176
+ # Folds the columns from left to right, keeping the first non-null value.
1177
+ #
1178
+ # @param exprs [Array]
1179
+ # Columns to coalesce. Accepts expression input. Strings are parsed as column
1180
+ # names, other non-expression inputs are parsed as literals.
1181
+ # @param more_exprs [Hash]
1182
+ # Additional columns to coalesce, specified as positional arguments.
1183
+ #
1184
+ # @return [Expr]
1185
+ #
1186
+ # @example
1187
+ # df = Polars::DataFrame.new(
1188
+ # {
1189
+ # "a" => [1, nil, nil, nil],
1190
+ # "b" => [1, 2, nil, nil],
1191
+ # "c" => [5, nil, 3, nil]
1192
+ # }
1193
+ # )
1194
+ # df.with_columns(Polars.coalesce(["a", "b", "c", 10]).alias("d"))
1195
+ # # =>
1196
+ # # shape: (4, 4)
1197
+ # # ┌──────┬──────┬──────┬─────┐
1198
+ # # │ a ┆ b ┆ c ┆ d │
1199
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1200
+ # # │ i64 ┆ i64 ┆ i64 ┆ i64 │
1201
+ # # ╞══════╪══════╪══════╪═════╡
1202
+ # # │ 1 ┆ 1 ┆ 5 ┆ 1 │
1203
+ # # │ null ┆ 2 ┆ null ┆ 2 │
1204
+ # # │ null ┆ null ┆ 3 ┆ 3 │
1205
+ # # │ null ┆ null ┆ null ┆ 10 │
1206
+ # # └──────┴──────┴──────┴─────┘
1207
+ #
1208
+ # @example
1209
+ # df.with_columns(Polars.coalesce(Polars.col(["a", "b", "c"]), 10.0).alias("d"))
1210
+ # # =>
1211
+ # # shape: (4, 4)
1212
+ # # ┌──────┬──────┬──────┬──────┐
1213
+ # # │ a ┆ b ┆ c ┆ d │
1214
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1215
+ # # │ i64 ┆ i64 ┆ i64 ┆ f64 │
1216
+ # # ╞══════╪══════╪══════╪══════╡
1217
+ # # │ 1 ┆ 1 ┆ 5 ┆ 1.0 │
1218
+ # # │ null ┆ 2 ┆ null ┆ 2.0 │
1219
+ # # │ null ┆ null ┆ 3 ┆ 3.0 │
1220
+ # # │ null ┆ null ┆ null ┆ 10.0 │
1221
+ # # └──────┴──────┴──────┴──────┘
1222
+ def coalesce(exprs, *more_exprs)
1223
+ exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
1224
+ Utils.wrap_expr(Plr.coalesce(exprs))
1225
+ end
1226
+
1227
+ # Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).
1228
+ #
1229
+ # Depending on the `unit` provided, this function will return a different dtype:
1230
+ # - unit: "d" returns pl.Date
1231
+ # - unit: "s" returns pl.Datetime["us"] (pl.Datetime's default)
1232
+ # - unit: "ms" returns pl.Datetime["ms"]
1233
+ # - unit: "us" returns pl.Datetime["us"]
1234
+ # - unit: "ns" returns pl.Datetime["ns"]
1235
+ #
1236
+ # @param column [Object]
1237
+ # Series or expression to parse integers to pl.Datetime.
1238
+ # @param unit [String]
1239
+ # The unit of the timesteps since epoch time.
1240
+ # @param eager [Boolean]
1241
+ # If eager evaluation is `true`, a Series is returned instead of an Expr.
1242
+ #
1243
+ # @return [Object]
1244
+ #
1245
+ # @example
1246
+ # df = Polars::DataFrame.new({"timestamp" => [1666683077, 1666683099]}).lazy
1247
+ # df.select(Polars.from_epoch(Polars.col("timestamp"), unit: "s")).collect
1248
+ # # =>
1249
+ # # shape: (2, 1)
1250
+ # # ┌─────────────────────┐
1251
+ # # │ timestamp │
1252
+ # # │ --- │
1253
+ # # │ datetime[μs] │
1254
+ # # ╞═════════════════════╡
1255
+ # # │ 2022-10-25 07:31:17 │
1256
+ # # │ 2022-10-25 07:31:39 │
1257
+ # # └─────────────────────┘
1258
+ def from_epoch(column, unit: "s", eager: false)
1259
+ if Utils.strlike?(column)
1260
+ column = col(column)
1261
+ elsif !column.is_a?(Series) && !column.is_a?(Expr)
1262
+ column = Series.new(column)
1263
+ end
1264
+
1265
+ if unit == "d"
1266
+ expr = column.cast(Date)
1267
+ elsif unit == "s"
1268
+ expr = (column.cast(Int64) * 1_000_000).cast(Datetime.new("us"))
1269
+ elsif Utils::DTYPE_TEMPORAL_UNITS.include?(unit)
1270
+ expr = column.cast(Datetime.new(unit))
1271
+ else
1272
+ raise ArgumentError, "'unit' must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got '#{unit}'."
1273
+ end
1274
+
1275
+ if eager
1276
+ if !column.is_a?(Series)
1277
+ raise ArgumentError, "expected Series or Array if eager: true, got #{column.class.name}"
1278
+ else
1279
+ column.to_frame.select(expr).to_series
1280
+ end
1281
+ else
1282
+ expr
1283
+ end
1284
+ end
1285
+
1286
+ # Parse one or more SQL expressions to polars expression(s).
1287
+ #
1288
+ # @param sql [Object]
1289
+ # One or more SQL expressions.
1290
+ #
1291
+ # @return [Expr]
1292
+ #
1293
+ # @example Parse a single SQL expression:
1294
+ # df = Polars::DataFrame.new({"a" => [2, 1]})
1295
+ # expr = Polars.sql_expr("MAX(a)")
1296
+ # df.select(expr)
1297
+ # # =>
1298
+ # # shape: (1, 1)
1299
+ # # ┌─────┐
1300
+ # # │ a │
1301
+ # # │ --- │
1302
+ # # │ i64 │
1303
+ # # ╞═════╡
1304
+ # # │ 2 │
1305
+ # # └─────┘
1306
+ #
1307
+ # @example Parse multiple SQL expressions:
1308
+ # df.with_columns(
1309
+ # *Polars.sql_expr(["POWER(a,a) AS a_a", "CAST(a AS TEXT) AS a_txt"])
1310
+ # )
1311
+ # # =>
1312
+ # # shape: (2, 3)
1313
+ # # ┌─────┬─────┬───────┐
1314
+ # # │ a ┆ a_a ┆ a_txt │
1315
+ # # │ --- ┆ --- ┆ --- │
1316
+ # # │ i64 ┆ i64 ┆ str │
1317
+ # # ╞═════╪═════╪═══════╡
1318
+ # # │ 2 ┆ 4 ┆ 2 │
1319
+ # # │ 1 ┆ 1 ┆ 1 │
1320
+ # # └─────┴─────┴───────┘
1321
+ def sql_expr(sql)
1322
+ if sql.is_a?(::String)
1323
+ Utils.wrap_expr(Plr.sql_expr(sql))
1324
+ else
1325
+ sql.map { |q| Utils.wrap_expr(Plr.sql_expr(q)) }
1326
+ end
1327
+ end
1328
+ end
1329
+ end