polars-df 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +9 -0
  4. data/Cargo.lock +74 -3
  5. data/Cargo.toml +3 -0
  6. data/README.md +1 -1
  7. data/ext/polars/Cargo.toml +18 -1
  8. data/ext/polars/src/conversion.rs +115 -2
  9. data/ext/polars/src/dataframe.rs +228 -11
  10. data/ext/polars/src/error.rs +4 -0
  11. data/ext/polars/src/lazy/dataframe.rs +5 -5
  12. data/ext/polars/src/lazy/dsl.rs +157 -2
  13. data/ext/polars/src/lib.rs +185 -10
  14. data/ext/polars/src/list_construction.rs +100 -0
  15. data/ext/polars/src/series.rs +217 -29
  16. data/ext/polars/src/set.rs +91 -0
  17. data/ext/polars/src/utils.rs +19 -0
  18. data/lib/polars/batched_csv_reader.rb +1 -0
  19. data/lib/polars/cat_expr.rb +39 -0
  20. data/lib/polars/cat_name_space.rb +54 -0
  21. data/lib/polars/data_frame.rb +2384 -140
  22. data/lib/polars/date_time_expr.rb +1282 -7
  23. data/lib/polars/date_time_name_space.rb +1484 -0
  24. data/lib/polars/exceptions.rb +20 -0
  25. data/lib/polars/expr.rb +4374 -53
  26. data/lib/polars/expr_dispatch.rb +22 -0
  27. data/lib/polars/functions.rb +219 -0
  28. data/lib/polars/group_by.rb +518 -0
  29. data/lib/polars/io.rb +421 -2
  30. data/lib/polars/lazy_frame.rb +1267 -69
  31. data/lib/polars/lazy_functions.rb +412 -24
  32. data/lib/polars/lazy_group_by.rb +80 -0
  33. data/lib/polars/list_expr.rb +507 -5
  34. data/lib/polars/list_name_space.rb +346 -0
  35. data/lib/polars/meta_expr.rb +21 -0
  36. data/lib/polars/series.rb +2256 -242
  37. data/lib/polars/slice.rb +104 -0
  38. data/lib/polars/string_expr.rb +847 -10
  39. data/lib/polars/string_name_space.rb +690 -0
  40. data/lib/polars/struct_expr.rb +73 -0
  41. data/lib/polars/struct_name_space.rb +64 -0
  42. data/lib/polars/utils.rb +71 -3
  43. data/lib/polars/version.rb +2 -1
  44. data/lib/polars/when.rb +1 -0
  45. data/lib/polars/when_then.rb +1 -0
  46. data/lib/polars.rb +12 -10
  47. metadata +15 -2
data/lib/polars/expr.rb CHANGED
@@ -11,106 +11,366 @@ module Polars
11
11
  expr
12
12
  end
13
13
 
14
+ # Returns a string representing the Expr.
15
+ #
16
+ # @return [String]
14
17
  def to_s
15
18
  _rbexpr.to_str
16
19
  end
17
20
  alias_method :inspect, :to_s
18
21
 
22
+ # Bitwise XOR.
23
+ #
24
+ # @return [Expr]
19
25
  def ^(other)
20
26
  wrap_expr(_rbexpr._xor(_to_rbexpr(other)))
21
27
  end
22
28
 
29
+ # Bitwise AND.
30
+ #
31
+ # @return [Expr]
23
32
  def &(other)
24
33
  wrap_expr(_rbexpr._and(_to_rbexpr(other)))
25
34
  end
26
35
 
36
+ # Bitwise OR.
37
+ #
38
+ # @return [Expr]
27
39
  def |(other)
28
40
  wrap_expr(_rbexpr._or(_to_rbexpr(other)))
29
41
  end
30
42
 
43
+ # Performs addition.
44
+ #
45
+ # @return [Expr]
31
46
  def +(other)
32
47
  wrap_expr(_rbexpr + _to_rbexpr(other))
33
48
  end
34
49
 
50
+ # Performs subtraction.
51
+ #
52
+ # @return [Expr]
35
53
  def -(other)
36
54
  wrap_expr(_rbexpr - _to_rbexpr(other))
37
55
  end
38
56
 
57
+ # Performs multiplication.
58
+ #
59
+ # @return [Expr]
39
60
  def *(other)
40
61
  wrap_expr(_rbexpr * _to_rbexpr(other))
41
62
  end
42
63
 
64
+ # Performs division.
65
+ #
66
+ # @return [Expr]
43
67
  def /(other)
44
68
  wrap_expr(_rbexpr / _to_rbexpr(other))
45
69
  end
46
70
 
71
+ # Performs floor division.
72
+ #
73
+ # @return [Expr]
74
+ def floordiv(other)
75
+ wrap_expr(_rbexpr.floordiv(_to_rbexpr(other)))
76
+ end
77
+
78
+ # Returns the modulo.
79
+ #
80
+ # @return [Expr]
47
81
  def %(other)
48
82
  wrap_expr(_rbexpr % _to_rbexpr(other))
49
83
  end
50
84
 
85
+ # Raises to the power of exponent.
86
+ #
87
+ # @return [Expr]
51
88
  def **(power)
52
89
  pow(power)
53
90
  end
54
91
 
92
+ # Greater than or equal.
93
+ #
94
+ # @return [Expr]
55
95
  def >=(other)
56
96
  wrap_expr(_rbexpr.gt_eq(_to_expr(other)._rbexpr))
57
97
  end
58
98
 
99
+ # Less than or equal.
100
+ #
101
+ # @return [Expr]
59
102
  def <=(other)
60
103
  wrap_expr(_rbexpr.lt_eq(_to_expr(other)._rbexpr))
61
104
  end
62
105
 
106
+ # Equal.
107
+ #
108
+ # @return [Expr]
63
109
  def ==(other)
64
110
  wrap_expr(_rbexpr.eq(_to_expr(other)._rbexpr))
65
111
  end
66
112
 
113
+ # Not equal.
114
+ #
115
+ # @return [Expr]
67
116
  def !=(other)
68
117
  wrap_expr(_rbexpr.neq(_to_expr(other)._rbexpr))
69
118
  end
70
119
 
120
+ # Less than.
121
+ #
122
+ # @return [Expr]
71
123
  def <(other)
72
124
  wrap_expr(_rbexpr.lt(_to_expr(other)._rbexpr))
73
125
  end
74
126
 
127
+ # Greater than.
128
+ #
129
+ # @return [Expr]
75
130
  def >(other)
76
131
  wrap_expr(_rbexpr.gt(_to_expr(other)._rbexpr))
77
132
  end
78
133
 
134
+ # Performs negation.
135
+ #
136
+ # @return [Expr]
79
137
  def -@
80
138
  Utils.lit(0) - self
81
139
  end
82
140
 
83
- # def to_physical
84
- # end
85
-
141
+ # Cast to physical representation of the logical dtype.
142
+ #
143
+ # - `:date` -> `:i32`
144
+ # - `:datetime` -> `:i64`
145
+ # - `:time` -> `:i64`
146
+ # - `:duration` -> `:i64`
147
+ # - `:cat` -> `:u32`
148
+ # - Other data types will be left unchanged.
149
+ #
150
+ # @return [Expr]
86
151
  #
152
+ # @example
153
+ # Polars::DataFrame.new({"vals" => ["a", "x", nil, "a"]}).with_columns(
154
+ # [
155
+ # Polars.col("vals").cast(:cat),
156
+ # Polars.col("vals")
157
+ # .cast(:cat)
158
+ # .to_physical
159
+ # .alias("vals_physical")
160
+ # ]
161
+ # )
162
+ # # =>
163
+ # # shape: (4, 2)
164
+ # # ┌──────┬───────────────┐
165
+ # # │ vals ┆ vals_physical │
166
+ # # │ --- ┆ --- │
167
+ # # │ cat ┆ u32 │
168
+ # # ╞══════╪═══════════════╡
169
+ # # │ a ┆ 0 │
170
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
171
+ # # │ x ┆ 1 │
172
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
173
+ # # │ null ┆ null │
174
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
175
+ # # │ a ┆ 0 │
176
+ # # └──────┴───────────────┘
177
+ def to_physical
178
+ wrap_expr(_rbexpr.to_physical)
179
+ end
180
+
181
+ # Check if any boolean value in a Boolean column is `true`.
182
+ #
183
+ # @return [Boolean]
184
+ #
185
+ # @example
186
+ # df = Polars::DataFrame.new({"TF" => [true, false], "FF" => [false, false]})
187
+ # df.select(Polars.all.any)
188
+ # # =>
189
+ # # shape: (1, 2)
190
+ # # ┌──────┬───────┐
191
+ # # │ TF ┆ FF │
192
+ # # │ --- ┆ --- │
193
+ # # │ bool ┆ bool │
194
+ # # ╞══════╪═══════╡
195
+ # # │ true ┆ false │
196
+ # # └──────┴───────┘
87
197
  def any
88
198
  wrap_expr(_rbexpr.any)
89
199
  end
90
200
 
201
+ # Check if all boolean values in a Boolean column are `true`.
202
+ #
203
+ # This method is an expression - not to be confused with
204
+ # `Polars.all` which is a function to select all columns.
205
+ #
206
+ # @return [Boolean]
207
+ #
208
+ # @example
209
+ # df = Polars::DataFrame.new(
210
+ # {"TT" => [true, true], "TF" => [true, false], "FF" => [false, false]}
211
+ # )
212
+ # df.select(Polars.col("*").all)
213
+ # # =>
214
+ # # shape: (1, 3)
215
+ # # ┌──────┬───────┬───────┐
216
+ # # │ TT ┆ TF ┆ FF │
217
+ # # │ --- ┆ --- ┆ --- │
218
+ # # │ bool ┆ bool ┆ bool │
219
+ # # ╞══════╪═══════╪═══════╡
220
+ # # │ true ┆ false ┆ false │
221
+ # # └──────┴───────┴───────┘
91
222
  def all
92
223
  wrap_expr(_rbexpr.all)
93
224
  end
94
225
 
226
+ # Compute the square root of the elements.
227
+ #
228
+ # @return [Expr]
229
+ #
230
+ # @example
231
+ # df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
232
+ # df.select(Polars.col("values").sqrt)
233
+ # # =>
234
+ # # shape: (3, 1)
235
+ # # ┌──────────┐
236
+ # # │ values │
237
+ # # │ --- │
238
+ # # │ f64 │
239
+ # # ╞══════════╡
240
+ # # │ 1.0 │
241
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
242
+ # # │ 1.414214 │
243
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
244
+ # # │ 2.0 │
245
+ # # └──────────┘
95
246
  def sqrt
96
- self ** 0.5
247
+ self**0.5
97
248
  end
98
249
 
250
+ # Compute the base 10 logarithm of the input array, element-wise.
251
+ #
252
+ # @return [Expr]
253
+ #
254
+ # @example
255
+ # df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
256
+ # df.select(Polars.col("values").log10)
257
+ # # =>
258
+ # # shape: (3, 1)
259
+ # # ┌─────────┐
260
+ # # │ values │
261
+ # # │ --- │
262
+ # # │ f64 │
263
+ # # ╞═════════╡
264
+ # # │ 0.0 │
265
+ # # ├╌╌╌╌╌╌╌╌╌┤
266
+ # # │ 0.30103 │
267
+ # # ├╌╌╌╌╌╌╌╌╌┤
268
+ # # │ 0.60206 │
269
+ # # └─────────┘
99
270
  def log10
100
271
  log(10)
101
272
  end
102
273
 
274
+ # Compute the exponential, element-wise.
275
+ #
276
+ # @return [Expr]
277
+ #
278
+ # @example
279
+ # df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
280
+ # df.select(Polars.col("values").exp)
281
+ # # =>
282
+ # # shape: (3, 1)
283
+ # # ┌──────────┐
284
+ # # │ values │
285
+ # # │ --- │
286
+ # # │ f64 │
287
+ # # ╞══════════╡
288
+ # # │ 2.718282 │
289
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
290
+ # # │ 7.389056 │
291
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
292
+ # # │ 54.59815 │
293
+ # # └──────────┘
103
294
  def exp
104
295
  wrap_expr(_rbexpr.exp)
105
296
  end
106
297
 
298
+ # Rename the output of an expression.
299
+ #
300
+ # @param name [String]
301
+ # New name.
302
+ #
303
+ # @return [Expr]
304
+ #
305
+ # @example
306
+ # df = Polars::DataFrame.new(
307
+ # {
308
+ # "a" => [1, 2, 3],
309
+ # "b" => ["a", "b", nil]
310
+ # }
311
+ # )
312
+ # df.select(
313
+ # [
314
+ # Polars.col("a").alias("bar"),
315
+ # Polars.col("b").alias("foo")
316
+ # ]
317
+ # )
318
+ # # =>
319
+ # # shape: (3, 2)
320
+ # # ┌─────┬──────┐
321
+ # # │ bar ┆ foo │
322
+ # # │ --- ┆ --- │
323
+ # # │ i64 ┆ str │
324
+ # # ╞═════╪══════╡
325
+ # # │ 1 ┆ a │
326
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
327
+ # # │ 2 ┆ b │
328
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
329
+ # # │ 3 ┆ null │
330
+ # # └─────┴──────┘
107
331
  def alias(name)
108
332
  wrap_expr(_rbexpr._alias(name))
109
333
  end
110
334
 
111
335
  # TODO support symbols for exclude
112
336
 
337
+ # Exclude certain columns from a wildcard/regex selection.
338
+ #
339
+ # You may also use regexes in the exclude list. They must start with `^` and end
340
+ # with `$`.
341
+ #
342
+ # @param columns [Object]
343
+ # Column(s) to exclude from selection.
344
+ # This can be:
113
345
  #
346
+ # - a column name, or multiple column names
347
+ # - a regular expression starting with `^` and ending with `$`
348
+ # - a dtype or multiple dtypes
349
+ #
350
+ # @return [Expr]
351
+ #
352
+ # @example
353
+ # df = Polars::DataFrame.new(
354
+ # {
355
+ # "aa" => [1, 2, 3],
356
+ # "ba" => ["a", "b", nil],
357
+ # "cc" => [nil, 2.5, 1.5]
358
+ # }
359
+ # )
360
+ # df.select(Polars.all.exclude("ba"))
361
+ # # =>
362
+ # # shape: (3, 2)
363
+ # # ┌─────┬──────┐
364
+ # # │ aa ┆ cc │
365
+ # # │ --- ┆ --- │
366
+ # # │ i64 ┆ f64 │
367
+ # # ╞═════╪══════╡
368
+ # # │ 1 ┆ null │
369
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
370
+ # # │ 2 ┆ 2.5 │
371
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
372
+ # # │ 3 ┆ 1.5 │
373
+ # # └─────┴──────┘
114
374
  def exclude(columns)
115
375
  if columns.is_a?(String)
116
376
  columns = [columns]
@@ -131,14 +391,43 @@ module Polars
131
391
  end
132
392
  end
133
393
 
394
+ # Keep the original root name of the expression.
395
+ #
396
+ # @return [Expr]
397
+ #
398
+ # @example
399
+ # df = Polars::DataFrame.new(
400
+ # {
401
+ # "a" => [1, 2],
402
+ # "b" => [3, 4]
403
+ # }
404
+ # )
405
+ # df.with_columns([(Polars.col("a") * 9).alias("c").keep_name])
406
+ # # =>
407
+ # # shape: (2, 2)
408
+ # # ┌─────┬─────┐
409
+ # # │ a ┆ b │
410
+ # # │ --- ┆ --- │
411
+ # # │ i64 ┆ i64 │
412
+ # # ╞═════╪═════╡
413
+ # # │ 9 ┆ 3 │
414
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
415
+ # # │ 18 ┆ 4 │
416
+ # # └─────┴─────┘
134
417
  def keep_name
135
418
  wrap_expr(_rbexpr.keep_name)
136
419
  end
137
420
 
421
+ # Add a prefix to the root column name of the expression.
422
+ #
423
+ # @return [Expr]
138
424
  def prefix(prefix)
139
425
  wrap_expr(_rbexpr.prefix(prefix))
140
426
  end
141
427
 
428
+ # Add a suffix to the root column name of the expression.
429
+ #
430
+ # @return [Expr]
142
431
  def suffix(suffix)
143
432
  wrap_expr(_rbexpr.suffix(suffix))
144
433
  end
@@ -146,47 +435,351 @@ module Polars
146
435
  # def map_alias
147
436
  # end
148
437
 
438
+ # Negate a boolean expression.
439
+ #
440
+ # @return [Expr]
441
+ #
442
+ # @example
443
+ # df = Polars::DataFrame.new(
444
+ # {
445
+ # "a" => [true, false, false],
446
+ # "b" => ["a", "b", nil]
447
+ # }
448
+ # )
449
+ # # =>
450
+ # # shape: (3, 2)
451
+ # # ┌───────┬──────┐
452
+ # # │ a ┆ b │
453
+ # # │ --- ┆ --- │
454
+ # # │ bool ┆ str │
455
+ # # ╞═══════╪══════╡
456
+ # # │ true ┆ a │
457
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
458
+ # # │ false ┆ b │
459
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
460
+ # # │ false ┆ null │
461
+ # # └───────┴──────┘
149
462
  #
463
+ # @example
464
+ # df.select(Polars.col("a").is_not)
465
+ # # =>
466
+ # # shape: (3, 1)
467
+ # # ┌───────┐
468
+ # # │ a │
469
+ # # │ --- │
470
+ # # │ bool │
471
+ # # ╞═══════╡
472
+ # # │ false │
473
+ # # ├╌╌╌╌╌╌╌┤
474
+ # # │ true │
475
+ # # ├╌╌╌╌╌╌╌┤
476
+ # # │ true │
477
+ # # └───────┘
150
478
  def is_not
151
479
  wrap_expr(_rbexpr.is_not)
152
480
  end
153
481
 
482
+ # Returns a boolean Series indicating which values are null.
483
+ #
484
+ # @return [Expr]
485
+ #
486
+ # @example
487
+ # df = Polars::DataFrame.new(
488
+ # {
489
+ # "a" => [1, 2, nil, 1, 5],
490
+ # "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
491
+ # }
492
+ # )
493
+ # df.with_column(Polars.all.is_null.suffix("_isnull"))
494
+ # # =>
495
+ # # shape: (5, 4)
496
+ # # ┌──────┬─────┬──────────┬──────────┐
497
+ # # │ a ┆ b ┆ a_isnull ┆ b_isnull │
498
+ # # │ --- ┆ --- ┆ --- ┆ --- │
499
+ # # │ i64 ┆ f64 ┆ bool ┆ bool │
500
+ # # ╞══════╪═════╪══════════╪══════════╡
501
+ # # │ 1 ┆ 1.0 ┆ false ┆ false │
502
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
503
+ # # │ 2 ┆ 2.0 ┆ false ┆ false │
504
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
505
+ # # │ null ┆ NaN ┆ true ┆ false │
506
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
507
+ # # │ 1 ┆ 1.0 ┆ false ┆ false │
508
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
509
+ # # │ 5 ┆ 5.0 ┆ false ┆ false │
510
+ # # └──────┴─────┴──────────┴──────────┘
154
511
  def is_null
155
512
  wrap_expr(_rbexpr.is_null)
156
513
  end
157
514
 
515
+ # Returns a boolean Series indicating which values are not null.
516
+ #
517
+ # @return [Expr]
518
+ #
519
+ # @example
520
+ # df = Polars::DataFrame.new(
521
+ # {
522
+ # "a" => [1, 2, nil, 1, 5],
523
+ # "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
524
+ # }
525
+ # )
526
+ # df.with_column(Polars.all.is_not_null.suffix("_not_null"))
527
+ # # =>
528
+ # # shape: (5, 4)
529
+ # # ┌──────┬─────┬────────────┬────────────┐
530
+ # # │ a ┆ b ┆ a_not_null ┆ b_not_null │
531
+ # # │ --- ┆ --- ┆ --- ┆ --- │
532
+ # # │ i64 ┆ f64 ┆ bool ┆ bool │
533
+ # # ╞══════╪═════╪════════════╪════════════╡
534
+ # # │ 1 ┆ 1.0 ┆ true ┆ true │
535
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
536
+ # # │ 2 ┆ 2.0 ┆ true ┆ true │
537
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
538
+ # # │ null ┆ NaN ┆ false ┆ true │
539
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
540
+ # # │ 1 ┆ 1.0 ┆ true ┆ true │
541
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
542
+ # # │ 5 ┆ 5.0 ┆ true ┆ true │
543
+ # # └──────┴─────┴────────────┴────────────┘
158
544
  def is_not_null
159
545
  wrap_expr(_rbexpr.is_not_null)
160
546
  end
161
547
 
548
+ # Returns a boolean Series indicating which values are finite.
549
+ #
550
+ # @return [Expr]
551
+ #
552
+ # @example
553
+ # df = Polars::DataFrame.new(
554
+ # {
555
+ # "A" => [1.0, 2],
556
+ # "B" => [3.0, Float::INFINITY]
557
+ # }
558
+ # )
559
+ # df.select(Polars.all.is_finite)
560
+ # # =>
561
+ # # shape: (2, 2)
562
+ # # ┌──────┬───────┐
563
+ # # │ A ┆ B │
564
+ # # │ --- ┆ --- │
565
+ # # │ bool ┆ bool │
566
+ # # ╞══════╪═══════╡
567
+ # # │ true ┆ true │
568
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
569
+ # # │ true ┆ false │
570
+ # # └──────┴───────┘
162
571
  def is_finite
163
572
  wrap_expr(_rbexpr.is_finite)
164
573
  end
165
574
 
575
+ # Returns a boolean Series indicating which values are infinite.
576
+ #
577
+ # @return [Expr]
578
+ #
579
+ # @example
580
+ # df = Polars::DataFrame.new(
581
+ # {
582
+ # "A" => [1.0, 2],
583
+ # "B" => [3.0, Float::INFINITY]
584
+ # }
585
+ # )
586
+ # df.select(Polars.all.is_infinite)
587
+ # # =>
588
+ # # shape: (2, 2)
589
+ # # ┌───────┬───────┐
590
+ # # │ A ┆ B │
591
+ # # │ --- ┆ --- │
592
+ # # │ bool ┆ bool │
593
+ # # ╞═══════╪═══════╡
594
+ # # │ false ┆ false │
595
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
596
+ # # │ false ┆ true │
597
+ # # └───────┴───────┘
166
598
  def is_infinite
167
599
  wrap_expr(_rbexpr.is_infinite)
168
600
  end
169
601
 
602
+ # Returns a boolean Series indicating which values are NaN.
603
+ #
604
+ # @note
605
+ # Floating point `NaN` (Not A Number) should not be confused
606
+ # with missing data represented as `nil`.
607
+ #
608
+ # @return [Expr]
609
+ #
610
+ # @example
611
+ # df = Polars::DataFrame.new(
612
+ # {
613
+ # "a" => [1, 2, nil, 1, 5],
614
+ # "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
615
+ # }
616
+ # )
617
+ # df.with_column(Polars.col(Polars::Float64).is_nan.suffix("_isnan"))
618
+ # # =>
619
+ # # shape: (5, 3)
620
+ # # ┌──────┬─────┬─────────┐
621
+ # # │ a ┆ b ┆ b_isnan │
622
+ # # │ --- ┆ --- ┆ --- │
623
+ # # │ i64 ┆ f64 ┆ bool │
624
+ # # ╞══════╪═════╪═════════╡
625
+ # # │ 1 ┆ 1.0 ┆ false │
626
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
627
+ # # │ 2 ┆ 2.0 ┆ false │
628
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
629
+ # # │ null ┆ NaN ┆ true │
630
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
631
+ # # │ 1 ┆ 1.0 ┆ false │
632
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
633
+ # # │ 5 ┆ 5.0 ┆ false │
634
+ # # └──────┴─────┴─────────┘
170
635
  def is_nan
171
636
  wrap_expr(_rbexpr.is_nan)
172
637
  end
173
638
 
639
+ # Returns a boolean Series indicating which values are not NaN.
640
+ #
641
+ # @note
642
+ # Floating point `NaN` (Not A Number) should not be confused
643
+ # with missing data represented as `nil`.
644
+ #
645
+ # @return [Expr]
646
+ #
647
+ # @example
648
+ # df = Polars::DataFrame.new(
649
+ # {
650
+ # "a" => [1, 2, nil, 1, 5],
651
+ # "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
652
+ # }
653
+ # )
654
+ # df.with_column(Polars.col(Polars::Float64).is_not_nan.suffix("_is_not_nan"))
655
+ # # =>
656
+ # # shape: (5, 3)
657
+ # # ┌──────┬─────┬──────────────┐
658
+ # # │ a ┆ b ┆ b_is_not_nan │
659
+ # # │ --- ┆ --- ┆ --- │
660
+ # # │ i64 ┆ f64 ┆ bool │
661
+ # # ╞══════╪═════╪══════════════╡
662
+ # # │ 1 ┆ 1.0 ┆ true │
663
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
664
+ # # │ 2 ┆ 2.0 ┆ true │
665
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
666
+ # # │ null ┆ NaN ┆ false │
667
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
668
+ # # │ 1 ┆ 1.0 ┆ true │
669
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
670
+ # # │ 5 ┆ 5.0 ┆ true │
671
+ # # └──────┴─────┴──────────────┘
174
672
  def is_not_nan
175
673
  wrap_expr(_rbexpr.is_not_nan)
176
674
  end
177
675
 
676
+ # Get the group indexes of the group by operation.
677
+ #
678
+ # Should be used in aggregation context only.
679
+ #
680
+ # @return [Expr]
681
+ #
682
+ # @example
683
+ # df = Polars::DataFrame.new(
684
+ # {
685
+ # "group" => [
686
+ # "one",
687
+ # "one",
688
+ # "one",
689
+ # "two",
690
+ # "two",
691
+ # "two"
692
+ # ],
693
+ # "value" => [94, 95, 96, 97, 97, 99]
694
+ # }
695
+ # )
696
+ # df.groupby("group", maintain_order: true).agg(Polars.col("value").agg_groups)
697
+ # # =>
698
+ # # shape: (2, 2)
699
+ # # ┌───────┬───────────┐
700
+ # # │ group ┆ value │
701
+ # # │ --- ┆ --- │
702
+ # # │ str ┆ list[u32] │
703
+ # # ╞═══════╪═══════════╡
704
+ # # │ one ┆ [0, 1, 2] │
705
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
706
+ # # │ two ┆ [3, 4, 5] │
707
+ # # └───────┴───────────┘
178
708
  def agg_groups
179
709
  wrap_expr(_rbexpr.agg_groups)
180
710
  end
181
711
 
712
+ # Count the number of values in this expression.
713
+ #
714
+ # @return [Expr]
715
+ #
716
+ # @example
717
+ # df = Polars::DataFrame.new({"a" => [8, 9, 10], "b" => [nil, 4, 4]})
718
+ # df.select(Polars.all.count)
719
+ # # =>
720
+ # # shape: (1, 2)
721
+ # # ┌─────┬─────┐
722
+ # # │ a ┆ b │
723
+ # # │ --- ┆ --- │
724
+ # # │ u32 ┆ u32 │
725
+ # # ╞═════╪═════╡
726
+ # # │ 3 ┆ 3 │
727
+ # # └─────┴─────┘
182
728
  def count
183
729
  wrap_expr(_rbexpr.count)
184
730
  end
185
731
 
732
+ # Count the number of values in this expression.
733
+ #
734
+ # Alias for {#count}.
735
+ #
736
+ # @return [Expr]
737
+ #
738
+ # @example
739
+ # df = Polars::DataFrame.new({"a" => [8, 9, 10], "b" => [nil, 4, 4]})
740
+ # df.select(Polars.all.len)
741
+ # # =>
742
+ # # shape: (1, 2)
743
+ # # ┌─────┬─────┐
744
+ # # │ a ┆ b │
745
+ # # │ --- ┆ --- │
746
+ # # │ u32 ┆ u32 │
747
+ # # ╞═════╪═════╡
748
+ # # │ 3 ┆ 3 │
749
+ # # └─────┴─────┘
186
750
  def len
187
751
  count
188
752
  end
189
753
 
754
+ # Get a slice of this expression.
755
+ #
756
+ # @param offset [Integer]
757
+ # Start index. Negative indexing is supported.
758
+ # @param length [Integer]
759
+ # Length of the slice. If set to `nil`, all rows starting at the offset
760
+ # will be selected.
761
+ #
762
+ # @return [Expr]
763
+ #
764
+ # @example
765
+ # df = Polars::DataFrame.new(
766
+ # {
767
+ # "a" => [8, 9, 10, 11],
768
+ # "b" => [nil, 4, 4, 4]
769
+ # }
770
+ # )
771
+ # df.select(Polars.all.slice(1, 2))
772
+ # # =>
773
+ # # shape: (2, 2)
774
+ # # ┌─────┬─────┐
775
+ # # │ a ┆ b │
776
+ # # │ --- ┆ --- │
777
+ # # │ i64 ┆ i64 │
778
+ # # ╞═════╪═════╡
779
+ # # │ 9 ┆ 4 │
780
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
781
+ # # │ 10 ┆ 4 │
782
+ # # └─────┴─────┘
190
783
  def slice(offset, length = nil)
191
784
  if !offset.is_a?(Expr)
192
785
  offset = Polars.lit(offset)
@@ -197,94 +790,785 @@ module Polars
197
790
  wrap_expr(_rbexpr.slice(offset._rbexpr, length._rbexpr))
198
791
  end
199
792
 
793
+ # Append expressions.
794
+ #
795
+ # This is done by adding the chunks of `other` to this `Series`.
796
+ #
797
+ # @param other [Expr]
798
+ # Expression to append.
799
+ # @param upcast [Boolean]
800
+ # Cast both `Series` to the same supertype.
801
+ #
802
+ # @return [Expr]
803
+ #
804
+ # @example
805
+ # df = Polars::DataFrame.new(
806
+ # {
807
+ # "a" => [8, 9, 10],
808
+ # "b" => [nil, 4, 4]
809
+ # }
810
+ # )
811
+ # df.select(Polars.all.head(1).append(Polars.all.tail(1)))
812
+ # # =>
813
+ # # shape: (2, 2)
814
+ # # ┌─────┬──────┐
815
+ # # │ a ┆ b │
816
+ # # │ --- ┆ --- │
817
+ # # │ i64 ┆ i64 │
818
+ # # ╞═════╪══════╡
819
+ # # │ 8 ┆ null │
820
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
821
+ # # │ 10 ┆ 4 │
822
+ # # └─────┴──────┘
200
823
  def append(other, upcast: true)
201
824
  other = Utils.expr_to_lit_or_expr(other)
202
825
  wrap_expr(_rbexpr.append(other._rbexpr, upcast))
203
826
  end
204
827
 
828
+ # Create a single chunk of memory for this Series.
829
+ #
830
+ # @return [Expr]
831
+ #
832
+ # @example Create a Series with 3 nulls, append column a then rechunk
833
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
834
+ # df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
835
+ # # =>
836
+ # # shape: (6, 1)
837
+ # # ┌─────────┐
838
+ # # │ literal │
839
+ # # │ --- │
840
+ # # │ i64 │
841
+ # # ╞═════════╡
842
+ # # │ null │
843
+ # # ├╌╌╌╌╌╌╌╌╌┤
844
+ # # │ null │
845
+ # # ├╌╌╌╌╌╌╌╌╌┤
846
+ # # │ null │
847
+ # # ├╌╌╌╌╌╌╌╌╌┤
848
+ # # │ 1 │
849
+ # # ├╌╌╌╌╌╌╌╌╌┤
850
+ # # │ 1 │
851
+ # # ├╌╌╌╌╌╌╌╌╌┤
852
+ # # │ 2 │
853
+ # # └─────────┘
205
854
  def rechunk
206
855
  wrap_expr(_rbexpr.rechunk)
207
856
  end
208
857
 
858
+ # Drop null values.
859
+ #
860
+ # @return [Expr]
861
+ #
862
+ # @example
863
+ # df = Polars::DataFrame.new(
864
+ # {
865
+ # "a" => [8, 9, 10, 11],
866
+ # "b" => [nil, 4.0, 4.0, Float::NAN]
867
+ # }
868
+ # )
869
+ # df.select(Polars.col("b").drop_nulls)
870
+ # # =>
871
+ # # shape: (3, 1)
872
+ # # ┌─────┐
873
+ # # │ b │
874
+ # # │ --- │
875
+ # # │ f64 │
876
+ # # ╞═════╡
877
+ # # │ 4.0 │
878
+ # # ├╌╌╌╌╌┤
879
+ # # │ 4.0 │
880
+ # # ├╌╌╌╌╌┤
881
+ # # │ NaN │
882
+ # # └─────┘
209
883
  def drop_nulls
210
884
  wrap_expr(_rbexpr.drop_nulls)
211
885
  end
212
886
 
887
+ # Drop floating point NaN values.
888
+ #
889
+ # @return [Expr]
890
+ #
891
+ # @example
892
+ # df = Polars::DataFrame.new(
893
+ # {
894
+ # "a" => [8, 9, 10, 11],
895
+ # "b" => [nil, 4.0, 4.0, Float::NAN]
896
+ # }
897
+ # )
898
+ # df.select(Polars.col("b").drop_nans)
899
+ # # =>
900
+ # # shape: (3, 1)
901
+ # # ┌──────┐
902
+ # # │ b │
903
+ # # │ --- │
904
+ # # │ f64 │
905
+ # # ╞══════╡
906
+ # # │ null │
907
+ # # ├╌╌╌╌╌╌┤
908
+ # # │ 4.0 │
909
+ # # ├╌╌╌╌╌╌┤
910
+ # # │ 4.0 │
911
+ # # └──────┘
213
912
  def drop_nans
214
913
  wrap_expr(_rbexpr.drop_nans)
215
914
  end
216
915
 
916
+ # Get an array with the cumulative sum computed at every element.
917
+ #
918
+ # @param reverse [Boolean]
919
+ # Reverse the operation.
920
+ #
921
+ # @return [Expr]
922
+ #
923
+ # @note
924
+ # Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
925
+ # `:i64` before summing to prevent overflow issues.
926
+ #
927
+ # @example
928
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
929
+ # df.select(
930
+ # [
931
+ # Polars.col("a").cumsum,
932
+ # Polars.col("a").cumsum(reverse: true).alias("a_reverse")
933
+ # ]
934
+ # )
935
+ # # =>
936
+ # # shape: (4, 2)
937
+ # # ┌─────┬───────────┐
938
+ # # │ a ┆ a_reverse │
939
+ # # │ --- ┆ --- │
940
+ # # │ i64 ┆ i64 │
941
+ # # ╞═════╪═══════════╡
942
+ # # │ 1 ┆ 10 │
943
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
944
+ # # │ 3 ┆ 9 │
945
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
946
+ # # │ 6 ┆ 7 │
947
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
948
+ # # │ 10 ┆ 4 │
949
+ # # └─────┴───────────┘
217
950
  def cumsum(reverse: false)
218
951
  wrap_expr(_rbexpr.cumsum(reverse))
219
952
  end
220
953
 
954
+ # Get an array with the cumulative product computed at every element.
955
+ #
956
+ # @param reverse [Boolean]
957
+ # Reverse the operation.
958
+ #
959
+ # @return [Expr]
960
+ #
961
+ # @note
962
+ # Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
963
+ # `:i64` before summing to prevent overflow issues.
964
+ #
965
+ # @example
966
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
967
+ # df.select(
968
+ # [
969
+ # Polars.col("a").cumprod,
970
+ # Polars.col("a").cumprod(reverse: true).alias("a_reverse")
971
+ # ]
972
+ # )
973
+ # # =>
974
+ # # shape: (4, 2)
975
+ # # ┌─────┬───────────┐
976
+ # # │ a ┆ a_reverse │
977
+ # # │ --- ┆ --- │
978
+ # # │ i64 ┆ i64 │
979
+ # # ╞═════╪═══════════╡
980
+ # # │ 1 ┆ 24 │
981
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
982
+ # # │ 2 ┆ 24 │
983
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
984
+ # # │ 6 ┆ 12 │
985
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
986
+ # # │ 24 ┆ 4 │
987
+ # # └─────┴───────────┘
221
988
  def cumprod(reverse: false)
222
989
  wrap_expr(_rbexpr.cumprod(reverse))
223
990
  end
224
991
 
992
+ # Get an array with the cumulative min computed at every element.
993
+ #
994
+ # @param reverse [Boolean]
995
+ # Reverse the operation.
996
+ #
997
+ # @return [Expr]
998
+ #
999
+ # @example
1000
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
1001
+ # df.select(
1002
+ # [
1003
+ # Polars.col("a").cummin,
1004
+ # Polars.col("a").cummin(reverse: true).alias("a_reverse")
1005
+ # ]
1006
+ # )
1007
+ # # =>
1008
+ # # shape: (4, 2)
1009
+ # # ┌─────┬───────────┐
1010
+ # # │ a ┆ a_reverse │
1011
+ # # │ --- ┆ --- │
1012
+ # # │ i64 ┆ i64 │
1013
+ # # ╞═════╪═══════════╡
1014
+ # # │ 1 ┆ 1 │
1015
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1016
+ # # │ 1 ┆ 2 │
1017
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1018
+ # # │ 1 ┆ 3 │
1019
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1020
+ # # │ 1 ┆ 4 │
1021
+ # # └─────┴───────────┘
225
1022
  def cummin(reverse: false)
226
1023
  wrap_expr(_rbexpr.cummin(reverse))
227
1024
  end
228
1025
 
1026
+ # Get an array with the cumulative max computed at every element.
1027
+ #
1028
+ # @param reverse [Boolean]
1029
+ # Reverse the operation.
1030
+ #
1031
+ # @return [Expr]
1032
+ #
1033
+ # @example
1034
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
1035
+ # df.select(
1036
+ # [
1037
+ # Polars.col("a").cummax,
1038
+ # Polars.col("a").cummax(reverse: true).alias("a_reverse")
1039
+ # ]
1040
+ # )
1041
+ # # =>
1042
+ # # shape: (4, 2)
1043
+ # # ┌─────┬───────────┐
1044
+ # # │ a ┆ a_reverse │
1045
+ # # │ --- ┆ --- │
1046
+ # # │ i64 ┆ i64 │
1047
+ # # ╞═════╪═══════════╡
1048
+ # # │ 1 ┆ 4 │
1049
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1050
+ # # │ 2 ┆ 4 │
1051
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1052
+ # # │ 3 ┆ 4 │
1053
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1054
+ # # │ 4 ┆ 4 │
1055
+ # # └─────┴───────────┘
229
1056
  def cummax(reverse: false)
230
1057
  wrap_expr(_rbexpr.cummax(reverse))
231
1058
  end
232
1059
 
1060
+ # Get an array with the cumulative count computed at every element.
1061
+ #
1062
+ # Counting from 0 to len
1063
+ #
1064
+ # @param reverse [Boolean]
1065
+ # Reverse the operation.
1066
+ #
1067
+ # @return [Expr]
1068
+ #
1069
+ # @example
1070
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
1071
+ # df.select(
1072
+ # [
1073
+ # Polars.col("a").cumcount,
1074
+ # Polars.col("a").cumcount(reverse: true).alias("a_reverse")
1075
+ # ]
1076
+ # )
1077
+ # # =>
1078
+ # # shape: (4, 2)
1079
+ # # ┌─────┬───────────┐
1080
+ # # │ a ┆ a_reverse │
1081
+ # # │ --- ┆ --- │
1082
+ # # │ u32 ┆ u32 │
1083
+ # # ╞═════╪═══════════╡
1084
+ # # │ 0 ┆ 3 │
1085
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1086
+ # # │ 1 ┆ 2 │
1087
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1088
+ # # │ 2 ┆ 1 │
1089
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1090
+ # # │ 3 ┆ 0 │
1091
+ # # └─────┴───────────┘
233
1092
  def cumcount(reverse: false)
234
1093
  wrap_expr(_rbexpr.cumcount(reverse))
235
1094
  end
236
1095
 
1096
+ # Rounds down to the nearest integer value.
1097
+ #
1098
+ # Only works on floating point Series.
1099
+ #
1100
+ # @return [Expr]
1101
+ #
1102
+ # @example
1103
+ # df = Polars::DataFrame.new({"a" => [0.3, 0.5, 1.0, 1.1]})
1104
+ # df.select(Polars.col("a").floor)
1105
+ # # =>
1106
+ # # shape: (4, 1)
1107
+ # # ┌─────┐
1108
+ # # │ a │
1109
+ # # │ --- │
1110
+ # # │ f64 │
1111
+ # # ╞═════╡
1112
+ # # │ 0.0 │
1113
+ # # ├╌╌╌╌╌┤
1114
+ # # │ 0.0 │
1115
+ # # ├╌╌╌╌╌┤
1116
+ # # │ 1.0 │
1117
+ # # ├╌╌╌╌╌┤
1118
+ # # │ 1.0 │
1119
+ # # └─────┘
237
1120
  def floor
238
1121
  wrap_expr(_rbexpr.floor)
239
1122
  end
240
1123
 
1124
+ # Rounds up to the nearest integer value.
1125
+ #
1126
+ # Only works on floating point Series.
1127
+ #
1128
+ # @return [Expr]
1129
+ #
1130
+ # @example
1131
+ # df = Polars::DataFrame.new({"a" => [0.3, 0.5, 1.0, 1.1]})
1132
+ # df.select(Polars.col("a").ceil)
1133
+ # # =>
1134
+ # # shape: (4, 1)
1135
+ # # ┌─────┐
1136
+ # # │ a │
1137
+ # # │ --- │
1138
+ # # │ f64 │
1139
+ # # ╞═════╡
1140
+ # # │ 1.0 │
1141
+ # # ├╌╌╌╌╌┤
1142
+ # # │ 1.0 │
1143
+ # # ├╌╌╌╌╌┤
1144
+ # # │ 1.0 │
1145
+ # # ├╌╌╌╌╌┤
1146
+ # # │ 2.0 │
1147
+ # # └─────┘
241
1148
  def ceil
242
1149
  wrap_expr(_rbexpr.ceil)
243
1150
  end
244
1151
 
1152
+ # Round underlying floating point data by `decimals` digits.
1153
+ #
1154
+ # @param decimals [Integer]
1155
+ # Number of decimals to round by.
1156
+ #
1157
+ # @return [Expr]
1158
+ #
1159
+ # @example
1160
+ # df = Polars::DataFrame.new({"a" => [0.33, 0.52, 1.02, 1.17]})
1161
+ # df.select(Polars.col("a").round(1))
1162
+ # # =>
1163
+ # # shape: (4, 1)
1164
+ # # ┌─────┐
1165
+ # # │ a │
1166
+ # # │ --- │
1167
+ # # │ f64 │
1168
+ # # ╞═════╡
1169
+ # # │ 0.3 │
1170
+ # # ├╌╌╌╌╌┤
1171
+ # # │ 0.5 │
1172
+ # # ├╌╌╌╌╌┤
1173
+ # # │ 1.0 │
1174
+ # # ├╌╌╌╌╌┤
1175
+ # # │ 1.2 │
1176
+ # # └─────┘
245
1177
  def round(decimals = 0)
246
1178
  wrap_expr(_rbexpr.round(decimals))
247
1179
  end
248
1180
 
1181
+ # Compute the dot/inner product between two Expressions.
1182
+ #
1183
+ # @param other [Expr]
1184
+ # Expression to compute dot product with.
1185
+ #
1186
+ # @return [Expr]
1187
+ #
1188
+ # @example
1189
+ # df = Polars::DataFrame.new(
1190
+ # {
1191
+ # "a" => [1, 3, 5],
1192
+ # "b" => [2, 4, 6]
1193
+ # }
1194
+ # )
1195
+ # df.select(Polars.col("a").dot(Polars.col("b")))
1196
+ # # =>
1197
+ # # shape: (1, 1)
1198
+ # # ┌─────┐
1199
+ # # │ a │
1200
+ # # │ --- │
1201
+ # # │ i64 │
1202
+ # # ╞═════╡
1203
+ # # │ 44 │
1204
+ # # └─────┘
249
1205
  def dot(other)
250
1206
  other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
251
1207
  wrap_expr(_rbexpr.dot(other._rbexpr))
252
1208
  end
253
1209
 
1210
+ # Compute the most occurring value(s).
1211
+ #
1212
+ # Can return multiple Values.
1213
+ #
1214
+ # @return [Expr]
1215
+ #
1216
+ # @example
1217
+ # df = Polars::DataFrame.new(
1218
+ # {
1219
+ # "a" => [1, 1, 2, 3],
1220
+ # "b" => [1, 1, 2, 2]
1221
+ # }
1222
+ # )
1223
+ # df.select(Polars.all.mode)
1224
+ # # =>
1225
+ # # shape: (2, 2)
1226
+ # # ┌─────┬─────┐
1227
+ # # │ a ┆ b │
1228
+ # # │ --- ┆ --- │
1229
+ # # │ i64 ┆ i64 │
1230
+ # # ╞═════╪═════╡
1231
+ # # │ 1 ┆ 1 │
1232
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1233
+ # # │ 1 ┆ 2 │
1234
+ # # └─────┴─────┘
254
1235
  def mode
255
1236
  wrap_expr(_rbexpr.mode)
256
1237
  end
257
1238
 
1239
+ # Cast between data types.
1240
+ #
1241
+ # @param dtype [Symbol]
1242
+ # DataType to cast to.
1243
+ # @param strict [Boolean]
1244
+ # Throw an error if a cast could not be done.
1245
+ # For instance, due to an overflow.
1246
+ #
1247
+ # @return [Expr]
1248
+ #
1249
+ # @example
1250
+ # df = Polars::DataFrame.new(
1251
+ # {
1252
+ # "a" => [1, 2, 3],
1253
+ # "b" => ["4", "5", "6"]
1254
+ # }
1255
+ # )
1256
+ # df.with_columns(
1257
+ # [
1258
+ # Polars.col("a").cast(:f64),
1259
+ # Polars.col("b").cast(:i32)
1260
+ # ]
1261
+ # )
1262
+ # # =>
1263
+ # # shape: (3, 2)
1264
+ # # ┌─────┬─────┐
1265
+ # # │ a ┆ b │
1266
+ # # │ --- ┆ --- │
1267
+ # # │ f64 ┆ i32 │
1268
+ # # ╞═════╪═════╡
1269
+ # # │ 1.0 ┆ 4 │
1270
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1271
+ # # │ 2.0 ┆ 5 │
1272
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1273
+ # # │ 3.0 ┆ 6 │
1274
+ # # └─────┴─────┘
258
1275
  def cast(dtype, strict: true)
259
1276
  dtype = Utils.rb_type_to_dtype(dtype)
260
1277
  wrap_expr(_rbexpr.cast(dtype, strict))
261
1278
  end
262
1279
 
1280
+ # Sort this column. In projection/ selection context the whole column is sorted.
1281
+ #
1282
+ # If used in a groupby context, the groups are sorted.
1283
+ #
1284
+ # @param reverse [Boolean]
1285
+ # false -> order from small to large.
1286
+ # true -> order from large to small.
1287
+ # @param nulls_last [Boolean]
1288
+ # If true nulls are considered to be larger than any valid value.
1289
+ #
1290
+ # @return [Expr]
1291
+ #
1292
+ # @example
1293
+ # df = Polars::DataFrame.new(
1294
+ # {
1295
+ # "group" => [
1296
+ # "one",
1297
+ # "one",
1298
+ # "one",
1299
+ # "two",
1300
+ # "two",
1301
+ # "two"
1302
+ # ],
1303
+ # "value" => [1, 98, 2, 3, 99, 4]
1304
+ # }
1305
+ # )
1306
+ # df.select(Polars.col("value").sort)
1307
+ # # =>
1308
+ # # shape: (6, 1)
1309
+ # # ┌───────┐
1310
+ # # │ value │
1311
+ # # │ --- │
1312
+ # # │ i64 │
1313
+ # # ╞═══════╡
1314
+ # # │ 1 │
1315
+ # # ├╌╌╌╌╌╌╌┤
1316
+ # # │ 2 │
1317
+ # # ├╌╌╌╌╌╌╌┤
1318
+ # # │ 3 │
1319
+ # # ├╌╌╌╌╌╌╌┤
1320
+ # # │ 4 │
1321
+ # # ├╌╌╌╌╌╌╌┤
1322
+ # # │ 98 │
1323
+ # # ├╌╌╌╌╌╌╌┤
1324
+ # # │ 99 │
1325
+ # # └───────┘
1326
+ #
1327
+ # @example
1328
+ # df.select(Polars.col("value").sort)
1329
+ # # =>
1330
+ # # shape: (6, 1)
1331
+ # # ┌───────┐
1332
+ # # │ value │
1333
+ # # │ --- │
1334
+ # # │ i64 │
1335
+ # # ╞═══════╡
1336
+ # # │ 1 │
1337
+ # # ├╌╌╌╌╌╌╌┤
1338
+ # # │ 2 │
1339
+ # # ├╌╌╌╌╌╌╌┤
1340
+ # # │ 3 │
1341
+ # # ├╌╌╌╌╌╌╌┤
1342
+ # # │ 4 │
1343
+ # # ├╌╌╌╌╌╌╌┤
1344
+ # # │ 98 │
1345
+ # # ├╌╌╌╌╌╌╌┤
1346
+ # # │ 99 │
1347
+ # # └───────┘
1348
+ #
1349
+ # @example
1350
+ # df.groupby("group").agg(Polars.col("value").sort)
1351
+ # # =>
1352
+ # # shape: (2, 2)
1353
+ # # ┌───────┬────────────┐
1354
+ # # │ group ┆ value │
1355
+ # # │ --- ┆ --- │
1356
+ # # │ str ┆ list[i64] │
1357
+ # # ╞═══════╪════════════╡
1358
+ # # │ two ┆ [3, 4, 99] │
1359
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1360
+ # # │ one ┆ [1, 2, 98] │
1361
+ # # └───────┴────────────┘
263
1362
  def sort(reverse: false, nulls_last: false)
264
1363
  wrap_expr(_rbexpr.sort_with(reverse, nulls_last))
265
1364
  end
266
1365
 
1366
+ # Return the `k` largest elements.
1367
+ #
1368
+ # If 'reverse: true` the smallest elements will be given.
1369
+ #
1370
+ # @param k [Integer]
1371
+ # Number of elements to return.
1372
+ # @param reverse [Boolean]
1373
+ # Return the smallest elements.
1374
+ #
1375
+ # @return [Expr]
1376
+ #
1377
+ # @example
1378
+ # df = Polars::DataFrame.new(
1379
+ # {
1380
+ # "value" => [1, 98, 2, 3, 99, 4]
1381
+ # }
1382
+ # )
1383
+ # df.select(
1384
+ # [
1385
+ # Polars.col("value").top_k.alias("top_k"),
1386
+ # Polars.col("value").top_k(reverse: true).alias("bottom_k")
1387
+ # ]
1388
+ # )
1389
+ # # =>
1390
+ # # shape: (5, 2)
1391
+ # # ┌───────┬──────────┐
1392
+ # # │ top_k ┆ bottom_k │
1393
+ # # │ --- ┆ --- │
1394
+ # # │ i64 ┆ i64 │
1395
+ # # ╞═══════╪══════════╡
1396
+ # # │ 99 ┆ 1 │
1397
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
1398
+ # # │ 98 ┆ 2 │
1399
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
1400
+ # # │ 4 ┆ 3 │
1401
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
1402
+ # # │ 3 ┆ 4 │
1403
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
1404
+ # # │ 2 ┆ 98 │
1405
+ # # └───────┴──────────┘
267
1406
  def top_k(k: 5, reverse: false)
268
1407
  wrap_expr(_rbexpr.top_k(k, reverse))
269
1408
  end
270
1409
 
1410
+ # Get the index values that would sort this column.
1411
+ #
1412
+ # @param reverse [Boolean]
1413
+ # Sort in reverse (descending) order.
1414
+ # @param nulls_last [Boolean]
1415
+ # Place null values last instead of first.
1416
+ #
1417
+ # @return [Expr]
1418
+ #
1419
+ # @example
1420
+ # df = Polars::DataFrame.new(
1421
+ # {
1422
+ # "a" => [20, 10, 30]
1423
+ # }
1424
+ # )
1425
+ # df.select(Polars.col("a").arg_sort)
1426
+ # # =>
1427
+ # # shape: (3, 1)
1428
+ # # ┌─────┐
1429
+ # # │ a │
1430
+ # # │ --- │
1431
+ # # │ u32 │
1432
+ # # ╞═════╡
1433
+ # # │ 1 │
1434
+ # # ├╌╌╌╌╌┤
1435
+ # # │ 0 │
1436
+ # # ├╌╌╌╌╌┤
1437
+ # # │ 2 │
1438
+ # # └─────┘
271
1439
  def arg_sort(reverse: false, nulls_last: false)
272
1440
  wrap_expr(_rbexpr.arg_sort(reverse, nulls_last))
273
1441
  end
274
1442
 
1443
+ # Get the index of the maximal value.
1444
+ #
1445
+ # @return [Expr]
1446
+ #
1447
+ # @example
1448
+ # df = Polars::DataFrame.new(
1449
+ # {
1450
+ # "a" => [20, 10, 30]
1451
+ # }
1452
+ # )
1453
+ # df.select(Polars.col("a").arg_max)
1454
+ # # =>
1455
+ # # shape: (1, 1)
1456
+ # # ┌─────┐
1457
+ # # │ a │
1458
+ # # │ --- │
1459
+ # # │ u32 │
1460
+ # # ╞═════╡
1461
+ # # │ 2 │
1462
+ # # └─────┘
275
1463
  def arg_max
276
1464
  wrap_expr(_rbexpr.arg_max)
277
1465
  end
278
1466
 
1467
+ # Get the index of the minimal value.
1468
+ #
1469
+ # @return [Expr]
1470
+ #
1471
+ # @example
1472
+ # df = Polars::DataFrame.new(
1473
+ # {
1474
+ # "a" => [20, 10, 30]
1475
+ # }
1476
+ # )
1477
+ # df.select(Polars.col("a").arg_min)
1478
+ # # =>
1479
+ # # shape: (1, 1)
1480
+ # # ┌─────┐
1481
+ # # │ a │
1482
+ # # │ --- │
1483
+ # # │ u32 │
1484
+ # # ╞═════╡
1485
+ # # │ 1 │
1486
+ # # └─────┘
279
1487
  def arg_min
280
1488
  wrap_expr(_rbexpr.arg_min)
281
1489
  end
282
1490
 
1491
+ # Find indices where elements should be inserted to maintain order.
1492
+ #
1493
+ # @param element [Object]
1494
+ # Expression or scalar value.
1495
+ #
1496
+ # @return [Expr]
1497
+ #
1498
+ # @example
1499
+ # df = Polars::DataFrame.new(
1500
+ # {
1501
+ # "values" => [1, 2, 3, 5]
1502
+ # }
1503
+ # )
1504
+ # df.select(
1505
+ # [
1506
+ # Polars.col("values").search_sorted(0).alias("zero"),
1507
+ # Polars.col("values").search_sorted(3).alias("three"),
1508
+ # Polars.col("values").search_sorted(6).alias("six")
1509
+ # ]
1510
+ # )
1511
+ # # =>
1512
+ # # shape: (1, 3)
1513
+ # # ┌──────┬───────┬─────┐
1514
+ # # │ zero ┆ three ┆ six │
1515
+ # # │ --- ┆ --- ┆ --- │
1516
+ # # │ u32 ┆ u32 ┆ u32 │
1517
+ # # ╞══════╪═══════╪═════╡
1518
+ # # │ 0 ┆ 2 ┆ 4 │
1519
+ # # └──────┴───────┴─────┘
283
1520
  def search_sorted(element)
284
1521
  element = Utils.expr_to_lit_or_expr(element, str_to_lit: false)
285
1522
  wrap_expr(_rbexpr.search_sorted(element._rbexpr))
286
1523
  end
287
1524
 
1525
+ # Sort this column by the ordering of another column, or multiple other columns.
1526
+ #
1527
+ # In projection/ selection context the whole column is sorted.
1528
+ # If used in a groupby context, the groups are sorted.
1529
+ #
1530
+ # @param by [Object]
1531
+ # The column(s) used for sorting.
1532
+ # @param reverse [Boolean]
1533
+ # false -> order from small to large.
1534
+ # true -> order from large to small.
1535
+ #
1536
+ # @return [Expr]
1537
+ #
1538
+ # @example
1539
+ # df = Polars::DataFrame.new(
1540
+ # {
1541
+ # "group" => [
1542
+ # "one",
1543
+ # "one",
1544
+ # "one",
1545
+ # "two",
1546
+ # "two",
1547
+ # "two"
1548
+ # ],
1549
+ # "value" => [1, 98, 2, 3, 99, 4]
1550
+ # }
1551
+ # )
1552
+ # df.select(Polars.col("group").sort_by("value"))
1553
+ # # =>
1554
+ # # shape: (6, 1)
1555
+ # # ┌───────┐
1556
+ # # │ group │
1557
+ # # │ --- │
1558
+ # # │ str │
1559
+ # # ╞═══════╡
1560
+ # # │ one │
1561
+ # # ├╌╌╌╌╌╌╌┤
1562
+ # # │ one │
1563
+ # # ├╌╌╌╌╌╌╌┤
1564
+ # # │ two │
1565
+ # # ├╌╌╌╌╌╌╌┤
1566
+ # # │ two │
1567
+ # # ├╌╌╌╌╌╌╌┤
1568
+ # # │ one │
1569
+ # # ├╌╌╌╌╌╌╌┤
1570
+ # # │ two │
1571
+ # # └───────┘
288
1572
  def sort_by(by, reverse: false)
289
1573
  if !by.is_a?(Array)
290
1574
  by = [by]
@@ -297,19 +1581,176 @@ module Polars
297
1581
  wrap_expr(_rbexpr.sort_by(by, reverse))
298
1582
  end
299
1583
 
300
- # def take
301
- # end
1584
+ # Take values by index.
1585
+ #
1586
+ # @param indices [Expr]
1587
+ # An expression that leads to a `:u32` dtyped Series.
1588
+ #
1589
+ # @return [Expr]
1590
+ #
1591
+ # @example
1592
+ # df = Polars::DataFrame.new(
1593
+ # {
1594
+ # "group" => [
1595
+ # "one",
1596
+ # "one",
1597
+ # "one",
1598
+ # "two",
1599
+ # "two",
1600
+ # "two"
1601
+ # ],
1602
+ # "value" => [1, 98, 2, 3, 99, 4]
1603
+ # }
1604
+ # )
1605
+ # df.groupby("group", maintain_order: true).agg(Polars.col("value").take(1))
1606
+ # # =>
1607
+ # # shape: (2, 2)
1608
+ # # ┌───────┬───────┐
1609
+ # # │ group ┆ value │
1610
+ # # │ --- ┆ --- │
1611
+ # # │ str ┆ i64 │
1612
+ # # ╞═══════╪═══════╡
1613
+ # # │ one ┆ 98 │
1614
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1615
+ # # │ two ┆ 99 │
1616
+ # # └───────┴───────┘
1617
+ def take(indices)
1618
+ if indices.is_a?(Array)
1619
+ indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
1620
+ else
1621
+ indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
1622
+ end
1623
+ wrap_expr(_rbexpr.take(indices_lit._rbexpr))
1624
+ end
302
1625
 
1626
+ # Shift the values by a given period.
1627
+ #
1628
+ # @param periods [Integer]
1629
+ # Number of places to shift (may be negative).
303
1630
  #
1631
+ # @return [Expr]
1632
+ #
1633
+ # @example
1634
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
1635
+ # df.select(Polars.col("foo").shift(1))
1636
+ # # =>
1637
+ # # shape: (4, 1)
1638
+ # # ┌──────┐
1639
+ # # │ foo │
1640
+ # # │ --- │
1641
+ # # │ i64 │
1642
+ # # ╞══════╡
1643
+ # # │ null │
1644
+ # # ├╌╌╌╌╌╌┤
1645
+ # # │ 1 │
1646
+ # # ├╌╌╌╌╌╌┤
1647
+ # # │ 2 │
1648
+ # # ├╌╌╌╌╌╌┤
1649
+ # # │ 3 │
1650
+ # # └──────┘
304
1651
  def shift(periods = 1)
305
1652
  wrap_expr(_rbexpr.shift(periods))
306
1653
  end
307
1654
 
1655
+ # Shift the values by a given period and fill the resulting null values.
1656
+ #
1657
+ # @param periods [Integer]
1658
+ # Number of places to shift (may be negative).
1659
+ # @param fill_value [Object]
1660
+ # Fill nil values with the result of this expression.
1661
+ #
1662
+ # @return [Expr]
1663
+ #
1664
+ # @example
1665
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
1666
+ # df.select(Polars.col("foo").shift_and_fill(1, "a"))
1667
+ # # =>
1668
+ # # shape: (4, 1)
1669
+ # # ┌─────┐
1670
+ # # │ foo │
1671
+ # # │ --- │
1672
+ # # │ str │
1673
+ # # ╞═════╡
1674
+ # # │ a │
1675
+ # # ├╌╌╌╌╌┤
1676
+ # # │ 1 │
1677
+ # # ├╌╌╌╌╌┤
1678
+ # # │ 2 │
1679
+ # # ├╌╌╌╌╌┤
1680
+ # # │ 3 │
1681
+ # # └─────┘
308
1682
  def shift_and_fill(periods, fill_value)
309
1683
  fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
310
1684
  wrap_expr(_rbexpr.shift_and_fill(periods, fill_value._rbexpr))
311
1685
  end
312
1686
 
1687
+ # Fill null values using the specified value or strategy.
1688
+ #
1689
+ # To interpolate over null values see interpolate.
1690
+ #
1691
+ # @param value [Object]
1692
+ # Value used to fill null values.
1693
+ # @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
1694
+ # Strategy used to fill null values.
1695
+ # @param limit [Integer]
1696
+ # Number of consecutive null values to fill when using the 'forward' or
1697
+ # 'backward' strategy.
1698
+ #
1699
+ # @return [Expr]
1700
+ #
1701
+ # @example
1702
+ # df = Polars::DataFrame.new(
1703
+ # {
1704
+ # "a" => [1, 2, nil],
1705
+ # "b" => [4, nil, 6]
1706
+ # }
1707
+ # )
1708
+ # df.fill_null(strategy: "zero")
1709
+ # # =>
1710
+ # # shape: (3, 2)
1711
+ # # ┌─────┬─────┐
1712
+ # # │ a ┆ b │
1713
+ # # │ --- ┆ --- │
1714
+ # # │ i64 ┆ i64 │
1715
+ # # ╞═════╪═════╡
1716
+ # # │ 1 ┆ 4 │
1717
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1718
+ # # │ 2 ┆ 0 │
1719
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1720
+ # # │ 0 ┆ 6 │
1721
+ # # └─────┴─────┘
1722
+ #
1723
+ # @example
1724
+ # df.fill_null(99)
1725
+ # # =>
1726
+ # # shape: (3, 2)
1727
+ # # ┌─────┬─────┐
1728
+ # # │ a ┆ b │
1729
+ # # │ --- ┆ --- │
1730
+ # # │ i64 ┆ i64 │
1731
+ # # ╞═════╪═════╡
1732
+ # # │ 1 ┆ 4 │
1733
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1734
+ # # │ 2 ┆ 99 │
1735
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1736
+ # # │ 99 ┆ 6 │
1737
+ # # └─────┴─────┘
1738
+ #
1739
+ # @example
1740
+ # df.fill_null(strategy: "forward")
1741
+ # # =>
1742
+ # # shape: (3, 2)
1743
+ # # ┌─────┬─────┐
1744
+ # # │ a ┆ b │
1745
+ # # │ --- ┆ --- │
1746
+ # # │ i64 ┆ i64 │
1747
+ # # ╞═════╪═════╡
1748
+ # # │ 1 ┆ 4 │
1749
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1750
+ # # │ 2 ┆ 4 │
1751
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1752
+ # # │ 2 ┆ 6 │
1753
+ # # └─────┴─────┘
313
1754
  def fill_null(value = nil, strategy: nil, limit: nil)
314
1755
  if !value.nil? && !strategy.nil?
315
1756
  raise ArgumentError, "cannot specify both 'value' and 'strategy'."
@@ -327,75 +1768,426 @@ module Polars
327
1768
  end
328
1769
  end
329
1770
 
1771
+ # Fill floating point NaN value with a fill value.
1772
+ #
1773
+ # @return [Expr]
1774
+ #
1775
+ # @example
1776
+ # df = Polars::DataFrame.new(
1777
+ # {
1778
+ # "a" => [1.0, nil, Float::NAN],
1779
+ # "b" => [4.0, Float::NAN, 6]
1780
+ # }
1781
+ # )
1782
+ # df.fill_nan("zero")
1783
+ # # =>
1784
+ # # shape: (3, 2)
1785
+ # # ┌──────┬──────┐
1786
+ # # │ a ┆ b │
1787
+ # # │ --- ┆ --- │
1788
+ # # │ str ┆ str │
1789
+ # # ╞══════╪══════╡
1790
+ # # │ 1.0 ┆ 4.0 │
1791
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1792
+ # # │ null ┆ zero │
1793
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1794
+ # # │ zero ┆ 6.0 │
1795
+ # # └──────┴──────┘
330
1796
  def fill_nan(fill_value)
331
1797
  fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
332
1798
  wrap_expr(_rbexpr.fill_nan(fill_value._rbexpr))
333
1799
  end
334
1800
 
1801
+ # Fill missing values with the latest seen values.
1802
+ #
1803
+ # @param limit [Integer]
1804
+ # The number of consecutive null values to forward fill.
1805
+ #
1806
+ # @return [Expr]
1807
+ #
1808
+ # @example
1809
+ # df = Polars::DataFrame.new(
1810
+ # {
1811
+ # "a" => [1, 2, nil],
1812
+ # "b" => [4, nil, 6]
1813
+ # }
1814
+ # )
1815
+ # df.select(Polars.all.forward_fill)
1816
+ # # =>
1817
+ # # shape: (3, 2)
1818
+ # # ┌─────┬─────┐
1819
+ # # │ a ┆ b │
1820
+ # # │ --- ┆ --- │
1821
+ # # │ i64 ┆ i64 │
1822
+ # # ╞═════╪═════╡
1823
+ # # │ 1 ┆ 4 │
1824
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1825
+ # # │ 2 ┆ 4 │
1826
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1827
+ # # │ 2 ┆ 6 │
1828
+ # # └─────┴─────┘
335
1829
  def forward_fill(limit: nil)
336
1830
  wrap_expr(_rbexpr.forward_fill(limit))
337
1831
  end
338
1832
 
1833
+ # Fill missing values with the next to be seen values.
1834
+ #
1835
+ # @param limit [Integer]
1836
+ # The number of consecutive null values to backward fill.
1837
+ #
1838
+ # @return [Expr]
1839
+ #
1840
+ # @example
1841
+ # df = Polars::DataFrame.new(
1842
+ # {
1843
+ # "a" => [1, 2, nil],
1844
+ # "b" => [4, nil, 6]
1845
+ # }
1846
+ # )
1847
+ # df.select(Polars.all.backward_fill)
1848
+ # # =>
1849
+ # # shape: (3, 2)
1850
+ # # ┌──────┬─────┐
1851
+ # # │ a ┆ b │
1852
+ # # │ --- ┆ --- │
1853
+ # # │ i64 ┆ i64 │
1854
+ # # ╞══════╪═════╡
1855
+ # # │ 1 ┆ 4 │
1856
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
1857
+ # # │ 2 ┆ 6 │
1858
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
1859
+ # # │ null ┆ 6 │
1860
+ # # └──────┴─────┘
339
1861
  def backward_fill(limit: nil)
340
1862
  wrap_expr(_rbexpr.backward_fill(limit))
341
1863
  end
342
1864
 
1865
+ # Reverse the selection.
1866
+ #
1867
+ # @return [Expr]
343
1868
  def reverse
344
1869
  wrap_expr(_rbexpr.reverse)
345
1870
  end
346
1871
 
1872
+ # Get standard deviation.
1873
+ #
1874
+ # @param ddof [Integer]
1875
+ # Degrees of freedom.
1876
+ #
1877
+ # @return [Expr]
1878
+ #
1879
+ # @example
1880
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
1881
+ # df.select(Polars.col("a").std)
1882
+ # # =>
1883
+ # # shape: (1, 1)
1884
+ # # ┌─────┐
1885
+ # # │ a │
1886
+ # # │ --- │
1887
+ # # │ f64 │
1888
+ # # ╞═════╡
1889
+ # # │ 1.0 │
1890
+ # # └─────┘
347
1891
  def std(ddof: 1)
348
1892
  wrap_expr(_rbexpr.std(ddof))
349
1893
  end
350
1894
 
1895
+ # Get variance.
1896
+ #
1897
+ # @param ddof [Integer]
1898
+ # Degrees of freedom.
1899
+ #
1900
+ # @return [Expr]
1901
+ #
1902
+ # @example
1903
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
1904
+ # df.select(Polars.col("a").var)
1905
+ # # =>
1906
+ # # shape: (1, 1)
1907
+ # # ┌─────┐
1908
+ # # │ a │
1909
+ # # │ --- │
1910
+ # # │ f64 │
1911
+ # # ╞═════╡
1912
+ # # │ 1.0 │
1913
+ # # └─────┘
351
1914
  def var(ddof: 1)
352
1915
  wrap_expr(_rbexpr.var(ddof))
353
1916
  end
354
1917
 
1918
+ # Get maximum value.
1919
+ #
1920
+ # @return [Expr]
1921
+ #
1922
+ # @example
1923
+ # df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
1924
+ # df.select(Polars.col("a").max)
1925
+ # # =>
1926
+ # # shape: (1, 1)
1927
+ # # ┌─────┐
1928
+ # # │ a │
1929
+ # # │ --- │
1930
+ # # │ f64 │
1931
+ # # ╞═════╡
1932
+ # # │ 1.0 │
1933
+ # # └─────┘
355
1934
  def max
356
1935
  wrap_expr(_rbexpr.max)
357
1936
  end
358
1937
 
1938
+ # Get minimum value.
1939
+ #
1940
+ # @return [Expr]
1941
+ #
1942
+ # @example
1943
+ # df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
1944
+ # df.select(Polars.col("a").min)
1945
+ # # =>
1946
+ # # shape: (1, 1)
1947
+ # # ┌──────┐
1948
+ # # │ a │
1949
+ # # │ --- │
1950
+ # # │ f64 │
1951
+ # # ╞══════╡
1952
+ # # │ -1.0 │
1953
+ # # └──────┘
359
1954
  def min
360
1955
  wrap_expr(_rbexpr.min)
361
1956
  end
362
1957
 
1958
+ # Get maximum value, but propagate/poison encountered NaN values.
1959
+ #
1960
+ # @return [Expr]
1961
+ #
1962
+ # @example
1963
+ # df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
1964
+ # df.select(Polars.col("a").nan_max)
1965
+ # # =>
1966
+ # # shape: (1, 1)
1967
+ # # ┌─────┐
1968
+ # # │ a │
1969
+ # # │ --- │
1970
+ # # │ f64 │
1971
+ # # ╞═════╡
1972
+ # # │ NaN │
1973
+ # # └─────┘
363
1974
  def nan_max
364
1975
  wrap_expr(_rbexpr.nan_max)
365
1976
  end
366
1977
 
1978
+ # Get minimum value, but propagate/poison encountered NaN values.
1979
+ #
1980
+ # @return [Expr]
1981
+ #
1982
+ # @example
1983
+ # df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
1984
+ # df.select(Polars.col("a").nan_min)
1985
+ # # =>
1986
+ # # shape: (1, 1)
1987
+ # # ┌─────┐
1988
+ # # │ a │
1989
+ # # │ --- │
1990
+ # # │ f64 │
1991
+ # # ╞═════╡
1992
+ # # │ NaN │
1993
+ # # └─────┘
367
1994
  def nan_min
368
1995
  wrap_expr(_rbexpr.nan_min)
369
1996
  end
370
1997
 
1998
+ # Get sum value.
1999
+ #
2000
+ # @return [Expr]
2001
+ #
2002
+ # @note
2003
+ # Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
2004
+ # `:i64` before summing to prevent overflow issues.
2005
+ #
2006
+ # @example
2007
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
2008
+ # df.select(Polars.col("a").sum)
2009
+ # # =>
2010
+ # # shape: (1, 1)
2011
+ # # ┌─────┐
2012
+ # # │ a │
2013
+ # # │ --- │
2014
+ # # │ i64 │
2015
+ # # ╞═════╡
2016
+ # # │ 0 │
2017
+ # # └─────┘
371
2018
  def sum
372
2019
  wrap_expr(_rbexpr.sum)
373
2020
  end
374
2021
 
2022
+ # Get mean value.
2023
+ #
2024
+ # @return [Expr]
2025
+ #
2026
+ # @example
2027
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
2028
+ # df.select(Polars.col("a").mean)
2029
+ # # =>
2030
+ # # shape: (1, 1)
2031
+ # # ┌─────┐
2032
+ # # │ a │
2033
+ # # │ --- │
2034
+ # # │ f64 │
2035
+ # # ╞═════╡
2036
+ # # │ 0.0 │
2037
+ # # └─────┘
375
2038
  def mean
376
2039
  wrap_expr(_rbexpr.mean)
377
2040
  end
378
2041
 
2042
+ # Get median value using linear interpolation.
2043
+ #
2044
+ # @return [Expr]
2045
+ #
2046
+ # @example
2047
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
2048
+ # df.select(Polars.col("a").median)
2049
+ # # =>
2050
+ # # shape: (1, 1)
2051
+ # # ┌─────┐
2052
+ # # │ a │
2053
+ # # │ --- │
2054
+ # # │ f64 │
2055
+ # # ╞═════╡
2056
+ # # │ 0.0 │
2057
+ # # └─────┘
379
2058
  def median
380
2059
  wrap_expr(_rbexpr.median)
381
2060
  end
382
2061
 
2062
+ # Compute the product of an expression.
2063
+ #
2064
+ # @return [Expr]
2065
+ #
2066
+ # @example
2067
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
2068
+ # df.select(Polars.col("a").product)
2069
+ # # =>
2070
+ # # shape: (1, 1)
2071
+ # # ┌─────┐
2072
+ # # │ a │
2073
+ # # │ --- │
2074
+ # # │ i64 │
2075
+ # # ╞═════╡
2076
+ # # │ 6 │
2077
+ # # └─────┘
383
2078
  def product
384
2079
  wrap_expr(_rbexpr.product)
385
2080
  end
386
2081
 
2082
+ # Count unique values.
2083
+ #
2084
+ # @return [Expr]
2085
+ #
2086
+ # @example
2087
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2088
+ # df.select(Polars.col("a").n_unique)
2089
+ # # =>
2090
+ # # shape: (1, 1)
2091
+ # # ┌─────┐
2092
+ # # │ a │
2093
+ # # │ --- │
2094
+ # # │ u32 │
2095
+ # # ╞═════╡
2096
+ # # │ 2 │
2097
+ # # └─────┘
387
2098
  def n_unique
388
2099
  wrap_expr(_rbexpr.n_unique)
389
2100
  end
390
2101
 
2102
+ # Count null values.
2103
+ #
2104
+ # @return [Expr]
2105
+ #
2106
+ # @example
2107
+ # df = Polars::DataFrame.new(
2108
+ # {
2109
+ # "a" => [nil, 1, nil],
2110
+ # "b" => [1, 2, 3]
2111
+ # }
2112
+ # )
2113
+ # df.select(Polars.all.null_count)
2114
+ # # =>
2115
+ # # shape: (1, 2)
2116
+ # # ┌─────┬─────┐
2117
+ # # │ a ┆ b │
2118
+ # # │ --- ┆ --- │
2119
+ # # │ u32 ┆ u32 │
2120
+ # # ╞═════╪═════╡
2121
+ # # │ 2 ┆ 0 │
2122
+ # # └─────┴─────┘
391
2123
  def null_count
392
2124
  wrap_expr(_rbexpr.null_count)
393
2125
  end
394
2126
 
2127
+ # Get index of first unique value.
2128
+ #
2129
+ # @return [Expr]
2130
+ #
2131
+ # @example
2132
+ # df = Polars::DataFrame.new(
2133
+ # {
2134
+ # "a" => [8, 9, 10],
2135
+ # "b" => [nil, 4, 4]
2136
+ # }
2137
+ # )
2138
+ # df.select(Polars.col("a").arg_unique)
2139
+ # # =>
2140
+ # # shape: (3, 1)
2141
+ # # ┌─────┐
2142
+ # # │ a │
2143
+ # # │ --- │
2144
+ # # │ u32 │
2145
+ # # ╞═════╡
2146
+ # # │ 0 │
2147
+ # # ├╌╌╌╌╌┤
2148
+ # # │ 1 │
2149
+ # # ├╌╌╌╌╌┤
2150
+ # # │ 2 │
2151
+ # # └─────┘
2152
+ #
2153
+ # @example
2154
+ # df.select(Polars.col("b").arg_unique)
2155
+ # # =>
2156
+ # # shape: (2, 1)
2157
+ # # ┌─────┐
2158
+ # # │ b │
2159
+ # # │ --- │
2160
+ # # │ u32 │
2161
+ # # ╞═════╡
2162
+ # # │ 0 │
2163
+ # # ├╌╌╌╌╌┤
2164
+ # # │ 1 │
2165
+ # # └─────┘
395
2166
  def arg_unique
396
2167
  wrap_expr(_rbexpr.arg_unique)
397
2168
  end
398
2169
 
2170
+ # Get unique values of this expression.
2171
+ #
2172
+ # @param maintain_order [Boolean]
2173
+ # Maintain order of data. This requires more work.
2174
+ #
2175
+ # @return [Expr]
2176
+ #
2177
+ # @example
2178
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2179
+ # df.select(Polars.col("a").unique(maintain_order: true))
2180
+ # # =>
2181
+ # # shape: (2, 1)
2182
+ # # ┌─────┐
2183
+ # # │ a │
2184
+ # # │ --- │
2185
+ # # │ i64 │
2186
+ # # ╞═════╡
2187
+ # # │ 1 │
2188
+ # # ├╌╌╌╌╌┤
2189
+ # # │ 2 │
2190
+ # # └─────┘
399
2191
  def unique(maintain_order: false)
400
2192
  if maintain_order
401
2193
  wrap_expr(_rbexpr.unique_stable)
@@ -404,95 +2196,743 @@ module Polars
404
2196
  end
405
2197
  end
406
2198
 
2199
+ # Get the first value.
2200
+ #
2201
+ # @return [Expr]
2202
+ #
2203
+ # @example
2204
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2205
+ # df.select(Polars.col("a").first)
2206
+ # # =>
2207
+ # # shape: (1, 1)
2208
+ # # ┌─────┐
2209
+ # # │ a │
2210
+ # # │ --- │
2211
+ # # │ i64 │
2212
+ # # ╞═════╡
2213
+ # # │ 1 │
2214
+ # # └─────┘
407
2215
  def first
408
2216
  wrap_expr(_rbexpr.first)
409
2217
  end
410
2218
 
2219
+ # Get the last value.
2220
+ #
2221
+ # @return [Expr]
2222
+ #
2223
+ # @example
2224
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2225
+ # df.select(Polars.col("a").last)
2226
+ # # =>
2227
+ # # shape: (1, 1)
2228
+ # # ┌─────┐
2229
+ # # │ a │
2230
+ # # │ --- │
2231
+ # # │ i64 │
2232
+ # # ╞═════╡
2233
+ # # │ 2 │
2234
+ # # └─────┘
411
2235
  def last
412
2236
  wrap_expr(_rbexpr.last)
413
2237
  end
414
2238
 
2239
+ # Apply window function over a subgroup.
2240
+ #
2241
+ # This is similar to a groupby + aggregation + self join.
2242
+ # Or similar to [window functions in Postgres](https://www.postgresql.org/docs/current/tutorial-window.html).
2243
+ #
2244
+ # @param expr [Object]
2245
+ # Column(s) to group by.
2246
+ #
2247
+ # @return [Expr]
2248
+ #
2249
+ # @example
2250
+ # df = Polars::DataFrame.new(
2251
+ # {
2252
+ # "groups" => ["g1", "g1", "g2"],
2253
+ # "values" => [1, 2, 3]
2254
+ # }
2255
+ # )
2256
+ # df.with_column(
2257
+ # Polars.col("values").max.over("groups").alias("max_by_group")
2258
+ # )
2259
+ # # =>
2260
+ # # shape: (3, 3)
2261
+ # # ┌────────┬────────┬──────────────┐
2262
+ # # │ groups ┆ values ┆ max_by_group │
2263
+ # # │ --- ┆ --- ┆ --- │
2264
+ # # │ str ┆ i64 ┆ i64 │
2265
+ # # ╞════════╪════════╪══════════════╡
2266
+ # # │ g1 ┆ 1 ┆ 2 │
2267
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
2268
+ # # │ g1 ┆ 2 ┆ 2 │
2269
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
2270
+ # # │ g2 ┆ 3 ┆ 3 │
2271
+ # # └────────┴────────┴──────────────┘
2272
+ #
2273
+ # @example
2274
+ # df = Polars::DataFrame.new(
2275
+ # {
2276
+ # "groups" => [1, 1, 2, 2, 1, 2, 3, 3, 1],
2277
+ # "values" => [1, 2, 3, 4, 5, 6, 7, 8, 8]
2278
+ # }
2279
+ # )
2280
+ # df.lazy
2281
+ # .select([Polars.col("groups").sum.over("groups")])
2282
+ # .collect
2283
+ # # =>
2284
+ # # shape: (9, 1)
2285
+ # # ┌────────┐
2286
+ # # │ groups │
2287
+ # # │ --- │
2288
+ # # │ i64 │
2289
+ # # ╞════════╡
2290
+ # # │ 4 │
2291
+ # # ├╌╌╌╌╌╌╌╌┤
2292
+ # # │ 4 │
2293
+ # # ├╌╌╌╌╌╌╌╌┤
2294
+ # # │ 6 │
2295
+ # # ├╌╌╌╌╌╌╌╌┤
2296
+ # # │ 6 │
2297
+ # # ├╌╌╌╌╌╌╌╌┤
2298
+ # # │ ... │
2299
+ # # ├╌╌╌╌╌╌╌╌┤
2300
+ # # │ 6 │
2301
+ # # ├╌╌╌╌╌╌╌╌┤
2302
+ # # │ 6 │
2303
+ # # ├╌╌╌╌╌╌╌╌┤
2304
+ # # │ 6 │
2305
+ # # ├╌╌╌╌╌╌╌╌┤
2306
+ # # │ 4 │
2307
+ # # └────────┘
415
2308
  def over(expr)
416
2309
  rbexprs = Utils.selection_to_rbexpr_list(expr)
417
2310
  wrap_expr(_rbexpr.over(rbexprs))
418
2311
  end
419
2312
 
2313
+ # Get mask of unique values.
2314
+ #
2315
+ # @return [Expr]
2316
+ #
2317
+ # @example
2318
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2319
+ # df.select(Polars.col("a").is_unique)
2320
+ # # =>
2321
+ # # shape: (3, 1)
2322
+ # # ┌───────┐
2323
+ # # │ a │
2324
+ # # │ --- │
2325
+ # # │ bool │
2326
+ # # ╞═══════╡
2327
+ # # │ false │
2328
+ # # ├╌╌╌╌╌╌╌┤
2329
+ # # │ false │
2330
+ # # ├╌╌╌╌╌╌╌┤
2331
+ # # │ true │
2332
+ # # └───────┘
420
2333
  def is_unique
421
2334
  wrap_expr(_rbexpr.is_unique)
422
2335
  end
423
2336
 
2337
+ # Get a mask of the first unique value.
2338
+ #
2339
+ # @return [Expr]
2340
+ #
2341
+ # @example
2342
+ # df = Polars::DataFrame.new(
2343
+ # {
2344
+ # "num" => [1, 2, 3, 1, 5]
2345
+ # }
2346
+ # )
2347
+ # df.with_column(Polars.col("num").is_first.alias("is_first"))
2348
+ # # =>
2349
+ # # shape: (5, 2)
2350
+ # # ┌─────┬──────────┐
2351
+ # # │ num ┆ is_first │
2352
+ # # │ --- ┆ --- │
2353
+ # # │ i64 ┆ bool │
2354
+ # # ╞═════╪══════════╡
2355
+ # # │ 1 ┆ true │
2356
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2357
+ # # │ 2 ┆ true │
2358
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2359
+ # # │ 3 ┆ true │
2360
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2361
+ # # │ 1 ┆ false │
2362
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2363
+ # # │ 5 ┆ true │
2364
+ # # └─────┴──────────┘
424
2365
  def is_first
425
2366
  wrap_expr(_rbexpr.is_first)
426
2367
  end
427
2368
 
2369
+ # Get mask of duplicated values.
2370
+ #
2371
+ # @return [Expr]
2372
+ #
2373
+ # @example
2374
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2375
+ # df.select(Polars.col("a").is_duplicated)
2376
+ # # =>
2377
+ # # shape: (3, 1)
2378
+ # # ┌───────┐
2379
+ # # │ a │
2380
+ # # │ --- │
2381
+ # # │ bool │
2382
+ # # ╞═══════╡
2383
+ # # │ true │
2384
+ # # ├╌╌╌╌╌╌╌┤
2385
+ # # │ true │
2386
+ # # ├╌╌╌╌╌╌╌┤
2387
+ # # │ false │
2388
+ # # └───────┘
428
2389
  def is_duplicated
429
2390
  wrap_expr(_rbexpr.is_duplicated)
430
2391
  end
431
2392
 
2393
+ # Get quantile value.
2394
+ #
2395
+ # @param quantile [Float]
2396
+ # Quantile between 0.0 and 1.0.
2397
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
2398
+ # Interpolation method.
2399
+ #
2400
+ # @return [Expr]
2401
+ #
2402
+ # @example
2403
+ # df = Polars::DataFrame.new({"a" => [0, 1, 2, 3, 4, 5]})
2404
+ # df.select(Polars.col("a").quantile(0.3))
2405
+ # # =>
2406
+ # # shape: (1, 1)
2407
+ # # ┌─────┐
2408
+ # # │ a │
2409
+ # # │ --- │
2410
+ # # │ f64 │
2411
+ # # ╞═════╡
2412
+ # # │ 1.0 │
2413
+ # # └─────┘
2414
+ #
2415
+ # @example
2416
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "higher"))
2417
+ # # =>
2418
+ # # shape: (1, 1)
2419
+ # # ┌─────┐
2420
+ # # │ a │
2421
+ # # │ --- │
2422
+ # # │ f64 │
2423
+ # # ╞═════╡
2424
+ # # │ 2.0 │
2425
+ # # └─────┘
2426
+ #
2427
+ # @example
2428
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "lower"))
2429
+ # # =>
2430
+ # # shape: (1, 1)
2431
+ # # ┌─────┐
2432
+ # # │ a │
2433
+ # # │ --- │
2434
+ # # │ f64 │
2435
+ # # ╞═════╡
2436
+ # # │ 1.0 │
2437
+ # # └─────┘
2438
+ #
2439
+ # @example
2440
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "midpoint"))
2441
+ # # =>
2442
+ # # shape: (1, 1)
2443
+ # # ┌─────┐
2444
+ # # │ a │
2445
+ # # │ --- │
2446
+ # # │ f64 │
2447
+ # # ╞═════╡
2448
+ # # │ 1.5 │
2449
+ # # └─────┘
2450
+ #
2451
+ # @example
2452
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "linear"))
2453
+ # # =>
2454
+ # # shape: (1, 1)
2455
+ # # ┌─────┐
2456
+ # # │ a │
2457
+ # # │ --- │
2458
+ # # │ f64 │
2459
+ # # ╞═════╡
2460
+ # # │ 1.5 │
2461
+ # # └─────┘
432
2462
  def quantile(quantile, interpolation: "nearest")
433
2463
  wrap_expr(_rbexpr.quantile(quantile, interpolation))
434
2464
  end
435
2465
 
2466
+ # Filter a single column.
2467
+ #
2468
+ # Mostly useful in an aggregation context. If you want to filter on a DataFrame
2469
+ # level, use `LazyFrame#filter`.
2470
+ #
2471
+ # @param predicate [Expr]
2472
+ # Boolean expression.
2473
+ #
2474
+ # @return [Expr]
2475
+ #
2476
+ # @example
2477
+ # df = Polars::DataFrame.new(
2478
+ # {
2479
+ # "group_col" => ["g1", "g1", "g2"],
2480
+ # "b" => [1, 2, 3]
2481
+ # }
2482
+ # )
2483
+ # (
2484
+ # df.groupby("group_col").agg(
2485
+ # [
2486
+ # Polars.col("b").filter(Polars.col("b") < 2).sum.alias("lt"),
2487
+ # Polars.col("b").filter(Polars.col("b") >= 2).sum.alias("gte")
2488
+ # ]
2489
+ # )
2490
+ # ).sort("group_col")
2491
+ # # =>
2492
+ # # shape: (2, 3)
2493
+ # # ┌───────────┬──────┬─────┐
2494
+ # # │ group_col ┆ lt ┆ gte │
2495
+ # # │ --- ┆ --- ┆ --- │
2496
+ # # │ str ┆ i64 ┆ i64 │
2497
+ # # ╞═══════════╪══════╪═════╡
2498
+ # # │ g1 ┆ 1 ┆ 2 │
2499
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
2500
+ # # │ g2 ┆ null ┆ 3 │
2501
+ # # └───────────┴──────┴─────┘
436
2502
  def filter(predicate)
437
2503
  wrap_expr(_rbexpr.filter(predicate._rbexpr))
438
2504
  end
439
2505
 
2506
+ # Filter a single column.
2507
+ #
2508
+ # Alias for {#filter}.
2509
+ #
2510
+ # @param predicate [Expr]
2511
+ # Boolean expression.
2512
+ #
2513
+ # @return [Expr]
2514
+ #
2515
+ # @example
2516
+ # df = Polars::DataFrame.new(
2517
+ # {
2518
+ # "group_col" => ["g1", "g1", "g2"],
2519
+ # "b" => [1, 2, 3]
2520
+ # }
2521
+ # )
2522
+ # (
2523
+ # df.groupby("group_col").agg(
2524
+ # [
2525
+ # Polars.col("b").where(Polars.col("b") < 2).sum.alias("lt"),
2526
+ # Polars.col("b").where(Polars.col("b") >= 2).sum.alias("gte")
2527
+ # ]
2528
+ # )
2529
+ # ).sort("group_col")
2530
+ # # =>
2531
+ # # shape: (2, 3)
2532
+ # # ┌───────────┬──────┬─────┐
2533
+ # # │ group_col ┆ lt ┆ gte │
2534
+ # # │ --- ┆ --- ┆ --- │
2535
+ # # │ str ┆ i64 ┆ i64 │
2536
+ # # ╞═══════════╪══════╪═════╡
2537
+ # # │ g1 ┆ 1 ┆ 2 │
2538
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
2539
+ # # │ g2 ┆ null ┆ 3 │
2540
+ # # └───────────┴──────┴─────┘
440
2541
  def where(predicate)
441
2542
  filter(predicate)
442
2543
  end
443
2544
 
444
- # def map
2545
+ # Apply a custom Ruby function to a Series or sequence of Series.
2546
+ #
2547
+ # The output of this custom function must be a Series.
2548
+ # If you want to apply a custom function elementwise over single values, see
2549
+ # {#apply}. A use case for `map` is when you want to transform an
2550
+ # expression with a third-party library.
2551
+ #
2552
+ # Read more in [the book](https://pola-rs.github.io/polars-book/user-guide/dsl/custom_functions.html).
2553
+ #
2554
+ # @param return_dtype [Symbol]
2555
+ # Dtype of the output Series.
2556
+ # @param agg_list [Boolean]
2557
+ # Aggregate list.
2558
+ #
2559
+ # @return [Expr]
2560
+ #
2561
+ # @example
2562
+ # df = Polars::DataFrame.new(
2563
+ # {
2564
+ # "sine" => [0.0, 1.0, 0.0, -1.0],
2565
+ # "cosine" => [1.0, 0.0, -1.0, 0.0]
2566
+ # }
2567
+ # )
2568
+ # df.select(Polars.all.map { |x| x.to_numpy.argmax })
2569
+ # # =>
2570
+ # # shape: (1, 2)
2571
+ # # ┌──────┬────────┐
2572
+ # # │ sine ┆ cosine │
2573
+ # # │ --- ┆ --- │
2574
+ # # │ i64 ┆ i64 │
2575
+ # # ╞══════╪════════╡
2576
+ # # │ 1 ┆ 0 │
2577
+ # # └──────┴────────┘
2578
+ # def map(return_dtype: nil, agg_list: false, &block)
2579
+ # if !return_dtype.nil?
2580
+ # return_dtype = Utils.rb_type_to_dtype(return_dtype)
2581
+ # end
2582
+ # wrap_expr(_rbexpr.map(return_dtype, agg_list, &block))
445
2583
  # end
446
2584
 
447
2585
  # def apply
448
2586
  # end
449
2587
 
2588
+ # Explode a list or utf8 Series. This means that every item is expanded to a new
2589
+ # row.
450
2590
  #
2591
+ # Alias for {#explode}.
2592
+ #
2593
+ # @return [Expr]
2594
+ #
2595
+ # @example
2596
+ # df = Polars::DataFrame.new({"foo" => ["hello", "world"]})
2597
+ # df.select(Polars.col("foo").flatten)
2598
+ # # =>
2599
+ # # shape: (10, 1)
2600
+ # # ┌─────┐
2601
+ # # │ foo │
2602
+ # # │ --- │
2603
+ # # │ str │
2604
+ # # ╞═════╡
2605
+ # # │ h │
2606
+ # # ├╌╌╌╌╌┤
2607
+ # # │ e │
2608
+ # # ├╌╌╌╌╌┤
2609
+ # # │ l │
2610
+ # # ├╌╌╌╌╌┤
2611
+ # # │ l │
2612
+ # # ├╌╌╌╌╌┤
2613
+ # # │ ... │
2614
+ # # ├╌╌╌╌╌┤
2615
+ # # │ o │
2616
+ # # ├╌╌╌╌╌┤
2617
+ # # │ r │
2618
+ # # ├╌╌╌╌╌┤
2619
+ # # │ l │
2620
+ # # ├╌╌╌╌╌┤
2621
+ # # │ d │
2622
+ # # └─────┘
451
2623
  def flatten
452
2624
  wrap_expr(_rbexpr.explode)
453
2625
  end
454
2626
 
2627
+ # Explode a list or utf8 Series.
2628
+ #
2629
+ # This means that every item is expanded to a new row.
2630
+ #
2631
+ # @return [Expr]
2632
+ #
2633
+ # @example
2634
+ # df = Polars::DataFrame.new({"b" => [[1, 2, 3], [4, 5, 6]]})
2635
+ # df.select(Polars.col("b").explode)
2636
+ # # =>
2637
+ # # shape: (6, 1)
2638
+ # # ┌─────┐
2639
+ # # │ b │
2640
+ # # │ --- │
2641
+ # # │ i64 │
2642
+ # # ╞═════╡
2643
+ # # │ 1 │
2644
+ # # ├╌╌╌╌╌┤
2645
+ # # │ 2 │
2646
+ # # ├╌╌╌╌╌┤
2647
+ # # │ 3 │
2648
+ # # ├╌╌╌╌╌┤
2649
+ # # │ 4 │
2650
+ # # ├╌╌╌╌╌┤
2651
+ # # │ 5 │
2652
+ # # ├╌╌╌╌╌┤
2653
+ # # │ 6 │
2654
+ # # └─────┘
455
2655
  def explode
456
2656
  wrap_expr(_rbexpr.explode)
457
2657
  end
458
2658
 
2659
+ # Take every nth value in the Series and return as a new Series.
2660
+ #
2661
+ # @return [Expr]
2662
+ #
2663
+ # @example
2664
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
2665
+ # df.select(Polars.col("foo").take_every(3))
2666
+ # # =>
2667
+ # # shape: (3, 1)
2668
+ # # ┌─────┐
2669
+ # # │ foo │
2670
+ # # │ --- │
2671
+ # # │ i64 │
2672
+ # # ╞═════╡
2673
+ # # │ 1 │
2674
+ # # ├╌╌╌╌╌┤
2675
+ # # │ 4 │
2676
+ # # ├╌╌╌╌╌┤
2677
+ # # │ 7 │
2678
+ # # └─────┘
459
2679
  def take_every(n)
460
2680
  wrap_expr(_rbexpr.take_every(n))
461
2681
  end
462
2682
 
2683
+ # Get the first `n` rows.
2684
+ #
2685
+ # @param n [Integer]
2686
+ # Number of rows to return.
2687
+ #
2688
+ # @return [Expr]
2689
+ #
2690
+ # @example
2691
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
2692
+ # df.head(3)
2693
+ # # =>
2694
+ # # shape: (3, 1)
2695
+ # # ┌─────┐
2696
+ # # │ foo │
2697
+ # # │ --- │
2698
+ # # │ i64 │
2699
+ # # ╞═════╡
2700
+ # # │ 1 │
2701
+ # # ├╌╌╌╌╌┤
2702
+ # # │ 2 │
2703
+ # # ├╌╌╌╌╌┤
2704
+ # # │ 3 │
2705
+ # # └─────┘
463
2706
  def head(n = 10)
464
2707
  wrap_expr(_rbexpr.head(n))
465
2708
  end
466
2709
 
2710
+ # Get the last `n` rows.
2711
+ #
2712
+ # @param n [Integer]
2713
+ # Number of rows to return.
2714
+ #
2715
+ # @return [Expr]
2716
+ #
2717
+ # @example
2718
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
2719
+ # df.tail(3)
2720
+ # # =>
2721
+ # # shape: (3, 1)
2722
+ # # ┌─────┐
2723
+ # # │ foo │
2724
+ # # │ --- │
2725
+ # # │ i64 │
2726
+ # # ╞═════╡
2727
+ # # │ 5 │
2728
+ # # ├╌╌╌╌╌┤
2729
+ # # │ 6 │
2730
+ # # ├╌╌╌╌╌┤
2731
+ # # │ 7 │
2732
+ # # └─────┘
467
2733
  def tail(n = 10)
468
2734
  wrap_expr(_rbexpr.tail(n))
469
2735
  end
470
2736
 
2737
+ # Get the first `n` rows.
2738
+ #
2739
+ # Alias for {#head}.
2740
+ #
2741
+ # @param n [Integer]
2742
+ # Number of rows to return.
2743
+ #
2744
+ # @return [Expr]
471
2745
  def limit(n = 10)
472
2746
  head(n)
473
2747
  end
474
2748
 
2749
+ # Raise expression to the power of exponent.
2750
+ #
2751
+ # @return [Expr]
2752
+ #
2753
+ # @example
2754
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
2755
+ # df.select(Polars.col("foo").pow(3))
2756
+ # # =>
2757
+ # # shape: (4, 1)
2758
+ # # ┌──────┐
2759
+ # # │ foo │
2760
+ # # │ --- │
2761
+ # # │ f64 │
2762
+ # # ╞══════╡
2763
+ # # │ 1.0 │
2764
+ # # ├╌╌╌╌╌╌┤
2765
+ # # │ 8.0 │
2766
+ # # ├╌╌╌╌╌╌┤
2767
+ # # │ 27.0 │
2768
+ # # ├╌╌╌╌╌╌┤
2769
+ # # │ 64.0 │
2770
+ # # └──────┘
475
2771
  def pow(exponent)
476
2772
  exponent = Utils.expr_to_lit_or_expr(exponent)
477
2773
  wrap_expr(_rbexpr.pow(exponent._rbexpr))
478
2774
  end
479
2775
 
480
- # def is_in
481
- # end
2776
+ # Check if elements of this expression are present in the other Series.
2777
+ #
2778
+ # @param other [Object]
2779
+ # Series or sequence of primitive type.
2780
+ #
2781
+ # @return [Expr]
2782
+ #
2783
+ # @example
2784
+ # df = Polars::DataFrame.new(
2785
+ # {"sets" => [[1, 2, 3], [1, 2], [9, 10]], "optional_members" => [1, 2, 3]}
2786
+ # )
2787
+ # df.select([Polars.col("optional_members").is_in("sets").alias("contains")])
2788
+ # # =>
2789
+ # # shape: (3, 1)
2790
+ # # ┌──────────┐
2791
+ # # │ contains │
2792
+ # # │ --- │
2793
+ # # │ bool │
2794
+ # # ╞══════════╡
2795
+ # # │ true │
2796
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
2797
+ # # │ true │
2798
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
2799
+ # # │ false │
2800
+ # # └──────────┘
2801
+ def is_in(other)
2802
+ if other.is_a?(Array)
2803
+ if other.length == 0
2804
+ other = Polars.lit(nil)
2805
+ else
2806
+ other = Polars.lit(Series.new(other))
2807
+ end
2808
+ else
2809
+ other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
2810
+ end
2811
+ wrap_expr(_rbexpr.is_in(other._rbexpr))
2812
+ end
482
2813
 
2814
+ # Repeat the elements in this Series as specified in the given expression.
2815
+ #
2816
+ # The repeated elements are expanded into a `List`.
2817
+ #
2818
+ # @param by [Object]
2819
+ # Numeric column that determines how often the values will be repeated.
2820
+ # The column will be coerced to UInt32. Give this dtype to make the coercion a
2821
+ # no-op.
483
2822
  #
2823
+ # @return [Expr]
2824
+ #
2825
+ # @example
2826
+ # df = Polars::DataFrame.new(
2827
+ # {
2828
+ # "a" => ["x", "y", "z"],
2829
+ # "n" => [1, 2, 3]
2830
+ # }
2831
+ # )
2832
+ # df.select(Polars.col("a").repeat_by("n"))
2833
+ # # =>
2834
+ # # shape: (3, 1)
2835
+ # # ┌─────────────────┐
2836
+ # # │ a │
2837
+ # # │ --- │
2838
+ # # │ list[str] │
2839
+ # # ╞═════════════════╡
2840
+ # # │ ["x"] │
2841
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
2842
+ # # │ ["y", "y"] │
2843
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
2844
+ # # │ ["z", "z", "z"] │
2845
+ # # └─────────────────┘
484
2846
  def repeat_by(by)
485
- by = Utils.expr_to_lit_or_expr(by, false)
2847
+ by = Utils.expr_to_lit_or_expr(by, str_to_lit: false)
486
2848
  wrap_expr(_rbexpr.repeat_by(by._rbexpr))
487
2849
  end
488
2850
 
489
- # def is_between
490
- # end
2851
+ # Check if this expression is between start and end.
2852
+ #
2853
+ # @param start [Object]
2854
+ # Lower bound as primitive type or datetime.
2855
+ # @param _end [Object]
2856
+ # Upper bound as primitive type or datetime.
2857
+ # @param include_bounds [Boolean]
2858
+ # False: Exclude both start and end (default).
2859
+ # True: Include both start and end.
2860
+ # (False, False): Exclude start and exclude end.
2861
+ # (True, True): Include start and include end.
2862
+ # (False, True): Exclude start and include end.
2863
+ # (True, False): Include start and exclude end.
2864
+ #
2865
+ # @return [Expr]
2866
+ #
2867
+ # @example
2868
+ # df = Polars::DataFrame.new({"num" => [1, 2, 3, 4, 5]})
2869
+ # df.with_column(Polars.col("num").is_between(2, 4))
2870
+ # # =>
2871
+ # # shape: (5, 2)
2872
+ # # ┌─────┬────────────┐
2873
+ # # │ num ┆ is_between │
2874
+ # # │ --- ┆ --- │
2875
+ # # │ i64 ┆ bool │
2876
+ # # ╞═════╪════════════╡
2877
+ # # │ 1 ┆ false │
2878
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2879
+ # # │ 2 ┆ false │
2880
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2881
+ # # │ 3 ┆ true │
2882
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2883
+ # # │ 4 ┆ false │
2884
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2885
+ # # │ 5 ┆ false │
2886
+ # # └─────┴────────────┘
2887
+ def is_between(start, _end, include_bounds: false)
2888
+ if include_bounds == false || include_bounds == [false, false]
2889
+ ((self > start) & (self < _end)).alias("is_between")
2890
+ elsif include_bounds == true || include_bounds == [true, true]
2891
+ ((self >= start) & (self <= _end)).alias("is_between")
2892
+ elsif include_bounds == [false, true]
2893
+ ((self > start) & (self <= _end)).alias("is_between")
2894
+ elsif include_bounds == [true, false]
2895
+ ((self >= start) & (self < _end)).alias("is_between")
2896
+ else
2897
+ raise ArgumentError, "include_bounds should be a bool or [bool, bool]."
2898
+ end
2899
+ end
491
2900
 
492
2901
  # def _hash
493
2902
  # end
494
2903
 
2904
+ # Reinterpret the underlying bits as a signed/unsigned integer.
495
2905
  #
2906
+ # This operation is only allowed for 64bit integers. For lower bits integers,
2907
+ # you can safely use that cast operation.
2908
+ #
2909
+ # @param signed [Boolean]
2910
+ # If true, reinterpret as `:i64`. Otherwise, reinterpret as `:u64`.
2911
+ #
2912
+ # @return [Expr]
2913
+ #
2914
+ # @example
2915
+ # s = Polars::Series.new("a", [1, 1, 2], dtype: :u64)
2916
+ # df = Polars::DataFrame.new([s])
2917
+ # df.select(
2918
+ # [
2919
+ # Polars.col("a").reinterpret(signed: true).alias("reinterpreted"),
2920
+ # Polars.col("a").alias("original")
2921
+ # ]
2922
+ # )
2923
+ # # =>
2924
+ # # shape: (3, 2)
2925
+ # # ┌───────────────┬──────────┐
2926
+ # # │ reinterpreted ┆ original │
2927
+ # # │ --- ┆ --- │
2928
+ # # │ i64 ┆ u64 │
2929
+ # # ╞═══════════════╪══════════╡
2930
+ # # │ 1 ┆ 1 │
2931
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2932
+ # # │ 1 ┆ 1 │
2933
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2934
+ # # │ 2 ┆ 2 │
2935
+ # # └───────────────┴──────────┘
496
2936
  def reinterpret(signed: false)
497
2937
  wrap_expr(_rbexpr.reinterpret(signed))
498
2938
  end
@@ -500,147 +2940,1541 @@ module Polars
500
2940
  # def _inspect
501
2941
  # end
502
2942
 
2943
+ # Fill nulls with linear interpolation over missing values.
2944
+ #
2945
+ # Can also be used to regrid data to a new grid - see examples below.
503
2946
  #
2947
+ # @return [Expr]
2948
+ #
2949
+ # @example Fill nulls with linear interpolation
2950
+ # df = Polars::DataFrame.new(
2951
+ # {
2952
+ # "a" => [1, nil, 3],
2953
+ # "b" => [1.0, Float::NAN, 3.0]
2954
+ # }
2955
+ # )
2956
+ # df.select(Polars.all.interpolate)
2957
+ # # =>
2958
+ # # shape: (3, 2)
2959
+ # # ┌─────┬─────┐
2960
+ # # │ a ┆ b │
2961
+ # # │ --- ┆ --- │
2962
+ # # │ i64 ┆ f64 │
2963
+ # # ╞═════╪═════╡
2964
+ # # │ 1 ┆ 1.0 │
2965
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2966
+ # # │ 2 ┆ NaN │
2967
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2968
+ # # │ 3 ┆ 3.0 │
2969
+ # # └─────┴─────┘
504
2970
  def interpolate
505
2971
  wrap_expr(_rbexpr.interpolate)
506
2972
  end
507
2973
 
508
- # def rolling_min
509
- # end
510
-
511
- # def rolling_max
512
- # end
513
-
514
- # def rolling_mean
515
- # end
516
-
517
- # def rolling_sum
518
- # end
519
-
520
- # def rolling_std
521
- # end
522
-
523
- # def rolling_var
524
- # end
525
-
526
- # def rolling_median
527
- # end
528
-
529
- # def rolling_quantile
530
- # end
2974
+ # Apply a rolling min (moving min) over the values in this array.
2975
+ #
2976
+ # A window of length `window_size` will traverse the array. The values that fill
2977
+ # this window will (optionally) be multiplied with the weights given by the
2978
+ # `weight` vector. The resulting values will be aggregated to their sum.
2979
+ #
2980
+ # @param window_size [Integer]
2981
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
2982
+ # size indicated by a timedelta or the following string language:
2983
+ #
2984
+ # - 1ns (1 nanosecond)
2985
+ # - 1us (1 microsecond)
2986
+ # - 1ms (1 millisecond)
2987
+ # - 1s (1 second)
2988
+ # - 1m (1 minute)
2989
+ # - 1h (1 hour)
2990
+ # - 1d (1 day)
2991
+ # - 1w (1 week)
2992
+ # - 1mo (1 calendar month)
2993
+ # - 1y (1 calendar year)
2994
+ # - 1i (1 index count)
2995
+ #
2996
+ # If a timedelta or the dynamic string language is used, the `by`
2997
+ # and `closed` arguments must also be set.
2998
+ # @param weights [Array]
2999
+ # An optional slice with the same length as the window that will be multiplied
3000
+ # elementwise with the values in the window.
3001
+ # @param min_periods [Integer]
3002
+ # The number of values in the window that should be non-null before computing
3003
+ # a result. If None, it will be set equal to window size.
3004
+ # @param center [Boolean]
3005
+ # Set the labels at the center of the window
3006
+ # @param by [String]
3007
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3008
+ # set the column that will be used to determine the windows. This column must
3009
+ # be of dtype `{Date, Datetime}`
3010
+ # @param closed ["left", "right", "both", "none"]
3011
+ # Define whether the temporal window interval is closed or not.
3012
+ #
3013
+ # @note
3014
+ # This functionality is experimental and may change without it being considered a
3015
+ # breaking change.
3016
+ #
3017
+ # @note
3018
+ # If you want to compute multiple aggregation statistics over the same dynamic
3019
+ # window, consider using `groupby_rolling` this method can cache the window size
3020
+ # computation.
3021
+ #
3022
+ # @return [Expr]
3023
+ #
3024
+ # @example
3025
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
3026
+ # df.select(
3027
+ # [
3028
+ # Polars.col("A").rolling_min(2)
3029
+ # ]
3030
+ # )
3031
+ # # =>
3032
+ # # shape: (6, 1)
3033
+ # # ┌──────┐
3034
+ # # │ A │
3035
+ # # │ --- │
3036
+ # # │ f64 │
3037
+ # # ╞══════╡
3038
+ # # │ null │
3039
+ # # ├╌╌╌╌╌╌┤
3040
+ # # │ 1.0 │
3041
+ # # ├╌╌╌╌╌╌┤
3042
+ # # │ 2.0 │
3043
+ # # ├╌╌╌╌╌╌┤
3044
+ # # │ 3.0 │
3045
+ # # ├╌╌╌╌╌╌┤
3046
+ # # │ 4.0 │
3047
+ # # ├╌╌╌╌╌╌┤
3048
+ # # │ 5.0 │
3049
+ # # └──────┘
3050
+ def rolling_min(
3051
+ window_size,
3052
+ weights: nil,
3053
+ min_periods: nil,
3054
+ center: false,
3055
+ by: nil,
3056
+ closed: "left"
3057
+ )
3058
+ window_size, min_periods = _prepare_rolling_window_args(
3059
+ window_size, min_periods
3060
+ )
3061
+ wrap_expr(
3062
+ _rbexpr.rolling_min(
3063
+ window_size, weights, min_periods, center, by, closed
3064
+ )
3065
+ )
3066
+ end
3067
+
3068
+ # Apply a rolling max (moving max) over the values in this array.
3069
+ #
3070
+ # A window of length `window_size` will traverse the array. The values that fill
3071
+ # this window will (optionally) be multiplied with the weights given by the
3072
+ # `weight` vector. The resulting values will be aggregated to their sum.
3073
+ #
3074
+ # @param window_size [Integer]
3075
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3076
+ # size indicated by a timedelta or the following string language:
3077
+ #
3078
+ # - 1ns (1 nanosecond)
3079
+ # - 1us (1 microsecond)
3080
+ # - 1ms (1 millisecond)
3081
+ # - 1s (1 second)
3082
+ # - 1m (1 minute)
3083
+ # - 1h (1 hour)
3084
+ # - 1d (1 day)
3085
+ # - 1w (1 week)
3086
+ # - 1mo (1 calendar month)
3087
+ # - 1y (1 calendar year)
3088
+ # - 1i (1 index count)
3089
+ #
3090
+ # If a timedelta or the dynamic string language is used, the `by`
3091
+ # and `closed` arguments must also be set.
3092
+ # @param weights [Array]
3093
+ # An optional slice with the same length as the window that will be multiplied
3094
+ # elementwise with the values in the window.
3095
+ # @param min_periods [Integer]
3096
+ # The number of values in the window that should be non-null before computing
3097
+ # a result. If None, it will be set equal to window size.
3098
+ # @param center [Boolean]
3099
+ # Set the labels at the center of the window
3100
+ # @param by [String]
3101
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3102
+ # set the column that will be used to determine the windows. This column must
3103
+ # be of dtype `{Date, Datetime}`
3104
+ # @param closed ["left", "right", "both", "none"]
3105
+ # Define whether the temporal window interval is closed or not.
3106
+ #
3107
+ # @note
3108
+ # This functionality is experimental and may change without it being considered a
3109
+ # breaking change.
3110
+ #
3111
+ # @note
3112
+ # If you want to compute multiple aggregation statistics over the same dynamic
3113
+ # window, consider using `groupby_rolling` this method can cache the window size
3114
+ # computation.
3115
+ #
3116
+ # @return [Expr]
3117
+ #
3118
+ # @example
3119
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
3120
+ # df.select(
3121
+ # [
3122
+ # Polars.col("A").rolling_max(2)
3123
+ # ]
3124
+ # )
3125
+ # # =>
3126
+ # # shape: (6, 1)
3127
+ # # ┌──────┐
3128
+ # # │ A │
3129
+ # # │ --- │
3130
+ # # │ f64 │
3131
+ # # ╞══════╡
3132
+ # # │ null │
3133
+ # # ├╌╌╌╌╌╌┤
3134
+ # # │ 2.0 │
3135
+ # # ├╌╌╌╌╌╌┤
3136
+ # # │ 3.0 │
3137
+ # # ├╌╌╌╌╌╌┤
3138
+ # # │ 4.0 │
3139
+ # # ├╌╌╌╌╌╌┤
3140
+ # # │ 5.0 │
3141
+ # # ├╌╌╌╌╌╌┤
3142
+ # # │ 6.0 │
3143
+ # # └──────┘
3144
+ def rolling_max(
3145
+ window_size,
3146
+ weights: nil,
3147
+ min_periods: nil,
3148
+ center: false,
3149
+ by: nil,
3150
+ closed: "left"
3151
+ )
3152
+ window_size, min_periods = _prepare_rolling_window_args(
3153
+ window_size, min_periods
3154
+ )
3155
+ wrap_expr(
3156
+ _rbexpr.rolling_max(
3157
+ window_size, weights, min_periods, center, by, closed
3158
+ )
3159
+ )
3160
+ end
3161
+
3162
+ # Apply a rolling mean (moving mean) over the values in this array.
3163
+ #
3164
+ # A window of length `window_size` will traverse the array. The values that fill
3165
+ # this window will (optionally) be multiplied with the weights given by the
3166
+ # `weight` vector. The resulting values will be aggregated to their sum.
3167
+ #
3168
+ # @param window_size [Integer]
3169
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3170
+ # size indicated by a timedelta or the following string language:
3171
+ #
3172
+ # - 1ns (1 nanosecond)
3173
+ # - 1us (1 microsecond)
3174
+ # - 1ms (1 millisecond)
3175
+ # - 1s (1 second)
3176
+ # - 1m (1 minute)
3177
+ # - 1h (1 hour)
3178
+ # - 1d (1 day)
3179
+ # - 1w (1 week)
3180
+ # - 1mo (1 calendar month)
3181
+ # - 1y (1 calendar year)
3182
+ # - 1i (1 index count)
3183
+ #
3184
+ # If a timedelta or the dynamic string language is used, the `by`
3185
+ # and `closed` arguments must also be set.
3186
+ # @param weights [Array]
3187
+ # An optional slice with the same length as the window that will be multiplied
3188
+ # elementwise with the values in the window.
3189
+ # @param min_periods [Integer]
3190
+ # The number of values in the window that should be non-null before computing
3191
+ # a result. If None, it will be set equal to window size.
3192
+ # @param center [Boolean]
3193
+ # Set the labels at the center of the window
3194
+ # @param by [String]
3195
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3196
+ # set the column that will be used to determine the windows. This column must
3197
+ # be of dtype `{Date, Datetime}`
3198
+ # @param closed ["left", "right", "both", "none"]
3199
+ # Define whether the temporal window interval is closed or not.
3200
+ #
3201
+ # @note
3202
+ # This functionality is experimental and may change without it being considered a
3203
+ # breaking change.
3204
+ #
3205
+ # @note
3206
+ # If you want to compute multiple aggregation statistics over the same dynamic
3207
+ # window, consider using `groupby_rolling` this method can cache the window size
3208
+ # computation.
3209
+ #
3210
+ # @return [Expr]
3211
+ #
3212
+ # @example
3213
+ # df = Polars::DataFrame.new({"A" => [1.0, 8.0, 6.0, 2.0, 16.0, 10.0]})
3214
+ # df.select(
3215
+ # [
3216
+ # Polars.col("A").rolling_mean(2)
3217
+ # ]
3218
+ # )
3219
+ # # =>
3220
+ # # shape: (6, 1)
3221
+ # # ┌──────┐
3222
+ # # │ A │
3223
+ # # │ --- │
3224
+ # # │ f64 │
3225
+ # # ╞══════╡
3226
+ # # │ null │
3227
+ # # ├╌╌╌╌╌╌┤
3228
+ # # │ 4.5 │
3229
+ # # ├╌╌╌╌╌╌┤
3230
+ # # │ 7.0 │
3231
+ # # ├╌╌╌╌╌╌┤
3232
+ # # │ 4.0 │
3233
+ # # ├╌╌╌╌╌╌┤
3234
+ # # │ 9.0 │
3235
+ # # ├╌╌╌╌╌╌┤
3236
+ # # │ 13.0 │
3237
+ # # └──────┘
3238
+ def rolling_mean(
3239
+ window_size,
3240
+ weights: nil,
3241
+ min_periods: nil,
3242
+ center: false,
3243
+ by: nil,
3244
+ closed: "left"
3245
+ )
3246
+ window_size, min_periods = _prepare_rolling_window_args(
3247
+ window_size, min_periods
3248
+ )
3249
+ wrap_expr(
3250
+ _rbexpr.rolling_mean(
3251
+ window_size, weights, min_periods, center, by, closed
3252
+ )
3253
+ )
3254
+ end
3255
+
3256
+ # Apply a rolling sum (moving sum) over the values in this array.
3257
+ #
3258
+ # A window of length `window_size` will traverse the array. The values that fill
3259
+ # this window will (optionally) be multiplied with the weights given by the
3260
+ # `weight` vector. The resulting values will be aggregated to their sum.
3261
+ #
3262
+ # @param window_size [Integer]
3263
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3264
+ # size indicated by a timedelta or the following string language:
3265
+ #
3266
+ # - 1ns (1 nanosecond)
3267
+ # - 1us (1 microsecond)
3268
+ # - 1ms (1 millisecond)
3269
+ # - 1s (1 second)
3270
+ # - 1m (1 minute)
3271
+ # - 1h (1 hour)
3272
+ # - 1d (1 day)
3273
+ # - 1w (1 week)
3274
+ # - 1mo (1 calendar month)
3275
+ # - 1y (1 calendar year)
3276
+ # - 1i (1 index count)
3277
+ #
3278
+ # If a timedelta or the dynamic string language is used, the `by`
3279
+ # and `closed` arguments must also be set.
3280
+ # @param weights [Array]
3281
+ # An optional slice with the same length as the window that will be multiplied
3282
+ # elementwise with the values in the window.
3283
+ # @param min_periods [Integer]
3284
+ # The number of values in the window that should be non-null before computing
3285
+ # a result. If None, it will be set equal to window size.
3286
+ # @param center [Boolean]
3287
+ # Set the labels at the center of the window
3288
+ # @param by [String]
3289
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3290
+ # set the column that will be used to determine the windows. This column must
3291
+ # be of dtype `{Date, Datetime}`
3292
+ # @param closed ["left", "right", "both", "none"]
3293
+ # Define whether the temporal window interval is closed or not.
3294
+ #
3295
+ # @note
3296
+ # This functionality is experimental and may change without it being considered a
3297
+ # breaking change.
3298
+ #
3299
+ # @note
3300
+ # If you want to compute multiple aggregation statistics over the same dynamic
3301
+ # window, consider using `groupby_rolling` this method can cache the window size
3302
+ # computation.
3303
+ #
3304
+ # @return [Expr]
3305
+ #
3306
+ # @example
3307
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
3308
+ # df.select(
3309
+ # [
3310
+ # Polars.col("A").rolling_sum(2)
3311
+ # ]
3312
+ # )
3313
+ # # =>
3314
+ # # shape: (6, 1)
3315
+ # # ┌──────┐
3316
+ # # │ A │
3317
+ # # │ --- │
3318
+ # # │ f64 │
3319
+ # # ╞══════╡
3320
+ # # │ null │
3321
+ # # ├╌╌╌╌╌╌┤
3322
+ # # │ 3.0 │
3323
+ # # ├╌╌╌╌╌╌┤
3324
+ # # │ 5.0 │
3325
+ # # ├╌╌╌╌╌╌┤
3326
+ # # │ 7.0 │
3327
+ # # ├╌╌╌╌╌╌┤
3328
+ # # │ 9.0 │
3329
+ # # ├╌╌╌╌╌╌┤
3330
+ # # │ 11.0 │
3331
+ # # └──────┘
3332
+ def rolling_sum(
3333
+ window_size,
3334
+ weights: nil,
3335
+ min_periods: nil,
3336
+ center: false,
3337
+ by: nil,
3338
+ closed: "left"
3339
+ )
3340
+ window_size, min_periods = _prepare_rolling_window_args(
3341
+ window_size, min_periods
3342
+ )
3343
+ wrap_expr(
3344
+ _rbexpr.rolling_sum(
3345
+ window_size, weights, min_periods, center, by, closed
3346
+ )
3347
+ )
3348
+ end
3349
+
3350
+ # Compute a rolling standard deviation.
3351
+ #
3352
+ # A window of length `window_size` will traverse the array. The values that fill
3353
+ # this window will (optionally) be multiplied with the weights given by the
3354
+ # `weight` vector. The resulting values will be aggregated to their sum.
3355
+ #
3356
+ # @param window_size [Integer]
3357
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3358
+ # size indicated by a timedelta or the following string language:
3359
+ #
3360
+ # - 1ns (1 nanosecond)
3361
+ # - 1us (1 microsecond)
3362
+ # - 1ms (1 millisecond)
3363
+ # - 1s (1 second)
3364
+ # - 1m (1 minute)
3365
+ # - 1h (1 hour)
3366
+ # - 1d (1 day)
3367
+ # - 1w (1 week)
3368
+ # - 1mo (1 calendar month)
3369
+ # - 1y (1 calendar year)
3370
+ # - 1i (1 index count)
3371
+ #
3372
+ # If a timedelta or the dynamic string language is used, the `by`
3373
+ # and `closed` arguments must also be set.
3374
+ # @param weights [Array]
3375
+ # An optional slice with the same length as the window that will be multiplied
3376
+ # elementwise with the values in the window.
3377
+ # @param min_periods [Integer]
3378
+ # The number of values in the window that should be non-null before computing
3379
+ # a result. If None, it will be set equal to window size.
3380
+ # @param center [Boolean]
3381
+ # Set the labels at the center of the window
3382
+ # @param by [String]
3383
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3384
+ # set the column that will be used to determine the windows. This column must
3385
+ # be of dtype `{Date, Datetime}`
3386
+ # @param closed ["left", "right", "both", "none"]
3387
+ # Define whether the temporal window interval is closed or not.
3388
+ #
3389
+ # @note
3390
+ # This functionality is experimental and may change without it being considered a
3391
+ # breaking change.
3392
+ #
3393
+ # @note
3394
+ # If you want to compute multiple aggregation statistics over the same dynamic
3395
+ # window, consider using `groupby_rolling` this method can cache the window size
3396
+ # computation.
3397
+ #
3398
+ # @return [Expr]
3399
+ #
3400
+ # @example
3401
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
3402
+ # df.select(
3403
+ # [
3404
+ # Polars.col("A").rolling_std(3)
3405
+ # ]
3406
+ # )
3407
+ # # =>
3408
+ # # shape: (6, 1)
3409
+ # # ┌──────────┐
3410
+ # # │ A │
3411
+ # # │ --- │
3412
+ # # │ f64 │
3413
+ # # ╞══════════╡
3414
+ # # │ null │
3415
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3416
+ # # │ null │
3417
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3418
+ # # │ 1.0 │
3419
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3420
+ # # │ 1.0 │
3421
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3422
+ # # │ 1.527525 │
3423
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3424
+ # # │ 2.0 │
3425
+ # # └──────────┘
3426
+ def rolling_std(
3427
+ window_size,
3428
+ weights: nil,
3429
+ min_periods: nil,
3430
+ center: false,
3431
+ by: nil,
3432
+ closed: "left"
3433
+ )
3434
+ window_size, min_periods = _prepare_rolling_window_args(
3435
+ window_size, min_periods
3436
+ )
3437
+ wrap_expr(
3438
+ _rbexpr.rolling_std(
3439
+ window_size, weights, min_periods, center, by, closed
3440
+ )
3441
+ )
3442
+ end
3443
+
3444
+ # Compute a rolling variance.
3445
+ #
3446
+ # A window of length `window_size` will traverse the array. The values that fill
3447
+ # this window will (optionally) be multiplied with the weights given by the
3448
+ # `weight` vector. The resulting values will be aggregated to their sum.
3449
+ #
3450
+ # @param window_size [Integer]
3451
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3452
+ # size indicated by a timedelta or the following string language:
3453
+ #
3454
+ # - 1ns (1 nanosecond)
3455
+ # - 1us (1 microsecond)
3456
+ # - 1ms (1 millisecond)
3457
+ # - 1s (1 second)
3458
+ # - 1m (1 minute)
3459
+ # - 1h (1 hour)
3460
+ # - 1d (1 day)
3461
+ # - 1w (1 week)
3462
+ # - 1mo (1 calendar month)
3463
+ # - 1y (1 calendar year)
3464
+ # - 1i (1 index count)
3465
+ #
3466
+ # If a timedelta or the dynamic string language is used, the `by`
3467
+ # and `closed` arguments must also be set.
3468
+ # @param weights [Array]
3469
+ # An optional slice with the same length as the window that will be multiplied
3470
+ # elementwise with the values in the window.
3471
+ # @param min_periods [Integer]
3472
+ # The number of values in the window that should be non-null before computing
3473
+ # a result. If None, it will be set equal to window size.
3474
+ # @param center [Boolean]
3475
+ # Set the labels at the center of the window
3476
+ # @param by [String]
3477
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3478
+ # set the column that will be used to determine the windows. This column must
3479
+ # be of dtype `{Date, Datetime}`
3480
+ # @param closed ["left", "right", "both", "none"]
3481
+ # Define whether the temporal window interval is closed or not.
3482
+ #
3483
+ # @note
3484
+ # This functionality is experimental and may change without it being considered a
3485
+ # breaking change.
3486
+ #
3487
+ # @note
3488
+ # If you want to compute multiple aggregation statistics over the same dynamic
3489
+ # window, consider using `groupby_rolling` this method can cache the window size
3490
+ # computation.
3491
+ #
3492
+ # @return [Expr]
3493
+ #
3494
+ # @example
3495
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
3496
+ # df.select(
3497
+ # [
3498
+ # Polars.col("A").rolling_var(3)
3499
+ # ]
3500
+ # )
3501
+ # # =>
3502
+ # # shape: (6, 1)
3503
+ # # ┌──────────┐
3504
+ # # │ A │
3505
+ # # │ --- │
3506
+ # # │ f64 │
3507
+ # # ╞══════════╡
3508
+ # # │ null │
3509
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3510
+ # # │ null │
3511
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3512
+ # # │ 1.0 │
3513
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3514
+ # # │ 1.0 │
3515
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3516
+ # # │ 2.333333 │
3517
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
3518
+ # # │ 4.0 │
3519
+ # # └──────────┘
3520
+ def rolling_var(
3521
+ window_size,
3522
+ weights: nil,
3523
+ min_periods: nil,
3524
+ center: false,
3525
+ by: nil,
3526
+ closed: "left"
3527
+ )
3528
+ window_size, min_periods = _prepare_rolling_window_args(
3529
+ window_size, min_periods
3530
+ )
3531
+ wrap_expr(
3532
+ _rbexpr.rolling_var(
3533
+ window_size, weights, min_periods, center, by, closed
3534
+ )
3535
+ )
3536
+ end
3537
+
3538
+ # Compute a rolling median.
3539
+ #
3540
+ # @param window_size [Integer]
3541
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3542
+ # size indicated by a timedelta or the following string language:
3543
+ #
3544
+ # - 1ns (1 nanosecond)
3545
+ # - 1us (1 microsecond)
3546
+ # - 1ms (1 millisecond)
3547
+ # - 1s (1 second)
3548
+ # - 1m (1 minute)
3549
+ # - 1h (1 hour)
3550
+ # - 1d (1 day)
3551
+ # - 1w (1 week)
3552
+ # - 1mo (1 calendar month)
3553
+ # - 1y (1 calendar year)
3554
+ # - 1i (1 index count)
3555
+ #
3556
+ # If a timedelta or the dynamic string language is used, the `by`
3557
+ # and `closed` arguments must also be set.
3558
+ # @param weights [Array]
3559
+ # An optional slice with the same length as the window that will be multiplied
3560
+ # elementwise with the values in the window.
3561
+ # @param min_periods [Integer]
3562
+ # The number of values in the window that should be non-null before computing
3563
+ # a result. If None, it will be set equal to window size.
3564
+ # @param center [Boolean]
3565
+ # Set the labels at the center of the window
3566
+ # @param by [String]
3567
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3568
+ # set the column that will be used to determine the windows. This column must
3569
+ # be of dtype `{Date, Datetime}`
3570
+ # @param closed ["left", "right", "both", "none"]
3571
+ # Define whether the temporal window interval is closed or not.
3572
+ #
3573
+ # @note
3574
+ # This functionality is experimental and may change without it being considered a
3575
+ # breaking change.
3576
+ #
3577
+ # @note
3578
+ # If you want to compute multiple aggregation statistics over the same dynamic
3579
+ # window, consider using `groupby_rolling` this method can cache the window size
3580
+ # computation.
3581
+ #
3582
+ # @return [Expr]
3583
+ #
3584
+ # @example
3585
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
3586
+ # df.select(
3587
+ # [
3588
+ # Polars.col("A").rolling_median(3)
3589
+ # ]
3590
+ # )
3591
+ # # =>
3592
+ # # shape: (6, 1)
3593
+ # # ┌──────┐
3594
+ # # │ A │
3595
+ # # │ --- │
3596
+ # # │ f64 │
3597
+ # # ╞══════╡
3598
+ # # │ null │
3599
+ # # ├╌╌╌╌╌╌┤
3600
+ # # │ null │
3601
+ # # ├╌╌╌╌╌╌┤
3602
+ # # │ 2.0 │
3603
+ # # ├╌╌╌╌╌╌┤
3604
+ # # │ 3.0 │
3605
+ # # ├╌╌╌╌╌╌┤
3606
+ # # │ 4.0 │
3607
+ # # ├╌╌╌╌╌╌┤
3608
+ # # │ 6.0 │
3609
+ # # └──────┘
3610
+ def rolling_median(
3611
+ window_size,
3612
+ weights: nil,
3613
+ min_periods: nil,
3614
+ center: false,
3615
+ by: nil,
3616
+ closed: "left"
3617
+ )
3618
+ window_size, min_periods = _prepare_rolling_window_args(
3619
+ window_size, min_periods
3620
+ )
3621
+ wrap_expr(
3622
+ _rbexpr.rolling_median(
3623
+ window_size, weights, min_periods, center, by, closed
3624
+ )
3625
+ )
3626
+ end
3627
+
3628
+ # Compute a rolling quantile.
3629
+ #
3630
+ # @param quantile [Float]
3631
+ # Quantile between 0.0 and 1.0.
3632
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
3633
+ # Interpolation method.
3634
+ # @param window_size [Integer]
3635
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
3636
+ # size indicated by a timedelta or the following string language:
3637
+ #
3638
+ # - 1ns (1 nanosecond)
3639
+ # - 1us (1 microsecond)
3640
+ # - 1ms (1 millisecond)
3641
+ # - 1s (1 second)
3642
+ # - 1m (1 minute)
3643
+ # - 1h (1 hour)
3644
+ # - 1d (1 day)
3645
+ # - 1w (1 week)
3646
+ # - 1mo (1 calendar month)
3647
+ # - 1y (1 calendar year)
3648
+ # - 1i (1 index count)
3649
+ #
3650
+ # If a timedelta or the dynamic string language is used, the `by`
3651
+ # and `closed` arguments must also be set.
3652
+ # @param weights [Array]
3653
+ # An optional slice with the same length as the window that will be multiplied
3654
+ # elementwise with the values in the window.
3655
+ # @param min_periods [Integer]
3656
+ # The number of values in the window that should be non-null before computing
3657
+ # a result. If None, it will be set equal to window size.
3658
+ # @param center [Boolean]
3659
+ # Set the labels at the center of the window
3660
+ # @param by [String]
3661
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
3662
+ # set the column that will be used to determine the windows. This column must
3663
+ # be of dtype `{Date, Datetime}`
3664
+ # @param closed ["left", "right", "both", "none"]
3665
+ # Define whether the temporal window interval is closed or not.
3666
+ #
3667
+ # @note
3668
+ # This functionality is experimental and may change without it being considered a
3669
+ # breaking change.
3670
+ #
3671
+ # @note
3672
+ # If you want to compute multiple aggregation statistics over the same dynamic
3673
+ # window, consider using `groupby_rolling` this method can cache the window size
3674
+ # computation.
3675
+ #
3676
+ # @return [Expr]
3677
+ #
3678
+ # @example
3679
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
3680
+ # df.select(
3681
+ # [
3682
+ # Polars.col("A").rolling_quantile(0.33, window_size: 3)
3683
+ # ]
3684
+ # )
3685
+ # # =>
3686
+ # # shape: (6, 1)
3687
+ # # ┌──────┐
3688
+ # # │ A │
3689
+ # # │ --- │
3690
+ # # │ f64 │
3691
+ # # ╞══════╡
3692
+ # # │ null │
3693
+ # # ├╌╌╌╌╌╌┤
3694
+ # # │ null │
3695
+ # # ├╌╌╌╌╌╌┤
3696
+ # # │ 1.0 │
3697
+ # # ├╌╌╌╌╌╌┤
3698
+ # # │ 2.0 │
3699
+ # # ├╌╌╌╌╌╌┤
3700
+ # # │ 3.0 │
3701
+ # # ├╌╌╌╌╌╌┤
3702
+ # # │ 4.0 │
3703
+ # # └──────┘
3704
+ def rolling_quantile(
3705
+ quantile,
3706
+ interpolation: "nearest",
3707
+ window_size: 2,
3708
+ weights: nil,
3709
+ min_periods: nil,
3710
+ center: false,
3711
+ by: nil,
3712
+ closed: "left"
3713
+ )
3714
+ window_size, min_periods = _prepare_rolling_window_args(
3715
+ window_size, min_periods
3716
+ )
3717
+ wrap_expr(
3718
+ _rbexpr.rolling_quantile(
3719
+ quantile, interpolation, window_size, weights, min_periods, center, by, closed
3720
+ )
3721
+ )
3722
+ end
531
3723
 
532
3724
  # def rolling_apply
533
3725
  # end
534
3726
 
3727
+ # Compute a rolling skew.
535
3728
  #
3729
+ # @param window_size [Integer]
3730
+ # Integer size of the rolling window.
3731
+ # @param bias [Boolean]
3732
+ # If false, the calculations are corrected for statistical bias.
3733
+ #
3734
+ # @return [Expr]
536
3735
  def rolling_skew(window_size, bias: true)
537
3736
  wrap_expr(_rbexpr.rolling_skew(window_size, bias))
538
3737
  end
539
3738
 
3739
+ # Compute absolute values.
3740
+ #
3741
+ # @return [Expr]
3742
+ #
3743
+ # @example
3744
+ # df = Polars::DataFrame.new(
3745
+ # {
3746
+ # "A" => [-1.0, 0.0, 1.0, 2.0]
3747
+ # }
3748
+ # )
3749
+ # df.select(Polars.col("A").abs)
3750
+ # # =>
3751
+ # # shape: (4, 1)
3752
+ # # ┌─────┐
3753
+ # # │ A │
3754
+ # # │ --- │
3755
+ # # │ f64 │
3756
+ # # ╞═════╡
3757
+ # # │ 1.0 │
3758
+ # # ├╌╌╌╌╌┤
3759
+ # # │ 0.0 │
3760
+ # # ├╌╌╌╌╌┤
3761
+ # # │ 1.0 │
3762
+ # # ├╌╌╌╌╌┤
3763
+ # # │ 2.0 │
3764
+ # # └─────┘
540
3765
  def abs
541
3766
  wrap_expr(_rbexpr.abs)
542
3767
  end
543
3768
 
3769
+ # Get the index values that would sort this column.
3770
+ #
3771
+ # Alias for {#arg_sort}.
3772
+ #
3773
+ # @param reverse [Boolean]
3774
+ # Sort in reverse (descending) order.
3775
+ # @param nulls_last [Boolean]
3776
+ # Place null values last instead of first.
3777
+ #
3778
+ # @return [expr]
3779
+ #
3780
+ # @example
3781
+ # df = Polars::DataFrame.new(
3782
+ # {
3783
+ # "a" => [20, 10, 30]
3784
+ # }
3785
+ # )
3786
+ # df.select(Polars.col("a").argsort)
3787
+ # # =>
3788
+ # # shape: (3, 1)
3789
+ # # ┌─────┐
3790
+ # # │ a │
3791
+ # # │ --- │
3792
+ # # │ u32 │
3793
+ # # ╞═════╡
3794
+ # # │ 1 │
3795
+ # # ├╌╌╌╌╌┤
3796
+ # # │ 0 │
3797
+ # # ├╌╌╌╌╌┤
3798
+ # # │ 2 │
3799
+ # # └─────┘
544
3800
  def argsort(reverse: false, nulls_last: false)
545
3801
  arg_sort(reverse: reverse, nulls_last: nulls_last)
546
3802
  end
547
3803
 
3804
+ # Assign ranks to data, dealing with ties appropriately.
3805
+ #
3806
+ # @param method ["average", "min", "max", "dense", "ordinal", "random"]
3807
+ # The method used to assign ranks to tied elements.
3808
+ # The following methods are available:
3809
+ #
3810
+ # - 'average' : The average of the ranks that would have been assigned to
3811
+ # all the tied values is assigned to each value.
3812
+ # - 'min' : The minimum of the ranks that would have been assigned to all
3813
+ # the tied values is assigned to each value. (This is also referred to
3814
+ # as "competition" ranking.)
3815
+ # - 'max' : The maximum of the ranks that would have been assigned to all
3816
+ # the tied values is assigned to each value.
3817
+ # - 'dense' : Like 'min', but the rank of the next highest element is
3818
+ # assigned the rank immediately after those assigned to the tied
3819
+ # elements.
3820
+ # - 'ordinal' : All values are given a distinct rank, corresponding to
3821
+ # the order that the values occur in the Series.
3822
+ # - 'random' : Like 'ordinal', but the rank for ties is not dependent
3823
+ # on the order that the values occur in the Series.
3824
+ # @param reverse [Boolean]
3825
+ # Reverse the operation.
3826
+ #
3827
+ # @return [Expr]
3828
+ #
3829
+ # @example The 'average' method:
3830
+ # df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
3831
+ # df.select(Polars.col("a").rank)
3832
+ # # =>
3833
+ # # shape: (5, 1)
3834
+ # # ┌─────┐
3835
+ # # │ a │
3836
+ # # │ --- │
3837
+ # # │ f32 │
3838
+ # # ╞═════╡
3839
+ # # │ 3.0 │
3840
+ # # ├╌╌╌╌╌┤
3841
+ # # │ 4.5 │
3842
+ # # ├╌╌╌╌╌┤
3843
+ # # │ 1.5 │
3844
+ # # ├╌╌╌╌╌┤
3845
+ # # │ 1.5 │
3846
+ # # ├╌╌╌╌╌┤
3847
+ # # │ 4.5 │
3848
+ # # └─────┘
3849
+ #
3850
+ # @example The 'ordinal' method:
3851
+ # df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
3852
+ # df.select(Polars.col("a").rank(method: "ordinal"))
3853
+ # # =>
3854
+ # # shape: (5, 1)
3855
+ # # ┌─────┐
3856
+ # # │ a │
3857
+ # # │ --- │
3858
+ # # │ u32 │
3859
+ # # ╞═════╡
3860
+ # # │ 3 │
3861
+ # # ├╌╌╌╌╌┤
3862
+ # # │ 4 │
3863
+ # # ├╌╌╌╌╌┤
3864
+ # # │ 1 │
3865
+ # # ├╌╌╌╌╌┤
3866
+ # # │ 2 │
3867
+ # # ├╌╌╌╌╌┤
3868
+ # # │ 5 │
3869
+ # # └─────┘
548
3870
  def rank(method: "average", reverse: false)
549
3871
  wrap_expr(_rbexpr.rank(method, reverse))
550
3872
  end
551
3873
 
3874
+ # Calculate the n-th discrete difference.
3875
+ #
3876
+ # @param n [Integer]
3877
+ # Number of slots to shift.
3878
+ # @param null_behavior ["ignore", "drop"]
3879
+ # How to handle null values.
3880
+ #
3881
+ # @return [Expr]
3882
+ #
3883
+ # @example
3884
+ # df = Polars::DataFrame.new(
3885
+ # {
3886
+ # "a" => [20, 10, 30]
3887
+ # }
3888
+ # )
3889
+ # df.select(Polars.col("a").diff)
3890
+ # # =>
3891
+ # # shape: (3, 1)
3892
+ # # ┌──────┐
3893
+ # # │ a │
3894
+ # # │ --- │
3895
+ # # │ i64 │
3896
+ # # ╞══════╡
3897
+ # # │ null │
3898
+ # # ├╌╌╌╌╌╌┤
3899
+ # # │ -10 │
3900
+ # # ├╌╌╌╌╌╌┤
3901
+ # # │ 20 │
3902
+ # # └──────┘
552
3903
  def diff(n: 1, null_behavior: "ignore")
553
3904
  wrap_expr(_rbexpr.diff(n, null_behavior))
554
3905
  end
555
3906
 
3907
+ # Computes percentage change between values.
3908
+ #
3909
+ # Percentage change (as fraction) between current element and most-recent
3910
+ # non-null element at least `n` period(s) before the current element.
3911
+ #
3912
+ # Computes the change from the previous row by default.
3913
+ #
3914
+ # @param n [Integer]
3915
+ # Periods to shift for forming percent change.
3916
+ #
3917
+ # @return [Expr]
3918
+ #
3919
+ # @example
3920
+ # df = Polars::DataFrame.new(
3921
+ # {
3922
+ # "a" => [10, 11, 12, nil, 12]
3923
+ # }
3924
+ # )
3925
+ # df.with_column(Polars.col("a").pct_change.alias("pct_change"))
3926
+ # # =>
3927
+ # # shape: (5, 2)
3928
+ # # ┌──────┬────────────┐
3929
+ # # │ a ┆ pct_change │
3930
+ # # │ --- ┆ --- │
3931
+ # # │ i64 ┆ f64 │
3932
+ # # ╞══════╪════════════╡
3933
+ # # │ 10 ┆ null │
3934
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
3935
+ # # │ 11 ┆ 0.1 │
3936
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
3937
+ # # │ 12 ┆ 0.090909 │
3938
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
3939
+ # # │ null ┆ 0.0 │
3940
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
3941
+ # # │ 12 ┆ 0.0 │
3942
+ # # └──────┴────────────┘
556
3943
  def pct_change(n: 1)
557
3944
  wrap_expr(_rbexpr.pct_change(n))
558
3945
  end
559
3946
 
3947
+ # Compute the sample skewness of a data set.
3948
+ #
3949
+ # For normally distributed data, the skewness should be about zero. For
3950
+ # unimodal continuous distributions, a skewness value greater than zero means
3951
+ # that there is more weight in the right tail of the distribution. The
3952
+ # function `skewtest` can be used to determine if the skewness value
3953
+ # is close enough to zero, statistically speaking.
3954
+ #
3955
+ # @param bias [Boolean]
3956
+ # If false, the calculations are corrected for statistical bias.
3957
+ #
3958
+ # @return [Expr]
3959
+ #
3960
+ # @example
3961
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
3962
+ # df.select(Polars.col("a").skew)
3963
+ # # =>
3964
+ # # shape: (1, 1)
3965
+ # # ┌──────────┐
3966
+ # # │ a │
3967
+ # # │ --- │
3968
+ # # │ f64 │
3969
+ # # ╞══════════╡
3970
+ # # │ 0.343622 │
3971
+ # # └──────────┘
560
3972
  def skew(bias: true)
561
3973
  wrap_expr(_rbexpr.skew(bias))
562
3974
  end
563
3975
 
3976
+ # Compute the kurtosis (Fisher or Pearson) of a dataset.
3977
+ #
3978
+ # Kurtosis is the fourth central moment divided by the square of the
3979
+ # variance. If Fisher's definition is used, then 3.0 is subtracted from
3980
+ # the result to give 0.0 for a normal distribution.
3981
+ # If bias is False then the kurtosis is calculated using k statistics to
3982
+ # eliminate bias coming from biased moment estimators
3983
+ #
3984
+ # @param fisher [Boolean]
3985
+ # If true, Fisher's definition is used (normal ==> 0.0). If false,
3986
+ # Pearson's definition is used (normal ==> 3.0).
3987
+ # @param bias [Boolean]
3988
+ # If false, the calculations are corrected for statistical bias.
3989
+ #
3990
+ # @return [Expr]
3991
+ #
3992
+ # @example
3993
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
3994
+ # df.select(Polars.col("a").kurtosis)
3995
+ # # =>
3996
+ # # shape: (1, 1)
3997
+ # # ┌───────────┐
3998
+ # # │ a │
3999
+ # # │ --- │
4000
+ # # │ f64 │
4001
+ # # ╞═══════════╡
4002
+ # # │ -1.153061 │
4003
+ # # └───────────┘
564
4004
  def kurtosis(fisher: true, bias: true)
565
4005
  wrap_expr(_rbexpr.kurtosis(fisher, bias))
566
4006
  end
567
4007
 
4008
+ # Clip (limit) the values in an array to a `min` and `max` boundary.
4009
+ #
4010
+ # Only works for numerical types.
4011
+ #
4012
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4013
+ # expression. See `when` for more information.
4014
+ #
4015
+ # @param min_val [Numeric]
4016
+ # Minimum value.
4017
+ # @param max_val [Numeric]
4018
+ # Maximum value.
4019
+ #
4020
+ # @return [Expr]
4021
+ #
4022
+ # @example
4023
+ # df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
4024
+ # df.with_column(Polars.col("foo").clip(1, 10).alias("foo_clipped"))
4025
+ # # =>
4026
+ # # shape: (4, 2)
4027
+ # # ┌──────┬─────────────┐
4028
+ # # │ foo ┆ foo_clipped │
4029
+ # # │ --- ┆ --- │
4030
+ # # │ i64 ┆ i64 │
4031
+ # # ╞══════╪═════════════╡
4032
+ # # │ -50 ┆ 1 │
4033
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4034
+ # # │ 5 ┆ 5 │
4035
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4036
+ # # │ null ┆ null │
4037
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4038
+ # # │ 50 ┆ 10 │
4039
+ # # └──────┴─────────────┘
568
4040
  def clip(min_val, max_val)
569
4041
  wrap_expr(_rbexpr.clip(min_val, max_val))
570
4042
  end
571
4043
 
4044
+ # Clip (limit) the values in an array to a `min` boundary.
4045
+ #
4046
+ # Only works for numerical types.
4047
+ #
4048
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4049
+ # expression. See `when` for more information.
4050
+ #
4051
+ # @param min_val [Numeric]
4052
+ # Minimum value.
4053
+ #
4054
+ # @return [Expr]
4055
+ #
4056
+ # @example
4057
+ # df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
4058
+ # df.with_column(Polars.col("foo").clip_min(0).alias("foo_clipped"))
4059
+ # # =>
4060
+ # # shape: (4, 2)
4061
+ # # ┌──────┬─────────────┐
4062
+ # # │ foo ┆ foo_clipped │
4063
+ # # │ --- ┆ --- │
4064
+ # # │ i64 ┆ i64 │
4065
+ # # ╞══════╪═════════════╡
4066
+ # # │ -50 ┆ 0 │
4067
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4068
+ # # │ 5 ┆ 5 │
4069
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4070
+ # # │ null ┆ null │
4071
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4072
+ # # │ 50 ┆ 50 │
4073
+ # # └──────┴─────────────┘
572
4074
  def clip_min(min_val)
573
4075
  wrap_expr(_rbexpr.clip_min(min_val))
574
4076
  end
575
4077
 
4078
+ # Clip (limit) the values in an array to a `max` boundary.
4079
+ #
4080
+ # Only works for numerical types.
4081
+ #
4082
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
4083
+ # expression. See `when` for more information.
4084
+ #
4085
+ # @param max_val [Numeric]
4086
+ # Maximum value.
4087
+ #
4088
+ # @return [Expr]
4089
+ #
4090
+ # @example
4091
+ # df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
4092
+ # df.with_column(Polars.col("foo").clip_max(0).alias("foo_clipped"))
4093
+ # # =>
4094
+ # # shape: (4, 2)
4095
+ # # ┌──────┬─────────────┐
4096
+ # # │ foo ┆ foo_clipped │
4097
+ # # │ --- ┆ --- │
4098
+ # # │ i64 ┆ i64 │
4099
+ # # ╞══════╪═════════════╡
4100
+ # # │ -50 ┆ -50 │
4101
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4102
+ # # │ 5 ┆ 0 │
4103
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4104
+ # # │ null ┆ null │
4105
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
4106
+ # # │ 50 ┆ 0 │
4107
+ # # └──────┴─────────────┘
576
4108
  def clip_max(max_val)
577
4109
  wrap_expr(_rbexpr.clip_max(max_val))
578
4110
  end
579
4111
 
4112
+ # Calculate the lower bound.
4113
+ #
4114
+ # Returns a unit Series with the lowest value possible for the dtype of this
4115
+ # expression.
4116
+ #
4117
+ # @return [Expr]
4118
+ #
4119
+ # @example
4120
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
4121
+ # df.select(Polars.col("a").lower_bound)
4122
+ # # =>
4123
+ # # shape: (1, 1)
4124
+ # # ┌──────────────────────┐
4125
+ # # │ a │
4126
+ # # │ --- │
4127
+ # # │ i64 │
4128
+ # # ╞══════════════════════╡
4129
+ # # │ -9223372036854775808 │
4130
+ # # └──────────────────────┘
580
4131
  def lower_bound
581
4132
  wrap_expr(_rbexpr.lower_bound)
582
4133
  end
583
4134
 
4135
+ # Calculate the upper bound.
4136
+ #
4137
+ # Returns a unit Series with the highest value possible for the dtype of this
4138
+ # expression.
4139
+ #
4140
+ # @return [Expr]
4141
+ #
4142
+ # @example
4143
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
4144
+ # df.select(Polars.col("a").upper_bound)
4145
+ # # =>
4146
+ # # shape: (1, 1)
4147
+ # # ┌─────────────────────┐
4148
+ # # │ a │
4149
+ # # │ --- │
4150
+ # # │ i64 │
4151
+ # # ╞═════════════════════╡
4152
+ # # │ 9223372036854775807 │
4153
+ # # └─────────────────────┘
584
4154
  def upper_bound
585
4155
  wrap_expr(_rbexpr.upper_bound)
586
4156
  end
587
4157
 
4158
+ # Compute the element-wise indication of the sign.
4159
+ #
4160
+ # @return [Expr]
4161
+ #
4162
+ # @example
4163
+ # df = Polars::DataFrame.new({"a" => [-9.0, -0.0, 0.0, 4.0, nil]})
4164
+ # df.select(Polars.col("a").sign)
4165
+ # # =>
4166
+ # # shape: (5, 1)
4167
+ # # ┌──────┐
4168
+ # # │ a │
4169
+ # # │ --- │
4170
+ # # │ i64 │
4171
+ # # ╞══════╡
4172
+ # # │ -1 │
4173
+ # # ├╌╌╌╌╌╌┤
4174
+ # # │ 0 │
4175
+ # # ├╌╌╌╌╌╌┤
4176
+ # # │ 0 │
4177
+ # # ├╌╌╌╌╌╌┤
4178
+ # # │ 1 │
4179
+ # # ├╌╌╌╌╌╌┤
4180
+ # # │ null │
4181
+ # # └──────┘
588
4182
  def sign
589
4183
  wrap_expr(_rbexpr.sign)
590
4184
  end
591
4185
 
4186
+ # Compute the element-wise value for the sine.
4187
+ #
4188
+ # @return [Expr]
4189
+ #
4190
+ # @example
4191
+ # df = Polars::DataFrame.new({"a" => [0.0]})
4192
+ # df.select(Polars.col("a").sin)
4193
+ # # =>
4194
+ # # shape: (1, 1)
4195
+ # # ┌─────┐
4196
+ # # │ a │
4197
+ # # │ --- │
4198
+ # # │ f64 │
4199
+ # # ╞═════╡
4200
+ # # │ 0.0 │
4201
+ # # └─────┘
592
4202
  def sin
593
4203
  wrap_expr(_rbexpr.sin)
594
4204
  end
595
4205
 
4206
+ # Compute the element-wise value for the cosine.
4207
+ #
4208
+ # @return [Expr]
4209
+ #
4210
+ # @example
4211
+ # df = Polars::DataFrame.new({"a" => [0.0]})
4212
+ # df.select(Polars.col("a").cos)
4213
+ # # =>
4214
+ # # shape: (1, 1)
4215
+ # # ┌─────┐
4216
+ # # │ a │
4217
+ # # │ --- │
4218
+ # # │ f64 │
4219
+ # # ╞═════╡
4220
+ # # │ 1.0 │
4221
+ # # └─────┘
596
4222
  def cos
597
4223
  wrap_expr(_rbexpr.cos)
598
4224
  end
599
4225
 
4226
+ # Compute the element-wise value for the tangent.
4227
+ #
4228
+ # @return [Expr]
4229
+ #
4230
+ # @example
4231
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4232
+ # df.select(Polars.col("a").tan)
4233
+ # # =>
4234
+ # # shape: (1, 1)
4235
+ # # ┌──────────┐
4236
+ # # │ a │
4237
+ # # │ --- │
4238
+ # # │ f64 │
4239
+ # # ╞══════════╡
4240
+ # # │ 1.557408 │
4241
+ # # └──────────┘
600
4242
  def tan
601
4243
  wrap_expr(_rbexpr.tan)
602
4244
  end
603
4245
 
4246
+ # Compute the element-wise value for the inverse sine.
4247
+ #
4248
+ # @return [Expr]
4249
+ #
4250
+ # @example
4251
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4252
+ # df.select(Polars.col("a").arcsin)
4253
+ # # =>
4254
+ # # shape: (1, 1)
4255
+ # # ┌──────────┐
4256
+ # # │ a │
4257
+ # # │ --- │
4258
+ # # │ f64 │
4259
+ # # ╞══════════╡
4260
+ # # │ 1.570796 │
4261
+ # # └──────────┘
604
4262
  def arcsin
605
4263
  wrap_expr(_rbexpr.arcsin)
606
4264
  end
607
4265
 
4266
+ # Compute the element-wise value for the inverse cosine.
4267
+ #
4268
+ # @return [Expr]
4269
+ #
4270
+ # @example
4271
+ # df = Polars::DataFrame.new({"a" => [0.0]})
4272
+ # df.select(Polars.col("a").arccos)
4273
+ # # =>
4274
+ # # shape: (1, 1)
4275
+ # # ┌──────────┐
4276
+ # # │ a │
4277
+ # # │ --- │
4278
+ # # │ f64 │
4279
+ # # ╞══════════╡
4280
+ # # │ 1.570796 │
4281
+ # # └──────────┘
608
4282
  def arccos
609
4283
  wrap_expr(_rbexpr.arccos)
610
4284
  end
611
4285
 
4286
+ # Compute the element-wise value for the inverse tangent.
4287
+ #
4288
+ # @return [Expr]
4289
+ #
4290
+ # @example
4291
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4292
+ # df.select(Polars.col("a").arctan)
4293
+ # # =>
4294
+ # # shape: (1, 1)
4295
+ # # ┌──────────┐
4296
+ # # │ a │
4297
+ # # │ --- │
4298
+ # # │ f64 │
4299
+ # # ╞══════════╡
4300
+ # # │ 0.785398 │
4301
+ # # └──────────┘
612
4302
  def arctan
613
4303
  wrap_expr(_rbexpr.arctan)
614
4304
  end
615
4305
 
4306
+ # Compute the element-wise value for the hyperbolic sine.
4307
+ #
4308
+ # @return [Expr]
4309
+ #
4310
+ # @example
4311
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4312
+ # df.select(Polars.col("a").sinh)
4313
+ # # =>
4314
+ # # shape: (1, 1)
4315
+ # # ┌──────────┐
4316
+ # # │ a │
4317
+ # # │ --- │
4318
+ # # │ f64 │
4319
+ # # ╞══════════╡
4320
+ # # │ 1.175201 │
4321
+ # # └──────────┘
616
4322
  def sinh
617
4323
  wrap_expr(_rbexpr.sinh)
618
4324
  end
619
4325
 
4326
+ # Compute the element-wise value for the hyperbolic cosine.
4327
+ #
4328
+ # @return [Expr]
4329
+ #
4330
+ # @example
4331
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4332
+ # df.select(Polars.col("a").cosh)
4333
+ # # =>
4334
+ # # shape: (1, 1)
4335
+ # # ┌──────────┐
4336
+ # # │ a │
4337
+ # # │ --- │
4338
+ # # │ f64 │
4339
+ # # ╞══════════╡
4340
+ # # │ 1.543081 │
4341
+ # # └──────────┘
620
4342
  def cosh
621
4343
  wrap_expr(_rbexpr.cosh)
622
4344
  end
623
4345
 
4346
+ # Compute the element-wise value for the hyperbolic tangent.
4347
+ #
4348
+ # @return [Expr]
4349
+ #
4350
+ # @example
4351
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4352
+ # df.select(Polars.col("a").tanh)
4353
+ # # =>
4354
+ # # shape: (1, 1)
4355
+ # # ┌──────────┐
4356
+ # # │ a │
4357
+ # # │ --- │
4358
+ # # │ f64 │
4359
+ # # ╞══════════╡
4360
+ # # │ 0.761594 │
4361
+ # # └──────────┘
624
4362
  def tanh
625
4363
  wrap_expr(_rbexpr.tanh)
626
4364
  end
627
4365
 
4366
+ # Compute the element-wise value for the inverse hyperbolic sine.
4367
+ #
4368
+ # @return [Expr]
4369
+ #
4370
+ # @example
4371
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4372
+ # df.select(Polars.col("a").arcsinh)
4373
+ # # =>
4374
+ # # shape: (1, 1)
4375
+ # # ┌──────────┐
4376
+ # # │ a │
4377
+ # # │ --- │
4378
+ # # │ f64 │
4379
+ # # ╞══════════╡
4380
+ # # │ 0.881374 │
4381
+ # # └──────────┘
628
4382
  def arcsinh
629
4383
  wrap_expr(_rbexpr.arcsinh)
630
4384
  end
631
4385
 
4386
+ # Compute the element-wise value for the inverse hyperbolic cosine.
4387
+ #
4388
+ # @return [Expr]
4389
+ #
4390
+ # @example
4391
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4392
+ # df.select(Polars.col("a").arccosh)
4393
+ # # =>
4394
+ # # shape: (1, 1)
4395
+ # # ┌─────┐
4396
+ # # │ a │
4397
+ # # │ --- │
4398
+ # # │ f64 │
4399
+ # # ╞═════╡
4400
+ # # │ 0.0 │
4401
+ # # └─────┘
632
4402
  def arccosh
633
4403
  wrap_expr(_rbexpr.arccosh)
634
4404
  end
635
4405
 
4406
+ # Compute the element-wise value for the inverse hyperbolic tangent.
4407
+ #
4408
+ # @return [Expr]
4409
+ #
4410
+ # @example
4411
+ # df = Polars::DataFrame.new({"a" => [1.0]})
4412
+ # df.select(Polars.col("a").arctanh)
4413
+ # # =>
4414
+ # # shape: (1, 1)
4415
+ # # ┌─────┐
4416
+ # # │ a │
4417
+ # # │ --- │
4418
+ # # │ f64 │
4419
+ # # ╞═════╡
4420
+ # # │ inf │
4421
+ # # └─────┘
636
4422
  def arctanh
637
4423
  wrap_expr(_rbexpr.arctanh)
638
4424
  end
639
4425
 
4426
+ # Reshape this Expr to a flat Series or a Series of Lists.
4427
+ #
4428
+ # @param dims [Array]
4429
+ # Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that
4430
+ # dimension is inferred.
4431
+ #
4432
+ # @return [Expr]
4433
+ #
4434
+ # @example
4435
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
4436
+ # df.select(Polars.col("foo").reshape([3, 3]))
4437
+ # # =>
4438
+ # # shape: (3, 1)
4439
+ # # ┌───────────┐
4440
+ # # │ foo │
4441
+ # # │ --- │
4442
+ # # │ list[i64] │
4443
+ # # ╞═══════════╡
4444
+ # # │ [1, 2, 3] │
4445
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
4446
+ # # │ [4, 5, 6] │
4447
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
4448
+ # # │ [7, 8, 9] │
4449
+ # # └───────────┘
640
4450
  def reshape(dims)
641
4451
  wrap_expr(_rbexpr.reshape(dims))
642
4452
  end
643
4453
 
4454
+ # Shuffle the contents of this expr.
4455
+ #
4456
+ # @param seed [Integer]
4457
+ # Seed for the random number generator. If set to None (default), a random
4458
+ # seed is generated using the `random` module.
4459
+ #
4460
+ # @return [Expr]
4461
+ #
4462
+ # @example
4463
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4464
+ # df.select(Polars.col("a").shuffle(seed: 1))
4465
+ # # =>
4466
+ # # shape: (3, 1)
4467
+ # # ┌─────┐
4468
+ # # │ a │
4469
+ # # │ --- │
4470
+ # # │ i64 │
4471
+ # # ╞═════╡
4472
+ # # │ 2 │
4473
+ # # ├╌╌╌╌╌┤
4474
+ # # │ 1 │
4475
+ # # ├╌╌╌╌╌┤
4476
+ # # │ 3 │
4477
+ # # └─────┘
644
4478
  def shuffle(seed: nil)
645
4479
  if seed.nil?
646
4480
  seed = rand(10000)
@@ -648,73 +4482,514 @@ module Polars
648
4482
  wrap_expr(_rbexpr.shuffle(seed))
649
4483
  end
650
4484
 
651
- # def sample
652
- # end
653
-
654
- # def ewm_mean
655
- # end
656
-
657
- # def ewm_std
658
- # end
4485
+ # Sample from this expression.
4486
+ #
4487
+ # @param frac [Float]
4488
+ # Fraction of items to return. Cannot be used with `n`.
4489
+ # @param with_replacement [Boolean]
4490
+ # Allow values to be sampled more than once.
4491
+ # @param shuffle [Boolean]
4492
+ # Shuffle the order of sampled data points.
4493
+ # @param seed [Integer]
4494
+ # Seed for the random number generator. If set to None (default), a random
4495
+ # seed is used.
4496
+ # @param n [Integer]
4497
+ # Number of items to return. Cannot be used with `frac`.
4498
+ #
4499
+ # @return [Expr]
4500
+ #
4501
+ # @example
4502
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4503
+ # df.select(Polars.col("a").sample(frac: 1.0, with_replacement: true, seed: 1))
4504
+ # # =>
4505
+ # # shape: (3, 1)
4506
+ # # ┌─────┐
4507
+ # # │ a │
4508
+ # # │ --- │
4509
+ # # │ i64 │
4510
+ # # ╞═════╡
4511
+ # # │ 3 │
4512
+ # # ├╌╌╌╌╌┤
4513
+ # # │ 1 │
4514
+ # # ├╌╌╌╌╌┤
4515
+ # # │ 1 │
4516
+ # # └─────┘
4517
+ def sample(
4518
+ frac: nil,
4519
+ with_replacement: true,
4520
+ shuffle: false,
4521
+ seed: nil,
4522
+ n: nil
4523
+ )
4524
+ if !n.nil? && !frac.nil?
4525
+ raise ArgumentError, "cannot specify both `n` and `frac`"
4526
+ end
659
4527
 
660
- # def ewm_var
661
- # end
4528
+ if !n.nil? && frac.nil?
4529
+ return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
4530
+ end
662
4531
 
663
- # def extend_constant
664
- # end
4532
+ if frac.nil?
4533
+ frac = 1.0
4534
+ end
4535
+ wrap_expr(
4536
+ _rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
4537
+ )
4538
+ end
665
4539
 
4540
+ # Exponentially-weighted moving average.
4541
+ #
4542
+ # @return [Expr]
4543
+ #
4544
+ # @example
4545
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4546
+ # df.select(Polars.col("a").ewm_mean(com: 1))
4547
+ # # =>
4548
+ # # shape: (3, 1)
4549
+ # # ┌──────────┐
4550
+ # # │ a │
4551
+ # # │ --- │
4552
+ # # │ f64 │
4553
+ # # ╞══════════╡
4554
+ # # │ 1.0 │
4555
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4556
+ # # │ 1.666667 │
4557
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4558
+ # # │ 2.428571 │
4559
+ # # └──────────┘
4560
+ def ewm_mean(
4561
+ com: nil,
4562
+ span: nil,
4563
+ half_life: nil,
4564
+ alpha: nil,
4565
+ adjust: true,
4566
+ min_periods: 1
4567
+ )
4568
+ alpha = _prepare_alpha(com, span, half_life, alpha)
4569
+ wrap_expr(_rbexpr.ewm_mean(alpha, adjust, min_periods))
4570
+ end
4571
+
4572
+ # Exponentially-weighted moving standard deviation.
4573
+ #
4574
+ # @return [Expr]
4575
+ #
4576
+ # @example
4577
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4578
+ # df.select(Polars.col("a").ewm_std(com: 1))
4579
+ # # =>
4580
+ # # shape: (3, 1)
4581
+ # # ┌──────────┐
4582
+ # # │ a │
4583
+ # # │ --- │
4584
+ # # │ f64 │
4585
+ # # ╞══════════╡
4586
+ # # │ 0.0 │
4587
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4588
+ # # │ 0.707107 │
4589
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4590
+ # # │ 0.963624 │
4591
+ # # └──────────┘
4592
+ def ewm_std(
4593
+ com: nil,
4594
+ span: nil,
4595
+ half_life: nil,
4596
+ alpha: nil,
4597
+ adjust: true,
4598
+ bias: false,
4599
+ min_periods: 1
4600
+ )
4601
+ alpha = _prepare_alpha(com, span, half_life, alpha)
4602
+ wrap_expr(_rbexpr.ewm_std(alpha, adjust, bias, min_periods))
4603
+ end
4604
+
4605
+ # Exponentially-weighted moving variance.
4606
+ #
4607
+ # @return [Expr]
4608
+ #
4609
+ # @example
4610
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4611
+ # df.select(Polars.col("a").ewm_var(com: 1))
4612
+ # # =>
4613
+ # # shape: (3, 1)
4614
+ # # ┌──────────┐
4615
+ # # │ a │
4616
+ # # │ --- │
4617
+ # # │ f64 │
4618
+ # # ╞══════════╡
4619
+ # # │ 0.0 │
4620
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4621
+ # # │ 0.5 │
4622
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4623
+ # # │ 0.928571 │
4624
+ # # └──────────┘
4625
+ def ewm_var(
4626
+ com: nil,
4627
+ span: nil,
4628
+ half_life: nil,
4629
+ alpha: nil,
4630
+ adjust: true,
4631
+ bias: false,
4632
+ min_periods: 1
4633
+ )
4634
+ alpha = _prepare_alpha(com, span, half_life, alpha)
4635
+ wrap_expr(_rbexpr.ewm_var(alpha, adjust, bias, min_periods))
4636
+ end
4637
+
4638
+ # Extend the Series with given number of values.
4639
+ #
4640
+ # @param value [Object]
4641
+ # The value to extend the Series with. This value may be nil to fill with
4642
+ # nulls.
4643
+ # @param n [Integer]
4644
+ # The number of values to extend.
666
4645
  #
4646
+ # @return [Expr]
4647
+ #
4648
+ # @example
4649
+ # df = Polars::DataFrame.new({"values" => [1, 2, 3]})
4650
+ # df.select(Polars.col("values").extend_constant(99, 2))
4651
+ # # =>
4652
+ # # shape: (5, 1)
4653
+ # # ┌────────┐
4654
+ # # │ values │
4655
+ # # │ --- │
4656
+ # # │ i64 │
4657
+ # # ╞════════╡
4658
+ # # │ 1 │
4659
+ # # ├╌╌╌╌╌╌╌╌┤
4660
+ # # │ 2 │
4661
+ # # ├╌╌╌╌╌╌╌╌┤
4662
+ # # │ 3 │
4663
+ # # ├╌╌╌╌╌╌╌╌┤
4664
+ # # │ 99 │
4665
+ # # ├╌╌╌╌╌╌╌╌┤
4666
+ # # │ 99 │
4667
+ # # └────────┘
4668
+ def extend_constant(value, n)
4669
+ wrap_expr(_rbexpr.extend_constant(value, n))
4670
+ end
4671
+
4672
+ # Count all unique values and create a struct mapping value to count.
4673
+ #
4674
+ # @param multithreaded [Boolean]
4675
+ # Better to turn this off in the aggregation context, as it can lead to
4676
+ # contention.
4677
+ # @param sort [Boolean]
4678
+ # Ensure the output is sorted from most values to least.
4679
+ #
4680
+ # @return [Expr]
4681
+ #
4682
+ # @example
4683
+ # df = Polars::DataFrame.new(
4684
+ # {
4685
+ # "id" => ["a", "b", "b", "c", "c", "c"]
4686
+ # }
4687
+ # )
4688
+ # df.select(
4689
+ # [
4690
+ # Polars.col("id").value_counts(sort: true),
4691
+ # ]
4692
+ # )
4693
+ # # =>
4694
+ # # shape: (3, 1)
4695
+ # # ┌───────────┐
4696
+ # # │ id │
4697
+ # # │ --- │
4698
+ # # │ struct[2] │
4699
+ # # ╞═══════════╡
4700
+ # # │ {"c",3} │
4701
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
4702
+ # # │ {"b",2} │
4703
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
4704
+ # # │ {"a",1} │
4705
+ # # └───────────┘
667
4706
  def value_counts(multithreaded: false, sort: false)
668
4707
  wrap_expr(_rbexpr.value_counts(multithreaded, sort))
669
4708
  end
670
4709
 
4710
+ # Return a count of the unique values in the order of appearance.
4711
+ #
4712
+ # This method differs from `value_counts` in that it does not return the
4713
+ # values, only the counts and might be faster
4714
+ #
4715
+ # @return [Expr]
4716
+ #
4717
+ # @example
4718
+ # df = Polars::DataFrame.new(
4719
+ # {
4720
+ # "id" => ["a", "b", "b", "c", "c", "c"]
4721
+ # }
4722
+ # )
4723
+ # df.select(
4724
+ # [
4725
+ # Polars.col("id").unique_counts
4726
+ # ]
4727
+ # )
4728
+ # # =>
4729
+ # # shape: (3, 1)
4730
+ # # ┌─────┐
4731
+ # # │ id │
4732
+ # # │ --- │
4733
+ # # │ u32 │
4734
+ # # ╞═════╡
4735
+ # # │ 1 │
4736
+ # # ├╌╌╌╌╌┤
4737
+ # # │ 2 │
4738
+ # # ├╌╌╌╌╌┤
4739
+ # # │ 3 │
4740
+ # # └─────┘
671
4741
  def unique_counts
672
4742
  wrap_expr(_rbexpr.unique_counts)
673
4743
  end
674
4744
 
4745
+ # Compute the logarithm to a given base.
4746
+ #
4747
+ # @param base [Float]
4748
+ # Given base, defaults to `e`.
4749
+ #
4750
+ # @return [Expr]
4751
+ #
4752
+ # @example
4753
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4754
+ # df.select(Polars.col("a").log(2))
4755
+ # # =>
4756
+ # # shape: (3, 1)
4757
+ # # ┌──────────┐
4758
+ # # │ a │
4759
+ # # │ --- │
4760
+ # # │ f64 │
4761
+ # # ╞══════════╡
4762
+ # # │ 0.0 │
4763
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4764
+ # # │ 1.0 │
4765
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
4766
+ # # │ 1.584963 │
4767
+ # # └──────────┘
675
4768
  def log(base = Math::E)
676
- wrap_expr(self._rbexpr.log(base))
4769
+ wrap_expr(_rbexpr.log(base))
677
4770
  end
678
4771
 
679
- def entropy(base: 2, normalize: false)
4772
+ # Computes the entropy.
4773
+ #
4774
+ # Uses the formula `-sum(pk * log(pk)` where `pk` are discrete probabilities.
4775
+ #
4776
+ # @param base [Float]
4777
+ # Given base, defaults to `e`.
4778
+ # @param normalize [Boolean]
4779
+ # Normalize pk if it doesn't sum to 1.
4780
+ #
4781
+ # @return [Expr]
4782
+ #
4783
+ # @example
4784
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
4785
+ # df.select(Polars.col("a").entropy(base: 2))
4786
+ # # =>
4787
+ # # shape: (1, 1)
4788
+ # # ┌──────────┐
4789
+ # # │ a │
4790
+ # # │ --- │
4791
+ # # │ f64 │
4792
+ # # ╞══════════╡
4793
+ # # │ 1.459148 │
4794
+ # # └──────────┘
4795
+ #
4796
+ # @example
4797
+ # df.select(Polars.col("a").entropy(base: 2, normalize: false))
4798
+ # # =>
4799
+ # # shape: (1, 1)
4800
+ # # ┌───────────┐
4801
+ # # │ a │
4802
+ # # │ --- │
4803
+ # # │ f64 │
4804
+ # # ╞═══════════╡
4805
+ # # │ -6.754888 │
4806
+ # # └───────────┘
4807
+ def entropy(base: 2, normalize: true)
680
4808
  wrap_expr(_rbexpr.entropy(base, normalize))
681
4809
  end
682
4810
 
683
- # def cumulative_eval
684
- # end
685
-
686
- # def set_sorted
4811
+ # Run an expression over a sliding window that increases `1` slot every iteration.
4812
+ #
4813
+ # @param expr [Expr]
4814
+ # Expression to evaluate
4815
+ # @param min_periods [Integer]
4816
+ # Number of valid values there should be in the window before the expression
4817
+ # is evaluated. valid values = `length - null_count`
4818
+ # @param parallel [Boolean]
4819
+ # Run in parallel. Don't do this in a groupby or another operation that
4820
+ # already has much parallelization.
4821
+ #
4822
+ # @return [Expr]
4823
+ #
4824
+ # @note
4825
+ # This functionality is experimental and may change without it being considered a
4826
+ # breaking change.
4827
+ #
4828
+ # @note
4829
+ # This can be really slow as it can have `O(n^2)` complexity. Don't use this
4830
+ # for operations that visit all elements.
4831
+ #
4832
+ # @example
4833
+ # df = Polars::DataFrame.new({"values" => [1, 2, 3, 4, 5]})
4834
+ # df.select(
4835
+ # [
4836
+ # Polars.col("values").cumulative_eval(
4837
+ # Polars.element.first - Polars.element.last ** 2
4838
+ # )
4839
+ # ]
4840
+ # )
4841
+ # # =>
4842
+ # # shape: (5, 1)
4843
+ # # ┌────────┐
4844
+ # # │ values │
4845
+ # # │ --- │
4846
+ # # │ f64 │
4847
+ # # ╞════════╡
4848
+ # # │ 0.0 │
4849
+ # # ├╌╌╌╌╌╌╌╌┤
4850
+ # # │ -3.0 │
4851
+ # # ├╌╌╌╌╌╌╌╌┤
4852
+ # # │ -8.0 │
4853
+ # # ├╌╌╌╌╌╌╌╌┤
4854
+ # # │ -15.0 │
4855
+ # # ├╌╌╌╌╌╌╌╌┤
4856
+ # # │ -24.0 │
4857
+ # # └────────┘
4858
+ def cumulative_eval(expr, min_periods: 1, parallel: false)
4859
+ wrap_expr(
4860
+ _rbexpr.cumulative_eval(expr._rbexpr, min_periods, parallel)
4861
+ )
4862
+ end
4863
+
4864
+ # Flags the expression as 'sorted'.
4865
+ #
4866
+ # Enables downstream code to user fast paths for sorted arrays.
4867
+ #
4868
+ # @param reverse [Boolean]
4869
+ # If the `Series` order is reversed, e.g. descending.
4870
+ #
4871
+ # @return [Expr]
4872
+ #
4873
+ # @note
4874
+ # This can lead to incorrect results if this `Series` is not sorted!!
4875
+ # Use with care!
4876
+ #
4877
+ # @example
4878
+ # df = Polars::DataFrame.new({"values" => [1, 2, 3]})
4879
+ # df.select(Polars.col("values").set_sorted.max)
4880
+ # # =>
4881
+ # # shape: (1, 1)
4882
+ # # ┌────────┐
4883
+ # # │ values │
4884
+ # # │ --- │
4885
+ # # │ i64 │
4886
+ # # ╞════════╡
4887
+ # # │ 3 │
4888
+ # # └────────┘
4889
+ # def set_sorted(reverse: false)
4890
+ # map { |s| s.set_sorted(reverse) }
687
4891
  # end
688
4892
 
4893
+ # Aggregate to list.
4894
+ #
4895
+ # @return [Expr]
689
4896
  #
4897
+ # @example
4898
+ # df = Polars::DataFrame.new(
4899
+ # {
4900
+ # "a" => [1, 2, 3],
4901
+ # "b" => [4, 5, 6]
4902
+ # }
4903
+ # )
4904
+ # df.select(Polars.all.list)
4905
+ # # =>
4906
+ # # shape: (1, 2)
4907
+ # # ┌───────────┬───────────┐
4908
+ # # │ a ┆ b │
4909
+ # # │ --- ┆ --- │
4910
+ # # │ list[i64] ┆ list[i64] │
4911
+ # # ╞═══════════╪═══════════╡
4912
+ # # │ [1, 2, 3] ┆ [4, 5, 6] │
4913
+ # # └───────────┴───────────┘
690
4914
  def list
691
4915
  wrap_expr(_rbexpr.list)
692
4916
  end
693
4917
 
4918
+ # Shrink numeric columns to the minimal required datatype.
4919
+ #
4920
+ # Shrink to the dtype needed to fit the extrema of this `Series`.
4921
+ # This can be used to reduce memory pressure.
4922
+ #
4923
+ # @return [Expr]
4924
+ #
4925
+ # @example
4926
+ # Polars::DataFrame.new(
4927
+ # {
4928
+ # "a" => [1, 2, 3],
4929
+ # "b" => [1, 2, 2 << 32],
4930
+ # "c" => [-1, 2, 1 << 30],
4931
+ # "d" => [-112, 2, 112],
4932
+ # "e" => [-112, 2, 129],
4933
+ # "f" => ["a", "b", "c"],
4934
+ # "g" => [0.1, 1.32, 0.12],
4935
+ # "h" => [true, nil, false]
4936
+ # }
4937
+ # ).select(Polars.all.shrink_dtype)
4938
+ # # =>
4939
+ # # shape: (3, 8)
4940
+ # # ┌─────┬────────────┬────────────┬──────┬──────┬─────┬──────┬───────┐
4941
+ # # │ a ┆ b ┆ c ┆ d ┆ e ┆ f ┆ g ┆ h │
4942
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
4943
+ # # │ i8 ┆ i64 ┆ i32 ┆ i8 ┆ i16 ┆ str ┆ f32 ┆ bool │
4944
+ # # ╞═════╪════════════╪════════════╪══════╪══════╪═════╪══════╪═══════╡
4945
+ # # │ 1 ┆ 1 ┆ -1 ┆ -112 ┆ -112 ┆ a ┆ 0.1 ┆ true │
4946
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
4947
+ # # │ 2 ┆ 2 ┆ 2 ┆ 2 ┆ 2 ┆ b ┆ 1.32 ┆ null │
4948
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
4949
+ # # │ 3 ┆ 8589934592 ┆ 1073741824 ┆ 112 ┆ 129 ┆ c ┆ 0.12 ┆ false │
4950
+ # # └─────┴────────────┴────────────┴──────┴──────┴─────┴──────┴───────┘
694
4951
  def shrink_dtype
695
4952
  wrap_expr(_rbexpr.shrink_dtype)
696
4953
  end
697
4954
 
4955
+ # Create an object namespace of all list related methods.
4956
+ #
4957
+ # @return [ListExpr]
698
4958
  def arr
699
4959
  ListExpr.new(self)
700
4960
  end
701
4961
 
4962
+ # Create an object namespace of all categorical related methods.
4963
+ #
4964
+ # @return [CatExpr]
702
4965
  def cat
703
4966
  CatExpr.new(self)
704
4967
  end
705
4968
 
4969
+ # Create an object namespace of all datetime related methods.
4970
+ #
4971
+ # @return [DateTimeExpr]
706
4972
  def dt
707
4973
  DateTimeExpr.new(self)
708
4974
  end
709
4975
 
4976
+ # Create an object namespace of all meta related expression methods.
4977
+ #
4978
+ # @return [MetaExpr]
710
4979
  def meta
711
4980
  MetaExpr.new(self)
712
4981
  end
713
4982
 
4983
+ # Create an object namespace of all string related methods.
4984
+ #
4985
+ # @return [StringExpr]
714
4986
  def str
715
4987
  StringExpr.new(self)
716
4988
  end
717
4989
 
4990
+ # Create an object namespace of all struct related methods.
4991
+ #
4992
+ # @return [StructExpr]
718
4993
  def struct
719
4994
  StructExpr.new(self)
720
4995
  end
@@ -732,5 +5007,51 @@ module Polars
732
5007
  def _to_expr(other)
733
5008
  other.is_a?(Expr) ? other : Utils.lit(other)
734
5009
  end
5010
+
5011
+ def _prepare_alpha(com, span, half_life, alpha)
5012
+ if [com, span, half_life, alpha].count { |v| !v.nil? } > 1
5013
+ raise ArgumentError, "Parameters 'com', 'span', 'half_life', and 'alpha' are mutually exclusive"
5014
+ end
5015
+
5016
+ if !com.nil?
5017
+ if com < 0.0
5018
+ raise ArgumentError, "Require 'com' >= 0 (found #{com})"
5019
+ end
5020
+ alpha = 1.0 / (1.0 + com)
5021
+
5022
+ elsif !span.nil?
5023
+ if span < 1.0
5024
+ raise ArgumentError, "Require 'span' >= 1 (found #{span})"
5025
+ end
5026
+ alpha = 2.0 / (span + 1.0)
5027
+
5028
+ elsif !half_life.nil?
5029
+ if half_life <= 0.0
5030
+ raise ArgumentError, "Require 'half_life' > 0 (found #{half_life})"
5031
+ end
5032
+ alpha = 1.0 - Math.exp(-Math.log(2.0) / half_life)
5033
+
5034
+ elsif alpha.nil?
5035
+ raise ArgumentError, "One of 'com', 'span', 'half_life', or 'alpha' must be set"
5036
+
5037
+ elsif alpha <= 0 || alpha > 1
5038
+ raise ArgumentError, "Require 0 < 'alpha' <= 1 (found #{alpha})"
5039
+ end
5040
+
5041
+ alpha
5042
+ end
5043
+
5044
+ def _prepare_rolling_window_args(window_size, min_periods)
5045
+ if window_size.is_a?(Integer)
5046
+ if min_periods.nil?
5047
+ min_periods = window_size
5048
+ end
5049
+ window_size = "#{window_size}i"
5050
+ end
5051
+ if min_periods.nil?
5052
+ min_periods = 1
5053
+ end
5054
+ [window_size, min_periods]
5055
+ end
735
5056
  end
736
5057
  end