polars-df 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +9 -0
- data/Cargo.lock +74 -3
- data/Cargo.toml +3 -0
- data/README.md +1 -1
- data/ext/polars/Cargo.toml +18 -1
- data/ext/polars/src/conversion.rs +115 -2
- data/ext/polars/src/dataframe.rs +228 -11
- data/ext/polars/src/error.rs +4 -0
- data/ext/polars/src/lazy/dataframe.rs +5 -5
- data/ext/polars/src/lazy/dsl.rs +157 -2
- data/ext/polars/src/lib.rs +185 -10
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +217 -29
- data/ext/polars/src/set.rs +91 -0
- data/ext/polars/src/utils.rs +19 -0
- data/lib/polars/batched_csv_reader.rb +1 -0
- data/lib/polars/cat_expr.rb +39 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/data_frame.rb +2384 -140
- data/lib/polars/date_time_expr.rb +1282 -7
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +4374 -53
- data/lib/polars/expr_dispatch.rb +22 -0
- data/lib/polars/functions.rb +219 -0
- data/lib/polars/group_by.rb +518 -0
- data/lib/polars/io.rb +421 -2
- data/lib/polars/lazy_frame.rb +1267 -69
- data/lib/polars/lazy_functions.rb +412 -24
- data/lib/polars/lazy_group_by.rb +80 -0
- data/lib/polars/list_expr.rb +507 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/meta_expr.rb +21 -0
- data/lib/polars/series.rb +2256 -242
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +847 -10
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_expr.rb +73 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +71 -3
- data/lib/polars/version.rb +2 -1
- data/lib/polars/when.rb +1 -0
- data/lib/polars/when_then.rb +1 -0
- data/lib/polars.rb +12 -10
- metadata +15 -2
data/lib/polars/expr.rb
CHANGED
@@ -11,106 +11,366 @@ module Polars
|
|
11
11
|
expr
|
12
12
|
end
|
13
13
|
|
14
|
+
# Returns a string representing the Expr.
|
15
|
+
#
|
16
|
+
# @return [String]
|
14
17
|
def to_s
|
15
18
|
_rbexpr.to_str
|
16
19
|
end
|
17
20
|
alias_method :inspect, :to_s
|
18
21
|
|
22
|
+
# Bitwise XOR.
|
23
|
+
#
|
24
|
+
# @return [Expr]
|
19
25
|
def ^(other)
|
20
26
|
wrap_expr(_rbexpr._xor(_to_rbexpr(other)))
|
21
27
|
end
|
22
28
|
|
29
|
+
# Bitwise AND.
|
30
|
+
#
|
31
|
+
# @return [Expr]
|
23
32
|
def &(other)
|
24
33
|
wrap_expr(_rbexpr._and(_to_rbexpr(other)))
|
25
34
|
end
|
26
35
|
|
36
|
+
# Bitwise OR.
|
37
|
+
#
|
38
|
+
# @return [Expr]
|
27
39
|
def |(other)
|
28
40
|
wrap_expr(_rbexpr._or(_to_rbexpr(other)))
|
29
41
|
end
|
30
42
|
|
43
|
+
# Performs addition.
|
44
|
+
#
|
45
|
+
# @return [Expr]
|
31
46
|
def +(other)
|
32
47
|
wrap_expr(_rbexpr + _to_rbexpr(other))
|
33
48
|
end
|
34
49
|
|
50
|
+
# Performs subtraction.
|
51
|
+
#
|
52
|
+
# @return [Expr]
|
35
53
|
def -(other)
|
36
54
|
wrap_expr(_rbexpr - _to_rbexpr(other))
|
37
55
|
end
|
38
56
|
|
57
|
+
# Performs multiplication.
|
58
|
+
#
|
59
|
+
# @return [Expr]
|
39
60
|
def *(other)
|
40
61
|
wrap_expr(_rbexpr * _to_rbexpr(other))
|
41
62
|
end
|
42
63
|
|
64
|
+
# Performs division.
|
65
|
+
#
|
66
|
+
# @return [Expr]
|
43
67
|
def /(other)
|
44
68
|
wrap_expr(_rbexpr / _to_rbexpr(other))
|
45
69
|
end
|
46
70
|
|
71
|
+
# Performs floor division.
|
72
|
+
#
|
73
|
+
# @return [Expr]
|
74
|
+
def floordiv(other)
|
75
|
+
wrap_expr(_rbexpr.floordiv(_to_rbexpr(other)))
|
76
|
+
end
|
77
|
+
|
78
|
+
# Returns the modulo.
|
79
|
+
#
|
80
|
+
# @return [Expr]
|
47
81
|
def %(other)
|
48
82
|
wrap_expr(_rbexpr % _to_rbexpr(other))
|
49
83
|
end
|
50
84
|
|
85
|
+
# Raises to the power of exponent.
|
86
|
+
#
|
87
|
+
# @return [Expr]
|
51
88
|
def **(power)
|
52
89
|
pow(power)
|
53
90
|
end
|
54
91
|
|
92
|
+
# Greater than or equal.
|
93
|
+
#
|
94
|
+
# @return [Expr]
|
55
95
|
def >=(other)
|
56
96
|
wrap_expr(_rbexpr.gt_eq(_to_expr(other)._rbexpr))
|
57
97
|
end
|
58
98
|
|
99
|
+
# Less than or equal.
|
100
|
+
#
|
101
|
+
# @return [Expr]
|
59
102
|
def <=(other)
|
60
103
|
wrap_expr(_rbexpr.lt_eq(_to_expr(other)._rbexpr))
|
61
104
|
end
|
62
105
|
|
106
|
+
# Equal.
|
107
|
+
#
|
108
|
+
# @return [Expr]
|
63
109
|
def ==(other)
|
64
110
|
wrap_expr(_rbexpr.eq(_to_expr(other)._rbexpr))
|
65
111
|
end
|
66
112
|
|
113
|
+
# Not equal.
|
114
|
+
#
|
115
|
+
# @return [Expr]
|
67
116
|
def !=(other)
|
68
117
|
wrap_expr(_rbexpr.neq(_to_expr(other)._rbexpr))
|
69
118
|
end
|
70
119
|
|
120
|
+
# Less than.
|
121
|
+
#
|
122
|
+
# @return [Expr]
|
71
123
|
def <(other)
|
72
124
|
wrap_expr(_rbexpr.lt(_to_expr(other)._rbexpr))
|
73
125
|
end
|
74
126
|
|
127
|
+
# Greater than.
|
128
|
+
#
|
129
|
+
# @return [Expr]
|
75
130
|
def >(other)
|
76
131
|
wrap_expr(_rbexpr.gt(_to_expr(other)._rbexpr))
|
77
132
|
end
|
78
133
|
|
134
|
+
# Performs negation.
|
135
|
+
#
|
136
|
+
# @return [Expr]
|
79
137
|
def -@
|
80
138
|
Utils.lit(0) - self
|
81
139
|
end
|
82
140
|
|
83
|
-
#
|
84
|
-
#
|
85
|
-
|
141
|
+
# Cast to physical representation of the logical dtype.
|
142
|
+
#
|
143
|
+
# - `:date` -> `:i32`
|
144
|
+
# - `:datetime` -> `:i64`
|
145
|
+
# - `:time` -> `:i64`
|
146
|
+
# - `:duration` -> `:i64`
|
147
|
+
# - `:cat` -> `:u32`
|
148
|
+
# - Other data types will be left unchanged.
|
149
|
+
#
|
150
|
+
# @return [Expr]
|
86
151
|
#
|
152
|
+
# @example
|
153
|
+
# Polars::DataFrame.new({"vals" => ["a", "x", nil, "a"]}).with_columns(
|
154
|
+
# [
|
155
|
+
# Polars.col("vals").cast(:cat),
|
156
|
+
# Polars.col("vals")
|
157
|
+
# .cast(:cat)
|
158
|
+
# .to_physical
|
159
|
+
# .alias("vals_physical")
|
160
|
+
# ]
|
161
|
+
# )
|
162
|
+
# # =>
|
163
|
+
# # shape: (4, 2)
|
164
|
+
# # ┌──────┬───────────────┐
|
165
|
+
# # │ vals ┆ vals_physical │
|
166
|
+
# # │ --- ┆ --- │
|
167
|
+
# # │ cat ┆ u32 │
|
168
|
+
# # ╞══════╪═══════════════╡
|
169
|
+
# # │ a ┆ 0 │
|
170
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
171
|
+
# # │ x ┆ 1 │
|
172
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
173
|
+
# # │ null ┆ null │
|
174
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
175
|
+
# # │ a ┆ 0 │
|
176
|
+
# # └──────┴───────────────┘
|
177
|
+
def to_physical
|
178
|
+
wrap_expr(_rbexpr.to_physical)
|
179
|
+
end
|
180
|
+
|
181
|
+
# Check if any boolean value in a Boolean column is `true`.
|
182
|
+
#
|
183
|
+
# @return [Boolean]
|
184
|
+
#
|
185
|
+
# @example
|
186
|
+
# df = Polars::DataFrame.new({"TF" => [true, false], "FF" => [false, false]})
|
187
|
+
# df.select(Polars.all.any)
|
188
|
+
# # =>
|
189
|
+
# # shape: (1, 2)
|
190
|
+
# # ┌──────┬───────┐
|
191
|
+
# # │ TF ┆ FF │
|
192
|
+
# # │ --- ┆ --- │
|
193
|
+
# # │ bool ┆ bool │
|
194
|
+
# # ╞══════╪═══════╡
|
195
|
+
# # │ true ┆ false │
|
196
|
+
# # └──────┴───────┘
|
87
197
|
def any
|
88
198
|
wrap_expr(_rbexpr.any)
|
89
199
|
end
|
90
200
|
|
201
|
+
# Check if all boolean values in a Boolean column are `true`.
|
202
|
+
#
|
203
|
+
# This method is an expression - not to be confused with
|
204
|
+
# `Polars.all` which is a function to select all columns.
|
205
|
+
#
|
206
|
+
# @return [Boolean]
|
207
|
+
#
|
208
|
+
# @example
|
209
|
+
# df = Polars::DataFrame.new(
|
210
|
+
# {"TT" => [true, true], "TF" => [true, false], "FF" => [false, false]}
|
211
|
+
# )
|
212
|
+
# df.select(Polars.col("*").all)
|
213
|
+
# # =>
|
214
|
+
# # shape: (1, 3)
|
215
|
+
# # ┌──────┬───────┬───────┐
|
216
|
+
# # │ TT ┆ TF ┆ FF │
|
217
|
+
# # │ --- ┆ --- ┆ --- │
|
218
|
+
# # │ bool ┆ bool ┆ bool │
|
219
|
+
# # ╞══════╪═══════╪═══════╡
|
220
|
+
# # │ true ┆ false ┆ false │
|
221
|
+
# # └──────┴───────┴───────┘
|
91
222
|
def all
|
92
223
|
wrap_expr(_rbexpr.all)
|
93
224
|
end
|
94
225
|
|
226
|
+
# Compute the square root of the elements.
|
227
|
+
#
|
228
|
+
# @return [Expr]
|
229
|
+
#
|
230
|
+
# @example
|
231
|
+
# df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
|
232
|
+
# df.select(Polars.col("values").sqrt)
|
233
|
+
# # =>
|
234
|
+
# # shape: (3, 1)
|
235
|
+
# # ┌──────────┐
|
236
|
+
# # │ values │
|
237
|
+
# # │ --- │
|
238
|
+
# # │ f64 │
|
239
|
+
# # ╞══════════╡
|
240
|
+
# # │ 1.0 │
|
241
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
242
|
+
# # │ 1.414214 │
|
243
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
244
|
+
# # │ 2.0 │
|
245
|
+
# # └──────────┘
|
95
246
|
def sqrt
|
96
|
-
self
|
247
|
+
self**0.5
|
97
248
|
end
|
98
249
|
|
250
|
+
# Compute the base 10 logarithm of the input array, element-wise.
|
251
|
+
#
|
252
|
+
# @return [Expr]
|
253
|
+
#
|
254
|
+
# @example
|
255
|
+
# df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
|
256
|
+
# df.select(Polars.col("values").log10)
|
257
|
+
# # =>
|
258
|
+
# # shape: (3, 1)
|
259
|
+
# # ┌─────────┐
|
260
|
+
# # │ values │
|
261
|
+
# # │ --- │
|
262
|
+
# # │ f64 │
|
263
|
+
# # ╞═════════╡
|
264
|
+
# # │ 0.0 │
|
265
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
266
|
+
# # │ 0.30103 │
|
267
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
268
|
+
# # │ 0.60206 │
|
269
|
+
# # └─────────┘
|
99
270
|
def log10
|
100
271
|
log(10)
|
101
272
|
end
|
102
273
|
|
274
|
+
# Compute the exponential, element-wise.
|
275
|
+
#
|
276
|
+
# @return [Expr]
|
277
|
+
#
|
278
|
+
# @example
|
279
|
+
# df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
|
280
|
+
# df.select(Polars.col("values").exp)
|
281
|
+
# # =>
|
282
|
+
# # shape: (3, 1)
|
283
|
+
# # ┌──────────┐
|
284
|
+
# # │ values │
|
285
|
+
# # │ --- │
|
286
|
+
# # │ f64 │
|
287
|
+
# # ╞══════════╡
|
288
|
+
# # │ 2.718282 │
|
289
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
290
|
+
# # │ 7.389056 │
|
291
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
292
|
+
# # │ 54.59815 │
|
293
|
+
# # └──────────┘
|
103
294
|
def exp
|
104
295
|
wrap_expr(_rbexpr.exp)
|
105
296
|
end
|
106
297
|
|
298
|
+
# Rename the output of an expression.
|
299
|
+
#
|
300
|
+
# @param name [String]
|
301
|
+
# New name.
|
302
|
+
#
|
303
|
+
# @return [Expr]
|
304
|
+
#
|
305
|
+
# @example
|
306
|
+
# df = Polars::DataFrame.new(
|
307
|
+
# {
|
308
|
+
# "a" => [1, 2, 3],
|
309
|
+
# "b" => ["a", "b", nil]
|
310
|
+
# }
|
311
|
+
# )
|
312
|
+
# df.select(
|
313
|
+
# [
|
314
|
+
# Polars.col("a").alias("bar"),
|
315
|
+
# Polars.col("b").alias("foo")
|
316
|
+
# ]
|
317
|
+
# )
|
318
|
+
# # =>
|
319
|
+
# # shape: (3, 2)
|
320
|
+
# # ┌─────┬──────┐
|
321
|
+
# # │ bar ┆ foo │
|
322
|
+
# # │ --- ┆ --- │
|
323
|
+
# # │ i64 ┆ str │
|
324
|
+
# # ╞═════╪══════╡
|
325
|
+
# # │ 1 ┆ a │
|
326
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
327
|
+
# # │ 2 ┆ b │
|
328
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
329
|
+
# # │ 3 ┆ null │
|
330
|
+
# # └─────┴──────┘
|
107
331
|
def alias(name)
|
108
332
|
wrap_expr(_rbexpr._alias(name))
|
109
333
|
end
|
110
334
|
|
111
335
|
# TODO support symbols for exclude
|
112
336
|
|
337
|
+
# Exclude certain columns from a wildcard/regex selection.
|
338
|
+
#
|
339
|
+
# You may also use regexes in the exclude list. They must start with `^` and end
|
340
|
+
# with `$`.
|
341
|
+
#
|
342
|
+
# @param columns [Object]
|
343
|
+
# Column(s) to exclude from selection.
|
344
|
+
# This can be:
|
113
345
|
#
|
346
|
+
# - a column name, or multiple column names
|
347
|
+
# - a regular expression starting with `^` and ending with `$`
|
348
|
+
# - a dtype or multiple dtypes
|
349
|
+
#
|
350
|
+
# @return [Expr]
|
351
|
+
#
|
352
|
+
# @example
|
353
|
+
# df = Polars::DataFrame.new(
|
354
|
+
# {
|
355
|
+
# "aa" => [1, 2, 3],
|
356
|
+
# "ba" => ["a", "b", nil],
|
357
|
+
# "cc" => [nil, 2.5, 1.5]
|
358
|
+
# }
|
359
|
+
# )
|
360
|
+
# df.select(Polars.all.exclude("ba"))
|
361
|
+
# # =>
|
362
|
+
# # shape: (3, 2)
|
363
|
+
# # ┌─────┬──────┐
|
364
|
+
# # │ aa ┆ cc │
|
365
|
+
# # │ --- ┆ --- │
|
366
|
+
# # │ i64 ┆ f64 │
|
367
|
+
# # ╞═════╪══════╡
|
368
|
+
# # │ 1 ┆ null │
|
369
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
370
|
+
# # │ 2 ┆ 2.5 │
|
371
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
372
|
+
# # │ 3 ┆ 1.5 │
|
373
|
+
# # └─────┴──────┘
|
114
374
|
def exclude(columns)
|
115
375
|
if columns.is_a?(String)
|
116
376
|
columns = [columns]
|
@@ -131,14 +391,43 @@ module Polars
|
|
131
391
|
end
|
132
392
|
end
|
133
393
|
|
394
|
+
# Keep the original root name of the expression.
|
395
|
+
#
|
396
|
+
# @return [Expr]
|
397
|
+
#
|
398
|
+
# @example
|
399
|
+
# df = Polars::DataFrame.new(
|
400
|
+
# {
|
401
|
+
# "a" => [1, 2],
|
402
|
+
# "b" => [3, 4]
|
403
|
+
# }
|
404
|
+
# )
|
405
|
+
# df.with_columns([(Polars.col("a") * 9).alias("c").keep_name])
|
406
|
+
# # =>
|
407
|
+
# # shape: (2, 2)
|
408
|
+
# # ┌─────┬─────┐
|
409
|
+
# # │ a ┆ b │
|
410
|
+
# # │ --- ┆ --- │
|
411
|
+
# # │ i64 ┆ i64 │
|
412
|
+
# # ╞═════╪═════╡
|
413
|
+
# # │ 9 ┆ 3 │
|
414
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
415
|
+
# # │ 18 ┆ 4 │
|
416
|
+
# # └─────┴─────┘
|
134
417
|
def keep_name
|
135
418
|
wrap_expr(_rbexpr.keep_name)
|
136
419
|
end
|
137
420
|
|
421
|
+
# Add a prefix to the root column name of the expression.
|
422
|
+
#
|
423
|
+
# @return [Expr]
|
138
424
|
def prefix(prefix)
|
139
425
|
wrap_expr(_rbexpr.prefix(prefix))
|
140
426
|
end
|
141
427
|
|
428
|
+
# Add a suffix to the root column name of the expression.
|
429
|
+
#
|
430
|
+
# @return [Expr]
|
142
431
|
def suffix(suffix)
|
143
432
|
wrap_expr(_rbexpr.suffix(suffix))
|
144
433
|
end
|
@@ -146,47 +435,351 @@ module Polars
|
|
146
435
|
# def map_alias
|
147
436
|
# end
|
148
437
|
|
438
|
+
# Negate a boolean expression.
|
439
|
+
#
|
440
|
+
# @return [Expr]
|
441
|
+
#
|
442
|
+
# @example
|
443
|
+
# df = Polars::DataFrame.new(
|
444
|
+
# {
|
445
|
+
# "a" => [true, false, false],
|
446
|
+
# "b" => ["a", "b", nil]
|
447
|
+
# }
|
448
|
+
# )
|
449
|
+
# # =>
|
450
|
+
# # shape: (3, 2)
|
451
|
+
# # ┌───────┬──────┐
|
452
|
+
# # │ a ┆ b │
|
453
|
+
# # │ --- ┆ --- │
|
454
|
+
# # │ bool ┆ str │
|
455
|
+
# # ╞═══════╪══════╡
|
456
|
+
# # │ true ┆ a │
|
457
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
458
|
+
# # │ false ┆ b │
|
459
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
460
|
+
# # │ false ┆ null │
|
461
|
+
# # └───────┴──────┘
|
149
462
|
#
|
463
|
+
# @example
|
464
|
+
# df.select(Polars.col("a").is_not)
|
465
|
+
# # =>
|
466
|
+
# # shape: (3, 1)
|
467
|
+
# # ┌───────┐
|
468
|
+
# # │ a │
|
469
|
+
# # │ --- │
|
470
|
+
# # │ bool │
|
471
|
+
# # ╞═══════╡
|
472
|
+
# # │ false │
|
473
|
+
# # ├╌╌╌╌╌╌╌┤
|
474
|
+
# # │ true │
|
475
|
+
# # ├╌╌╌╌╌╌╌┤
|
476
|
+
# # │ true │
|
477
|
+
# # └───────┘
|
150
478
|
def is_not
|
151
479
|
wrap_expr(_rbexpr.is_not)
|
152
480
|
end
|
153
481
|
|
482
|
+
# Returns a boolean Series indicating which values are null.
|
483
|
+
#
|
484
|
+
# @return [Expr]
|
485
|
+
#
|
486
|
+
# @example
|
487
|
+
# df = Polars::DataFrame.new(
|
488
|
+
# {
|
489
|
+
# "a" => [1, 2, nil, 1, 5],
|
490
|
+
# "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
|
491
|
+
# }
|
492
|
+
# )
|
493
|
+
# df.with_column(Polars.all.is_null.suffix("_isnull"))
|
494
|
+
# # =>
|
495
|
+
# # shape: (5, 4)
|
496
|
+
# # ┌──────┬─────┬──────────┬──────────┐
|
497
|
+
# # │ a ┆ b ┆ a_isnull ┆ b_isnull │
|
498
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
499
|
+
# # │ i64 ┆ f64 ┆ bool ┆ bool │
|
500
|
+
# # ╞══════╪═════╪══════════╪══════════╡
|
501
|
+
# # │ 1 ┆ 1.0 ┆ false ┆ false │
|
502
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
503
|
+
# # │ 2 ┆ 2.0 ┆ false ┆ false │
|
504
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
505
|
+
# # │ null ┆ NaN ┆ true ┆ false │
|
506
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
507
|
+
# # │ 1 ┆ 1.0 ┆ false ┆ false │
|
508
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
509
|
+
# # │ 5 ┆ 5.0 ┆ false ┆ false │
|
510
|
+
# # └──────┴─────┴──────────┴──────────┘
|
154
511
|
def is_null
|
155
512
|
wrap_expr(_rbexpr.is_null)
|
156
513
|
end
|
157
514
|
|
515
|
+
# Returns a boolean Series indicating which values are not null.
|
516
|
+
#
|
517
|
+
# @return [Expr]
|
518
|
+
#
|
519
|
+
# @example
|
520
|
+
# df = Polars::DataFrame.new(
|
521
|
+
# {
|
522
|
+
# "a" => [1, 2, nil, 1, 5],
|
523
|
+
# "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
|
524
|
+
# }
|
525
|
+
# )
|
526
|
+
# df.with_column(Polars.all.is_not_null.suffix("_not_null"))
|
527
|
+
# # =>
|
528
|
+
# # shape: (5, 4)
|
529
|
+
# # ┌──────┬─────┬────────────┬────────────┐
|
530
|
+
# # │ a ┆ b ┆ a_not_null ┆ b_not_null │
|
531
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
532
|
+
# # │ i64 ┆ f64 ┆ bool ┆ bool │
|
533
|
+
# # ╞══════╪═════╪════════════╪════════════╡
|
534
|
+
# # │ 1 ┆ 1.0 ┆ true ┆ true │
|
535
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
536
|
+
# # │ 2 ┆ 2.0 ┆ true ┆ true │
|
537
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
538
|
+
# # │ null ┆ NaN ┆ false ┆ true │
|
539
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
540
|
+
# # │ 1 ┆ 1.0 ┆ true ┆ true │
|
541
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
542
|
+
# # │ 5 ┆ 5.0 ┆ true ┆ true │
|
543
|
+
# # └──────┴─────┴────────────┴────────────┘
|
158
544
|
def is_not_null
|
159
545
|
wrap_expr(_rbexpr.is_not_null)
|
160
546
|
end
|
161
547
|
|
548
|
+
# Returns a boolean Series indicating which values are finite.
|
549
|
+
#
|
550
|
+
# @return [Expr]
|
551
|
+
#
|
552
|
+
# @example
|
553
|
+
# df = Polars::DataFrame.new(
|
554
|
+
# {
|
555
|
+
# "A" => [1.0, 2],
|
556
|
+
# "B" => [3.0, Float::INFINITY]
|
557
|
+
# }
|
558
|
+
# )
|
559
|
+
# df.select(Polars.all.is_finite)
|
560
|
+
# # =>
|
561
|
+
# # shape: (2, 2)
|
562
|
+
# # ┌──────┬───────┐
|
563
|
+
# # │ A ┆ B │
|
564
|
+
# # │ --- ┆ --- │
|
565
|
+
# # │ bool ┆ bool │
|
566
|
+
# # ╞══════╪═══════╡
|
567
|
+
# # │ true ┆ true │
|
568
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
569
|
+
# # │ true ┆ false │
|
570
|
+
# # └──────┴───────┘
|
162
571
|
def is_finite
|
163
572
|
wrap_expr(_rbexpr.is_finite)
|
164
573
|
end
|
165
574
|
|
575
|
+
# Returns a boolean Series indicating which values are infinite.
|
576
|
+
#
|
577
|
+
# @return [Expr]
|
578
|
+
#
|
579
|
+
# @example
|
580
|
+
# df = Polars::DataFrame.new(
|
581
|
+
# {
|
582
|
+
# "A" => [1.0, 2],
|
583
|
+
# "B" => [3.0, Float::INFINITY]
|
584
|
+
# }
|
585
|
+
# )
|
586
|
+
# df.select(Polars.all.is_infinite)
|
587
|
+
# # =>
|
588
|
+
# # shape: (2, 2)
|
589
|
+
# # ┌───────┬───────┐
|
590
|
+
# # │ A ┆ B │
|
591
|
+
# # │ --- ┆ --- │
|
592
|
+
# # │ bool ┆ bool │
|
593
|
+
# # ╞═══════╪═══════╡
|
594
|
+
# # │ false ┆ false │
|
595
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
596
|
+
# # │ false ┆ true │
|
597
|
+
# # └───────┴───────┘
|
166
598
|
def is_infinite
|
167
599
|
wrap_expr(_rbexpr.is_infinite)
|
168
600
|
end
|
169
601
|
|
602
|
+
# Returns a boolean Series indicating which values are NaN.
|
603
|
+
#
|
604
|
+
# @note
|
605
|
+
# Floating point `NaN` (Not A Number) should not be confused
|
606
|
+
# with missing data represented as `nil`.
|
607
|
+
#
|
608
|
+
# @return [Expr]
|
609
|
+
#
|
610
|
+
# @example
|
611
|
+
# df = Polars::DataFrame.new(
|
612
|
+
# {
|
613
|
+
# "a" => [1, 2, nil, 1, 5],
|
614
|
+
# "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
|
615
|
+
# }
|
616
|
+
# )
|
617
|
+
# df.with_column(Polars.col(Polars::Float64).is_nan.suffix("_isnan"))
|
618
|
+
# # =>
|
619
|
+
# # shape: (5, 3)
|
620
|
+
# # ┌──────┬─────┬─────────┐
|
621
|
+
# # │ a ┆ b ┆ b_isnan │
|
622
|
+
# # │ --- ┆ --- ┆ --- │
|
623
|
+
# # │ i64 ┆ f64 ┆ bool │
|
624
|
+
# # ╞══════╪═════╪═════════╡
|
625
|
+
# # │ 1 ┆ 1.0 ┆ false │
|
626
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
627
|
+
# # │ 2 ┆ 2.0 ┆ false │
|
628
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
629
|
+
# # │ null ┆ NaN ┆ true │
|
630
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
631
|
+
# # │ 1 ┆ 1.0 ┆ false │
|
632
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
633
|
+
# # │ 5 ┆ 5.0 ┆ false │
|
634
|
+
# # └──────┴─────┴─────────┘
|
170
635
|
def is_nan
|
171
636
|
wrap_expr(_rbexpr.is_nan)
|
172
637
|
end
|
173
638
|
|
639
|
+
# Returns a boolean Series indicating which values are not NaN.
|
640
|
+
#
|
641
|
+
# @note
|
642
|
+
# Floating point `NaN` (Not A Number) should not be confused
|
643
|
+
# with missing data represented as `nil`.
|
644
|
+
#
|
645
|
+
# @return [Expr]
|
646
|
+
#
|
647
|
+
# @example
|
648
|
+
# df = Polars::DataFrame.new(
|
649
|
+
# {
|
650
|
+
# "a" => [1, 2, nil, 1, 5],
|
651
|
+
# "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
|
652
|
+
# }
|
653
|
+
# )
|
654
|
+
# df.with_column(Polars.col(Polars::Float64).is_not_nan.suffix("_is_not_nan"))
|
655
|
+
# # =>
|
656
|
+
# # shape: (5, 3)
|
657
|
+
# # ┌──────┬─────┬──────────────┐
|
658
|
+
# # │ a ┆ b ┆ b_is_not_nan │
|
659
|
+
# # │ --- ┆ --- ┆ --- │
|
660
|
+
# # │ i64 ┆ f64 ┆ bool │
|
661
|
+
# # ╞══════╪═════╪══════════════╡
|
662
|
+
# # │ 1 ┆ 1.0 ┆ true │
|
663
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
664
|
+
# # │ 2 ┆ 2.0 ┆ true │
|
665
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
666
|
+
# # │ null ┆ NaN ┆ false │
|
667
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
668
|
+
# # │ 1 ┆ 1.0 ┆ true │
|
669
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
670
|
+
# # │ 5 ┆ 5.0 ┆ true │
|
671
|
+
# # └──────┴─────┴──────────────┘
|
174
672
|
def is_not_nan
|
175
673
|
wrap_expr(_rbexpr.is_not_nan)
|
176
674
|
end
|
177
675
|
|
676
|
+
# Get the group indexes of the group by operation.
|
677
|
+
#
|
678
|
+
# Should be used in aggregation context only.
|
679
|
+
#
|
680
|
+
# @return [Expr]
|
681
|
+
#
|
682
|
+
# @example
|
683
|
+
# df = Polars::DataFrame.new(
|
684
|
+
# {
|
685
|
+
# "group" => [
|
686
|
+
# "one",
|
687
|
+
# "one",
|
688
|
+
# "one",
|
689
|
+
# "two",
|
690
|
+
# "two",
|
691
|
+
# "two"
|
692
|
+
# ],
|
693
|
+
# "value" => [94, 95, 96, 97, 97, 99]
|
694
|
+
# }
|
695
|
+
# )
|
696
|
+
# df.groupby("group", maintain_order: true).agg(Polars.col("value").agg_groups)
|
697
|
+
# # =>
|
698
|
+
# # shape: (2, 2)
|
699
|
+
# # ┌───────┬───────────┐
|
700
|
+
# # │ group ┆ value │
|
701
|
+
# # │ --- ┆ --- │
|
702
|
+
# # │ str ┆ list[u32] │
|
703
|
+
# # ╞═══════╪═══════════╡
|
704
|
+
# # │ one ┆ [0, 1, 2] │
|
705
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
706
|
+
# # │ two ┆ [3, 4, 5] │
|
707
|
+
# # └───────┴───────────┘
|
178
708
|
def agg_groups
|
179
709
|
wrap_expr(_rbexpr.agg_groups)
|
180
710
|
end
|
181
711
|
|
712
|
+
# Count the number of values in this expression.
|
713
|
+
#
|
714
|
+
# @return [Expr]
|
715
|
+
#
|
716
|
+
# @example
|
717
|
+
# df = Polars::DataFrame.new({"a" => [8, 9, 10], "b" => [nil, 4, 4]})
|
718
|
+
# df.select(Polars.all.count)
|
719
|
+
# # =>
|
720
|
+
# # shape: (1, 2)
|
721
|
+
# # ┌─────┬─────┐
|
722
|
+
# # │ a ┆ b │
|
723
|
+
# # │ --- ┆ --- │
|
724
|
+
# # │ u32 ┆ u32 │
|
725
|
+
# # ╞═════╪═════╡
|
726
|
+
# # │ 3 ┆ 3 │
|
727
|
+
# # └─────┴─────┘
|
182
728
|
def count
|
183
729
|
wrap_expr(_rbexpr.count)
|
184
730
|
end
|
185
731
|
|
732
|
+
# Count the number of values in this expression.
|
733
|
+
#
|
734
|
+
# Alias for {#count}.
|
735
|
+
#
|
736
|
+
# @return [Expr]
|
737
|
+
#
|
738
|
+
# @example
|
739
|
+
# df = Polars::DataFrame.new({"a" => [8, 9, 10], "b" => [nil, 4, 4]})
|
740
|
+
# df.select(Polars.all.len)
|
741
|
+
# # =>
|
742
|
+
# # shape: (1, 2)
|
743
|
+
# # ┌─────┬─────┐
|
744
|
+
# # │ a ┆ b │
|
745
|
+
# # │ --- ┆ --- │
|
746
|
+
# # │ u32 ┆ u32 │
|
747
|
+
# # ╞═════╪═════╡
|
748
|
+
# # │ 3 ┆ 3 │
|
749
|
+
# # └─────┴─────┘
|
186
750
|
def len
|
187
751
|
count
|
188
752
|
end
|
189
753
|
|
754
|
+
# Get a slice of this expression.
|
755
|
+
#
|
756
|
+
# @param offset [Integer]
|
757
|
+
# Start index. Negative indexing is supported.
|
758
|
+
# @param length [Integer]
|
759
|
+
# Length of the slice. If set to `nil`, all rows starting at the offset
|
760
|
+
# will be selected.
|
761
|
+
#
|
762
|
+
# @return [Expr]
|
763
|
+
#
|
764
|
+
# @example
|
765
|
+
# df = Polars::DataFrame.new(
|
766
|
+
# {
|
767
|
+
# "a" => [8, 9, 10, 11],
|
768
|
+
# "b" => [nil, 4, 4, 4]
|
769
|
+
# }
|
770
|
+
# )
|
771
|
+
# df.select(Polars.all.slice(1, 2))
|
772
|
+
# # =>
|
773
|
+
# # shape: (2, 2)
|
774
|
+
# # ┌─────┬─────┐
|
775
|
+
# # │ a ┆ b │
|
776
|
+
# # │ --- ┆ --- │
|
777
|
+
# # │ i64 ┆ i64 │
|
778
|
+
# # ╞═════╪═════╡
|
779
|
+
# # │ 9 ┆ 4 │
|
780
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
781
|
+
# # │ 10 ┆ 4 │
|
782
|
+
# # └─────┴─────┘
|
190
783
|
def slice(offset, length = nil)
|
191
784
|
if !offset.is_a?(Expr)
|
192
785
|
offset = Polars.lit(offset)
|
@@ -197,94 +790,785 @@ module Polars
|
|
197
790
|
wrap_expr(_rbexpr.slice(offset._rbexpr, length._rbexpr))
|
198
791
|
end
|
199
792
|
|
793
|
+
# Append expressions.
|
794
|
+
#
|
795
|
+
# This is done by adding the chunks of `other` to this `Series`.
|
796
|
+
#
|
797
|
+
# @param other [Expr]
|
798
|
+
# Expression to append.
|
799
|
+
# @param upcast [Boolean]
|
800
|
+
# Cast both `Series` to the same supertype.
|
801
|
+
#
|
802
|
+
# @return [Expr]
|
803
|
+
#
|
804
|
+
# @example
|
805
|
+
# df = Polars::DataFrame.new(
|
806
|
+
# {
|
807
|
+
# "a" => [8, 9, 10],
|
808
|
+
# "b" => [nil, 4, 4]
|
809
|
+
# }
|
810
|
+
# )
|
811
|
+
# df.select(Polars.all.head(1).append(Polars.all.tail(1)))
|
812
|
+
# # =>
|
813
|
+
# # shape: (2, 2)
|
814
|
+
# # ┌─────┬──────┐
|
815
|
+
# # │ a ┆ b │
|
816
|
+
# # │ --- ┆ --- │
|
817
|
+
# # │ i64 ┆ i64 │
|
818
|
+
# # ╞═════╪══════╡
|
819
|
+
# # │ 8 ┆ null │
|
820
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
821
|
+
# # │ 10 ┆ 4 │
|
822
|
+
# # └─────┴──────┘
|
200
823
|
def append(other, upcast: true)
|
201
824
|
other = Utils.expr_to_lit_or_expr(other)
|
202
825
|
wrap_expr(_rbexpr.append(other._rbexpr, upcast))
|
203
826
|
end
|
204
827
|
|
828
|
+
# Create a single chunk of memory for this Series.
|
829
|
+
#
|
830
|
+
# @return [Expr]
|
831
|
+
#
|
832
|
+
# @example Create a Series with 3 nulls, append column a then rechunk
|
833
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
834
|
+
# df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
|
835
|
+
# # =>
|
836
|
+
# # shape: (6, 1)
|
837
|
+
# # ┌─────────┐
|
838
|
+
# # │ literal │
|
839
|
+
# # │ --- │
|
840
|
+
# # │ i64 │
|
841
|
+
# # ╞═════════╡
|
842
|
+
# # │ null │
|
843
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
844
|
+
# # │ null │
|
845
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
846
|
+
# # │ null │
|
847
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
848
|
+
# # │ 1 │
|
849
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
850
|
+
# # │ 1 │
|
851
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
852
|
+
# # │ 2 │
|
853
|
+
# # └─────────┘
|
205
854
|
def rechunk
|
206
855
|
wrap_expr(_rbexpr.rechunk)
|
207
856
|
end
|
208
857
|
|
858
|
+
# Drop null values.
|
859
|
+
#
|
860
|
+
# @return [Expr]
|
861
|
+
#
|
862
|
+
# @example
|
863
|
+
# df = Polars::DataFrame.new(
|
864
|
+
# {
|
865
|
+
# "a" => [8, 9, 10, 11],
|
866
|
+
# "b" => [nil, 4.0, 4.0, Float::NAN]
|
867
|
+
# }
|
868
|
+
# )
|
869
|
+
# df.select(Polars.col("b").drop_nulls)
|
870
|
+
# # =>
|
871
|
+
# # shape: (3, 1)
|
872
|
+
# # ┌─────┐
|
873
|
+
# # │ b │
|
874
|
+
# # │ --- │
|
875
|
+
# # │ f64 │
|
876
|
+
# # ╞═════╡
|
877
|
+
# # │ 4.0 │
|
878
|
+
# # ├╌╌╌╌╌┤
|
879
|
+
# # │ 4.0 │
|
880
|
+
# # ├╌╌╌╌╌┤
|
881
|
+
# # │ NaN │
|
882
|
+
# # └─────┘
|
209
883
|
def drop_nulls
|
210
884
|
wrap_expr(_rbexpr.drop_nulls)
|
211
885
|
end
|
212
886
|
|
887
|
+
# Drop floating point NaN values.
|
888
|
+
#
|
889
|
+
# @return [Expr]
|
890
|
+
#
|
891
|
+
# @example
|
892
|
+
# df = Polars::DataFrame.new(
|
893
|
+
# {
|
894
|
+
# "a" => [8, 9, 10, 11],
|
895
|
+
# "b" => [nil, 4.0, 4.0, Float::NAN]
|
896
|
+
# }
|
897
|
+
# )
|
898
|
+
# df.select(Polars.col("b").drop_nans)
|
899
|
+
# # =>
|
900
|
+
# # shape: (3, 1)
|
901
|
+
# # ┌──────┐
|
902
|
+
# # │ b │
|
903
|
+
# # │ --- │
|
904
|
+
# # │ f64 │
|
905
|
+
# # ╞══════╡
|
906
|
+
# # │ null │
|
907
|
+
# # ├╌╌╌╌╌╌┤
|
908
|
+
# # │ 4.0 │
|
909
|
+
# # ├╌╌╌╌╌╌┤
|
910
|
+
# # │ 4.0 │
|
911
|
+
# # └──────┘
|
213
912
|
def drop_nans
|
214
913
|
wrap_expr(_rbexpr.drop_nans)
|
215
914
|
end
|
216
915
|
|
916
|
+
# Get an array with the cumulative sum computed at every element.
|
917
|
+
#
|
918
|
+
# @param reverse [Boolean]
|
919
|
+
# Reverse the operation.
|
920
|
+
#
|
921
|
+
# @return [Expr]
|
922
|
+
#
|
923
|
+
# @note
|
924
|
+
# Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
|
925
|
+
# `:i64` before summing to prevent overflow issues.
|
926
|
+
#
|
927
|
+
# @example
|
928
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
929
|
+
# df.select(
|
930
|
+
# [
|
931
|
+
# Polars.col("a").cumsum,
|
932
|
+
# Polars.col("a").cumsum(reverse: true).alias("a_reverse")
|
933
|
+
# ]
|
934
|
+
# )
|
935
|
+
# # =>
|
936
|
+
# # shape: (4, 2)
|
937
|
+
# # ┌─────┬───────────┐
|
938
|
+
# # │ a ┆ a_reverse │
|
939
|
+
# # │ --- ┆ --- │
|
940
|
+
# # │ i64 ┆ i64 │
|
941
|
+
# # ╞═════╪═══════════╡
|
942
|
+
# # │ 1 ┆ 10 │
|
943
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
944
|
+
# # │ 3 ┆ 9 │
|
945
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
946
|
+
# # │ 6 ┆ 7 │
|
947
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
948
|
+
# # │ 10 ┆ 4 │
|
949
|
+
# # └─────┴───────────┘
|
217
950
|
def cumsum(reverse: false)
|
218
951
|
wrap_expr(_rbexpr.cumsum(reverse))
|
219
952
|
end
|
220
953
|
|
954
|
+
# Get an array with the cumulative product computed at every element.
|
955
|
+
#
|
956
|
+
# @param reverse [Boolean]
|
957
|
+
# Reverse the operation.
|
958
|
+
#
|
959
|
+
# @return [Expr]
|
960
|
+
#
|
961
|
+
# @note
|
962
|
+
# Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
|
963
|
+
# `:i64` before summing to prevent overflow issues.
|
964
|
+
#
|
965
|
+
# @example
|
966
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
967
|
+
# df.select(
|
968
|
+
# [
|
969
|
+
# Polars.col("a").cumprod,
|
970
|
+
# Polars.col("a").cumprod(reverse: true).alias("a_reverse")
|
971
|
+
# ]
|
972
|
+
# )
|
973
|
+
# # =>
|
974
|
+
# # shape: (4, 2)
|
975
|
+
# # ┌─────┬───────────┐
|
976
|
+
# # │ a ┆ a_reverse │
|
977
|
+
# # │ --- ┆ --- │
|
978
|
+
# # │ i64 ┆ i64 │
|
979
|
+
# # ╞═════╪═══════════╡
|
980
|
+
# # │ 1 ┆ 24 │
|
981
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
982
|
+
# # │ 2 ┆ 24 │
|
983
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
984
|
+
# # │ 6 ┆ 12 │
|
985
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
986
|
+
# # │ 24 ┆ 4 │
|
987
|
+
# # └─────┴───────────┘
|
221
988
|
def cumprod(reverse: false)
|
222
989
|
wrap_expr(_rbexpr.cumprod(reverse))
|
223
990
|
end
|
224
991
|
|
992
|
+
# Get an array with the cumulative min computed at every element.
|
993
|
+
#
|
994
|
+
# @param reverse [Boolean]
|
995
|
+
# Reverse the operation.
|
996
|
+
#
|
997
|
+
# @return [Expr]
|
998
|
+
#
|
999
|
+
# @example
|
1000
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
1001
|
+
# df.select(
|
1002
|
+
# [
|
1003
|
+
# Polars.col("a").cummin,
|
1004
|
+
# Polars.col("a").cummin(reverse: true).alias("a_reverse")
|
1005
|
+
# ]
|
1006
|
+
# )
|
1007
|
+
# # =>
|
1008
|
+
# # shape: (4, 2)
|
1009
|
+
# # ┌─────┬───────────┐
|
1010
|
+
# # │ a ┆ a_reverse │
|
1011
|
+
# # │ --- ┆ --- │
|
1012
|
+
# # │ i64 ┆ i64 │
|
1013
|
+
# # ╞═════╪═══════════╡
|
1014
|
+
# # │ 1 ┆ 1 │
|
1015
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1016
|
+
# # │ 1 ┆ 2 │
|
1017
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1018
|
+
# # │ 1 ┆ 3 │
|
1019
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1020
|
+
# # │ 1 ┆ 4 │
|
1021
|
+
# # └─────┴───────────┘
|
225
1022
|
def cummin(reverse: false)
|
226
1023
|
wrap_expr(_rbexpr.cummin(reverse))
|
227
1024
|
end
|
228
1025
|
|
1026
|
+
# Get an array with the cumulative max computed at every element.
|
1027
|
+
#
|
1028
|
+
# @param reverse [Boolean]
|
1029
|
+
# Reverse the operation.
|
1030
|
+
#
|
1031
|
+
# @return [Expr]
|
1032
|
+
#
|
1033
|
+
# @example
|
1034
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
1035
|
+
# df.select(
|
1036
|
+
# [
|
1037
|
+
# Polars.col("a").cummax,
|
1038
|
+
# Polars.col("a").cummax(reverse: true).alias("a_reverse")
|
1039
|
+
# ]
|
1040
|
+
# )
|
1041
|
+
# # =>
|
1042
|
+
# # shape: (4, 2)
|
1043
|
+
# # ┌─────┬───────────┐
|
1044
|
+
# # │ a ┆ a_reverse │
|
1045
|
+
# # │ --- ┆ --- │
|
1046
|
+
# # │ i64 ┆ i64 │
|
1047
|
+
# # ╞═════╪═══════════╡
|
1048
|
+
# # │ 1 ┆ 4 │
|
1049
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1050
|
+
# # │ 2 ┆ 4 │
|
1051
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1052
|
+
# # │ 3 ┆ 4 │
|
1053
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1054
|
+
# # │ 4 ┆ 4 │
|
1055
|
+
# # └─────┴───────────┘
|
229
1056
|
def cummax(reverse: false)
|
230
1057
|
wrap_expr(_rbexpr.cummax(reverse))
|
231
1058
|
end
|
232
1059
|
|
1060
|
+
# Get an array with the cumulative count computed at every element.
|
1061
|
+
#
|
1062
|
+
# Counting from 0 to len
|
1063
|
+
#
|
1064
|
+
# @param reverse [Boolean]
|
1065
|
+
# Reverse the operation.
|
1066
|
+
#
|
1067
|
+
# @return [Expr]
|
1068
|
+
#
|
1069
|
+
# @example
|
1070
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
|
1071
|
+
# df.select(
|
1072
|
+
# [
|
1073
|
+
# Polars.col("a").cumcount,
|
1074
|
+
# Polars.col("a").cumcount(reverse: true).alias("a_reverse")
|
1075
|
+
# ]
|
1076
|
+
# )
|
1077
|
+
# # =>
|
1078
|
+
# # shape: (4, 2)
|
1079
|
+
# # ┌─────┬───────────┐
|
1080
|
+
# # │ a ┆ a_reverse │
|
1081
|
+
# # │ --- ┆ --- │
|
1082
|
+
# # │ u32 ┆ u32 │
|
1083
|
+
# # ╞═════╪═══════════╡
|
1084
|
+
# # │ 0 ┆ 3 │
|
1085
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1086
|
+
# # │ 1 ┆ 2 │
|
1087
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1088
|
+
# # │ 2 ┆ 1 │
|
1089
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1090
|
+
# # │ 3 ┆ 0 │
|
1091
|
+
# # └─────┴───────────┘
|
233
1092
|
def cumcount(reverse: false)
|
234
1093
|
wrap_expr(_rbexpr.cumcount(reverse))
|
235
1094
|
end
|
236
1095
|
|
1096
|
+
# Rounds down to the nearest integer value.
|
1097
|
+
#
|
1098
|
+
# Only works on floating point Series.
|
1099
|
+
#
|
1100
|
+
# @return [Expr]
|
1101
|
+
#
|
1102
|
+
# @example
|
1103
|
+
# df = Polars::DataFrame.new({"a" => [0.3, 0.5, 1.0, 1.1]})
|
1104
|
+
# df.select(Polars.col("a").floor)
|
1105
|
+
# # =>
|
1106
|
+
# # shape: (4, 1)
|
1107
|
+
# # ┌─────┐
|
1108
|
+
# # │ a │
|
1109
|
+
# # │ --- │
|
1110
|
+
# # │ f64 │
|
1111
|
+
# # ╞═════╡
|
1112
|
+
# # │ 0.0 │
|
1113
|
+
# # ├╌╌╌╌╌┤
|
1114
|
+
# # │ 0.0 │
|
1115
|
+
# # ├╌╌╌╌╌┤
|
1116
|
+
# # │ 1.0 │
|
1117
|
+
# # ├╌╌╌╌╌┤
|
1118
|
+
# # │ 1.0 │
|
1119
|
+
# # └─────┘
|
237
1120
|
def floor
|
238
1121
|
wrap_expr(_rbexpr.floor)
|
239
1122
|
end
|
240
1123
|
|
1124
|
+
# Rounds up to the nearest integer value.
|
1125
|
+
#
|
1126
|
+
# Only works on floating point Series.
|
1127
|
+
#
|
1128
|
+
# @return [Expr]
|
1129
|
+
#
|
1130
|
+
# @example
|
1131
|
+
# df = Polars::DataFrame.new({"a" => [0.3, 0.5, 1.0, 1.1]})
|
1132
|
+
# df.select(Polars.col("a").ceil)
|
1133
|
+
# # =>
|
1134
|
+
# # shape: (4, 1)
|
1135
|
+
# # ┌─────┐
|
1136
|
+
# # │ a │
|
1137
|
+
# # │ --- │
|
1138
|
+
# # │ f64 │
|
1139
|
+
# # ╞═════╡
|
1140
|
+
# # │ 1.0 │
|
1141
|
+
# # ├╌╌╌╌╌┤
|
1142
|
+
# # │ 1.0 │
|
1143
|
+
# # ├╌╌╌╌╌┤
|
1144
|
+
# # │ 1.0 │
|
1145
|
+
# # ├╌╌╌╌╌┤
|
1146
|
+
# # │ 2.0 │
|
1147
|
+
# # └─────┘
|
241
1148
|
def ceil
|
242
1149
|
wrap_expr(_rbexpr.ceil)
|
243
1150
|
end
|
244
1151
|
|
1152
|
+
# Round underlying floating point data by `decimals` digits.
|
1153
|
+
#
|
1154
|
+
# @param decimals [Integer]
|
1155
|
+
# Number of decimals to round by.
|
1156
|
+
#
|
1157
|
+
# @return [Expr]
|
1158
|
+
#
|
1159
|
+
# @example
|
1160
|
+
# df = Polars::DataFrame.new({"a" => [0.33, 0.52, 1.02, 1.17]})
|
1161
|
+
# df.select(Polars.col("a").round(1))
|
1162
|
+
# # =>
|
1163
|
+
# # shape: (4, 1)
|
1164
|
+
# # ┌─────┐
|
1165
|
+
# # │ a │
|
1166
|
+
# # │ --- │
|
1167
|
+
# # │ f64 │
|
1168
|
+
# # ╞═════╡
|
1169
|
+
# # │ 0.3 │
|
1170
|
+
# # ├╌╌╌╌╌┤
|
1171
|
+
# # │ 0.5 │
|
1172
|
+
# # ├╌╌╌╌╌┤
|
1173
|
+
# # │ 1.0 │
|
1174
|
+
# # ├╌╌╌╌╌┤
|
1175
|
+
# # │ 1.2 │
|
1176
|
+
# # └─────┘
|
245
1177
|
def round(decimals = 0)
|
246
1178
|
wrap_expr(_rbexpr.round(decimals))
|
247
1179
|
end
|
248
1180
|
|
1181
|
+
# Compute the dot/inner product between two Expressions.
|
1182
|
+
#
|
1183
|
+
# @param other [Expr]
|
1184
|
+
# Expression to compute dot product with.
|
1185
|
+
#
|
1186
|
+
# @return [Expr]
|
1187
|
+
#
|
1188
|
+
# @example
|
1189
|
+
# df = Polars::DataFrame.new(
|
1190
|
+
# {
|
1191
|
+
# "a" => [1, 3, 5],
|
1192
|
+
# "b" => [2, 4, 6]
|
1193
|
+
# }
|
1194
|
+
# )
|
1195
|
+
# df.select(Polars.col("a").dot(Polars.col("b")))
|
1196
|
+
# # =>
|
1197
|
+
# # shape: (1, 1)
|
1198
|
+
# # ┌─────┐
|
1199
|
+
# # │ a │
|
1200
|
+
# # │ --- │
|
1201
|
+
# # │ i64 │
|
1202
|
+
# # ╞═════╡
|
1203
|
+
# # │ 44 │
|
1204
|
+
# # └─────┘
|
249
1205
|
def dot(other)
|
250
1206
|
other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
|
251
1207
|
wrap_expr(_rbexpr.dot(other._rbexpr))
|
252
1208
|
end
|
253
1209
|
|
1210
|
+
# Compute the most occurring value(s).
|
1211
|
+
#
|
1212
|
+
# Can return multiple Values.
|
1213
|
+
#
|
1214
|
+
# @return [Expr]
|
1215
|
+
#
|
1216
|
+
# @example
|
1217
|
+
# df = Polars::DataFrame.new(
|
1218
|
+
# {
|
1219
|
+
# "a" => [1, 1, 2, 3],
|
1220
|
+
# "b" => [1, 1, 2, 2]
|
1221
|
+
# }
|
1222
|
+
# )
|
1223
|
+
# df.select(Polars.all.mode)
|
1224
|
+
# # =>
|
1225
|
+
# # shape: (2, 2)
|
1226
|
+
# # ┌─────┬─────┐
|
1227
|
+
# # │ a ┆ b │
|
1228
|
+
# # │ --- ┆ --- │
|
1229
|
+
# # │ i64 ┆ i64 │
|
1230
|
+
# # ╞═════╪═════╡
|
1231
|
+
# # │ 1 ┆ 1 │
|
1232
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1233
|
+
# # │ 1 ┆ 2 │
|
1234
|
+
# # └─────┴─────┘
|
254
1235
|
def mode
|
255
1236
|
wrap_expr(_rbexpr.mode)
|
256
1237
|
end
|
257
1238
|
|
1239
|
+
# Cast between data types.
|
1240
|
+
#
|
1241
|
+
# @param dtype [Symbol]
|
1242
|
+
# DataType to cast to.
|
1243
|
+
# @param strict [Boolean]
|
1244
|
+
# Throw an error if a cast could not be done.
|
1245
|
+
# For instance, due to an overflow.
|
1246
|
+
#
|
1247
|
+
# @return [Expr]
|
1248
|
+
#
|
1249
|
+
# @example
|
1250
|
+
# df = Polars::DataFrame.new(
|
1251
|
+
# {
|
1252
|
+
# "a" => [1, 2, 3],
|
1253
|
+
# "b" => ["4", "5", "6"]
|
1254
|
+
# }
|
1255
|
+
# )
|
1256
|
+
# df.with_columns(
|
1257
|
+
# [
|
1258
|
+
# Polars.col("a").cast(:f64),
|
1259
|
+
# Polars.col("b").cast(:i32)
|
1260
|
+
# ]
|
1261
|
+
# )
|
1262
|
+
# # =>
|
1263
|
+
# # shape: (3, 2)
|
1264
|
+
# # ┌─────┬─────┐
|
1265
|
+
# # │ a ┆ b │
|
1266
|
+
# # │ --- ┆ --- │
|
1267
|
+
# # │ f64 ┆ i32 │
|
1268
|
+
# # ╞═════╪═════╡
|
1269
|
+
# # │ 1.0 ┆ 4 │
|
1270
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1271
|
+
# # │ 2.0 ┆ 5 │
|
1272
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1273
|
+
# # │ 3.0 ┆ 6 │
|
1274
|
+
# # └─────┴─────┘
|
258
1275
|
def cast(dtype, strict: true)
|
259
1276
|
dtype = Utils.rb_type_to_dtype(dtype)
|
260
1277
|
wrap_expr(_rbexpr.cast(dtype, strict))
|
261
1278
|
end
|
262
1279
|
|
1280
|
+
# Sort this column. In projection/ selection context the whole column is sorted.
|
1281
|
+
#
|
1282
|
+
# If used in a groupby context, the groups are sorted.
|
1283
|
+
#
|
1284
|
+
# @param reverse [Boolean]
|
1285
|
+
# false -> order from small to large.
|
1286
|
+
# true -> order from large to small.
|
1287
|
+
# @param nulls_last [Boolean]
|
1288
|
+
# If true nulls are considered to be larger than any valid value.
|
1289
|
+
#
|
1290
|
+
# @return [Expr]
|
1291
|
+
#
|
1292
|
+
# @example
|
1293
|
+
# df = Polars::DataFrame.new(
|
1294
|
+
# {
|
1295
|
+
# "group" => [
|
1296
|
+
# "one",
|
1297
|
+
# "one",
|
1298
|
+
# "one",
|
1299
|
+
# "two",
|
1300
|
+
# "two",
|
1301
|
+
# "two"
|
1302
|
+
# ],
|
1303
|
+
# "value" => [1, 98, 2, 3, 99, 4]
|
1304
|
+
# }
|
1305
|
+
# )
|
1306
|
+
# df.select(Polars.col("value").sort)
|
1307
|
+
# # =>
|
1308
|
+
# # shape: (6, 1)
|
1309
|
+
# # ┌───────┐
|
1310
|
+
# # │ value │
|
1311
|
+
# # │ --- │
|
1312
|
+
# # │ i64 │
|
1313
|
+
# # ╞═══════╡
|
1314
|
+
# # │ 1 │
|
1315
|
+
# # ├╌╌╌╌╌╌╌┤
|
1316
|
+
# # │ 2 │
|
1317
|
+
# # ├╌╌╌╌╌╌╌┤
|
1318
|
+
# # │ 3 │
|
1319
|
+
# # ├╌╌╌╌╌╌╌┤
|
1320
|
+
# # │ 4 │
|
1321
|
+
# # ├╌╌╌╌╌╌╌┤
|
1322
|
+
# # │ 98 │
|
1323
|
+
# # ├╌╌╌╌╌╌╌┤
|
1324
|
+
# # │ 99 │
|
1325
|
+
# # └───────┘
|
1326
|
+
#
|
1327
|
+
# @example
|
1328
|
+
# df.select(Polars.col("value").sort)
|
1329
|
+
# # =>
|
1330
|
+
# # shape: (6, 1)
|
1331
|
+
# # ┌───────┐
|
1332
|
+
# # │ value │
|
1333
|
+
# # │ --- │
|
1334
|
+
# # │ i64 │
|
1335
|
+
# # ╞═══════╡
|
1336
|
+
# # │ 1 │
|
1337
|
+
# # ├╌╌╌╌╌╌╌┤
|
1338
|
+
# # │ 2 │
|
1339
|
+
# # ├╌╌╌╌╌╌╌┤
|
1340
|
+
# # │ 3 │
|
1341
|
+
# # ├╌╌╌╌╌╌╌┤
|
1342
|
+
# # │ 4 │
|
1343
|
+
# # ├╌╌╌╌╌╌╌┤
|
1344
|
+
# # │ 98 │
|
1345
|
+
# # ├╌╌╌╌╌╌╌┤
|
1346
|
+
# # │ 99 │
|
1347
|
+
# # └───────┘
|
1348
|
+
#
|
1349
|
+
# @example
|
1350
|
+
# df.groupby("group").agg(Polars.col("value").sort)
|
1351
|
+
# # =>
|
1352
|
+
# # shape: (2, 2)
|
1353
|
+
# # ┌───────┬────────────┐
|
1354
|
+
# # │ group ┆ value │
|
1355
|
+
# # │ --- ┆ --- │
|
1356
|
+
# # │ str ┆ list[i64] │
|
1357
|
+
# # ╞═══════╪════════════╡
|
1358
|
+
# # │ two ┆ [3, 4, 99] │
|
1359
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
1360
|
+
# # │ one ┆ [1, 2, 98] │
|
1361
|
+
# # └───────┴────────────┘
|
263
1362
|
def sort(reverse: false, nulls_last: false)
|
264
1363
|
wrap_expr(_rbexpr.sort_with(reverse, nulls_last))
|
265
1364
|
end
|
266
1365
|
|
1366
|
+
# Return the `k` largest elements.
|
1367
|
+
#
|
1368
|
+
# If 'reverse: true` the smallest elements will be given.
|
1369
|
+
#
|
1370
|
+
# @param k [Integer]
|
1371
|
+
# Number of elements to return.
|
1372
|
+
# @param reverse [Boolean]
|
1373
|
+
# Return the smallest elements.
|
1374
|
+
#
|
1375
|
+
# @return [Expr]
|
1376
|
+
#
|
1377
|
+
# @example
|
1378
|
+
# df = Polars::DataFrame.new(
|
1379
|
+
# {
|
1380
|
+
# "value" => [1, 98, 2, 3, 99, 4]
|
1381
|
+
# }
|
1382
|
+
# )
|
1383
|
+
# df.select(
|
1384
|
+
# [
|
1385
|
+
# Polars.col("value").top_k.alias("top_k"),
|
1386
|
+
# Polars.col("value").top_k(reverse: true).alias("bottom_k")
|
1387
|
+
# ]
|
1388
|
+
# )
|
1389
|
+
# # =>
|
1390
|
+
# # shape: (5, 2)
|
1391
|
+
# # ┌───────┬──────────┐
|
1392
|
+
# # │ top_k ┆ bottom_k │
|
1393
|
+
# # │ --- ┆ --- │
|
1394
|
+
# # │ i64 ┆ i64 │
|
1395
|
+
# # ╞═══════╪══════════╡
|
1396
|
+
# # │ 99 ┆ 1 │
|
1397
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
1398
|
+
# # │ 98 ┆ 2 │
|
1399
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
1400
|
+
# # │ 4 ┆ 3 │
|
1401
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
1402
|
+
# # │ 3 ┆ 4 │
|
1403
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
1404
|
+
# # │ 2 ┆ 98 │
|
1405
|
+
# # └───────┴──────────┘
|
267
1406
|
def top_k(k: 5, reverse: false)
|
268
1407
|
wrap_expr(_rbexpr.top_k(k, reverse))
|
269
1408
|
end
|
270
1409
|
|
1410
|
+
# Get the index values that would sort this column.
|
1411
|
+
#
|
1412
|
+
# @param reverse [Boolean]
|
1413
|
+
# Sort in reverse (descending) order.
|
1414
|
+
# @param nulls_last [Boolean]
|
1415
|
+
# Place null values last instead of first.
|
1416
|
+
#
|
1417
|
+
# @return [Expr]
|
1418
|
+
#
|
1419
|
+
# @example
|
1420
|
+
# df = Polars::DataFrame.new(
|
1421
|
+
# {
|
1422
|
+
# "a" => [20, 10, 30]
|
1423
|
+
# }
|
1424
|
+
# )
|
1425
|
+
# df.select(Polars.col("a").arg_sort)
|
1426
|
+
# # =>
|
1427
|
+
# # shape: (3, 1)
|
1428
|
+
# # ┌─────┐
|
1429
|
+
# # │ a │
|
1430
|
+
# # │ --- │
|
1431
|
+
# # │ u32 │
|
1432
|
+
# # ╞═════╡
|
1433
|
+
# # │ 1 │
|
1434
|
+
# # ├╌╌╌╌╌┤
|
1435
|
+
# # │ 0 │
|
1436
|
+
# # ├╌╌╌╌╌┤
|
1437
|
+
# # │ 2 │
|
1438
|
+
# # └─────┘
|
271
1439
|
def arg_sort(reverse: false, nulls_last: false)
|
272
1440
|
wrap_expr(_rbexpr.arg_sort(reverse, nulls_last))
|
273
1441
|
end
|
274
1442
|
|
1443
|
+
# Get the index of the maximal value.
|
1444
|
+
#
|
1445
|
+
# @return [Expr]
|
1446
|
+
#
|
1447
|
+
# @example
|
1448
|
+
# df = Polars::DataFrame.new(
|
1449
|
+
# {
|
1450
|
+
# "a" => [20, 10, 30]
|
1451
|
+
# }
|
1452
|
+
# )
|
1453
|
+
# df.select(Polars.col("a").arg_max)
|
1454
|
+
# # =>
|
1455
|
+
# # shape: (1, 1)
|
1456
|
+
# # ┌─────┐
|
1457
|
+
# # │ a │
|
1458
|
+
# # │ --- │
|
1459
|
+
# # │ u32 │
|
1460
|
+
# # ╞═════╡
|
1461
|
+
# # │ 2 │
|
1462
|
+
# # └─────┘
|
275
1463
|
def arg_max
|
276
1464
|
wrap_expr(_rbexpr.arg_max)
|
277
1465
|
end
|
278
1466
|
|
1467
|
+
# Get the index of the minimal value.
|
1468
|
+
#
|
1469
|
+
# @return [Expr]
|
1470
|
+
#
|
1471
|
+
# @example
|
1472
|
+
# df = Polars::DataFrame.new(
|
1473
|
+
# {
|
1474
|
+
# "a" => [20, 10, 30]
|
1475
|
+
# }
|
1476
|
+
# )
|
1477
|
+
# df.select(Polars.col("a").arg_min)
|
1478
|
+
# # =>
|
1479
|
+
# # shape: (1, 1)
|
1480
|
+
# # ┌─────┐
|
1481
|
+
# # │ a │
|
1482
|
+
# # │ --- │
|
1483
|
+
# # │ u32 │
|
1484
|
+
# # ╞═════╡
|
1485
|
+
# # │ 1 │
|
1486
|
+
# # └─────┘
|
279
1487
|
def arg_min
|
280
1488
|
wrap_expr(_rbexpr.arg_min)
|
281
1489
|
end
|
282
1490
|
|
1491
|
+
# Find indices where elements should be inserted to maintain order.
|
1492
|
+
#
|
1493
|
+
# @param element [Object]
|
1494
|
+
# Expression or scalar value.
|
1495
|
+
#
|
1496
|
+
# @return [Expr]
|
1497
|
+
#
|
1498
|
+
# @example
|
1499
|
+
# df = Polars::DataFrame.new(
|
1500
|
+
# {
|
1501
|
+
# "values" => [1, 2, 3, 5]
|
1502
|
+
# }
|
1503
|
+
# )
|
1504
|
+
# df.select(
|
1505
|
+
# [
|
1506
|
+
# Polars.col("values").search_sorted(0).alias("zero"),
|
1507
|
+
# Polars.col("values").search_sorted(3).alias("three"),
|
1508
|
+
# Polars.col("values").search_sorted(6).alias("six")
|
1509
|
+
# ]
|
1510
|
+
# )
|
1511
|
+
# # =>
|
1512
|
+
# # shape: (1, 3)
|
1513
|
+
# # ┌──────┬───────┬─────┐
|
1514
|
+
# # │ zero ┆ three ┆ six │
|
1515
|
+
# # │ --- ┆ --- ┆ --- │
|
1516
|
+
# # │ u32 ┆ u32 ┆ u32 │
|
1517
|
+
# # ╞══════╪═══════╪═════╡
|
1518
|
+
# # │ 0 ┆ 2 ┆ 4 │
|
1519
|
+
# # └──────┴───────┴─────┘
|
283
1520
|
def search_sorted(element)
|
284
1521
|
element = Utils.expr_to_lit_or_expr(element, str_to_lit: false)
|
285
1522
|
wrap_expr(_rbexpr.search_sorted(element._rbexpr))
|
286
1523
|
end
|
287
1524
|
|
1525
|
+
# Sort this column by the ordering of another column, or multiple other columns.
|
1526
|
+
#
|
1527
|
+
# In projection/ selection context the whole column is sorted.
|
1528
|
+
# If used in a groupby context, the groups are sorted.
|
1529
|
+
#
|
1530
|
+
# @param by [Object]
|
1531
|
+
# The column(s) used for sorting.
|
1532
|
+
# @param reverse [Boolean]
|
1533
|
+
# false -> order from small to large.
|
1534
|
+
# true -> order from large to small.
|
1535
|
+
#
|
1536
|
+
# @return [Expr]
|
1537
|
+
#
|
1538
|
+
# @example
|
1539
|
+
# df = Polars::DataFrame.new(
|
1540
|
+
# {
|
1541
|
+
# "group" => [
|
1542
|
+
# "one",
|
1543
|
+
# "one",
|
1544
|
+
# "one",
|
1545
|
+
# "two",
|
1546
|
+
# "two",
|
1547
|
+
# "two"
|
1548
|
+
# ],
|
1549
|
+
# "value" => [1, 98, 2, 3, 99, 4]
|
1550
|
+
# }
|
1551
|
+
# )
|
1552
|
+
# df.select(Polars.col("group").sort_by("value"))
|
1553
|
+
# # =>
|
1554
|
+
# # shape: (6, 1)
|
1555
|
+
# # ┌───────┐
|
1556
|
+
# # │ group │
|
1557
|
+
# # │ --- │
|
1558
|
+
# # │ str │
|
1559
|
+
# # ╞═══════╡
|
1560
|
+
# # │ one │
|
1561
|
+
# # ├╌╌╌╌╌╌╌┤
|
1562
|
+
# # │ one │
|
1563
|
+
# # ├╌╌╌╌╌╌╌┤
|
1564
|
+
# # │ two │
|
1565
|
+
# # ├╌╌╌╌╌╌╌┤
|
1566
|
+
# # │ two │
|
1567
|
+
# # ├╌╌╌╌╌╌╌┤
|
1568
|
+
# # │ one │
|
1569
|
+
# # ├╌╌╌╌╌╌╌┤
|
1570
|
+
# # │ two │
|
1571
|
+
# # └───────┘
|
288
1572
|
def sort_by(by, reverse: false)
|
289
1573
|
if !by.is_a?(Array)
|
290
1574
|
by = [by]
|
@@ -297,19 +1581,176 @@ module Polars
|
|
297
1581
|
wrap_expr(_rbexpr.sort_by(by, reverse))
|
298
1582
|
end
|
299
1583
|
|
300
|
-
#
|
301
|
-
#
|
1584
|
+
# Take values by index.
|
1585
|
+
#
|
1586
|
+
# @param indices [Expr]
|
1587
|
+
# An expression that leads to a `:u32` dtyped Series.
|
1588
|
+
#
|
1589
|
+
# @return [Expr]
|
1590
|
+
#
|
1591
|
+
# @example
|
1592
|
+
# df = Polars::DataFrame.new(
|
1593
|
+
# {
|
1594
|
+
# "group" => [
|
1595
|
+
# "one",
|
1596
|
+
# "one",
|
1597
|
+
# "one",
|
1598
|
+
# "two",
|
1599
|
+
# "two",
|
1600
|
+
# "two"
|
1601
|
+
# ],
|
1602
|
+
# "value" => [1, 98, 2, 3, 99, 4]
|
1603
|
+
# }
|
1604
|
+
# )
|
1605
|
+
# df.groupby("group", maintain_order: true).agg(Polars.col("value").take(1))
|
1606
|
+
# # =>
|
1607
|
+
# # shape: (2, 2)
|
1608
|
+
# # ┌───────┬───────┐
|
1609
|
+
# # │ group ┆ value │
|
1610
|
+
# # │ --- ┆ --- │
|
1611
|
+
# # │ str ┆ i64 │
|
1612
|
+
# # ╞═══════╪═══════╡
|
1613
|
+
# # │ one ┆ 98 │
|
1614
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
1615
|
+
# # │ two ┆ 99 │
|
1616
|
+
# # └───────┴───────┘
|
1617
|
+
def take(indices)
|
1618
|
+
if indices.is_a?(Array)
|
1619
|
+
indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
|
1620
|
+
else
|
1621
|
+
indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
|
1622
|
+
end
|
1623
|
+
wrap_expr(_rbexpr.take(indices_lit._rbexpr))
|
1624
|
+
end
|
302
1625
|
|
1626
|
+
# Shift the values by a given period.
|
1627
|
+
#
|
1628
|
+
# @param periods [Integer]
|
1629
|
+
# Number of places to shift (may be negative).
|
303
1630
|
#
|
1631
|
+
# @return [Expr]
|
1632
|
+
#
|
1633
|
+
# @example
|
1634
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
|
1635
|
+
# df.select(Polars.col("foo").shift(1))
|
1636
|
+
# # =>
|
1637
|
+
# # shape: (4, 1)
|
1638
|
+
# # ┌──────┐
|
1639
|
+
# # │ foo │
|
1640
|
+
# # │ --- │
|
1641
|
+
# # │ i64 │
|
1642
|
+
# # ╞══════╡
|
1643
|
+
# # │ null │
|
1644
|
+
# # ├╌╌╌╌╌╌┤
|
1645
|
+
# # │ 1 │
|
1646
|
+
# # ├╌╌╌╌╌╌┤
|
1647
|
+
# # │ 2 │
|
1648
|
+
# # ├╌╌╌╌╌╌┤
|
1649
|
+
# # │ 3 │
|
1650
|
+
# # └──────┘
|
304
1651
|
def shift(periods = 1)
|
305
1652
|
wrap_expr(_rbexpr.shift(periods))
|
306
1653
|
end
|
307
1654
|
|
1655
|
+
# Shift the values by a given period and fill the resulting null values.
|
1656
|
+
#
|
1657
|
+
# @param periods [Integer]
|
1658
|
+
# Number of places to shift (may be negative).
|
1659
|
+
# @param fill_value [Object]
|
1660
|
+
# Fill nil values with the result of this expression.
|
1661
|
+
#
|
1662
|
+
# @return [Expr]
|
1663
|
+
#
|
1664
|
+
# @example
|
1665
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
|
1666
|
+
# df.select(Polars.col("foo").shift_and_fill(1, "a"))
|
1667
|
+
# # =>
|
1668
|
+
# # shape: (4, 1)
|
1669
|
+
# # ┌─────┐
|
1670
|
+
# # │ foo │
|
1671
|
+
# # │ --- │
|
1672
|
+
# # │ str │
|
1673
|
+
# # ╞═════╡
|
1674
|
+
# # │ a │
|
1675
|
+
# # ├╌╌╌╌╌┤
|
1676
|
+
# # │ 1 │
|
1677
|
+
# # ├╌╌╌╌╌┤
|
1678
|
+
# # │ 2 │
|
1679
|
+
# # ├╌╌╌╌╌┤
|
1680
|
+
# # │ 3 │
|
1681
|
+
# # └─────┘
|
308
1682
|
def shift_and_fill(periods, fill_value)
|
309
1683
|
fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
|
310
1684
|
wrap_expr(_rbexpr.shift_and_fill(periods, fill_value._rbexpr))
|
311
1685
|
end
|
312
1686
|
|
1687
|
+
# Fill null values using the specified value or strategy.
|
1688
|
+
#
|
1689
|
+
# To interpolate over null values see interpolate.
|
1690
|
+
#
|
1691
|
+
# @param value [Object]
|
1692
|
+
# Value used to fill null values.
|
1693
|
+
# @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
|
1694
|
+
# Strategy used to fill null values.
|
1695
|
+
# @param limit [Integer]
|
1696
|
+
# Number of consecutive null values to fill when using the 'forward' or
|
1697
|
+
# 'backward' strategy.
|
1698
|
+
#
|
1699
|
+
# @return [Expr]
|
1700
|
+
#
|
1701
|
+
# @example
|
1702
|
+
# df = Polars::DataFrame.new(
|
1703
|
+
# {
|
1704
|
+
# "a" => [1, 2, nil],
|
1705
|
+
# "b" => [4, nil, 6]
|
1706
|
+
# }
|
1707
|
+
# )
|
1708
|
+
# df.fill_null(strategy: "zero")
|
1709
|
+
# # =>
|
1710
|
+
# # shape: (3, 2)
|
1711
|
+
# # ┌─────┬─────┐
|
1712
|
+
# # │ a ┆ b │
|
1713
|
+
# # │ --- ┆ --- │
|
1714
|
+
# # │ i64 ┆ i64 │
|
1715
|
+
# # ╞═════╪═════╡
|
1716
|
+
# # │ 1 ┆ 4 │
|
1717
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1718
|
+
# # │ 2 ┆ 0 │
|
1719
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1720
|
+
# # │ 0 ┆ 6 │
|
1721
|
+
# # └─────┴─────┘
|
1722
|
+
#
|
1723
|
+
# @example
|
1724
|
+
# df.fill_null(99)
|
1725
|
+
# # =>
|
1726
|
+
# # shape: (3, 2)
|
1727
|
+
# # ┌─────┬─────┐
|
1728
|
+
# # │ a ┆ b │
|
1729
|
+
# # │ --- ┆ --- │
|
1730
|
+
# # │ i64 ┆ i64 │
|
1731
|
+
# # ╞═════╪═════╡
|
1732
|
+
# # │ 1 ┆ 4 │
|
1733
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1734
|
+
# # │ 2 ┆ 99 │
|
1735
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1736
|
+
# # │ 99 ┆ 6 │
|
1737
|
+
# # └─────┴─────┘
|
1738
|
+
#
|
1739
|
+
# @example
|
1740
|
+
# df.fill_null(strategy: "forward")
|
1741
|
+
# # =>
|
1742
|
+
# # shape: (3, 2)
|
1743
|
+
# # ┌─────┬─────┐
|
1744
|
+
# # │ a ┆ b │
|
1745
|
+
# # │ --- ┆ --- │
|
1746
|
+
# # │ i64 ┆ i64 │
|
1747
|
+
# # ╞═════╪═════╡
|
1748
|
+
# # │ 1 ┆ 4 │
|
1749
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1750
|
+
# # │ 2 ┆ 4 │
|
1751
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1752
|
+
# # │ 2 ┆ 6 │
|
1753
|
+
# # └─────┴─────┘
|
313
1754
|
def fill_null(value = nil, strategy: nil, limit: nil)
|
314
1755
|
if !value.nil? && !strategy.nil?
|
315
1756
|
raise ArgumentError, "cannot specify both 'value' and 'strategy'."
|
@@ -327,75 +1768,426 @@ module Polars
|
|
327
1768
|
end
|
328
1769
|
end
|
329
1770
|
|
1771
|
+
# Fill floating point NaN value with a fill value.
|
1772
|
+
#
|
1773
|
+
# @return [Expr]
|
1774
|
+
#
|
1775
|
+
# @example
|
1776
|
+
# df = Polars::DataFrame.new(
|
1777
|
+
# {
|
1778
|
+
# "a" => [1.0, nil, Float::NAN],
|
1779
|
+
# "b" => [4.0, Float::NAN, 6]
|
1780
|
+
# }
|
1781
|
+
# )
|
1782
|
+
# df.fill_nan("zero")
|
1783
|
+
# # =>
|
1784
|
+
# # shape: (3, 2)
|
1785
|
+
# # ┌──────┬──────┐
|
1786
|
+
# # │ a ┆ b │
|
1787
|
+
# # │ --- ┆ --- │
|
1788
|
+
# # │ str ┆ str │
|
1789
|
+
# # ╞══════╪══════╡
|
1790
|
+
# # │ 1.0 ┆ 4.0 │
|
1791
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1792
|
+
# # │ null ┆ zero │
|
1793
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1794
|
+
# # │ zero ┆ 6.0 │
|
1795
|
+
# # └──────┴──────┘
|
330
1796
|
def fill_nan(fill_value)
|
331
1797
|
fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
|
332
1798
|
wrap_expr(_rbexpr.fill_nan(fill_value._rbexpr))
|
333
1799
|
end
|
334
1800
|
|
1801
|
+
# Fill missing values with the latest seen values.
|
1802
|
+
#
|
1803
|
+
# @param limit [Integer]
|
1804
|
+
# The number of consecutive null values to forward fill.
|
1805
|
+
#
|
1806
|
+
# @return [Expr]
|
1807
|
+
#
|
1808
|
+
# @example
|
1809
|
+
# df = Polars::DataFrame.new(
|
1810
|
+
# {
|
1811
|
+
# "a" => [1, 2, nil],
|
1812
|
+
# "b" => [4, nil, 6]
|
1813
|
+
# }
|
1814
|
+
# )
|
1815
|
+
# df.select(Polars.all.forward_fill)
|
1816
|
+
# # =>
|
1817
|
+
# # shape: (3, 2)
|
1818
|
+
# # ┌─────┬─────┐
|
1819
|
+
# # │ a ┆ b │
|
1820
|
+
# # │ --- ┆ --- │
|
1821
|
+
# # │ i64 ┆ i64 │
|
1822
|
+
# # ╞═════╪═════╡
|
1823
|
+
# # │ 1 ┆ 4 │
|
1824
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1825
|
+
# # │ 2 ┆ 4 │
|
1826
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1827
|
+
# # │ 2 ┆ 6 │
|
1828
|
+
# # └─────┴─────┘
|
335
1829
|
def forward_fill(limit: nil)
|
336
1830
|
wrap_expr(_rbexpr.forward_fill(limit))
|
337
1831
|
end
|
338
1832
|
|
1833
|
+
# Fill missing values with the next to be seen values.
|
1834
|
+
#
|
1835
|
+
# @param limit [Integer]
|
1836
|
+
# The number of consecutive null values to backward fill.
|
1837
|
+
#
|
1838
|
+
# @return [Expr]
|
1839
|
+
#
|
1840
|
+
# @example
|
1841
|
+
# df = Polars::DataFrame.new(
|
1842
|
+
# {
|
1843
|
+
# "a" => [1, 2, nil],
|
1844
|
+
# "b" => [4, nil, 6]
|
1845
|
+
# }
|
1846
|
+
# )
|
1847
|
+
# df.select(Polars.all.backward_fill)
|
1848
|
+
# # =>
|
1849
|
+
# # shape: (3, 2)
|
1850
|
+
# # ┌──────┬─────┐
|
1851
|
+
# # │ a ┆ b │
|
1852
|
+
# # │ --- ┆ --- │
|
1853
|
+
# # │ i64 ┆ i64 │
|
1854
|
+
# # ╞══════╪═════╡
|
1855
|
+
# # │ 1 ┆ 4 │
|
1856
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
|
1857
|
+
# # │ 2 ┆ 6 │
|
1858
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
|
1859
|
+
# # │ null ┆ 6 │
|
1860
|
+
# # └──────┴─────┘
|
339
1861
|
def backward_fill(limit: nil)
|
340
1862
|
wrap_expr(_rbexpr.backward_fill(limit))
|
341
1863
|
end
|
342
1864
|
|
1865
|
+
# Reverse the selection.
|
1866
|
+
#
|
1867
|
+
# @return [Expr]
|
343
1868
|
def reverse
|
344
1869
|
wrap_expr(_rbexpr.reverse)
|
345
1870
|
end
|
346
1871
|
|
1872
|
+
# Get standard deviation.
|
1873
|
+
#
|
1874
|
+
# @param ddof [Integer]
|
1875
|
+
# Degrees of freedom.
|
1876
|
+
#
|
1877
|
+
# @return [Expr]
|
1878
|
+
#
|
1879
|
+
# @example
|
1880
|
+
# df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
|
1881
|
+
# df.select(Polars.col("a").std)
|
1882
|
+
# # =>
|
1883
|
+
# # shape: (1, 1)
|
1884
|
+
# # ┌─────┐
|
1885
|
+
# # │ a │
|
1886
|
+
# # │ --- │
|
1887
|
+
# # │ f64 │
|
1888
|
+
# # ╞═════╡
|
1889
|
+
# # │ 1.0 │
|
1890
|
+
# # └─────┘
|
347
1891
|
def std(ddof: 1)
|
348
1892
|
wrap_expr(_rbexpr.std(ddof))
|
349
1893
|
end
|
350
1894
|
|
1895
|
+
# Get variance.
|
1896
|
+
#
|
1897
|
+
# @param ddof [Integer]
|
1898
|
+
# Degrees of freedom.
|
1899
|
+
#
|
1900
|
+
# @return [Expr]
|
1901
|
+
#
|
1902
|
+
# @example
|
1903
|
+
# df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
|
1904
|
+
# df.select(Polars.col("a").var)
|
1905
|
+
# # =>
|
1906
|
+
# # shape: (1, 1)
|
1907
|
+
# # ┌─────┐
|
1908
|
+
# # │ a │
|
1909
|
+
# # │ --- │
|
1910
|
+
# # │ f64 │
|
1911
|
+
# # ╞═════╡
|
1912
|
+
# # │ 1.0 │
|
1913
|
+
# # └─────┘
|
351
1914
|
def var(ddof: 1)
|
352
1915
|
wrap_expr(_rbexpr.var(ddof))
|
353
1916
|
end
|
354
1917
|
|
1918
|
+
# Get maximum value.
|
1919
|
+
#
|
1920
|
+
# @return [Expr]
|
1921
|
+
#
|
1922
|
+
# @example
|
1923
|
+
# df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
|
1924
|
+
# df.select(Polars.col("a").max)
|
1925
|
+
# # =>
|
1926
|
+
# # shape: (1, 1)
|
1927
|
+
# # ┌─────┐
|
1928
|
+
# # │ a │
|
1929
|
+
# # │ --- │
|
1930
|
+
# # │ f64 │
|
1931
|
+
# # ╞═════╡
|
1932
|
+
# # │ 1.0 │
|
1933
|
+
# # └─────┘
|
355
1934
|
def max
|
356
1935
|
wrap_expr(_rbexpr.max)
|
357
1936
|
end
|
358
1937
|
|
1938
|
+
# Get minimum value.
|
1939
|
+
#
|
1940
|
+
# @return [Expr]
|
1941
|
+
#
|
1942
|
+
# @example
|
1943
|
+
# df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
|
1944
|
+
# df.select(Polars.col("a").min)
|
1945
|
+
# # =>
|
1946
|
+
# # shape: (1, 1)
|
1947
|
+
# # ┌──────┐
|
1948
|
+
# # │ a │
|
1949
|
+
# # │ --- │
|
1950
|
+
# # │ f64 │
|
1951
|
+
# # ╞══════╡
|
1952
|
+
# # │ -1.0 │
|
1953
|
+
# # └──────┘
|
359
1954
|
def min
|
360
1955
|
wrap_expr(_rbexpr.min)
|
361
1956
|
end
|
362
1957
|
|
1958
|
+
# Get maximum value, but propagate/poison encountered NaN values.
|
1959
|
+
#
|
1960
|
+
# @return [Expr]
|
1961
|
+
#
|
1962
|
+
# @example
|
1963
|
+
# df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
|
1964
|
+
# df.select(Polars.col("a").nan_max)
|
1965
|
+
# # =>
|
1966
|
+
# # shape: (1, 1)
|
1967
|
+
# # ┌─────┐
|
1968
|
+
# # │ a │
|
1969
|
+
# # │ --- │
|
1970
|
+
# # │ f64 │
|
1971
|
+
# # ╞═════╡
|
1972
|
+
# # │ NaN │
|
1973
|
+
# # └─────┘
|
363
1974
|
def nan_max
|
364
1975
|
wrap_expr(_rbexpr.nan_max)
|
365
1976
|
end
|
366
1977
|
|
1978
|
+
# Get minimum value, but propagate/poison encountered NaN values.
|
1979
|
+
#
|
1980
|
+
# @return [Expr]
|
1981
|
+
#
|
1982
|
+
# @example
|
1983
|
+
# df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
|
1984
|
+
# df.select(Polars.col("a").nan_min)
|
1985
|
+
# # =>
|
1986
|
+
# # shape: (1, 1)
|
1987
|
+
# # ┌─────┐
|
1988
|
+
# # │ a │
|
1989
|
+
# # │ --- │
|
1990
|
+
# # │ f64 │
|
1991
|
+
# # ╞═════╡
|
1992
|
+
# # │ NaN │
|
1993
|
+
# # └─────┘
|
367
1994
|
def nan_min
|
368
1995
|
wrap_expr(_rbexpr.nan_min)
|
369
1996
|
end
|
370
1997
|
|
1998
|
+
# Get sum value.
|
1999
|
+
#
|
2000
|
+
# @return [Expr]
|
2001
|
+
#
|
2002
|
+
# @note
|
2003
|
+
# Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
|
2004
|
+
# `:i64` before summing to prevent overflow issues.
|
2005
|
+
#
|
2006
|
+
# @example
|
2007
|
+
# df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
|
2008
|
+
# df.select(Polars.col("a").sum)
|
2009
|
+
# # =>
|
2010
|
+
# # shape: (1, 1)
|
2011
|
+
# # ┌─────┐
|
2012
|
+
# # │ a │
|
2013
|
+
# # │ --- │
|
2014
|
+
# # │ i64 │
|
2015
|
+
# # ╞═════╡
|
2016
|
+
# # │ 0 │
|
2017
|
+
# # └─────┘
|
371
2018
|
def sum
|
372
2019
|
wrap_expr(_rbexpr.sum)
|
373
2020
|
end
|
374
2021
|
|
2022
|
+
# Get mean value.
|
2023
|
+
#
|
2024
|
+
# @return [Expr]
|
2025
|
+
#
|
2026
|
+
# @example
|
2027
|
+
# df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
|
2028
|
+
# df.select(Polars.col("a").mean)
|
2029
|
+
# # =>
|
2030
|
+
# # shape: (1, 1)
|
2031
|
+
# # ┌─────┐
|
2032
|
+
# # │ a │
|
2033
|
+
# # │ --- │
|
2034
|
+
# # │ f64 │
|
2035
|
+
# # ╞═════╡
|
2036
|
+
# # │ 0.0 │
|
2037
|
+
# # └─────┘
|
375
2038
|
def mean
|
376
2039
|
wrap_expr(_rbexpr.mean)
|
377
2040
|
end
|
378
2041
|
|
2042
|
+
# Get median value using linear interpolation.
|
2043
|
+
#
|
2044
|
+
# @return [Expr]
|
2045
|
+
#
|
2046
|
+
# @example
|
2047
|
+
# df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
|
2048
|
+
# df.select(Polars.col("a").median)
|
2049
|
+
# # =>
|
2050
|
+
# # shape: (1, 1)
|
2051
|
+
# # ┌─────┐
|
2052
|
+
# # │ a │
|
2053
|
+
# # │ --- │
|
2054
|
+
# # │ f64 │
|
2055
|
+
# # ╞═════╡
|
2056
|
+
# # │ 0.0 │
|
2057
|
+
# # └─────┘
|
379
2058
|
def median
|
380
2059
|
wrap_expr(_rbexpr.median)
|
381
2060
|
end
|
382
2061
|
|
2062
|
+
# Compute the product of an expression.
|
2063
|
+
#
|
2064
|
+
# @return [Expr]
|
2065
|
+
#
|
2066
|
+
# @example
|
2067
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
2068
|
+
# df.select(Polars.col("a").product)
|
2069
|
+
# # =>
|
2070
|
+
# # shape: (1, 1)
|
2071
|
+
# # ┌─────┐
|
2072
|
+
# # │ a │
|
2073
|
+
# # │ --- │
|
2074
|
+
# # │ i64 │
|
2075
|
+
# # ╞═════╡
|
2076
|
+
# # │ 6 │
|
2077
|
+
# # └─────┘
|
383
2078
|
def product
|
384
2079
|
wrap_expr(_rbexpr.product)
|
385
2080
|
end
|
386
2081
|
|
2082
|
+
# Count unique values.
|
2083
|
+
#
|
2084
|
+
# @return [Expr]
|
2085
|
+
#
|
2086
|
+
# @example
|
2087
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
2088
|
+
# df.select(Polars.col("a").n_unique)
|
2089
|
+
# # =>
|
2090
|
+
# # shape: (1, 1)
|
2091
|
+
# # ┌─────┐
|
2092
|
+
# # │ a │
|
2093
|
+
# # │ --- │
|
2094
|
+
# # │ u32 │
|
2095
|
+
# # ╞═════╡
|
2096
|
+
# # │ 2 │
|
2097
|
+
# # └─────┘
|
387
2098
|
def n_unique
|
388
2099
|
wrap_expr(_rbexpr.n_unique)
|
389
2100
|
end
|
390
2101
|
|
2102
|
+
# Count null values.
|
2103
|
+
#
|
2104
|
+
# @return [Expr]
|
2105
|
+
#
|
2106
|
+
# @example
|
2107
|
+
# df = Polars::DataFrame.new(
|
2108
|
+
# {
|
2109
|
+
# "a" => [nil, 1, nil],
|
2110
|
+
# "b" => [1, 2, 3]
|
2111
|
+
# }
|
2112
|
+
# )
|
2113
|
+
# df.select(Polars.all.null_count)
|
2114
|
+
# # =>
|
2115
|
+
# # shape: (1, 2)
|
2116
|
+
# # ┌─────┬─────┐
|
2117
|
+
# # │ a ┆ b │
|
2118
|
+
# # │ --- ┆ --- │
|
2119
|
+
# # │ u32 ┆ u32 │
|
2120
|
+
# # ╞═════╪═════╡
|
2121
|
+
# # │ 2 ┆ 0 │
|
2122
|
+
# # └─────┴─────┘
|
391
2123
|
def null_count
|
392
2124
|
wrap_expr(_rbexpr.null_count)
|
393
2125
|
end
|
394
2126
|
|
2127
|
+
# Get index of first unique value.
|
2128
|
+
#
|
2129
|
+
# @return [Expr]
|
2130
|
+
#
|
2131
|
+
# @example
|
2132
|
+
# df = Polars::DataFrame.new(
|
2133
|
+
# {
|
2134
|
+
# "a" => [8, 9, 10],
|
2135
|
+
# "b" => [nil, 4, 4]
|
2136
|
+
# }
|
2137
|
+
# )
|
2138
|
+
# df.select(Polars.col("a").arg_unique)
|
2139
|
+
# # =>
|
2140
|
+
# # shape: (3, 1)
|
2141
|
+
# # ┌─────┐
|
2142
|
+
# # │ a │
|
2143
|
+
# # │ --- │
|
2144
|
+
# # │ u32 │
|
2145
|
+
# # ╞═════╡
|
2146
|
+
# # │ 0 │
|
2147
|
+
# # ├╌╌╌╌╌┤
|
2148
|
+
# # │ 1 │
|
2149
|
+
# # ├╌╌╌╌╌┤
|
2150
|
+
# # │ 2 │
|
2151
|
+
# # └─────┘
|
2152
|
+
#
|
2153
|
+
# @example
|
2154
|
+
# df.select(Polars.col("b").arg_unique)
|
2155
|
+
# # =>
|
2156
|
+
# # shape: (2, 1)
|
2157
|
+
# # ┌─────┐
|
2158
|
+
# # │ b │
|
2159
|
+
# # │ --- │
|
2160
|
+
# # │ u32 │
|
2161
|
+
# # ╞═════╡
|
2162
|
+
# # │ 0 │
|
2163
|
+
# # ├╌╌╌╌╌┤
|
2164
|
+
# # │ 1 │
|
2165
|
+
# # └─────┘
|
395
2166
|
def arg_unique
|
396
2167
|
wrap_expr(_rbexpr.arg_unique)
|
397
2168
|
end
|
398
2169
|
|
2170
|
+
# Get unique values of this expression.
|
2171
|
+
#
|
2172
|
+
# @param maintain_order [Boolean]
|
2173
|
+
# Maintain order of data. This requires more work.
|
2174
|
+
#
|
2175
|
+
# @return [Expr]
|
2176
|
+
#
|
2177
|
+
# @example
|
2178
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
2179
|
+
# df.select(Polars.col("a").unique(maintain_order: true))
|
2180
|
+
# # =>
|
2181
|
+
# # shape: (2, 1)
|
2182
|
+
# # ┌─────┐
|
2183
|
+
# # │ a │
|
2184
|
+
# # │ --- │
|
2185
|
+
# # │ i64 │
|
2186
|
+
# # ╞═════╡
|
2187
|
+
# # │ 1 │
|
2188
|
+
# # ├╌╌╌╌╌┤
|
2189
|
+
# # │ 2 │
|
2190
|
+
# # └─────┘
|
399
2191
|
def unique(maintain_order: false)
|
400
2192
|
if maintain_order
|
401
2193
|
wrap_expr(_rbexpr.unique_stable)
|
@@ -404,95 +2196,743 @@ module Polars
|
|
404
2196
|
end
|
405
2197
|
end
|
406
2198
|
|
2199
|
+
# Get the first value.
|
2200
|
+
#
|
2201
|
+
# @return [Expr]
|
2202
|
+
#
|
2203
|
+
# @example
|
2204
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
2205
|
+
# df.select(Polars.col("a").first)
|
2206
|
+
# # =>
|
2207
|
+
# # shape: (1, 1)
|
2208
|
+
# # ┌─────┐
|
2209
|
+
# # │ a │
|
2210
|
+
# # │ --- │
|
2211
|
+
# # │ i64 │
|
2212
|
+
# # ╞═════╡
|
2213
|
+
# # │ 1 │
|
2214
|
+
# # └─────┘
|
407
2215
|
def first
|
408
2216
|
wrap_expr(_rbexpr.first)
|
409
2217
|
end
|
410
2218
|
|
2219
|
+
# Get the last value.
|
2220
|
+
#
|
2221
|
+
# @return [Expr]
|
2222
|
+
#
|
2223
|
+
# @example
|
2224
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
2225
|
+
# df.select(Polars.col("a").last)
|
2226
|
+
# # =>
|
2227
|
+
# # shape: (1, 1)
|
2228
|
+
# # ┌─────┐
|
2229
|
+
# # │ a │
|
2230
|
+
# # │ --- │
|
2231
|
+
# # │ i64 │
|
2232
|
+
# # ╞═════╡
|
2233
|
+
# # │ 2 │
|
2234
|
+
# # └─────┘
|
411
2235
|
def last
|
412
2236
|
wrap_expr(_rbexpr.last)
|
413
2237
|
end
|
414
2238
|
|
2239
|
+
# Apply window function over a subgroup.
|
2240
|
+
#
|
2241
|
+
# This is similar to a groupby + aggregation + self join.
|
2242
|
+
# Or similar to [window functions in Postgres](https://www.postgresql.org/docs/current/tutorial-window.html).
|
2243
|
+
#
|
2244
|
+
# @param expr [Object]
|
2245
|
+
# Column(s) to group by.
|
2246
|
+
#
|
2247
|
+
# @return [Expr]
|
2248
|
+
#
|
2249
|
+
# @example
|
2250
|
+
# df = Polars::DataFrame.new(
|
2251
|
+
# {
|
2252
|
+
# "groups" => ["g1", "g1", "g2"],
|
2253
|
+
# "values" => [1, 2, 3]
|
2254
|
+
# }
|
2255
|
+
# )
|
2256
|
+
# df.with_column(
|
2257
|
+
# Polars.col("values").max.over("groups").alias("max_by_group")
|
2258
|
+
# )
|
2259
|
+
# # =>
|
2260
|
+
# # shape: (3, 3)
|
2261
|
+
# # ┌────────┬────────┬──────────────┐
|
2262
|
+
# # │ groups ┆ values ┆ max_by_group │
|
2263
|
+
# # │ --- ┆ --- ┆ --- │
|
2264
|
+
# # │ str ┆ i64 ┆ i64 │
|
2265
|
+
# # ╞════════╪════════╪══════════════╡
|
2266
|
+
# # │ g1 ┆ 1 ┆ 2 │
|
2267
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2268
|
+
# # │ g1 ┆ 2 ┆ 2 │
|
2269
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2270
|
+
# # │ g2 ┆ 3 ┆ 3 │
|
2271
|
+
# # └────────┴────────┴──────────────┘
|
2272
|
+
#
|
2273
|
+
# @example
|
2274
|
+
# df = Polars::DataFrame.new(
|
2275
|
+
# {
|
2276
|
+
# "groups" => [1, 1, 2, 2, 1, 2, 3, 3, 1],
|
2277
|
+
# "values" => [1, 2, 3, 4, 5, 6, 7, 8, 8]
|
2278
|
+
# }
|
2279
|
+
# )
|
2280
|
+
# df.lazy
|
2281
|
+
# .select([Polars.col("groups").sum.over("groups")])
|
2282
|
+
# .collect
|
2283
|
+
# # =>
|
2284
|
+
# # shape: (9, 1)
|
2285
|
+
# # ┌────────┐
|
2286
|
+
# # │ groups │
|
2287
|
+
# # │ --- │
|
2288
|
+
# # │ i64 │
|
2289
|
+
# # ╞════════╡
|
2290
|
+
# # │ 4 │
|
2291
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2292
|
+
# # │ 4 │
|
2293
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2294
|
+
# # │ 6 │
|
2295
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2296
|
+
# # │ 6 │
|
2297
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2298
|
+
# # │ ... │
|
2299
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2300
|
+
# # │ 6 │
|
2301
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2302
|
+
# # │ 6 │
|
2303
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2304
|
+
# # │ 6 │
|
2305
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
2306
|
+
# # │ 4 │
|
2307
|
+
# # └────────┘
|
415
2308
|
def over(expr)
|
416
2309
|
rbexprs = Utils.selection_to_rbexpr_list(expr)
|
417
2310
|
wrap_expr(_rbexpr.over(rbexprs))
|
418
2311
|
end
|
419
2312
|
|
2313
|
+
# Get mask of unique values.
|
2314
|
+
#
|
2315
|
+
# @return [Expr]
|
2316
|
+
#
|
2317
|
+
# @example
|
2318
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
2319
|
+
# df.select(Polars.col("a").is_unique)
|
2320
|
+
# # =>
|
2321
|
+
# # shape: (3, 1)
|
2322
|
+
# # ┌───────┐
|
2323
|
+
# # │ a │
|
2324
|
+
# # │ --- │
|
2325
|
+
# # │ bool │
|
2326
|
+
# # ╞═══════╡
|
2327
|
+
# # │ false │
|
2328
|
+
# # ├╌╌╌╌╌╌╌┤
|
2329
|
+
# # │ false │
|
2330
|
+
# # ├╌╌╌╌╌╌╌┤
|
2331
|
+
# # │ true │
|
2332
|
+
# # └───────┘
|
420
2333
|
def is_unique
|
421
2334
|
wrap_expr(_rbexpr.is_unique)
|
422
2335
|
end
|
423
2336
|
|
2337
|
+
# Get a mask of the first unique value.
|
2338
|
+
#
|
2339
|
+
# @return [Expr]
|
2340
|
+
#
|
2341
|
+
# @example
|
2342
|
+
# df = Polars::DataFrame.new(
|
2343
|
+
# {
|
2344
|
+
# "num" => [1, 2, 3, 1, 5]
|
2345
|
+
# }
|
2346
|
+
# )
|
2347
|
+
# df.with_column(Polars.col("num").is_first.alias("is_first"))
|
2348
|
+
# # =>
|
2349
|
+
# # shape: (5, 2)
|
2350
|
+
# # ┌─────┬──────────┐
|
2351
|
+
# # │ num ┆ is_first │
|
2352
|
+
# # │ --- ┆ --- │
|
2353
|
+
# # │ i64 ┆ bool │
|
2354
|
+
# # ╞═════╪══════════╡
|
2355
|
+
# # │ 1 ┆ true │
|
2356
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
2357
|
+
# # │ 2 ┆ true │
|
2358
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
2359
|
+
# # │ 3 ┆ true │
|
2360
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
2361
|
+
# # │ 1 ┆ false │
|
2362
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
2363
|
+
# # │ 5 ┆ true │
|
2364
|
+
# # └─────┴──────────┘
|
424
2365
|
def is_first
|
425
2366
|
wrap_expr(_rbexpr.is_first)
|
426
2367
|
end
|
427
2368
|
|
2369
|
+
# Get mask of duplicated values.
|
2370
|
+
#
|
2371
|
+
# @return [Expr]
|
2372
|
+
#
|
2373
|
+
# @example
|
2374
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
2375
|
+
# df.select(Polars.col("a").is_duplicated)
|
2376
|
+
# # =>
|
2377
|
+
# # shape: (3, 1)
|
2378
|
+
# # ┌───────┐
|
2379
|
+
# # │ a │
|
2380
|
+
# # │ --- │
|
2381
|
+
# # │ bool │
|
2382
|
+
# # ╞═══════╡
|
2383
|
+
# # │ true │
|
2384
|
+
# # ├╌╌╌╌╌╌╌┤
|
2385
|
+
# # │ true │
|
2386
|
+
# # ├╌╌╌╌╌╌╌┤
|
2387
|
+
# # │ false │
|
2388
|
+
# # └───────┘
|
428
2389
|
def is_duplicated
|
429
2390
|
wrap_expr(_rbexpr.is_duplicated)
|
430
2391
|
end
|
431
2392
|
|
2393
|
+
# Get quantile value.
|
2394
|
+
#
|
2395
|
+
# @param quantile [Float]
|
2396
|
+
# Quantile between 0.0 and 1.0.
|
2397
|
+
# @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
|
2398
|
+
# Interpolation method.
|
2399
|
+
#
|
2400
|
+
# @return [Expr]
|
2401
|
+
#
|
2402
|
+
# @example
|
2403
|
+
# df = Polars::DataFrame.new({"a" => [0, 1, 2, 3, 4, 5]})
|
2404
|
+
# df.select(Polars.col("a").quantile(0.3))
|
2405
|
+
# # =>
|
2406
|
+
# # shape: (1, 1)
|
2407
|
+
# # ┌─────┐
|
2408
|
+
# # │ a │
|
2409
|
+
# # │ --- │
|
2410
|
+
# # │ f64 │
|
2411
|
+
# # ╞═════╡
|
2412
|
+
# # │ 1.0 │
|
2413
|
+
# # └─────┘
|
2414
|
+
#
|
2415
|
+
# @example
|
2416
|
+
# df.select(Polars.col("a").quantile(0.3, interpolation: "higher"))
|
2417
|
+
# # =>
|
2418
|
+
# # shape: (1, 1)
|
2419
|
+
# # ┌─────┐
|
2420
|
+
# # │ a │
|
2421
|
+
# # │ --- │
|
2422
|
+
# # │ f64 │
|
2423
|
+
# # ╞═════╡
|
2424
|
+
# # │ 2.0 │
|
2425
|
+
# # └─────┘
|
2426
|
+
#
|
2427
|
+
# @example
|
2428
|
+
# df.select(Polars.col("a").quantile(0.3, interpolation: "lower"))
|
2429
|
+
# # =>
|
2430
|
+
# # shape: (1, 1)
|
2431
|
+
# # ┌─────┐
|
2432
|
+
# # │ a │
|
2433
|
+
# # │ --- │
|
2434
|
+
# # │ f64 │
|
2435
|
+
# # ╞═════╡
|
2436
|
+
# # │ 1.0 │
|
2437
|
+
# # └─────┘
|
2438
|
+
#
|
2439
|
+
# @example
|
2440
|
+
# df.select(Polars.col("a").quantile(0.3, interpolation: "midpoint"))
|
2441
|
+
# # =>
|
2442
|
+
# # shape: (1, 1)
|
2443
|
+
# # ┌─────┐
|
2444
|
+
# # │ a │
|
2445
|
+
# # │ --- │
|
2446
|
+
# # │ f64 │
|
2447
|
+
# # ╞═════╡
|
2448
|
+
# # │ 1.5 │
|
2449
|
+
# # └─────┘
|
2450
|
+
#
|
2451
|
+
# @example
|
2452
|
+
# df.select(Polars.col("a").quantile(0.3, interpolation: "linear"))
|
2453
|
+
# # =>
|
2454
|
+
# # shape: (1, 1)
|
2455
|
+
# # ┌─────┐
|
2456
|
+
# # │ a │
|
2457
|
+
# # │ --- │
|
2458
|
+
# # │ f64 │
|
2459
|
+
# # ╞═════╡
|
2460
|
+
# # │ 1.5 │
|
2461
|
+
# # └─────┘
|
432
2462
|
def quantile(quantile, interpolation: "nearest")
|
433
2463
|
wrap_expr(_rbexpr.quantile(quantile, interpolation))
|
434
2464
|
end
|
435
2465
|
|
2466
|
+
# Filter a single column.
|
2467
|
+
#
|
2468
|
+
# Mostly useful in an aggregation context. If you want to filter on a DataFrame
|
2469
|
+
# level, use `LazyFrame#filter`.
|
2470
|
+
#
|
2471
|
+
# @param predicate [Expr]
|
2472
|
+
# Boolean expression.
|
2473
|
+
#
|
2474
|
+
# @return [Expr]
|
2475
|
+
#
|
2476
|
+
# @example
|
2477
|
+
# df = Polars::DataFrame.new(
|
2478
|
+
# {
|
2479
|
+
# "group_col" => ["g1", "g1", "g2"],
|
2480
|
+
# "b" => [1, 2, 3]
|
2481
|
+
# }
|
2482
|
+
# )
|
2483
|
+
# (
|
2484
|
+
# df.groupby("group_col").agg(
|
2485
|
+
# [
|
2486
|
+
# Polars.col("b").filter(Polars.col("b") < 2).sum.alias("lt"),
|
2487
|
+
# Polars.col("b").filter(Polars.col("b") >= 2).sum.alias("gte")
|
2488
|
+
# ]
|
2489
|
+
# )
|
2490
|
+
# ).sort("group_col")
|
2491
|
+
# # =>
|
2492
|
+
# # shape: (2, 3)
|
2493
|
+
# # ┌───────────┬──────┬─────┐
|
2494
|
+
# # │ group_col ┆ lt ┆ gte │
|
2495
|
+
# # │ --- ┆ --- ┆ --- │
|
2496
|
+
# # │ str ┆ i64 ┆ i64 │
|
2497
|
+
# # ╞═══════════╪══════╪═════╡
|
2498
|
+
# # │ g1 ┆ 1 ┆ 2 │
|
2499
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
|
2500
|
+
# # │ g2 ┆ null ┆ 3 │
|
2501
|
+
# # └───────────┴──────┴─────┘
|
436
2502
|
def filter(predicate)
|
437
2503
|
wrap_expr(_rbexpr.filter(predicate._rbexpr))
|
438
2504
|
end
|
439
2505
|
|
2506
|
+
# Filter a single column.
|
2507
|
+
#
|
2508
|
+
# Alias for {#filter}.
|
2509
|
+
#
|
2510
|
+
# @param predicate [Expr]
|
2511
|
+
# Boolean expression.
|
2512
|
+
#
|
2513
|
+
# @return [Expr]
|
2514
|
+
#
|
2515
|
+
# @example
|
2516
|
+
# df = Polars::DataFrame.new(
|
2517
|
+
# {
|
2518
|
+
# "group_col" => ["g1", "g1", "g2"],
|
2519
|
+
# "b" => [1, 2, 3]
|
2520
|
+
# }
|
2521
|
+
# )
|
2522
|
+
# (
|
2523
|
+
# df.groupby("group_col").agg(
|
2524
|
+
# [
|
2525
|
+
# Polars.col("b").where(Polars.col("b") < 2).sum.alias("lt"),
|
2526
|
+
# Polars.col("b").where(Polars.col("b") >= 2).sum.alias("gte")
|
2527
|
+
# ]
|
2528
|
+
# )
|
2529
|
+
# ).sort("group_col")
|
2530
|
+
# # =>
|
2531
|
+
# # shape: (2, 3)
|
2532
|
+
# # ┌───────────┬──────┬─────┐
|
2533
|
+
# # │ group_col ┆ lt ┆ gte │
|
2534
|
+
# # │ --- ┆ --- ┆ --- │
|
2535
|
+
# # │ str ┆ i64 ┆ i64 │
|
2536
|
+
# # ╞═══════════╪══════╪═════╡
|
2537
|
+
# # │ g1 ┆ 1 ┆ 2 │
|
2538
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
|
2539
|
+
# # │ g2 ┆ null ┆ 3 │
|
2540
|
+
# # └───────────┴──────┴─────┘
|
440
2541
|
def where(predicate)
|
441
2542
|
filter(predicate)
|
442
2543
|
end
|
443
2544
|
|
444
|
-
#
|
2545
|
+
# Apply a custom Ruby function to a Series or sequence of Series.
|
2546
|
+
#
|
2547
|
+
# The output of this custom function must be a Series.
|
2548
|
+
# If you want to apply a custom function elementwise over single values, see
|
2549
|
+
# {#apply}. A use case for `map` is when you want to transform an
|
2550
|
+
# expression with a third-party library.
|
2551
|
+
#
|
2552
|
+
# Read more in [the book](https://pola-rs.github.io/polars-book/user-guide/dsl/custom_functions.html).
|
2553
|
+
#
|
2554
|
+
# @param return_dtype [Symbol]
|
2555
|
+
# Dtype of the output Series.
|
2556
|
+
# @param agg_list [Boolean]
|
2557
|
+
# Aggregate list.
|
2558
|
+
#
|
2559
|
+
# @return [Expr]
|
2560
|
+
#
|
2561
|
+
# @example
|
2562
|
+
# df = Polars::DataFrame.new(
|
2563
|
+
# {
|
2564
|
+
# "sine" => [0.0, 1.0, 0.0, -1.0],
|
2565
|
+
# "cosine" => [1.0, 0.0, -1.0, 0.0]
|
2566
|
+
# }
|
2567
|
+
# )
|
2568
|
+
# df.select(Polars.all.map { |x| x.to_numpy.argmax })
|
2569
|
+
# # =>
|
2570
|
+
# # shape: (1, 2)
|
2571
|
+
# # ┌──────┬────────┐
|
2572
|
+
# # │ sine ┆ cosine │
|
2573
|
+
# # │ --- ┆ --- │
|
2574
|
+
# # │ i64 ┆ i64 │
|
2575
|
+
# # ╞══════╪════════╡
|
2576
|
+
# # │ 1 ┆ 0 │
|
2577
|
+
# # └──────┴────────┘
|
2578
|
+
# def map(return_dtype: nil, agg_list: false, &block)
|
2579
|
+
# if !return_dtype.nil?
|
2580
|
+
# return_dtype = Utils.rb_type_to_dtype(return_dtype)
|
2581
|
+
# end
|
2582
|
+
# wrap_expr(_rbexpr.map(return_dtype, agg_list, &block))
|
445
2583
|
# end
|
446
2584
|
|
447
2585
|
# def apply
|
448
2586
|
# end
|
449
2587
|
|
2588
|
+
# Explode a list or utf8 Series. This means that every item is expanded to a new
|
2589
|
+
# row.
|
450
2590
|
#
|
2591
|
+
# Alias for {#explode}.
|
2592
|
+
#
|
2593
|
+
# @return [Expr]
|
2594
|
+
#
|
2595
|
+
# @example
|
2596
|
+
# df = Polars::DataFrame.new({"foo" => ["hello", "world"]})
|
2597
|
+
# df.select(Polars.col("foo").flatten)
|
2598
|
+
# # =>
|
2599
|
+
# # shape: (10, 1)
|
2600
|
+
# # ┌─────┐
|
2601
|
+
# # │ foo │
|
2602
|
+
# # │ --- │
|
2603
|
+
# # │ str │
|
2604
|
+
# # ╞═════╡
|
2605
|
+
# # │ h │
|
2606
|
+
# # ├╌╌╌╌╌┤
|
2607
|
+
# # │ e │
|
2608
|
+
# # ├╌╌╌╌╌┤
|
2609
|
+
# # │ l │
|
2610
|
+
# # ├╌╌╌╌╌┤
|
2611
|
+
# # │ l │
|
2612
|
+
# # ├╌╌╌╌╌┤
|
2613
|
+
# # │ ... │
|
2614
|
+
# # ├╌╌╌╌╌┤
|
2615
|
+
# # │ o │
|
2616
|
+
# # ├╌╌╌╌╌┤
|
2617
|
+
# # │ r │
|
2618
|
+
# # ├╌╌╌╌╌┤
|
2619
|
+
# # │ l │
|
2620
|
+
# # ├╌╌╌╌╌┤
|
2621
|
+
# # │ d │
|
2622
|
+
# # └─────┘
|
451
2623
|
def flatten
|
452
2624
|
wrap_expr(_rbexpr.explode)
|
453
2625
|
end
|
454
2626
|
|
2627
|
+
# Explode a list or utf8 Series.
|
2628
|
+
#
|
2629
|
+
# This means that every item is expanded to a new row.
|
2630
|
+
#
|
2631
|
+
# @return [Expr]
|
2632
|
+
#
|
2633
|
+
# @example
|
2634
|
+
# df = Polars::DataFrame.new({"b" => [[1, 2, 3], [4, 5, 6]]})
|
2635
|
+
# df.select(Polars.col("b").explode)
|
2636
|
+
# # =>
|
2637
|
+
# # shape: (6, 1)
|
2638
|
+
# # ┌─────┐
|
2639
|
+
# # │ b │
|
2640
|
+
# # │ --- │
|
2641
|
+
# # │ i64 │
|
2642
|
+
# # ╞═════╡
|
2643
|
+
# # │ 1 │
|
2644
|
+
# # ├╌╌╌╌╌┤
|
2645
|
+
# # │ 2 │
|
2646
|
+
# # ├╌╌╌╌╌┤
|
2647
|
+
# # │ 3 │
|
2648
|
+
# # ├╌╌╌╌╌┤
|
2649
|
+
# # │ 4 │
|
2650
|
+
# # ├╌╌╌╌╌┤
|
2651
|
+
# # │ 5 │
|
2652
|
+
# # ├╌╌╌╌╌┤
|
2653
|
+
# # │ 6 │
|
2654
|
+
# # └─────┘
|
455
2655
|
def explode
|
456
2656
|
wrap_expr(_rbexpr.explode)
|
457
2657
|
end
|
458
2658
|
|
2659
|
+
# Take every nth value in the Series and return as a new Series.
|
2660
|
+
#
|
2661
|
+
# @return [Expr]
|
2662
|
+
#
|
2663
|
+
# @example
|
2664
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
|
2665
|
+
# df.select(Polars.col("foo").take_every(3))
|
2666
|
+
# # =>
|
2667
|
+
# # shape: (3, 1)
|
2668
|
+
# # ┌─────┐
|
2669
|
+
# # │ foo │
|
2670
|
+
# # │ --- │
|
2671
|
+
# # │ i64 │
|
2672
|
+
# # ╞═════╡
|
2673
|
+
# # │ 1 │
|
2674
|
+
# # ├╌╌╌╌╌┤
|
2675
|
+
# # │ 4 │
|
2676
|
+
# # ├╌╌╌╌╌┤
|
2677
|
+
# # │ 7 │
|
2678
|
+
# # └─────┘
|
459
2679
|
def take_every(n)
|
460
2680
|
wrap_expr(_rbexpr.take_every(n))
|
461
2681
|
end
|
462
2682
|
|
2683
|
+
# Get the first `n` rows.
|
2684
|
+
#
|
2685
|
+
# @param n [Integer]
|
2686
|
+
# Number of rows to return.
|
2687
|
+
#
|
2688
|
+
# @return [Expr]
|
2689
|
+
#
|
2690
|
+
# @example
|
2691
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
|
2692
|
+
# df.head(3)
|
2693
|
+
# # =>
|
2694
|
+
# # shape: (3, 1)
|
2695
|
+
# # ┌─────┐
|
2696
|
+
# # │ foo │
|
2697
|
+
# # │ --- │
|
2698
|
+
# # │ i64 │
|
2699
|
+
# # ╞═════╡
|
2700
|
+
# # │ 1 │
|
2701
|
+
# # ├╌╌╌╌╌┤
|
2702
|
+
# # │ 2 │
|
2703
|
+
# # ├╌╌╌╌╌┤
|
2704
|
+
# # │ 3 │
|
2705
|
+
# # └─────┘
|
463
2706
|
def head(n = 10)
|
464
2707
|
wrap_expr(_rbexpr.head(n))
|
465
2708
|
end
|
466
2709
|
|
2710
|
+
# Get the last `n` rows.
|
2711
|
+
#
|
2712
|
+
# @param n [Integer]
|
2713
|
+
# Number of rows to return.
|
2714
|
+
#
|
2715
|
+
# @return [Expr]
|
2716
|
+
#
|
2717
|
+
# @example
|
2718
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
|
2719
|
+
# df.tail(3)
|
2720
|
+
# # =>
|
2721
|
+
# # shape: (3, 1)
|
2722
|
+
# # ┌─────┐
|
2723
|
+
# # │ foo │
|
2724
|
+
# # │ --- │
|
2725
|
+
# # │ i64 │
|
2726
|
+
# # ╞═════╡
|
2727
|
+
# # │ 5 │
|
2728
|
+
# # ├╌╌╌╌╌┤
|
2729
|
+
# # │ 6 │
|
2730
|
+
# # ├╌╌╌╌╌┤
|
2731
|
+
# # │ 7 │
|
2732
|
+
# # └─────┘
|
467
2733
|
def tail(n = 10)
|
468
2734
|
wrap_expr(_rbexpr.tail(n))
|
469
2735
|
end
|
470
2736
|
|
2737
|
+
# Get the first `n` rows.
|
2738
|
+
#
|
2739
|
+
# Alias for {#head}.
|
2740
|
+
#
|
2741
|
+
# @param n [Integer]
|
2742
|
+
# Number of rows to return.
|
2743
|
+
#
|
2744
|
+
# @return [Expr]
|
471
2745
|
def limit(n = 10)
|
472
2746
|
head(n)
|
473
2747
|
end
|
474
2748
|
|
2749
|
+
# Raise expression to the power of exponent.
|
2750
|
+
#
|
2751
|
+
# @return [Expr]
|
2752
|
+
#
|
2753
|
+
# @example
|
2754
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
|
2755
|
+
# df.select(Polars.col("foo").pow(3))
|
2756
|
+
# # =>
|
2757
|
+
# # shape: (4, 1)
|
2758
|
+
# # ┌──────┐
|
2759
|
+
# # │ foo │
|
2760
|
+
# # │ --- │
|
2761
|
+
# # │ f64 │
|
2762
|
+
# # ╞══════╡
|
2763
|
+
# # │ 1.0 │
|
2764
|
+
# # ├╌╌╌╌╌╌┤
|
2765
|
+
# # │ 8.0 │
|
2766
|
+
# # ├╌╌╌╌╌╌┤
|
2767
|
+
# # │ 27.0 │
|
2768
|
+
# # ├╌╌╌╌╌╌┤
|
2769
|
+
# # │ 64.0 │
|
2770
|
+
# # └──────┘
|
475
2771
|
def pow(exponent)
|
476
2772
|
exponent = Utils.expr_to_lit_or_expr(exponent)
|
477
2773
|
wrap_expr(_rbexpr.pow(exponent._rbexpr))
|
478
2774
|
end
|
479
2775
|
|
480
|
-
#
|
481
|
-
#
|
2776
|
+
# Check if elements of this expression are present in the other Series.
|
2777
|
+
#
|
2778
|
+
# @param other [Object]
|
2779
|
+
# Series or sequence of primitive type.
|
2780
|
+
#
|
2781
|
+
# @return [Expr]
|
2782
|
+
#
|
2783
|
+
# @example
|
2784
|
+
# df = Polars::DataFrame.new(
|
2785
|
+
# {"sets" => [[1, 2, 3], [1, 2], [9, 10]], "optional_members" => [1, 2, 3]}
|
2786
|
+
# )
|
2787
|
+
# df.select([Polars.col("optional_members").is_in("sets").alias("contains")])
|
2788
|
+
# # =>
|
2789
|
+
# # shape: (3, 1)
|
2790
|
+
# # ┌──────────┐
|
2791
|
+
# # │ contains │
|
2792
|
+
# # │ --- │
|
2793
|
+
# # │ bool │
|
2794
|
+
# # ╞══════════╡
|
2795
|
+
# # │ true │
|
2796
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
2797
|
+
# # │ true │
|
2798
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
2799
|
+
# # │ false │
|
2800
|
+
# # └──────────┘
|
2801
|
+
def is_in(other)
|
2802
|
+
if other.is_a?(Array)
|
2803
|
+
if other.length == 0
|
2804
|
+
other = Polars.lit(nil)
|
2805
|
+
else
|
2806
|
+
other = Polars.lit(Series.new(other))
|
2807
|
+
end
|
2808
|
+
else
|
2809
|
+
other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
|
2810
|
+
end
|
2811
|
+
wrap_expr(_rbexpr.is_in(other._rbexpr))
|
2812
|
+
end
|
482
2813
|
|
2814
|
+
# Repeat the elements in this Series as specified in the given expression.
|
2815
|
+
#
|
2816
|
+
# The repeated elements are expanded into a `List`.
|
2817
|
+
#
|
2818
|
+
# @param by [Object]
|
2819
|
+
# Numeric column that determines how often the values will be repeated.
|
2820
|
+
# The column will be coerced to UInt32. Give this dtype to make the coercion a
|
2821
|
+
# no-op.
|
483
2822
|
#
|
2823
|
+
# @return [Expr]
|
2824
|
+
#
|
2825
|
+
# @example
|
2826
|
+
# df = Polars::DataFrame.new(
|
2827
|
+
# {
|
2828
|
+
# "a" => ["x", "y", "z"],
|
2829
|
+
# "n" => [1, 2, 3]
|
2830
|
+
# }
|
2831
|
+
# )
|
2832
|
+
# df.select(Polars.col("a").repeat_by("n"))
|
2833
|
+
# # =>
|
2834
|
+
# # shape: (3, 1)
|
2835
|
+
# # ┌─────────────────┐
|
2836
|
+
# # │ a │
|
2837
|
+
# # │ --- │
|
2838
|
+
# # │ list[str] │
|
2839
|
+
# # ╞═════════════════╡
|
2840
|
+
# # │ ["x"] │
|
2841
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2842
|
+
# # │ ["y", "y"] │
|
2843
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2844
|
+
# # │ ["z", "z", "z"] │
|
2845
|
+
# # └─────────────────┘
|
484
2846
|
def repeat_by(by)
|
485
|
-
by = Utils.expr_to_lit_or_expr(by, false)
|
2847
|
+
by = Utils.expr_to_lit_or_expr(by, str_to_lit: false)
|
486
2848
|
wrap_expr(_rbexpr.repeat_by(by._rbexpr))
|
487
2849
|
end
|
488
2850
|
|
489
|
-
#
|
490
|
-
#
|
2851
|
+
# Check if this expression is between start and end.
|
2852
|
+
#
|
2853
|
+
# @param start [Object]
|
2854
|
+
# Lower bound as primitive type or datetime.
|
2855
|
+
# @param _end [Object]
|
2856
|
+
# Upper bound as primitive type or datetime.
|
2857
|
+
# @param include_bounds [Boolean]
|
2858
|
+
# False: Exclude both start and end (default).
|
2859
|
+
# True: Include both start and end.
|
2860
|
+
# (False, False): Exclude start and exclude end.
|
2861
|
+
# (True, True): Include start and include end.
|
2862
|
+
# (False, True): Exclude start and include end.
|
2863
|
+
# (True, False): Include start and exclude end.
|
2864
|
+
#
|
2865
|
+
# @return [Expr]
|
2866
|
+
#
|
2867
|
+
# @example
|
2868
|
+
# df = Polars::DataFrame.new({"num" => [1, 2, 3, 4, 5]})
|
2869
|
+
# df.with_column(Polars.col("num").is_between(2, 4))
|
2870
|
+
# # =>
|
2871
|
+
# # shape: (5, 2)
|
2872
|
+
# # ┌─────┬────────────┐
|
2873
|
+
# # │ num ┆ is_between │
|
2874
|
+
# # │ --- ┆ --- │
|
2875
|
+
# # │ i64 ┆ bool │
|
2876
|
+
# # ╞═════╪════════════╡
|
2877
|
+
# # │ 1 ┆ false │
|
2878
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2879
|
+
# # │ 2 ┆ false │
|
2880
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2881
|
+
# # │ 3 ┆ true │
|
2882
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2883
|
+
# # │ 4 ┆ false │
|
2884
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
2885
|
+
# # │ 5 ┆ false │
|
2886
|
+
# # └─────┴────────────┘
|
2887
|
+
def is_between(start, _end, include_bounds: false)
|
2888
|
+
if include_bounds == false || include_bounds == [false, false]
|
2889
|
+
((self > start) & (self < _end)).alias("is_between")
|
2890
|
+
elsif include_bounds == true || include_bounds == [true, true]
|
2891
|
+
((self >= start) & (self <= _end)).alias("is_between")
|
2892
|
+
elsif include_bounds == [false, true]
|
2893
|
+
((self > start) & (self <= _end)).alias("is_between")
|
2894
|
+
elsif include_bounds == [true, false]
|
2895
|
+
((self >= start) & (self < _end)).alias("is_between")
|
2896
|
+
else
|
2897
|
+
raise ArgumentError, "include_bounds should be a bool or [bool, bool]."
|
2898
|
+
end
|
2899
|
+
end
|
491
2900
|
|
492
2901
|
# def _hash
|
493
2902
|
# end
|
494
2903
|
|
2904
|
+
# Reinterpret the underlying bits as a signed/unsigned integer.
|
495
2905
|
#
|
2906
|
+
# This operation is only allowed for 64bit integers. For lower bits integers,
|
2907
|
+
# you can safely use that cast operation.
|
2908
|
+
#
|
2909
|
+
# @param signed [Boolean]
|
2910
|
+
# If true, reinterpret as `:i64`. Otherwise, reinterpret as `:u64`.
|
2911
|
+
#
|
2912
|
+
# @return [Expr]
|
2913
|
+
#
|
2914
|
+
# @example
|
2915
|
+
# s = Polars::Series.new("a", [1, 1, 2], dtype: :u64)
|
2916
|
+
# df = Polars::DataFrame.new([s])
|
2917
|
+
# df.select(
|
2918
|
+
# [
|
2919
|
+
# Polars.col("a").reinterpret(signed: true).alias("reinterpreted"),
|
2920
|
+
# Polars.col("a").alias("original")
|
2921
|
+
# ]
|
2922
|
+
# )
|
2923
|
+
# # =>
|
2924
|
+
# # shape: (3, 2)
|
2925
|
+
# # ┌───────────────┬──────────┐
|
2926
|
+
# # │ reinterpreted ┆ original │
|
2927
|
+
# # │ --- ┆ --- │
|
2928
|
+
# # │ i64 ┆ u64 │
|
2929
|
+
# # ╞═══════════════╪══════════╡
|
2930
|
+
# # │ 1 ┆ 1 │
|
2931
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
2932
|
+
# # │ 1 ┆ 1 │
|
2933
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
2934
|
+
# # │ 2 ┆ 2 │
|
2935
|
+
# # └───────────────┴──────────┘
|
496
2936
|
def reinterpret(signed: false)
|
497
2937
|
wrap_expr(_rbexpr.reinterpret(signed))
|
498
2938
|
end
|
@@ -500,147 +2940,1541 @@ module Polars
|
|
500
2940
|
# def _inspect
|
501
2941
|
# end
|
502
2942
|
|
2943
|
+
# Fill nulls with linear interpolation over missing values.
|
2944
|
+
#
|
2945
|
+
# Can also be used to regrid data to a new grid - see examples below.
|
503
2946
|
#
|
2947
|
+
# @return [Expr]
|
2948
|
+
#
|
2949
|
+
# @example Fill nulls with linear interpolation
|
2950
|
+
# df = Polars::DataFrame.new(
|
2951
|
+
# {
|
2952
|
+
# "a" => [1, nil, 3],
|
2953
|
+
# "b" => [1.0, Float::NAN, 3.0]
|
2954
|
+
# }
|
2955
|
+
# )
|
2956
|
+
# df.select(Polars.all.interpolate)
|
2957
|
+
# # =>
|
2958
|
+
# # shape: (3, 2)
|
2959
|
+
# # ┌─────┬─────┐
|
2960
|
+
# # │ a ┆ b │
|
2961
|
+
# # │ --- ┆ --- │
|
2962
|
+
# # │ i64 ┆ f64 │
|
2963
|
+
# # ╞═════╪═════╡
|
2964
|
+
# # │ 1 ┆ 1.0 │
|
2965
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
2966
|
+
# # │ 2 ┆ NaN │
|
2967
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
2968
|
+
# # │ 3 ┆ 3.0 │
|
2969
|
+
# # └─────┴─────┘
|
504
2970
|
def interpolate
|
505
2971
|
wrap_expr(_rbexpr.interpolate)
|
506
2972
|
end
|
507
2973
|
|
508
|
-
#
|
509
|
-
#
|
510
|
-
|
511
|
-
#
|
512
|
-
#
|
513
|
-
|
514
|
-
#
|
515
|
-
#
|
516
|
-
|
517
|
-
#
|
518
|
-
#
|
519
|
-
|
520
|
-
#
|
521
|
-
#
|
522
|
-
|
523
|
-
#
|
524
|
-
#
|
525
|
-
|
526
|
-
#
|
527
|
-
#
|
528
|
-
|
529
|
-
#
|
530
|
-
#
|
2974
|
+
# Apply a rolling min (moving min) over the values in this array.
|
2975
|
+
#
|
2976
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
2977
|
+
# this window will (optionally) be multiplied with the weights given by the
|
2978
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
2979
|
+
#
|
2980
|
+
# @param window_size [Integer]
|
2981
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
2982
|
+
# size indicated by a timedelta or the following string language:
|
2983
|
+
#
|
2984
|
+
# - 1ns (1 nanosecond)
|
2985
|
+
# - 1us (1 microsecond)
|
2986
|
+
# - 1ms (1 millisecond)
|
2987
|
+
# - 1s (1 second)
|
2988
|
+
# - 1m (1 minute)
|
2989
|
+
# - 1h (1 hour)
|
2990
|
+
# - 1d (1 day)
|
2991
|
+
# - 1w (1 week)
|
2992
|
+
# - 1mo (1 calendar month)
|
2993
|
+
# - 1y (1 calendar year)
|
2994
|
+
# - 1i (1 index count)
|
2995
|
+
#
|
2996
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
2997
|
+
# and `closed` arguments must also be set.
|
2998
|
+
# @param weights [Array]
|
2999
|
+
# An optional slice with the same length as the window that will be multiplied
|
3000
|
+
# elementwise with the values in the window.
|
3001
|
+
# @param min_periods [Integer]
|
3002
|
+
# The number of values in the window that should be non-null before computing
|
3003
|
+
# a result. If None, it will be set equal to window size.
|
3004
|
+
# @param center [Boolean]
|
3005
|
+
# Set the labels at the center of the window
|
3006
|
+
# @param by [String]
|
3007
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3008
|
+
# set the column that will be used to determine the windows. This column must
|
3009
|
+
# be of dtype `{Date, Datetime}`
|
3010
|
+
# @param closed ["left", "right", "both", "none"]
|
3011
|
+
# Define whether the temporal window interval is closed or not.
|
3012
|
+
#
|
3013
|
+
# @note
|
3014
|
+
# This functionality is experimental and may change without it being considered a
|
3015
|
+
# breaking change.
|
3016
|
+
#
|
3017
|
+
# @note
|
3018
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3019
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3020
|
+
# computation.
|
3021
|
+
#
|
3022
|
+
# @return [Expr]
|
3023
|
+
#
|
3024
|
+
# @example
|
3025
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
|
3026
|
+
# df.select(
|
3027
|
+
# [
|
3028
|
+
# Polars.col("A").rolling_min(2)
|
3029
|
+
# ]
|
3030
|
+
# )
|
3031
|
+
# # =>
|
3032
|
+
# # shape: (6, 1)
|
3033
|
+
# # ┌──────┐
|
3034
|
+
# # │ A │
|
3035
|
+
# # │ --- │
|
3036
|
+
# # │ f64 │
|
3037
|
+
# # ╞══════╡
|
3038
|
+
# # │ null │
|
3039
|
+
# # ├╌╌╌╌╌╌┤
|
3040
|
+
# # │ 1.0 │
|
3041
|
+
# # ├╌╌╌╌╌╌┤
|
3042
|
+
# # │ 2.0 │
|
3043
|
+
# # ├╌╌╌╌╌╌┤
|
3044
|
+
# # │ 3.0 │
|
3045
|
+
# # ├╌╌╌╌╌╌┤
|
3046
|
+
# # │ 4.0 │
|
3047
|
+
# # ├╌╌╌╌╌╌┤
|
3048
|
+
# # │ 5.0 │
|
3049
|
+
# # └──────┘
|
3050
|
+
def rolling_min(
|
3051
|
+
window_size,
|
3052
|
+
weights: nil,
|
3053
|
+
min_periods: nil,
|
3054
|
+
center: false,
|
3055
|
+
by: nil,
|
3056
|
+
closed: "left"
|
3057
|
+
)
|
3058
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3059
|
+
window_size, min_periods
|
3060
|
+
)
|
3061
|
+
wrap_expr(
|
3062
|
+
_rbexpr.rolling_min(
|
3063
|
+
window_size, weights, min_periods, center, by, closed
|
3064
|
+
)
|
3065
|
+
)
|
3066
|
+
end
|
3067
|
+
|
3068
|
+
# Apply a rolling max (moving max) over the values in this array.
|
3069
|
+
#
|
3070
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
3071
|
+
# this window will (optionally) be multiplied with the weights given by the
|
3072
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
3073
|
+
#
|
3074
|
+
# @param window_size [Integer]
|
3075
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
3076
|
+
# size indicated by a timedelta or the following string language:
|
3077
|
+
#
|
3078
|
+
# - 1ns (1 nanosecond)
|
3079
|
+
# - 1us (1 microsecond)
|
3080
|
+
# - 1ms (1 millisecond)
|
3081
|
+
# - 1s (1 second)
|
3082
|
+
# - 1m (1 minute)
|
3083
|
+
# - 1h (1 hour)
|
3084
|
+
# - 1d (1 day)
|
3085
|
+
# - 1w (1 week)
|
3086
|
+
# - 1mo (1 calendar month)
|
3087
|
+
# - 1y (1 calendar year)
|
3088
|
+
# - 1i (1 index count)
|
3089
|
+
#
|
3090
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
3091
|
+
# and `closed` arguments must also be set.
|
3092
|
+
# @param weights [Array]
|
3093
|
+
# An optional slice with the same length as the window that will be multiplied
|
3094
|
+
# elementwise with the values in the window.
|
3095
|
+
# @param min_periods [Integer]
|
3096
|
+
# The number of values in the window that should be non-null before computing
|
3097
|
+
# a result. If None, it will be set equal to window size.
|
3098
|
+
# @param center [Boolean]
|
3099
|
+
# Set the labels at the center of the window
|
3100
|
+
# @param by [String]
|
3101
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3102
|
+
# set the column that will be used to determine the windows. This column must
|
3103
|
+
# be of dtype `{Date, Datetime}`
|
3104
|
+
# @param closed ["left", "right", "both", "none"]
|
3105
|
+
# Define whether the temporal window interval is closed or not.
|
3106
|
+
#
|
3107
|
+
# @note
|
3108
|
+
# This functionality is experimental and may change without it being considered a
|
3109
|
+
# breaking change.
|
3110
|
+
#
|
3111
|
+
# @note
|
3112
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3113
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3114
|
+
# computation.
|
3115
|
+
#
|
3116
|
+
# @return [Expr]
|
3117
|
+
#
|
3118
|
+
# @example
|
3119
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
|
3120
|
+
# df.select(
|
3121
|
+
# [
|
3122
|
+
# Polars.col("A").rolling_max(2)
|
3123
|
+
# ]
|
3124
|
+
# )
|
3125
|
+
# # =>
|
3126
|
+
# # shape: (6, 1)
|
3127
|
+
# # ┌──────┐
|
3128
|
+
# # │ A │
|
3129
|
+
# # │ --- │
|
3130
|
+
# # │ f64 │
|
3131
|
+
# # ╞══════╡
|
3132
|
+
# # │ null │
|
3133
|
+
# # ├╌╌╌╌╌╌┤
|
3134
|
+
# # │ 2.0 │
|
3135
|
+
# # ├╌╌╌╌╌╌┤
|
3136
|
+
# # │ 3.0 │
|
3137
|
+
# # ├╌╌╌╌╌╌┤
|
3138
|
+
# # │ 4.0 │
|
3139
|
+
# # ├╌╌╌╌╌╌┤
|
3140
|
+
# # │ 5.0 │
|
3141
|
+
# # ├╌╌╌╌╌╌┤
|
3142
|
+
# # │ 6.0 │
|
3143
|
+
# # └──────┘
|
3144
|
+
def rolling_max(
|
3145
|
+
window_size,
|
3146
|
+
weights: nil,
|
3147
|
+
min_periods: nil,
|
3148
|
+
center: false,
|
3149
|
+
by: nil,
|
3150
|
+
closed: "left"
|
3151
|
+
)
|
3152
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3153
|
+
window_size, min_periods
|
3154
|
+
)
|
3155
|
+
wrap_expr(
|
3156
|
+
_rbexpr.rolling_max(
|
3157
|
+
window_size, weights, min_periods, center, by, closed
|
3158
|
+
)
|
3159
|
+
)
|
3160
|
+
end
|
3161
|
+
|
3162
|
+
# Apply a rolling mean (moving mean) over the values in this array.
|
3163
|
+
#
|
3164
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
3165
|
+
# this window will (optionally) be multiplied with the weights given by the
|
3166
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
3167
|
+
#
|
3168
|
+
# @param window_size [Integer]
|
3169
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
3170
|
+
# size indicated by a timedelta or the following string language:
|
3171
|
+
#
|
3172
|
+
# - 1ns (1 nanosecond)
|
3173
|
+
# - 1us (1 microsecond)
|
3174
|
+
# - 1ms (1 millisecond)
|
3175
|
+
# - 1s (1 second)
|
3176
|
+
# - 1m (1 minute)
|
3177
|
+
# - 1h (1 hour)
|
3178
|
+
# - 1d (1 day)
|
3179
|
+
# - 1w (1 week)
|
3180
|
+
# - 1mo (1 calendar month)
|
3181
|
+
# - 1y (1 calendar year)
|
3182
|
+
# - 1i (1 index count)
|
3183
|
+
#
|
3184
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
3185
|
+
# and `closed` arguments must also be set.
|
3186
|
+
# @param weights [Array]
|
3187
|
+
# An optional slice with the same length as the window that will be multiplied
|
3188
|
+
# elementwise with the values in the window.
|
3189
|
+
# @param min_periods [Integer]
|
3190
|
+
# The number of values in the window that should be non-null before computing
|
3191
|
+
# a result. If None, it will be set equal to window size.
|
3192
|
+
# @param center [Boolean]
|
3193
|
+
# Set the labels at the center of the window
|
3194
|
+
# @param by [String]
|
3195
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3196
|
+
# set the column that will be used to determine the windows. This column must
|
3197
|
+
# be of dtype `{Date, Datetime}`
|
3198
|
+
# @param closed ["left", "right", "both", "none"]
|
3199
|
+
# Define whether the temporal window interval is closed or not.
|
3200
|
+
#
|
3201
|
+
# @note
|
3202
|
+
# This functionality is experimental and may change without it being considered a
|
3203
|
+
# breaking change.
|
3204
|
+
#
|
3205
|
+
# @note
|
3206
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3207
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3208
|
+
# computation.
|
3209
|
+
#
|
3210
|
+
# @return [Expr]
|
3211
|
+
#
|
3212
|
+
# @example
|
3213
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 8.0, 6.0, 2.0, 16.0, 10.0]})
|
3214
|
+
# df.select(
|
3215
|
+
# [
|
3216
|
+
# Polars.col("A").rolling_mean(2)
|
3217
|
+
# ]
|
3218
|
+
# )
|
3219
|
+
# # =>
|
3220
|
+
# # shape: (6, 1)
|
3221
|
+
# # ┌──────┐
|
3222
|
+
# # │ A │
|
3223
|
+
# # │ --- │
|
3224
|
+
# # │ f64 │
|
3225
|
+
# # ╞══════╡
|
3226
|
+
# # │ null │
|
3227
|
+
# # ├╌╌╌╌╌╌┤
|
3228
|
+
# # │ 4.5 │
|
3229
|
+
# # ├╌╌╌╌╌╌┤
|
3230
|
+
# # │ 7.0 │
|
3231
|
+
# # ├╌╌╌╌╌╌┤
|
3232
|
+
# # │ 4.0 │
|
3233
|
+
# # ├╌╌╌╌╌╌┤
|
3234
|
+
# # │ 9.0 │
|
3235
|
+
# # ├╌╌╌╌╌╌┤
|
3236
|
+
# # │ 13.0 │
|
3237
|
+
# # └──────┘
|
3238
|
+
def rolling_mean(
|
3239
|
+
window_size,
|
3240
|
+
weights: nil,
|
3241
|
+
min_periods: nil,
|
3242
|
+
center: false,
|
3243
|
+
by: nil,
|
3244
|
+
closed: "left"
|
3245
|
+
)
|
3246
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3247
|
+
window_size, min_periods
|
3248
|
+
)
|
3249
|
+
wrap_expr(
|
3250
|
+
_rbexpr.rolling_mean(
|
3251
|
+
window_size, weights, min_periods, center, by, closed
|
3252
|
+
)
|
3253
|
+
)
|
3254
|
+
end
|
3255
|
+
|
3256
|
+
# Apply a rolling sum (moving sum) over the values in this array.
|
3257
|
+
#
|
3258
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
3259
|
+
# this window will (optionally) be multiplied with the weights given by the
|
3260
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
3261
|
+
#
|
3262
|
+
# @param window_size [Integer]
|
3263
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
3264
|
+
# size indicated by a timedelta or the following string language:
|
3265
|
+
#
|
3266
|
+
# - 1ns (1 nanosecond)
|
3267
|
+
# - 1us (1 microsecond)
|
3268
|
+
# - 1ms (1 millisecond)
|
3269
|
+
# - 1s (1 second)
|
3270
|
+
# - 1m (1 minute)
|
3271
|
+
# - 1h (1 hour)
|
3272
|
+
# - 1d (1 day)
|
3273
|
+
# - 1w (1 week)
|
3274
|
+
# - 1mo (1 calendar month)
|
3275
|
+
# - 1y (1 calendar year)
|
3276
|
+
# - 1i (1 index count)
|
3277
|
+
#
|
3278
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
3279
|
+
# and `closed` arguments must also be set.
|
3280
|
+
# @param weights [Array]
|
3281
|
+
# An optional slice with the same length as the window that will be multiplied
|
3282
|
+
# elementwise with the values in the window.
|
3283
|
+
# @param min_periods [Integer]
|
3284
|
+
# The number of values in the window that should be non-null before computing
|
3285
|
+
# a result. If None, it will be set equal to window size.
|
3286
|
+
# @param center [Boolean]
|
3287
|
+
# Set the labels at the center of the window
|
3288
|
+
# @param by [String]
|
3289
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3290
|
+
# set the column that will be used to determine the windows. This column must
|
3291
|
+
# be of dtype `{Date, Datetime}`
|
3292
|
+
# @param closed ["left", "right", "both", "none"]
|
3293
|
+
# Define whether the temporal window interval is closed or not.
|
3294
|
+
#
|
3295
|
+
# @note
|
3296
|
+
# This functionality is experimental and may change without it being considered a
|
3297
|
+
# breaking change.
|
3298
|
+
#
|
3299
|
+
# @note
|
3300
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3301
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3302
|
+
# computation.
|
3303
|
+
#
|
3304
|
+
# @return [Expr]
|
3305
|
+
#
|
3306
|
+
# @example
|
3307
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
|
3308
|
+
# df.select(
|
3309
|
+
# [
|
3310
|
+
# Polars.col("A").rolling_sum(2)
|
3311
|
+
# ]
|
3312
|
+
# )
|
3313
|
+
# # =>
|
3314
|
+
# # shape: (6, 1)
|
3315
|
+
# # ┌──────┐
|
3316
|
+
# # │ A │
|
3317
|
+
# # │ --- │
|
3318
|
+
# # │ f64 │
|
3319
|
+
# # ╞══════╡
|
3320
|
+
# # │ null │
|
3321
|
+
# # ├╌╌╌╌╌╌┤
|
3322
|
+
# # │ 3.0 │
|
3323
|
+
# # ├╌╌╌╌╌╌┤
|
3324
|
+
# # │ 5.0 │
|
3325
|
+
# # ├╌╌╌╌╌╌┤
|
3326
|
+
# # │ 7.0 │
|
3327
|
+
# # ├╌╌╌╌╌╌┤
|
3328
|
+
# # │ 9.0 │
|
3329
|
+
# # ├╌╌╌╌╌╌┤
|
3330
|
+
# # │ 11.0 │
|
3331
|
+
# # └──────┘
|
3332
|
+
def rolling_sum(
|
3333
|
+
window_size,
|
3334
|
+
weights: nil,
|
3335
|
+
min_periods: nil,
|
3336
|
+
center: false,
|
3337
|
+
by: nil,
|
3338
|
+
closed: "left"
|
3339
|
+
)
|
3340
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3341
|
+
window_size, min_periods
|
3342
|
+
)
|
3343
|
+
wrap_expr(
|
3344
|
+
_rbexpr.rolling_sum(
|
3345
|
+
window_size, weights, min_periods, center, by, closed
|
3346
|
+
)
|
3347
|
+
)
|
3348
|
+
end
|
3349
|
+
|
3350
|
+
# Compute a rolling standard deviation.
|
3351
|
+
#
|
3352
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
3353
|
+
# this window will (optionally) be multiplied with the weights given by the
|
3354
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
3355
|
+
#
|
3356
|
+
# @param window_size [Integer]
|
3357
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
3358
|
+
# size indicated by a timedelta or the following string language:
|
3359
|
+
#
|
3360
|
+
# - 1ns (1 nanosecond)
|
3361
|
+
# - 1us (1 microsecond)
|
3362
|
+
# - 1ms (1 millisecond)
|
3363
|
+
# - 1s (1 second)
|
3364
|
+
# - 1m (1 minute)
|
3365
|
+
# - 1h (1 hour)
|
3366
|
+
# - 1d (1 day)
|
3367
|
+
# - 1w (1 week)
|
3368
|
+
# - 1mo (1 calendar month)
|
3369
|
+
# - 1y (1 calendar year)
|
3370
|
+
# - 1i (1 index count)
|
3371
|
+
#
|
3372
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
3373
|
+
# and `closed` arguments must also be set.
|
3374
|
+
# @param weights [Array]
|
3375
|
+
# An optional slice with the same length as the window that will be multiplied
|
3376
|
+
# elementwise with the values in the window.
|
3377
|
+
# @param min_periods [Integer]
|
3378
|
+
# The number of values in the window that should be non-null before computing
|
3379
|
+
# a result. If None, it will be set equal to window size.
|
3380
|
+
# @param center [Boolean]
|
3381
|
+
# Set the labels at the center of the window
|
3382
|
+
# @param by [String]
|
3383
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3384
|
+
# set the column that will be used to determine the windows. This column must
|
3385
|
+
# be of dtype `{Date, Datetime}`
|
3386
|
+
# @param closed ["left", "right", "both", "none"]
|
3387
|
+
# Define whether the temporal window interval is closed or not.
|
3388
|
+
#
|
3389
|
+
# @note
|
3390
|
+
# This functionality is experimental and may change without it being considered a
|
3391
|
+
# breaking change.
|
3392
|
+
#
|
3393
|
+
# @note
|
3394
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3395
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3396
|
+
# computation.
|
3397
|
+
#
|
3398
|
+
# @return [Expr]
|
3399
|
+
#
|
3400
|
+
# @example
|
3401
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
|
3402
|
+
# df.select(
|
3403
|
+
# [
|
3404
|
+
# Polars.col("A").rolling_std(3)
|
3405
|
+
# ]
|
3406
|
+
# )
|
3407
|
+
# # =>
|
3408
|
+
# # shape: (6, 1)
|
3409
|
+
# # ┌──────────┐
|
3410
|
+
# # │ A │
|
3411
|
+
# # │ --- │
|
3412
|
+
# # │ f64 │
|
3413
|
+
# # ╞══════════╡
|
3414
|
+
# # │ null │
|
3415
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3416
|
+
# # │ null │
|
3417
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3418
|
+
# # │ 1.0 │
|
3419
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3420
|
+
# # │ 1.0 │
|
3421
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3422
|
+
# # │ 1.527525 │
|
3423
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3424
|
+
# # │ 2.0 │
|
3425
|
+
# # └──────────┘
|
3426
|
+
def rolling_std(
|
3427
|
+
window_size,
|
3428
|
+
weights: nil,
|
3429
|
+
min_periods: nil,
|
3430
|
+
center: false,
|
3431
|
+
by: nil,
|
3432
|
+
closed: "left"
|
3433
|
+
)
|
3434
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3435
|
+
window_size, min_periods
|
3436
|
+
)
|
3437
|
+
wrap_expr(
|
3438
|
+
_rbexpr.rolling_std(
|
3439
|
+
window_size, weights, min_periods, center, by, closed
|
3440
|
+
)
|
3441
|
+
)
|
3442
|
+
end
|
3443
|
+
|
3444
|
+
# Compute a rolling variance.
|
3445
|
+
#
|
3446
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
3447
|
+
# this window will (optionally) be multiplied with the weights given by the
|
3448
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
3449
|
+
#
|
3450
|
+
# @param window_size [Integer]
|
3451
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
3452
|
+
# size indicated by a timedelta or the following string language:
|
3453
|
+
#
|
3454
|
+
# - 1ns (1 nanosecond)
|
3455
|
+
# - 1us (1 microsecond)
|
3456
|
+
# - 1ms (1 millisecond)
|
3457
|
+
# - 1s (1 second)
|
3458
|
+
# - 1m (1 minute)
|
3459
|
+
# - 1h (1 hour)
|
3460
|
+
# - 1d (1 day)
|
3461
|
+
# - 1w (1 week)
|
3462
|
+
# - 1mo (1 calendar month)
|
3463
|
+
# - 1y (1 calendar year)
|
3464
|
+
# - 1i (1 index count)
|
3465
|
+
#
|
3466
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
3467
|
+
# and `closed` arguments must also be set.
|
3468
|
+
# @param weights [Array]
|
3469
|
+
# An optional slice with the same length as the window that will be multiplied
|
3470
|
+
# elementwise with the values in the window.
|
3471
|
+
# @param min_periods [Integer]
|
3472
|
+
# The number of values in the window that should be non-null before computing
|
3473
|
+
# a result. If None, it will be set equal to window size.
|
3474
|
+
# @param center [Boolean]
|
3475
|
+
# Set the labels at the center of the window
|
3476
|
+
# @param by [String]
|
3477
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3478
|
+
# set the column that will be used to determine the windows. This column must
|
3479
|
+
# be of dtype `{Date, Datetime}`
|
3480
|
+
# @param closed ["left", "right", "both", "none"]
|
3481
|
+
# Define whether the temporal window interval is closed or not.
|
3482
|
+
#
|
3483
|
+
# @note
|
3484
|
+
# This functionality is experimental and may change without it being considered a
|
3485
|
+
# breaking change.
|
3486
|
+
#
|
3487
|
+
# @note
|
3488
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3489
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3490
|
+
# computation.
|
3491
|
+
#
|
3492
|
+
# @return [Expr]
|
3493
|
+
#
|
3494
|
+
# @example
|
3495
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
|
3496
|
+
# df.select(
|
3497
|
+
# [
|
3498
|
+
# Polars.col("A").rolling_var(3)
|
3499
|
+
# ]
|
3500
|
+
# )
|
3501
|
+
# # =>
|
3502
|
+
# # shape: (6, 1)
|
3503
|
+
# # ┌──────────┐
|
3504
|
+
# # │ A │
|
3505
|
+
# # │ --- │
|
3506
|
+
# # │ f64 │
|
3507
|
+
# # ╞══════════╡
|
3508
|
+
# # │ null │
|
3509
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3510
|
+
# # │ null │
|
3511
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3512
|
+
# # │ 1.0 │
|
3513
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3514
|
+
# # │ 1.0 │
|
3515
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3516
|
+
# # │ 2.333333 │
|
3517
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3518
|
+
# # │ 4.0 │
|
3519
|
+
# # └──────────┘
|
3520
|
+
def rolling_var(
|
3521
|
+
window_size,
|
3522
|
+
weights: nil,
|
3523
|
+
min_periods: nil,
|
3524
|
+
center: false,
|
3525
|
+
by: nil,
|
3526
|
+
closed: "left"
|
3527
|
+
)
|
3528
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3529
|
+
window_size, min_periods
|
3530
|
+
)
|
3531
|
+
wrap_expr(
|
3532
|
+
_rbexpr.rolling_var(
|
3533
|
+
window_size, weights, min_periods, center, by, closed
|
3534
|
+
)
|
3535
|
+
)
|
3536
|
+
end
|
3537
|
+
|
3538
|
+
# Compute a rolling median.
|
3539
|
+
#
|
3540
|
+
# @param window_size [Integer]
|
3541
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
3542
|
+
# size indicated by a timedelta or the following string language:
|
3543
|
+
#
|
3544
|
+
# - 1ns (1 nanosecond)
|
3545
|
+
# - 1us (1 microsecond)
|
3546
|
+
# - 1ms (1 millisecond)
|
3547
|
+
# - 1s (1 second)
|
3548
|
+
# - 1m (1 minute)
|
3549
|
+
# - 1h (1 hour)
|
3550
|
+
# - 1d (1 day)
|
3551
|
+
# - 1w (1 week)
|
3552
|
+
# - 1mo (1 calendar month)
|
3553
|
+
# - 1y (1 calendar year)
|
3554
|
+
# - 1i (1 index count)
|
3555
|
+
#
|
3556
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
3557
|
+
# and `closed` arguments must also be set.
|
3558
|
+
# @param weights [Array]
|
3559
|
+
# An optional slice with the same length as the window that will be multiplied
|
3560
|
+
# elementwise with the values in the window.
|
3561
|
+
# @param min_periods [Integer]
|
3562
|
+
# The number of values in the window that should be non-null before computing
|
3563
|
+
# a result. If None, it will be set equal to window size.
|
3564
|
+
# @param center [Boolean]
|
3565
|
+
# Set the labels at the center of the window
|
3566
|
+
# @param by [String]
|
3567
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3568
|
+
# set the column that will be used to determine the windows. This column must
|
3569
|
+
# be of dtype `{Date, Datetime}`
|
3570
|
+
# @param closed ["left", "right", "both", "none"]
|
3571
|
+
# Define whether the temporal window interval is closed or not.
|
3572
|
+
#
|
3573
|
+
# @note
|
3574
|
+
# This functionality is experimental and may change without it being considered a
|
3575
|
+
# breaking change.
|
3576
|
+
#
|
3577
|
+
# @note
|
3578
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3579
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3580
|
+
# computation.
|
3581
|
+
#
|
3582
|
+
# @return [Expr]
|
3583
|
+
#
|
3584
|
+
# @example
|
3585
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
|
3586
|
+
# df.select(
|
3587
|
+
# [
|
3588
|
+
# Polars.col("A").rolling_median(3)
|
3589
|
+
# ]
|
3590
|
+
# )
|
3591
|
+
# # =>
|
3592
|
+
# # shape: (6, 1)
|
3593
|
+
# # ┌──────┐
|
3594
|
+
# # │ A │
|
3595
|
+
# # │ --- │
|
3596
|
+
# # │ f64 │
|
3597
|
+
# # ╞══════╡
|
3598
|
+
# # │ null │
|
3599
|
+
# # ├╌╌╌╌╌╌┤
|
3600
|
+
# # │ null │
|
3601
|
+
# # ├╌╌╌╌╌╌┤
|
3602
|
+
# # │ 2.0 │
|
3603
|
+
# # ├╌╌╌╌╌╌┤
|
3604
|
+
# # │ 3.0 │
|
3605
|
+
# # ├╌╌╌╌╌╌┤
|
3606
|
+
# # │ 4.0 │
|
3607
|
+
# # ├╌╌╌╌╌╌┤
|
3608
|
+
# # │ 6.0 │
|
3609
|
+
# # └──────┘
|
3610
|
+
def rolling_median(
|
3611
|
+
window_size,
|
3612
|
+
weights: nil,
|
3613
|
+
min_periods: nil,
|
3614
|
+
center: false,
|
3615
|
+
by: nil,
|
3616
|
+
closed: "left"
|
3617
|
+
)
|
3618
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3619
|
+
window_size, min_periods
|
3620
|
+
)
|
3621
|
+
wrap_expr(
|
3622
|
+
_rbexpr.rolling_median(
|
3623
|
+
window_size, weights, min_periods, center, by, closed
|
3624
|
+
)
|
3625
|
+
)
|
3626
|
+
end
|
3627
|
+
|
3628
|
+
# Compute a rolling quantile.
|
3629
|
+
#
|
3630
|
+
# @param quantile [Float]
|
3631
|
+
# Quantile between 0.0 and 1.0.
|
3632
|
+
# @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
|
3633
|
+
# Interpolation method.
|
3634
|
+
# @param window_size [Integer]
|
3635
|
+
# The length of the window. Can be a fixed integer size, or a dynamic temporal
|
3636
|
+
# size indicated by a timedelta or the following string language:
|
3637
|
+
#
|
3638
|
+
# - 1ns (1 nanosecond)
|
3639
|
+
# - 1us (1 microsecond)
|
3640
|
+
# - 1ms (1 millisecond)
|
3641
|
+
# - 1s (1 second)
|
3642
|
+
# - 1m (1 minute)
|
3643
|
+
# - 1h (1 hour)
|
3644
|
+
# - 1d (1 day)
|
3645
|
+
# - 1w (1 week)
|
3646
|
+
# - 1mo (1 calendar month)
|
3647
|
+
# - 1y (1 calendar year)
|
3648
|
+
# - 1i (1 index count)
|
3649
|
+
#
|
3650
|
+
# If a timedelta or the dynamic string language is used, the `by`
|
3651
|
+
# and `closed` arguments must also be set.
|
3652
|
+
# @param weights [Array]
|
3653
|
+
# An optional slice with the same length as the window that will be multiplied
|
3654
|
+
# elementwise with the values in the window.
|
3655
|
+
# @param min_periods [Integer]
|
3656
|
+
# The number of values in the window that should be non-null before computing
|
3657
|
+
# a result. If None, it will be set equal to window size.
|
3658
|
+
# @param center [Boolean]
|
3659
|
+
# Set the labels at the center of the window
|
3660
|
+
# @param by [String]
|
3661
|
+
# If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
|
3662
|
+
# set the column that will be used to determine the windows. This column must
|
3663
|
+
# be of dtype `{Date, Datetime}`
|
3664
|
+
# @param closed ["left", "right", "both", "none"]
|
3665
|
+
# Define whether the temporal window interval is closed or not.
|
3666
|
+
#
|
3667
|
+
# @note
|
3668
|
+
# This functionality is experimental and may change without it being considered a
|
3669
|
+
# breaking change.
|
3670
|
+
#
|
3671
|
+
# @note
|
3672
|
+
# If you want to compute multiple aggregation statistics over the same dynamic
|
3673
|
+
# window, consider using `groupby_rolling` this method can cache the window size
|
3674
|
+
# computation.
|
3675
|
+
#
|
3676
|
+
# @return [Expr]
|
3677
|
+
#
|
3678
|
+
# @example
|
3679
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
|
3680
|
+
# df.select(
|
3681
|
+
# [
|
3682
|
+
# Polars.col("A").rolling_quantile(0.33, window_size: 3)
|
3683
|
+
# ]
|
3684
|
+
# )
|
3685
|
+
# # =>
|
3686
|
+
# # shape: (6, 1)
|
3687
|
+
# # ┌──────┐
|
3688
|
+
# # │ A │
|
3689
|
+
# # │ --- │
|
3690
|
+
# # │ f64 │
|
3691
|
+
# # ╞══════╡
|
3692
|
+
# # │ null │
|
3693
|
+
# # ├╌╌╌╌╌╌┤
|
3694
|
+
# # │ null │
|
3695
|
+
# # ├╌╌╌╌╌╌┤
|
3696
|
+
# # │ 1.0 │
|
3697
|
+
# # ├╌╌╌╌╌╌┤
|
3698
|
+
# # │ 2.0 │
|
3699
|
+
# # ├╌╌╌╌╌╌┤
|
3700
|
+
# # │ 3.0 │
|
3701
|
+
# # ├╌╌╌╌╌╌┤
|
3702
|
+
# # │ 4.0 │
|
3703
|
+
# # └──────┘
|
3704
|
+
def rolling_quantile(
|
3705
|
+
quantile,
|
3706
|
+
interpolation: "nearest",
|
3707
|
+
window_size: 2,
|
3708
|
+
weights: nil,
|
3709
|
+
min_periods: nil,
|
3710
|
+
center: false,
|
3711
|
+
by: nil,
|
3712
|
+
closed: "left"
|
3713
|
+
)
|
3714
|
+
window_size, min_periods = _prepare_rolling_window_args(
|
3715
|
+
window_size, min_periods
|
3716
|
+
)
|
3717
|
+
wrap_expr(
|
3718
|
+
_rbexpr.rolling_quantile(
|
3719
|
+
quantile, interpolation, window_size, weights, min_periods, center, by, closed
|
3720
|
+
)
|
3721
|
+
)
|
3722
|
+
end
|
531
3723
|
|
532
3724
|
# def rolling_apply
|
533
3725
|
# end
|
534
3726
|
|
3727
|
+
# Compute a rolling skew.
|
535
3728
|
#
|
3729
|
+
# @param window_size [Integer]
|
3730
|
+
# Integer size of the rolling window.
|
3731
|
+
# @param bias [Boolean]
|
3732
|
+
# If false, the calculations are corrected for statistical bias.
|
3733
|
+
#
|
3734
|
+
# @return [Expr]
|
536
3735
|
def rolling_skew(window_size, bias: true)
|
537
3736
|
wrap_expr(_rbexpr.rolling_skew(window_size, bias))
|
538
3737
|
end
|
539
3738
|
|
3739
|
+
# Compute absolute values.
|
3740
|
+
#
|
3741
|
+
# @return [Expr]
|
3742
|
+
#
|
3743
|
+
# @example
|
3744
|
+
# df = Polars::DataFrame.new(
|
3745
|
+
# {
|
3746
|
+
# "A" => [-1.0, 0.0, 1.0, 2.0]
|
3747
|
+
# }
|
3748
|
+
# )
|
3749
|
+
# df.select(Polars.col("A").abs)
|
3750
|
+
# # =>
|
3751
|
+
# # shape: (4, 1)
|
3752
|
+
# # ┌─────┐
|
3753
|
+
# # │ A │
|
3754
|
+
# # │ --- │
|
3755
|
+
# # │ f64 │
|
3756
|
+
# # ╞═════╡
|
3757
|
+
# # │ 1.0 │
|
3758
|
+
# # ├╌╌╌╌╌┤
|
3759
|
+
# # │ 0.0 │
|
3760
|
+
# # ├╌╌╌╌╌┤
|
3761
|
+
# # │ 1.0 │
|
3762
|
+
# # ├╌╌╌╌╌┤
|
3763
|
+
# # │ 2.0 │
|
3764
|
+
# # └─────┘
|
540
3765
|
def abs
|
541
3766
|
wrap_expr(_rbexpr.abs)
|
542
3767
|
end
|
543
3768
|
|
3769
|
+
# Get the index values that would sort this column.
|
3770
|
+
#
|
3771
|
+
# Alias for {#arg_sort}.
|
3772
|
+
#
|
3773
|
+
# @param reverse [Boolean]
|
3774
|
+
# Sort in reverse (descending) order.
|
3775
|
+
# @param nulls_last [Boolean]
|
3776
|
+
# Place null values last instead of first.
|
3777
|
+
#
|
3778
|
+
# @return [expr]
|
3779
|
+
#
|
3780
|
+
# @example
|
3781
|
+
# df = Polars::DataFrame.new(
|
3782
|
+
# {
|
3783
|
+
# "a" => [20, 10, 30]
|
3784
|
+
# }
|
3785
|
+
# )
|
3786
|
+
# df.select(Polars.col("a").argsort)
|
3787
|
+
# # =>
|
3788
|
+
# # shape: (3, 1)
|
3789
|
+
# # ┌─────┐
|
3790
|
+
# # │ a │
|
3791
|
+
# # │ --- │
|
3792
|
+
# # │ u32 │
|
3793
|
+
# # ╞═════╡
|
3794
|
+
# # │ 1 │
|
3795
|
+
# # ├╌╌╌╌╌┤
|
3796
|
+
# # │ 0 │
|
3797
|
+
# # ├╌╌╌╌╌┤
|
3798
|
+
# # │ 2 │
|
3799
|
+
# # └─────┘
|
544
3800
|
def argsort(reverse: false, nulls_last: false)
|
545
3801
|
arg_sort(reverse: reverse, nulls_last: nulls_last)
|
546
3802
|
end
|
547
3803
|
|
3804
|
+
# Assign ranks to data, dealing with ties appropriately.
|
3805
|
+
#
|
3806
|
+
# @param method ["average", "min", "max", "dense", "ordinal", "random"]
|
3807
|
+
# The method used to assign ranks to tied elements.
|
3808
|
+
# The following methods are available:
|
3809
|
+
#
|
3810
|
+
# - 'average' : The average of the ranks that would have been assigned to
|
3811
|
+
# all the tied values is assigned to each value.
|
3812
|
+
# - 'min' : The minimum of the ranks that would have been assigned to all
|
3813
|
+
# the tied values is assigned to each value. (This is also referred to
|
3814
|
+
# as "competition" ranking.)
|
3815
|
+
# - 'max' : The maximum of the ranks that would have been assigned to all
|
3816
|
+
# the tied values is assigned to each value.
|
3817
|
+
# - 'dense' : Like 'min', but the rank of the next highest element is
|
3818
|
+
# assigned the rank immediately after those assigned to the tied
|
3819
|
+
# elements.
|
3820
|
+
# - 'ordinal' : All values are given a distinct rank, corresponding to
|
3821
|
+
# the order that the values occur in the Series.
|
3822
|
+
# - 'random' : Like 'ordinal', but the rank for ties is not dependent
|
3823
|
+
# on the order that the values occur in the Series.
|
3824
|
+
# @param reverse [Boolean]
|
3825
|
+
# Reverse the operation.
|
3826
|
+
#
|
3827
|
+
# @return [Expr]
|
3828
|
+
#
|
3829
|
+
# @example The 'average' method:
|
3830
|
+
# df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
|
3831
|
+
# df.select(Polars.col("a").rank)
|
3832
|
+
# # =>
|
3833
|
+
# # shape: (5, 1)
|
3834
|
+
# # ┌─────┐
|
3835
|
+
# # │ a │
|
3836
|
+
# # │ --- │
|
3837
|
+
# # │ f32 │
|
3838
|
+
# # ╞═════╡
|
3839
|
+
# # │ 3.0 │
|
3840
|
+
# # ├╌╌╌╌╌┤
|
3841
|
+
# # │ 4.5 │
|
3842
|
+
# # ├╌╌╌╌╌┤
|
3843
|
+
# # │ 1.5 │
|
3844
|
+
# # ├╌╌╌╌╌┤
|
3845
|
+
# # │ 1.5 │
|
3846
|
+
# # ├╌╌╌╌╌┤
|
3847
|
+
# # │ 4.5 │
|
3848
|
+
# # └─────┘
|
3849
|
+
#
|
3850
|
+
# @example The 'ordinal' method:
|
3851
|
+
# df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
|
3852
|
+
# df.select(Polars.col("a").rank(method: "ordinal"))
|
3853
|
+
# # =>
|
3854
|
+
# # shape: (5, 1)
|
3855
|
+
# # ┌─────┐
|
3856
|
+
# # │ a │
|
3857
|
+
# # │ --- │
|
3858
|
+
# # │ u32 │
|
3859
|
+
# # ╞═════╡
|
3860
|
+
# # │ 3 │
|
3861
|
+
# # ├╌╌╌╌╌┤
|
3862
|
+
# # │ 4 │
|
3863
|
+
# # ├╌╌╌╌╌┤
|
3864
|
+
# # │ 1 │
|
3865
|
+
# # ├╌╌╌╌╌┤
|
3866
|
+
# # │ 2 │
|
3867
|
+
# # ├╌╌╌╌╌┤
|
3868
|
+
# # │ 5 │
|
3869
|
+
# # └─────┘
|
548
3870
|
def rank(method: "average", reverse: false)
|
549
3871
|
wrap_expr(_rbexpr.rank(method, reverse))
|
550
3872
|
end
|
551
3873
|
|
3874
|
+
# Calculate the n-th discrete difference.
|
3875
|
+
#
|
3876
|
+
# @param n [Integer]
|
3877
|
+
# Number of slots to shift.
|
3878
|
+
# @param null_behavior ["ignore", "drop"]
|
3879
|
+
# How to handle null values.
|
3880
|
+
#
|
3881
|
+
# @return [Expr]
|
3882
|
+
#
|
3883
|
+
# @example
|
3884
|
+
# df = Polars::DataFrame.new(
|
3885
|
+
# {
|
3886
|
+
# "a" => [20, 10, 30]
|
3887
|
+
# }
|
3888
|
+
# )
|
3889
|
+
# df.select(Polars.col("a").diff)
|
3890
|
+
# # =>
|
3891
|
+
# # shape: (3, 1)
|
3892
|
+
# # ┌──────┐
|
3893
|
+
# # │ a │
|
3894
|
+
# # │ --- │
|
3895
|
+
# # │ i64 │
|
3896
|
+
# # ╞══════╡
|
3897
|
+
# # │ null │
|
3898
|
+
# # ├╌╌╌╌╌╌┤
|
3899
|
+
# # │ -10 │
|
3900
|
+
# # ├╌╌╌╌╌╌┤
|
3901
|
+
# # │ 20 │
|
3902
|
+
# # └──────┘
|
552
3903
|
def diff(n: 1, null_behavior: "ignore")
|
553
3904
|
wrap_expr(_rbexpr.diff(n, null_behavior))
|
554
3905
|
end
|
555
3906
|
|
3907
|
+
# Computes percentage change between values.
|
3908
|
+
#
|
3909
|
+
# Percentage change (as fraction) between current element and most-recent
|
3910
|
+
# non-null element at least `n` period(s) before the current element.
|
3911
|
+
#
|
3912
|
+
# Computes the change from the previous row by default.
|
3913
|
+
#
|
3914
|
+
# @param n [Integer]
|
3915
|
+
# Periods to shift for forming percent change.
|
3916
|
+
#
|
3917
|
+
# @return [Expr]
|
3918
|
+
#
|
3919
|
+
# @example
|
3920
|
+
# df = Polars::DataFrame.new(
|
3921
|
+
# {
|
3922
|
+
# "a" => [10, 11, 12, nil, 12]
|
3923
|
+
# }
|
3924
|
+
# )
|
3925
|
+
# df.with_column(Polars.col("a").pct_change.alias("pct_change"))
|
3926
|
+
# # =>
|
3927
|
+
# # shape: (5, 2)
|
3928
|
+
# # ┌──────┬────────────┐
|
3929
|
+
# # │ a ┆ pct_change │
|
3930
|
+
# # │ --- ┆ --- │
|
3931
|
+
# # │ i64 ┆ f64 │
|
3932
|
+
# # ╞══════╪════════════╡
|
3933
|
+
# # │ 10 ┆ null │
|
3934
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
3935
|
+
# # │ 11 ┆ 0.1 │
|
3936
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
3937
|
+
# # │ 12 ┆ 0.090909 │
|
3938
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
3939
|
+
# # │ null ┆ 0.0 │
|
3940
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
3941
|
+
# # │ 12 ┆ 0.0 │
|
3942
|
+
# # └──────┴────────────┘
|
556
3943
|
def pct_change(n: 1)
|
557
3944
|
wrap_expr(_rbexpr.pct_change(n))
|
558
3945
|
end
|
559
3946
|
|
3947
|
+
# Compute the sample skewness of a data set.
|
3948
|
+
#
|
3949
|
+
# For normally distributed data, the skewness should be about zero. For
|
3950
|
+
# unimodal continuous distributions, a skewness value greater than zero means
|
3951
|
+
# that there is more weight in the right tail of the distribution. The
|
3952
|
+
# function `skewtest` can be used to determine if the skewness value
|
3953
|
+
# is close enough to zero, statistically speaking.
|
3954
|
+
#
|
3955
|
+
# @param bias [Boolean]
|
3956
|
+
# If false, the calculations are corrected for statistical bias.
|
3957
|
+
#
|
3958
|
+
# @return [Expr]
|
3959
|
+
#
|
3960
|
+
# @example
|
3961
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
|
3962
|
+
# df.select(Polars.col("a").skew)
|
3963
|
+
# # =>
|
3964
|
+
# # shape: (1, 1)
|
3965
|
+
# # ┌──────────┐
|
3966
|
+
# # │ a │
|
3967
|
+
# # │ --- │
|
3968
|
+
# # │ f64 │
|
3969
|
+
# # ╞══════════╡
|
3970
|
+
# # │ 0.343622 │
|
3971
|
+
# # └──────────┘
|
560
3972
|
def skew(bias: true)
|
561
3973
|
wrap_expr(_rbexpr.skew(bias))
|
562
3974
|
end
|
563
3975
|
|
3976
|
+
# Compute the kurtosis (Fisher or Pearson) of a dataset.
|
3977
|
+
#
|
3978
|
+
# Kurtosis is the fourth central moment divided by the square of the
|
3979
|
+
# variance. If Fisher's definition is used, then 3.0 is subtracted from
|
3980
|
+
# the result to give 0.0 for a normal distribution.
|
3981
|
+
# If bias is False then the kurtosis is calculated using k statistics to
|
3982
|
+
# eliminate bias coming from biased moment estimators
|
3983
|
+
#
|
3984
|
+
# @param fisher [Boolean]
|
3985
|
+
# If true, Fisher's definition is used (normal ==> 0.0). If false,
|
3986
|
+
# Pearson's definition is used (normal ==> 3.0).
|
3987
|
+
# @param bias [Boolean]
|
3988
|
+
# If false, the calculations are corrected for statistical bias.
|
3989
|
+
#
|
3990
|
+
# @return [Expr]
|
3991
|
+
#
|
3992
|
+
# @example
|
3993
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
|
3994
|
+
# df.select(Polars.col("a").kurtosis)
|
3995
|
+
# # =>
|
3996
|
+
# # shape: (1, 1)
|
3997
|
+
# # ┌───────────┐
|
3998
|
+
# # │ a │
|
3999
|
+
# # │ --- │
|
4000
|
+
# # │ f64 │
|
4001
|
+
# # ╞═══════════╡
|
4002
|
+
# # │ -1.153061 │
|
4003
|
+
# # └───────────┘
|
564
4004
|
def kurtosis(fisher: true, bias: true)
|
565
4005
|
wrap_expr(_rbexpr.kurtosis(fisher, bias))
|
566
4006
|
end
|
567
4007
|
|
4008
|
+
# Clip (limit) the values in an array to a `min` and `max` boundary.
|
4009
|
+
#
|
4010
|
+
# Only works for numerical types.
|
4011
|
+
#
|
4012
|
+
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
4013
|
+
# expression. See `when` for more information.
|
4014
|
+
#
|
4015
|
+
# @param min_val [Numeric]
|
4016
|
+
# Minimum value.
|
4017
|
+
# @param max_val [Numeric]
|
4018
|
+
# Maximum value.
|
4019
|
+
#
|
4020
|
+
# @return [Expr]
|
4021
|
+
#
|
4022
|
+
# @example
|
4023
|
+
# df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
|
4024
|
+
# df.with_column(Polars.col("foo").clip(1, 10).alias("foo_clipped"))
|
4025
|
+
# # =>
|
4026
|
+
# # shape: (4, 2)
|
4027
|
+
# # ┌──────┬─────────────┐
|
4028
|
+
# # │ foo ┆ foo_clipped │
|
4029
|
+
# # │ --- ┆ --- │
|
4030
|
+
# # │ i64 ┆ i64 │
|
4031
|
+
# # ╞══════╪═════════════╡
|
4032
|
+
# # │ -50 ┆ 1 │
|
4033
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4034
|
+
# # │ 5 ┆ 5 │
|
4035
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4036
|
+
# # │ null ┆ null │
|
4037
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4038
|
+
# # │ 50 ┆ 10 │
|
4039
|
+
# # └──────┴─────────────┘
|
568
4040
|
def clip(min_val, max_val)
|
569
4041
|
wrap_expr(_rbexpr.clip(min_val, max_val))
|
570
4042
|
end
|
571
4043
|
|
4044
|
+
# Clip (limit) the values in an array to a `min` boundary.
|
4045
|
+
#
|
4046
|
+
# Only works for numerical types.
|
4047
|
+
#
|
4048
|
+
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
4049
|
+
# expression. See `when` for more information.
|
4050
|
+
#
|
4051
|
+
# @param min_val [Numeric]
|
4052
|
+
# Minimum value.
|
4053
|
+
#
|
4054
|
+
# @return [Expr]
|
4055
|
+
#
|
4056
|
+
# @example
|
4057
|
+
# df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
|
4058
|
+
# df.with_column(Polars.col("foo").clip_min(0).alias("foo_clipped"))
|
4059
|
+
# # =>
|
4060
|
+
# # shape: (4, 2)
|
4061
|
+
# # ┌──────┬─────────────┐
|
4062
|
+
# # │ foo ┆ foo_clipped │
|
4063
|
+
# # │ --- ┆ --- │
|
4064
|
+
# # │ i64 ┆ i64 │
|
4065
|
+
# # ╞══════╪═════════════╡
|
4066
|
+
# # │ -50 ┆ 0 │
|
4067
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4068
|
+
# # │ 5 ┆ 5 │
|
4069
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4070
|
+
# # │ null ┆ null │
|
4071
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4072
|
+
# # │ 50 ┆ 50 │
|
4073
|
+
# # └──────┴─────────────┘
|
572
4074
|
def clip_min(min_val)
|
573
4075
|
wrap_expr(_rbexpr.clip_min(min_val))
|
574
4076
|
end
|
575
4077
|
|
4078
|
+
# Clip (limit) the values in an array to a `max` boundary.
|
4079
|
+
#
|
4080
|
+
# Only works for numerical types.
|
4081
|
+
#
|
4082
|
+
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
4083
|
+
# expression. See `when` for more information.
|
4084
|
+
#
|
4085
|
+
# @param max_val [Numeric]
|
4086
|
+
# Maximum value.
|
4087
|
+
#
|
4088
|
+
# @return [Expr]
|
4089
|
+
#
|
4090
|
+
# @example
|
4091
|
+
# df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
|
4092
|
+
# df.with_column(Polars.col("foo").clip_max(0).alias("foo_clipped"))
|
4093
|
+
# # =>
|
4094
|
+
# # shape: (4, 2)
|
4095
|
+
# # ┌──────┬─────────────┐
|
4096
|
+
# # │ foo ┆ foo_clipped │
|
4097
|
+
# # │ --- ┆ --- │
|
4098
|
+
# # │ i64 ┆ i64 │
|
4099
|
+
# # ╞══════╪═════════════╡
|
4100
|
+
# # │ -50 ┆ -50 │
|
4101
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4102
|
+
# # │ 5 ┆ 0 │
|
4103
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4104
|
+
# # │ null ┆ null │
|
4105
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
4106
|
+
# # │ 50 ┆ 0 │
|
4107
|
+
# # └──────┴─────────────┘
|
576
4108
|
def clip_max(max_val)
|
577
4109
|
wrap_expr(_rbexpr.clip_max(max_val))
|
578
4110
|
end
|
579
4111
|
|
4112
|
+
# Calculate the lower bound.
|
4113
|
+
#
|
4114
|
+
# Returns a unit Series with the lowest value possible for the dtype of this
|
4115
|
+
# expression.
|
4116
|
+
#
|
4117
|
+
# @return [Expr]
|
4118
|
+
#
|
4119
|
+
# @example
|
4120
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
|
4121
|
+
# df.select(Polars.col("a").lower_bound)
|
4122
|
+
# # =>
|
4123
|
+
# # shape: (1, 1)
|
4124
|
+
# # ┌──────────────────────┐
|
4125
|
+
# # │ a │
|
4126
|
+
# # │ --- │
|
4127
|
+
# # │ i64 │
|
4128
|
+
# # ╞══════════════════════╡
|
4129
|
+
# # │ -9223372036854775808 │
|
4130
|
+
# # └──────────────────────┘
|
580
4131
|
def lower_bound
|
581
4132
|
wrap_expr(_rbexpr.lower_bound)
|
582
4133
|
end
|
583
4134
|
|
4135
|
+
# Calculate the upper bound.
|
4136
|
+
#
|
4137
|
+
# Returns a unit Series with the highest value possible for the dtype of this
|
4138
|
+
# expression.
|
4139
|
+
#
|
4140
|
+
# @return [Expr]
|
4141
|
+
#
|
4142
|
+
# @example
|
4143
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
|
4144
|
+
# df.select(Polars.col("a").upper_bound)
|
4145
|
+
# # =>
|
4146
|
+
# # shape: (1, 1)
|
4147
|
+
# # ┌─────────────────────┐
|
4148
|
+
# # │ a │
|
4149
|
+
# # │ --- │
|
4150
|
+
# # │ i64 │
|
4151
|
+
# # ╞═════════════════════╡
|
4152
|
+
# # │ 9223372036854775807 │
|
4153
|
+
# # └─────────────────────┘
|
584
4154
|
def upper_bound
|
585
4155
|
wrap_expr(_rbexpr.upper_bound)
|
586
4156
|
end
|
587
4157
|
|
4158
|
+
# Compute the element-wise indication of the sign.
|
4159
|
+
#
|
4160
|
+
# @return [Expr]
|
4161
|
+
#
|
4162
|
+
# @example
|
4163
|
+
# df = Polars::DataFrame.new({"a" => [-9.0, -0.0, 0.0, 4.0, nil]})
|
4164
|
+
# df.select(Polars.col("a").sign)
|
4165
|
+
# # =>
|
4166
|
+
# # shape: (5, 1)
|
4167
|
+
# # ┌──────┐
|
4168
|
+
# # │ a │
|
4169
|
+
# # │ --- │
|
4170
|
+
# # │ i64 │
|
4171
|
+
# # ╞══════╡
|
4172
|
+
# # │ -1 │
|
4173
|
+
# # ├╌╌╌╌╌╌┤
|
4174
|
+
# # │ 0 │
|
4175
|
+
# # ├╌╌╌╌╌╌┤
|
4176
|
+
# # │ 0 │
|
4177
|
+
# # ├╌╌╌╌╌╌┤
|
4178
|
+
# # │ 1 │
|
4179
|
+
# # ├╌╌╌╌╌╌┤
|
4180
|
+
# # │ null │
|
4181
|
+
# # └──────┘
|
588
4182
|
def sign
|
589
4183
|
wrap_expr(_rbexpr.sign)
|
590
4184
|
end
|
591
4185
|
|
4186
|
+
# Compute the element-wise value for the sine.
|
4187
|
+
#
|
4188
|
+
# @return [Expr]
|
4189
|
+
#
|
4190
|
+
# @example
|
4191
|
+
# df = Polars::DataFrame.new({"a" => [0.0]})
|
4192
|
+
# df.select(Polars.col("a").sin)
|
4193
|
+
# # =>
|
4194
|
+
# # shape: (1, 1)
|
4195
|
+
# # ┌─────┐
|
4196
|
+
# # │ a │
|
4197
|
+
# # │ --- │
|
4198
|
+
# # │ f64 │
|
4199
|
+
# # ╞═════╡
|
4200
|
+
# # │ 0.0 │
|
4201
|
+
# # └─────┘
|
592
4202
|
def sin
|
593
4203
|
wrap_expr(_rbexpr.sin)
|
594
4204
|
end
|
595
4205
|
|
4206
|
+
# Compute the element-wise value for the cosine.
|
4207
|
+
#
|
4208
|
+
# @return [Expr]
|
4209
|
+
#
|
4210
|
+
# @example
|
4211
|
+
# df = Polars::DataFrame.new({"a" => [0.0]})
|
4212
|
+
# df.select(Polars.col("a").cos)
|
4213
|
+
# # =>
|
4214
|
+
# # shape: (1, 1)
|
4215
|
+
# # ┌─────┐
|
4216
|
+
# # │ a │
|
4217
|
+
# # │ --- │
|
4218
|
+
# # │ f64 │
|
4219
|
+
# # ╞═════╡
|
4220
|
+
# # │ 1.0 │
|
4221
|
+
# # └─────┘
|
596
4222
|
def cos
|
597
4223
|
wrap_expr(_rbexpr.cos)
|
598
4224
|
end
|
599
4225
|
|
4226
|
+
# Compute the element-wise value for the tangent.
|
4227
|
+
#
|
4228
|
+
# @return [Expr]
|
4229
|
+
#
|
4230
|
+
# @example
|
4231
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4232
|
+
# df.select(Polars.col("a").tan)
|
4233
|
+
# # =>
|
4234
|
+
# # shape: (1, 1)
|
4235
|
+
# # ┌──────────┐
|
4236
|
+
# # │ a │
|
4237
|
+
# # │ --- │
|
4238
|
+
# # │ f64 │
|
4239
|
+
# # ╞══════════╡
|
4240
|
+
# # │ 1.557408 │
|
4241
|
+
# # └──────────┘
|
600
4242
|
def tan
|
601
4243
|
wrap_expr(_rbexpr.tan)
|
602
4244
|
end
|
603
4245
|
|
4246
|
+
# Compute the element-wise value for the inverse sine.
|
4247
|
+
#
|
4248
|
+
# @return [Expr]
|
4249
|
+
#
|
4250
|
+
# @example
|
4251
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4252
|
+
# df.select(Polars.col("a").arcsin)
|
4253
|
+
# # =>
|
4254
|
+
# # shape: (1, 1)
|
4255
|
+
# # ┌──────────┐
|
4256
|
+
# # │ a │
|
4257
|
+
# # │ --- │
|
4258
|
+
# # │ f64 │
|
4259
|
+
# # ╞══════════╡
|
4260
|
+
# # │ 1.570796 │
|
4261
|
+
# # └──────────┘
|
604
4262
|
def arcsin
|
605
4263
|
wrap_expr(_rbexpr.arcsin)
|
606
4264
|
end
|
607
4265
|
|
4266
|
+
# Compute the element-wise value for the inverse cosine.
|
4267
|
+
#
|
4268
|
+
# @return [Expr]
|
4269
|
+
#
|
4270
|
+
# @example
|
4271
|
+
# df = Polars::DataFrame.new({"a" => [0.0]})
|
4272
|
+
# df.select(Polars.col("a").arccos)
|
4273
|
+
# # =>
|
4274
|
+
# # shape: (1, 1)
|
4275
|
+
# # ┌──────────┐
|
4276
|
+
# # │ a │
|
4277
|
+
# # │ --- │
|
4278
|
+
# # │ f64 │
|
4279
|
+
# # ╞══════════╡
|
4280
|
+
# # │ 1.570796 │
|
4281
|
+
# # └──────────┘
|
608
4282
|
def arccos
|
609
4283
|
wrap_expr(_rbexpr.arccos)
|
610
4284
|
end
|
611
4285
|
|
4286
|
+
# Compute the element-wise value for the inverse tangent.
|
4287
|
+
#
|
4288
|
+
# @return [Expr]
|
4289
|
+
#
|
4290
|
+
# @example
|
4291
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4292
|
+
# df.select(Polars.col("a").arctan)
|
4293
|
+
# # =>
|
4294
|
+
# # shape: (1, 1)
|
4295
|
+
# # ┌──────────┐
|
4296
|
+
# # │ a │
|
4297
|
+
# # │ --- │
|
4298
|
+
# # │ f64 │
|
4299
|
+
# # ╞══════════╡
|
4300
|
+
# # │ 0.785398 │
|
4301
|
+
# # └──────────┘
|
612
4302
|
def arctan
|
613
4303
|
wrap_expr(_rbexpr.arctan)
|
614
4304
|
end
|
615
4305
|
|
4306
|
+
# Compute the element-wise value for the hyperbolic sine.
|
4307
|
+
#
|
4308
|
+
# @return [Expr]
|
4309
|
+
#
|
4310
|
+
# @example
|
4311
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4312
|
+
# df.select(Polars.col("a").sinh)
|
4313
|
+
# # =>
|
4314
|
+
# # shape: (1, 1)
|
4315
|
+
# # ┌──────────┐
|
4316
|
+
# # │ a │
|
4317
|
+
# # │ --- │
|
4318
|
+
# # │ f64 │
|
4319
|
+
# # ╞══════════╡
|
4320
|
+
# # │ 1.175201 │
|
4321
|
+
# # └──────────┘
|
616
4322
|
def sinh
|
617
4323
|
wrap_expr(_rbexpr.sinh)
|
618
4324
|
end
|
619
4325
|
|
4326
|
+
# Compute the element-wise value for the hyperbolic cosine.
|
4327
|
+
#
|
4328
|
+
# @return [Expr]
|
4329
|
+
#
|
4330
|
+
# @example
|
4331
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4332
|
+
# df.select(Polars.col("a").cosh)
|
4333
|
+
# # =>
|
4334
|
+
# # shape: (1, 1)
|
4335
|
+
# # ┌──────────┐
|
4336
|
+
# # │ a │
|
4337
|
+
# # │ --- │
|
4338
|
+
# # │ f64 │
|
4339
|
+
# # ╞══════════╡
|
4340
|
+
# # │ 1.543081 │
|
4341
|
+
# # └──────────┘
|
620
4342
|
def cosh
|
621
4343
|
wrap_expr(_rbexpr.cosh)
|
622
4344
|
end
|
623
4345
|
|
4346
|
+
# Compute the element-wise value for the hyperbolic tangent.
|
4347
|
+
#
|
4348
|
+
# @return [Expr]
|
4349
|
+
#
|
4350
|
+
# @example
|
4351
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4352
|
+
# df.select(Polars.col("a").tanh)
|
4353
|
+
# # =>
|
4354
|
+
# # shape: (1, 1)
|
4355
|
+
# # ┌──────────┐
|
4356
|
+
# # │ a │
|
4357
|
+
# # │ --- │
|
4358
|
+
# # │ f64 │
|
4359
|
+
# # ╞══════════╡
|
4360
|
+
# # │ 0.761594 │
|
4361
|
+
# # └──────────┘
|
624
4362
|
def tanh
|
625
4363
|
wrap_expr(_rbexpr.tanh)
|
626
4364
|
end
|
627
4365
|
|
4366
|
+
# Compute the element-wise value for the inverse hyperbolic sine.
|
4367
|
+
#
|
4368
|
+
# @return [Expr]
|
4369
|
+
#
|
4370
|
+
# @example
|
4371
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4372
|
+
# df.select(Polars.col("a").arcsinh)
|
4373
|
+
# # =>
|
4374
|
+
# # shape: (1, 1)
|
4375
|
+
# # ┌──────────┐
|
4376
|
+
# # │ a │
|
4377
|
+
# # │ --- │
|
4378
|
+
# # │ f64 │
|
4379
|
+
# # ╞══════════╡
|
4380
|
+
# # │ 0.881374 │
|
4381
|
+
# # └──────────┘
|
628
4382
|
def arcsinh
|
629
4383
|
wrap_expr(_rbexpr.arcsinh)
|
630
4384
|
end
|
631
4385
|
|
4386
|
+
# Compute the element-wise value for the inverse hyperbolic cosine.
|
4387
|
+
#
|
4388
|
+
# @return [Expr]
|
4389
|
+
#
|
4390
|
+
# @example
|
4391
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4392
|
+
# df.select(Polars.col("a").arccosh)
|
4393
|
+
# # =>
|
4394
|
+
# # shape: (1, 1)
|
4395
|
+
# # ┌─────┐
|
4396
|
+
# # │ a │
|
4397
|
+
# # │ --- │
|
4398
|
+
# # │ f64 │
|
4399
|
+
# # ╞═════╡
|
4400
|
+
# # │ 0.0 │
|
4401
|
+
# # └─────┘
|
632
4402
|
def arccosh
|
633
4403
|
wrap_expr(_rbexpr.arccosh)
|
634
4404
|
end
|
635
4405
|
|
4406
|
+
# Compute the element-wise value for the inverse hyperbolic tangent.
|
4407
|
+
#
|
4408
|
+
# @return [Expr]
|
4409
|
+
#
|
4410
|
+
# @example
|
4411
|
+
# df = Polars::DataFrame.new({"a" => [1.0]})
|
4412
|
+
# df.select(Polars.col("a").arctanh)
|
4413
|
+
# # =>
|
4414
|
+
# # shape: (1, 1)
|
4415
|
+
# # ┌─────┐
|
4416
|
+
# # │ a │
|
4417
|
+
# # │ --- │
|
4418
|
+
# # │ f64 │
|
4419
|
+
# # ╞═════╡
|
4420
|
+
# # │ inf │
|
4421
|
+
# # └─────┘
|
636
4422
|
def arctanh
|
637
4423
|
wrap_expr(_rbexpr.arctanh)
|
638
4424
|
end
|
639
4425
|
|
4426
|
+
# Reshape this Expr to a flat Series or a Series of Lists.
|
4427
|
+
#
|
4428
|
+
# @param dims [Array]
|
4429
|
+
# Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that
|
4430
|
+
# dimension is inferred.
|
4431
|
+
#
|
4432
|
+
# @return [Expr]
|
4433
|
+
#
|
4434
|
+
# @example
|
4435
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
|
4436
|
+
# df.select(Polars.col("foo").reshape([3, 3]))
|
4437
|
+
# # =>
|
4438
|
+
# # shape: (3, 1)
|
4439
|
+
# # ┌───────────┐
|
4440
|
+
# # │ foo │
|
4441
|
+
# # │ --- │
|
4442
|
+
# # │ list[i64] │
|
4443
|
+
# # ╞═══════════╡
|
4444
|
+
# # │ [1, 2, 3] │
|
4445
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌┤
|
4446
|
+
# # │ [4, 5, 6] │
|
4447
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌┤
|
4448
|
+
# # │ [7, 8, 9] │
|
4449
|
+
# # └───────────┘
|
640
4450
|
def reshape(dims)
|
641
4451
|
wrap_expr(_rbexpr.reshape(dims))
|
642
4452
|
end
|
643
4453
|
|
4454
|
+
# Shuffle the contents of this expr.
|
4455
|
+
#
|
4456
|
+
# @param seed [Integer]
|
4457
|
+
# Seed for the random number generator. If set to None (default), a random
|
4458
|
+
# seed is generated using the `random` module.
|
4459
|
+
#
|
4460
|
+
# @return [Expr]
|
4461
|
+
#
|
4462
|
+
# @example
|
4463
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
4464
|
+
# df.select(Polars.col("a").shuffle(seed: 1))
|
4465
|
+
# # =>
|
4466
|
+
# # shape: (3, 1)
|
4467
|
+
# # ┌─────┐
|
4468
|
+
# # │ a │
|
4469
|
+
# # │ --- │
|
4470
|
+
# # │ i64 │
|
4471
|
+
# # ╞═════╡
|
4472
|
+
# # │ 2 │
|
4473
|
+
# # ├╌╌╌╌╌┤
|
4474
|
+
# # │ 1 │
|
4475
|
+
# # ├╌╌╌╌╌┤
|
4476
|
+
# # │ 3 │
|
4477
|
+
# # └─────┘
|
644
4478
|
def shuffle(seed: nil)
|
645
4479
|
if seed.nil?
|
646
4480
|
seed = rand(10000)
|
@@ -648,73 +4482,514 @@ module Polars
|
|
648
4482
|
wrap_expr(_rbexpr.shuffle(seed))
|
649
4483
|
end
|
650
4484
|
|
651
|
-
#
|
652
|
-
#
|
653
|
-
|
654
|
-
#
|
655
|
-
#
|
656
|
-
|
657
|
-
#
|
658
|
-
#
|
4485
|
+
# Sample from this expression.
|
4486
|
+
#
|
4487
|
+
# @param frac [Float]
|
4488
|
+
# Fraction of items to return. Cannot be used with `n`.
|
4489
|
+
# @param with_replacement [Boolean]
|
4490
|
+
# Allow values to be sampled more than once.
|
4491
|
+
# @param shuffle [Boolean]
|
4492
|
+
# Shuffle the order of sampled data points.
|
4493
|
+
# @param seed [Integer]
|
4494
|
+
# Seed for the random number generator. If set to None (default), a random
|
4495
|
+
# seed is used.
|
4496
|
+
# @param n [Integer]
|
4497
|
+
# Number of items to return. Cannot be used with `frac`.
|
4498
|
+
#
|
4499
|
+
# @return [Expr]
|
4500
|
+
#
|
4501
|
+
# @example
|
4502
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
4503
|
+
# df.select(Polars.col("a").sample(frac: 1.0, with_replacement: true, seed: 1))
|
4504
|
+
# # =>
|
4505
|
+
# # shape: (3, 1)
|
4506
|
+
# # ┌─────┐
|
4507
|
+
# # │ a │
|
4508
|
+
# # │ --- │
|
4509
|
+
# # │ i64 │
|
4510
|
+
# # ╞═════╡
|
4511
|
+
# # │ 3 │
|
4512
|
+
# # ├╌╌╌╌╌┤
|
4513
|
+
# # │ 1 │
|
4514
|
+
# # ├╌╌╌╌╌┤
|
4515
|
+
# # │ 1 │
|
4516
|
+
# # └─────┘
|
4517
|
+
def sample(
|
4518
|
+
frac: nil,
|
4519
|
+
with_replacement: true,
|
4520
|
+
shuffle: false,
|
4521
|
+
seed: nil,
|
4522
|
+
n: nil
|
4523
|
+
)
|
4524
|
+
if !n.nil? && !frac.nil?
|
4525
|
+
raise ArgumentError, "cannot specify both `n` and `frac`"
|
4526
|
+
end
|
659
4527
|
|
660
|
-
|
661
|
-
|
4528
|
+
if !n.nil? && frac.nil?
|
4529
|
+
return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
|
4530
|
+
end
|
662
4531
|
|
663
|
-
|
664
|
-
|
4532
|
+
if frac.nil?
|
4533
|
+
frac = 1.0
|
4534
|
+
end
|
4535
|
+
wrap_expr(
|
4536
|
+
_rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
|
4537
|
+
)
|
4538
|
+
end
|
665
4539
|
|
4540
|
+
# Exponentially-weighted moving average.
|
4541
|
+
#
|
4542
|
+
# @return [Expr]
|
4543
|
+
#
|
4544
|
+
# @example
|
4545
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
4546
|
+
# df.select(Polars.col("a").ewm_mean(com: 1))
|
4547
|
+
# # =>
|
4548
|
+
# # shape: (3, 1)
|
4549
|
+
# # ┌──────────┐
|
4550
|
+
# # │ a │
|
4551
|
+
# # │ --- │
|
4552
|
+
# # │ f64 │
|
4553
|
+
# # ╞══════════╡
|
4554
|
+
# # │ 1.0 │
|
4555
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
4556
|
+
# # │ 1.666667 │
|
4557
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
4558
|
+
# # │ 2.428571 │
|
4559
|
+
# # └──────────┘
|
4560
|
+
def ewm_mean(
|
4561
|
+
com: nil,
|
4562
|
+
span: nil,
|
4563
|
+
half_life: nil,
|
4564
|
+
alpha: nil,
|
4565
|
+
adjust: true,
|
4566
|
+
min_periods: 1
|
4567
|
+
)
|
4568
|
+
alpha = _prepare_alpha(com, span, half_life, alpha)
|
4569
|
+
wrap_expr(_rbexpr.ewm_mean(alpha, adjust, min_periods))
|
4570
|
+
end
|
4571
|
+
|
4572
|
+
# Exponentially-weighted moving standard deviation.
|
4573
|
+
#
|
4574
|
+
# @return [Expr]
|
4575
|
+
#
|
4576
|
+
# @example
|
4577
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
4578
|
+
# df.select(Polars.col("a").ewm_std(com: 1))
|
4579
|
+
# # =>
|
4580
|
+
# # shape: (3, 1)
|
4581
|
+
# # ┌──────────┐
|
4582
|
+
# # │ a │
|
4583
|
+
# # │ --- │
|
4584
|
+
# # │ f64 │
|
4585
|
+
# # ╞══════════╡
|
4586
|
+
# # │ 0.0 │
|
4587
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
4588
|
+
# # │ 0.707107 │
|
4589
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
4590
|
+
# # │ 0.963624 │
|
4591
|
+
# # └──────────┘
|
4592
|
+
def ewm_std(
|
4593
|
+
com: nil,
|
4594
|
+
span: nil,
|
4595
|
+
half_life: nil,
|
4596
|
+
alpha: nil,
|
4597
|
+
adjust: true,
|
4598
|
+
bias: false,
|
4599
|
+
min_periods: 1
|
4600
|
+
)
|
4601
|
+
alpha = _prepare_alpha(com, span, half_life, alpha)
|
4602
|
+
wrap_expr(_rbexpr.ewm_std(alpha, adjust, bias, min_periods))
|
4603
|
+
end
|
4604
|
+
|
4605
|
+
# Exponentially-weighted moving variance.
|
4606
|
+
#
|
4607
|
+
# @return [Expr]
|
4608
|
+
#
|
4609
|
+
# @example
|
4610
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
4611
|
+
# df.select(Polars.col("a").ewm_var(com: 1))
|
4612
|
+
# # =>
|
4613
|
+
# # shape: (3, 1)
|
4614
|
+
# # ┌──────────┐
|
4615
|
+
# # │ a │
|
4616
|
+
# # │ --- │
|
4617
|
+
# # │ f64 │
|
4618
|
+
# # ╞══════════╡
|
4619
|
+
# # │ 0.0 │
|
4620
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
4621
|
+
# # │ 0.5 │
|
4622
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
4623
|
+
# # │ 0.928571 │
|
4624
|
+
# # └──────────┘
|
4625
|
+
def ewm_var(
|
4626
|
+
com: nil,
|
4627
|
+
span: nil,
|
4628
|
+
half_life: nil,
|
4629
|
+
alpha: nil,
|
4630
|
+
adjust: true,
|
4631
|
+
bias: false,
|
4632
|
+
min_periods: 1
|
4633
|
+
)
|
4634
|
+
alpha = _prepare_alpha(com, span, half_life, alpha)
|
4635
|
+
wrap_expr(_rbexpr.ewm_var(alpha, adjust, bias, min_periods))
|
4636
|
+
end
|
4637
|
+
|
4638
|
+
# Extend the Series with given number of values.
|
4639
|
+
#
|
4640
|
+
# @param value [Object]
|
4641
|
+
# The value to extend the Series with. This value may be nil to fill with
|
4642
|
+
# nulls.
|
4643
|
+
# @param n [Integer]
|
4644
|
+
# The number of values to extend.
|
666
4645
|
#
|
4646
|
+
# @return [Expr]
|
4647
|
+
#
|
4648
|
+
# @example
|
4649
|
+
# df = Polars::DataFrame.new({"values" => [1, 2, 3]})
|
4650
|
+
# df.select(Polars.col("values").extend_constant(99, 2))
|
4651
|
+
# # =>
|
4652
|
+
# # shape: (5, 1)
|
4653
|
+
# # ┌────────┐
|
4654
|
+
# # │ values │
|
4655
|
+
# # │ --- │
|
4656
|
+
# # │ i64 │
|
4657
|
+
# # ╞════════╡
|
4658
|
+
# # │ 1 │
|
4659
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
4660
|
+
# # │ 2 │
|
4661
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
4662
|
+
# # │ 3 │
|
4663
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
4664
|
+
# # │ 99 │
|
4665
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
4666
|
+
# # │ 99 │
|
4667
|
+
# # └────────┘
|
4668
|
+
def extend_constant(value, n)
|
4669
|
+
wrap_expr(_rbexpr.extend_constant(value, n))
|
4670
|
+
end
|
4671
|
+
|
4672
|
+
# Count all unique values and create a struct mapping value to count.
|
4673
|
+
#
|
4674
|
+
# @param multithreaded [Boolean]
|
4675
|
+
# Better to turn this off in the aggregation context, as it can lead to
|
4676
|
+
# contention.
|
4677
|
+
# @param sort [Boolean]
|
4678
|
+
# Ensure the output is sorted from most values to least.
|
4679
|
+
#
|
4680
|
+
# @return [Expr]
|
4681
|
+
#
|
4682
|
+
# @example
|
4683
|
+
# df = Polars::DataFrame.new(
|
4684
|
+
# {
|
4685
|
+
# "id" => ["a", "b", "b", "c", "c", "c"]
|
4686
|
+
# }
|
4687
|
+
# )
|
4688
|
+
# df.select(
|
4689
|
+
# [
|
4690
|
+
# Polars.col("id").value_counts(sort: true),
|
4691
|
+
# ]
|
4692
|
+
# )
|
4693
|
+
# # =>
|
4694
|
+
# # shape: (3, 1)
|
4695
|
+
# # ┌───────────┐
|
4696
|
+
# # │ id │
|
4697
|
+
# # │ --- │
|
4698
|
+
# # │ struct[2] │
|
4699
|
+
# # ╞═══════════╡
|
4700
|
+
# # │ {"c",3} │
|
4701
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌┤
|
4702
|
+
# # │ {"b",2} │
|
4703
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌┤
|
4704
|
+
# # │ {"a",1} │
|
4705
|
+
# # └───────────┘
|
667
4706
|
def value_counts(multithreaded: false, sort: false)
|
668
4707
|
wrap_expr(_rbexpr.value_counts(multithreaded, sort))
|
669
4708
|
end
|
670
4709
|
|
4710
|
+
# Return a count of the unique values in the order of appearance.
|
4711
|
+
#
|
4712
|
+
# This method differs from `value_counts` in that it does not return the
|
4713
|
+
# values, only the counts and might be faster
|
4714
|
+
#
|
4715
|
+
# @return [Expr]
|
4716
|
+
#
|
4717
|
+
# @example
|
4718
|
+
# df = Polars::DataFrame.new(
|
4719
|
+
# {
|
4720
|
+
# "id" => ["a", "b", "b", "c", "c", "c"]
|
4721
|
+
# }
|
4722
|
+
# )
|
4723
|
+
# df.select(
|
4724
|
+
# [
|
4725
|
+
# Polars.col("id").unique_counts
|
4726
|
+
# ]
|
4727
|
+
# )
|
4728
|
+
# # =>
|
4729
|
+
# # shape: (3, 1)
|
4730
|
+
# # ┌─────┐
|
4731
|
+
# # │ id │
|
4732
|
+
# # │ --- │
|
4733
|
+
# # │ u32 │
|
4734
|
+
# # ╞═════╡
|
4735
|
+
# # │ 1 │
|
4736
|
+
# # ├╌╌╌╌╌┤
|
4737
|
+
# # │ 2 │
|
4738
|
+
# # ├╌╌╌╌╌┤
|
4739
|
+
# # │ 3 │
|
4740
|
+
# # └─────┘
|
671
4741
|
def unique_counts
|
672
4742
|
wrap_expr(_rbexpr.unique_counts)
|
673
4743
|
end
|
674
4744
|
|
4745
|
+
# Compute the logarithm to a given base.
|
4746
|
+
#
|
4747
|
+
# @param base [Float]
|
4748
|
+
# Given base, defaults to `e`.
|
4749
|
+
#
|
4750
|
+
# @return [Expr]
|
4751
|
+
#
|
4752
|
+
# @example
|
4753
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
4754
|
+
# df.select(Polars.col("a").log(2))
|
4755
|
+
# # =>
|
4756
|
+
# # shape: (3, 1)
|
4757
|
+
# # ┌──────────┐
|
4758
|
+
# # │ a │
|
4759
|
+
# # │ --- │
|
4760
|
+
# # │ f64 │
|
4761
|
+
# # ╞══════════╡
|
4762
|
+
# # │ 0.0 │
|
4763
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
4764
|
+
# # │ 1.0 │
|
4765
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
4766
|
+
# # │ 1.584963 │
|
4767
|
+
# # └──────────┘
|
675
4768
|
def log(base = Math::E)
|
676
|
-
wrap_expr(
|
4769
|
+
wrap_expr(_rbexpr.log(base))
|
677
4770
|
end
|
678
4771
|
|
679
|
-
|
4772
|
+
# Computes the entropy.
|
4773
|
+
#
|
4774
|
+
# Uses the formula `-sum(pk * log(pk)` where `pk` are discrete probabilities.
|
4775
|
+
#
|
4776
|
+
# @param base [Float]
|
4777
|
+
# Given base, defaults to `e`.
|
4778
|
+
# @param normalize [Boolean]
|
4779
|
+
# Normalize pk if it doesn't sum to 1.
|
4780
|
+
#
|
4781
|
+
# @return [Expr]
|
4782
|
+
#
|
4783
|
+
# @example
|
4784
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3]})
|
4785
|
+
# df.select(Polars.col("a").entropy(base: 2))
|
4786
|
+
# # =>
|
4787
|
+
# # shape: (1, 1)
|
4788
|
+
# # ┌──────────┐
|
4789
|
+
# # │ a │
|
4790
|
+
# # │ --- │
|
4791
|
+
# # │ f64 │
|
4792
|
+
# # ╞══════════╡
|
4793
|
+
# # │ 1.459148 │
|
4794
|
+
# # └──────────┘
|
4795
|
+
#
|
4796
|
+
# @example
|
4797
|
+
# df.select(Polars.col("a").entropy(base: 2, normalize: false))
|
4798
|
+
# # =>
|
4799
|
+
# # shape: (1, 1)
|
4800
|
+
# # ┌───────────┐
|
4801
|
+
# # │ a │
|
4802
|
+
# # │ --- │
|
4803
|
+
# # │ f64 │
|
4804
|
+
# # ╞═══════════╡
|
4805
|
+
# # │ -6.754888 │
|
4806
|
+
# # └───────────┘
|
4807
|
+
def entropy(base: 2, normalize: true)
|
680
4808
|
wrap_expr(_rbexpr.entropy(base, normalize))
|
681
4809
|
end
|
682
4810
|
|
683
|
-
#
|
684
|
-
#
|
685
|
-
|
686
|
-
#
|
4811
|
+
# Run an expression over a sliding window that increases `1` slot every iteration.
|
4812
|
+
#
|
4813
|
+
# @param expr [Expr]
|
4814
|
+
# Expression to evaluate
|
4815
|
+
# @param min_periods [Integer]
|
4816
|
+
# Number of valid values there should be in the window before the expression
|
4817
|
+
# is evaluated. valid values = `length - null_count`
|
4818
|
+
# @param parallel [Boolean]
|
4819
|
+
# Run in parallel. Don't do this in a groupby or another operation that
|
4820
|
+
# already has much parallelization.
|
4821
|
+
#
|
4822
|
+
# @return [Expr]
|
4823
|
+
#
|
4824
|
+
# @note
|
4825
|
+
# This functionality is experimental and may change without it being considered a
|
4826
|
+
# breaking change.
|
4827
|
+
#
|
4828
|
+
# @note
|
4829
|
+
# This can be really slow as it can have `O(n^2)` complexity. Don't use this
|
4830
|
+
# for operations that visit all elements.
|
4831
|
+
#
|
4832
|
+
# @example
|
4833
|
+
# df = Polars::DataFrame.new({"values" => [1, 2, 3, 4, 5]})
|
4834
|
+
# df.select(
|
4835
|
+
# [
|
4836
|
+
# Polars.col("values").cumulative_eval(
|
4837
|
+
# Polars.element.first - Polars.element.last ** 2
|
4838
|
+
# )
|
4839
|
+
# ]
|
4840
|
+
# )
|
4841
|
+
# # =>
|
4842
|
+
# # shape: (5, 1)
|
4843
|
+
# # ┌────────┐
|
4844
|
+
# # │ values │
|
4845
|
+
# # │ --- │
|
4846
|
+
# # │ f64 │
|
4847
|
+
# # ╞════════╡
|
4848
|
+
# # │ 0.0 │
|
4849
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
4850
|
+
# # │ -3.0 │
|
4851
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
4852
|
+
# # │ -8.0 │
|
4853
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
4854
|
+
# # │ -15.0 │
|
4855
|
+
# # ├╌╌╌╌╌╌╌╌┤
|
4856
|
+
# # │ -24.0 │
|
4857
|
+
# # └────────┘
|
4858
|
+
def cumulative_eval(expr, min_periods: 1, parallel: false)
|
4859
|
+
wrap_expr(
|
4860
|
+
_rbexpr.cumulative_eval(expr._rbexpr, min_periods, parallel)
|
4861
|
+
)
|
4862
|
+
end
|
4863
|
+
|
4864
|
+
# Flags the expression as 'sorted'.
|
4865
|
+
#
|
4866
|
+
# Enables downstream code to user fast paths for sorted arrays.
|
4867
|
+
#
|
4868
|
+
# @param reverse [Boolean]
|
4869
|
+
# If the `Series` order is reversed, e.g. descending.
|
4870
|
+
#
|
4871
|
+
# @return [Expr]
|
4872
|
+
#
|
4873
|
+
# @note
|
4874
|
+
# This can lead to incorrect results if this `Series` is not sorted!!
|
4875
|
+
# Use with care!
|
4876
|
+
#
|
4877
|
+
# @example
|
4878
|
+
# df = Polars::DataFrame.new({"values" => [1, 2, 3]})
|
4879
|
+
# df.select(Polars.col("values").set_sorted.max)
|
4880
|
+
# # =>
|
4881
|
+
# # shape: (1, 1)
|
4882
|
+
# # ┌────────┐
|
4883
|
+
# # │ values │
|
4884
|
+
# # │ --- │
|
4885
|
+
# # │ i64 │
|
4886
|
+
# # ╞════════╡
|
4887
|
+
# # │ 3 │
|
4888
|
+
# # └────────┘
|
4889
|
+
# def set_sorted(reverse: false)
|
4890
|
+
# map { |s| s.set_sorted(reverse) }
|
687
4891
|
# end
|
688
4892
|
|
4893
|
+
# Aggregate to list.
|
4894
|
+
#
|
4895
|
+
# @return [Expr]
|
689
4896
|
#
|
4897
|
+
# @example
|
4898
|
+
# df = Polars::DataFrame.new(
|
4899
|
+
# {
|
4900
|
+
# "a" => [1, 2, 3],
|
4901
|
+
# "b" => [4, 5, 6]
|
4902
|
+
# }
|
4903
|
+
# )
|
4904
|
+
# df.select(Polars.all.list)
|
4905
|
+
# # =>
|
4906
|
+
# # shape: (1, 2)
|
4907
|
+
# # ┌───────────┬───────────┐
|
4908
|
+
# # │ a ┆ b │
|
4909
|
+
# # │ --- ┆ --- │
|
4910
|
+
# # │ list[i64] ┆ list[i64] │
|
4911
|
+
# # ╞═══════════╪═══════════╡
|
4912
|
+
# # │ [1, 2, 3] ┆ [4, 5, 6] │
|
4913
|
+
# # └───────────┴───────────┘
|
690
4914
|
def list
|
691
4915
|
wrap_expr(_rbexpr.list)
|
692
4916
|
end
|
693
4917
|
|
4918
|
+
# Shrink numeric columns to the minimal required datatype.
|
4919
|
+
#
|
4920
|
+
# Shrink to the dtype needed to fit the extrema of this `Series`.
|
4921
|
+
# This can be used to reduce memory pressure.
|
4922
|
+
#
|
4923
|
+
# @return [Expr]
|
4924
|
+
#
|
4925
|
+
# @example
|
4926
|
+
# Polars::DataFrame.new(
|
4927
|
+
# {
|
4928
|
+
# "a" => [1, 2, 3],
|
4929
|
+
# "b" => [1, 2, 2 << 32],
|
4930
|
+
# "c" => [-1, 2, 1 << 30],
|
4931
|
+
# "d" => [-112, 2, 112],
|
4932
|
+
# "e" => [-112, 2, 129],
|
4933
|
+
# "f" => ["a", "b", "c"],
|
4934
|
+
# "g" => [0.1, 1.32, 0.12],
|
4935
|
+
# "h" => [true, nil, false]
|
4936
|
+
# }
|
4937
|
+
# ).select(Polars.all.shrink_dtype)
|
4938
|
+
# # =>
|
4939
|
+
# # shape: (3, 8)
|
4940
|
+
# # ┌─────┬────────────┬────────────┬──────┬──────┬─────┬──────┬───────┐
|
4941
|
+
# # │ a ┆ b ┆ c ┆ d ┆ e ┆ f ┆ g ┆ h │
|
4942
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
4943
|
+
# # │ i8 ┆ i64 ┆ i32 ┆ i8 ┆ i16 ┆ str ┆ f32 ┆ bool │
|
4944
|
+
# # ╞═════╪════════════╪════════════╪══════╪══════╪═════╪══════╪═══════╡
|
4945
|
+
# # │ 1 ┆ 1 ┆ -1 ┆ -112 ┆ -112 ┆ a ┆ 0.1 ┆ true │
|
4946
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
4947
|
+
# # │ 2 ┆ 2 ┆ 2 ┆ 2 ┆ 2 ┆ b ┆ 1.32 ┆ null │
|
4948
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
4949
|
+
# # │ 3 ┆ 8589934592 ┆ 1073741824 ┆ 112 ┆ 129 ┆ c ┆ 0.12 ┆ false │
|
4950
|
+
# # └─────┴────────────┴────────────┴──────┴──────┴─────┴──────┴───────┘
|
694
4951
|
def shrink_dtype
|
695
4952
|
wrap_expr(_rbexpr.shrink_dtype)
|
696
4953
|
end
|
697
4954
|
|
4955
|
+
# Create an object namespace of all list related methods.
|
4956
|
+
#
|
4957
|
+
# @return [ListExpr]
|
698
4958
|
def arr
|
699
4959
|
ListExpr.new(self)
|
700
4960
|
end
|
701
4961
|
|
4962
|
+
# Create an object namespace of all categorical related methods.
|
4963
|
+
#
|
4964
|
+
# @return [CatExpr]
|
702
4965
|
def cat
|
703
4966
|
CatExpr.new(self)
|
704
4967
|
end
|
705
4968
|
|
4969
|
+
# Create an object namespace of all datetime related methods.
|
4970
|
+
#
|
4971
|
+
# @return [DateTimeExpr]
|
706
4972
|
def dt
|
707
4973
|
DateTimeExpr.new(self)
|
708
4974
|
end
|
709
4975
|
|
4976
|
+
# Create an object namespace of all meta related expression methods.
|
4977
|
+
#
|
4978
|
+
# @return [MetaExpr]
|
710
4979
|
def meta
|
711
4980
|
MetaExpr.new(self)
|
712
4981
|
end
|
713
4982
|
|
4983
|
+
# Create an object namespace of all string related methods.
|
4984
|
+
#
|
4985
|
+
# @return [StringExpr]
|
714
4986
|
def str
|
715
4987
|
StringExpr.new(self)
|
716
4988
|
end
|
717
4989
|
|
4990
|
+
# Create an object namespace of all struct related methods.
|
4991
|
+
#
|
4992
|
+
# @return [StructExpr]
|
718
4993
|
def struct
|
719
4994
|
StructExpr.new(self)
|
720
4995
|
end
|
@@ -732,5 +5007,51 @@ module Polars
|
|
732
5007
|
def _to_expr(other)
|
733
5008
|
other.is_a?(Expr) ? other : Utils.lit(other)
|
734
5009
|
end
|
5010
|
+
|
5011
|
+
def _prepare_alpha(com, span, half_life, alpha)
|
5012
|
+
if [com, span, half_life, alpha].count { |v| !v.nil? } > 1
|
5013
|
+
raise ArgumentError, "Parameters 'com', 'span', 'half_life', and 'alpha' are mutually exclusive"
|
5014
|
+
end
|
5015
|
+
|
5016
|
+
if !com.nil?
|
5017
|
+
if com < 0.0
|
5018
|
+
raise ArgumentError, "Require 'com' >= 0 (found #{com})"
|
5019
|
+
end
|
5020
|
+
alpha = 1.0 / (1.0 + com)
|
5021
|
+
|
5022
|
+
elsif !span.nil?
|
5023
|
+
if span < 1.0
|
5024
|
+
raise ArgumentError, "Require 'span' >= 1 (found #{span})"
|
5025
|
+
end
|
5026
|
+
alpha = 2.0 / (span + 1.0)
|
5027
|
+
|
5028
|
+
elsif !half_life.nil?
|
5029
|
+
if half_life <= 0.0
|
5030
|
+
raise ArgumentError, "Require 'half_life' > 0 (found #{half_life})"
|
5031
|
+
end
|
5032
|
+
alpha = 1.0 - Math.exp(-Math.log(2.0) / half_life)
|
5033
|
+
|
5034
|
+
elsif alpha.nil?
|
5035
|
+
raise ArgumentError, "One of 'com', 'span', 'half_life', or 'alpha' must be set"
|
5036
|
+
|
5037
|
+
elsif alpha <= 0 || alpha > 1
|
5038
|
+
raise ArgumentError, "Require 0 < 'alpha' <= 1 (found #{alpha})"
|
5039
|
+
end
|
5040
|
+
|
5041
|
+
alpha
|
5042
|
+
end
|
5043
|
+
|
5044
|
+
def _prepare_rolling_window_args(window_size, min_periods)
|
5045
|
+
if window_size.is_a?(Integer)
|
5046
|
+
if min_periods.nil?
|
5047
|
+
min_periods = window_size
|
5048
|
+
end
|
5049
|
+
window_size = "#{window_size}i"
|
5050
|
+
end
|
5051
|
+
if min_periods.nil?
|
5052
|
+
min_periods = 1
|
5053
|
+
end
|
5054
|
+
[window_size, min_periods]
|
5055
|
+
end
|
735
5056
|
end
|
736
5057
|
end
|