polars-df 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +4 -0
- data/Cargo.lock +2 -1
- data/README.md +1 -1
- data/ext/polars/Cargo.toml +7 -1
- data/ext/polars/src/conversion.rs +35 -2
- data/ext/polars/src/dataframe.rs +228 -11
- data/ext/polars/src/lazy/dataframe.rs +3 -3
- data/ext/polars/src/lazy/dsl.rs +59 -2
- data/ext/polars/src/lib.rs +151 -10
- data/ext/polars/src/series.rs +182 -29
- data/ext/polars/src/set.rs +91 -0
- data/ext/polars/src/utils.rs +19 -0
- data/lib/polars/batched_csv_reader.rb +1 -0
- data/lib/polars/cat_expr.rb +39 -0
- data/lib/polars/data_frame.rb +2284 -137
- data/lib/polars/date_time_expr.rb +1282 -7
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +612 -7
- data/lib/polars/expr_dispatch.rb +14 -0
- data/lib/polars/functions.rb +219 -0
- data/lib/polars/group_by.rb +517 -0
- data/lib/polars/io.rb +421 -2
- data/lib/polars/lazy_frame.rb +1261 -67
- data/lib/polars/lazy_functions.rb +288 -10
- data/lib/polars/lazy_group_by.rb +79 -0
- data/lib/polars/list_expr.rb +5 -0
- data/lib/polars/meta_expr.rb +21 -0
- data/lib/polars/series.rb +1476 -212
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +663 -2
- data/lib/polars/struct_expr.rb +73 -0
- data/lib/polars/utils.rb +43 -3
- data/lib/polars/version.rb +2 -1
- data/lib/polars/when.rb +1 -0
- data/lib/polars/when_then.rb +1 -0
- data/lib/polars.rb +7 -10
- metadata +9 -2
data/lib/polars/expr.rb
CHANGED
@@ -11,71 +11,129 @@ module Polars
|
|
11
11
|
expr
|
12
12
|
end
|
13
13
|
|
14
|
+
# Returns a string representing the Expr.
|
15
|
+
#
|
16
|
+
# @return [String]
|
14
17
|
def to_s
|
15
18
|
_rbexpr.to_str
|
16
19
|
end
|
17
20
|
alias_method :inspect, :to_s
|
18
21
|
|
22
|
+
# Bitwise XOR.
|
23
|
+
#
|
24
|
+
# @return [Expr]
|
19
25
|
def ^(other)
|
20
26
|
wrap_expr(_rbexpr._xor(_to_rbexpr(other)))
|
21
27
|
end
|
22
28
|
|
29
|
+
# Bitwise AND.
|
30
|
+
#
|
31
|
+
# @return [Expr]
|
23
32
|
def &(other)
|
24
33
|
wrap_expr(_rbexpr._and(_to_rbexpr(other)))
|
25
34
|
end
|
26
35
|
|
36
|
+
# Bitwise OR.
|
37
|
+
#
|
38
|
+
# @return [Expr]
|
27
39
|
def |(other)
|
28
40
|
wrap_expr(_rbexpr._or(_to_rbexpr(other)))
|
29
41
|
end
|
30
42
|
|
43
|
+
# Performs addition.
|
44
|
+
#
|
45
|
+
# @return [Expr]
|
31
46
|
def +(other)
|
32
47
|
wrap_expr(_rbexpr + _to_rbexpr(other))
|
33
48
|
end
|
34
49
|
|
50
|
+
# Performs subtraction.
|
51
|
+
#
|
52
|
+
# @return [Expr]
|
35
53
|
def -(other)
|
36
54
|
wrap_expr(_rbexpr - _to_rbexpr(other))
|
37
55
|
end
|
38
56
|
|
57
|
+
# Performs multiplication.
|
58
|
+
#
|
59
|
+
# @return [Expr]
|
39
60
|
def *(other)
|
40
61
|
wrap_expr(_rbexpr * _to_rbexpr(other))
|
41
62
|
end
|
42
63
|
|
64
|
+
# Performs division.
|
65
|
+
#
|
66
|
+
# @return [Expr]
|
43
67
|
def /(other)
|
44
68
|
wrap_expr(_rbexpr / _to_rbexpr(other))
|
45
69
|
end
|
46
70
|
|
71
|
+
# Performs floor division.
|
72
|
+
#
|
73
|
+
# @return [Expr]
|
74
|
+
def floordiv(other)
|
75
|
+
wrap_expr(_rbexpr.floordiv(_to_rbexpr(other)))
|
76
|
+
end
|
77
|
+
|
78
|
+
# Returns the modulo.
|
79
|
+
#
|
80
|
+
# @return [Expr]
|
47
81
|
def %(other)
|
48
82
|
wrap_expr(_rbexpr % _to_rbexpr(other))
|
49
83
|
end
|
50
84
|
|
85
|
+
# Raises to the power of exponent.
|
86
|
+
#
|
87
|
+
# @return [Expr]
|
51
88
|
def **(power)
|
52
89
|
pow(power)
|
53
90
|
end
|
54
91
|
|
92
|
+
# Greater than or equal.
|
93
|
+
#
|
94
|
+
# @return [Expr]
|
55
95
|
def >=(other)
|
56
96
|
wrap_expr(_rbexpr.gt_eq(_to_expr(other)._rbexpr))
|
57
97
|
end
|
58
98
|
|
99
|
+
# Less than or equal.
|
100
|
+
#
|
101
|
+
# @return [Expr]
|
59
102
|
def <=(other)
|
60
103
|
wrap_expr(_rbexpr.lt_eq(_to_expr(other)._rbexpr))
|
61
104
|
end
|
62
105
|
|
106
|
+
# Equal.
|
107
|
+
#
|
108
|
+
# @return [Expr]
|
63
109
|
def ==(other)
|
64
110
|
wrap_expr(_rbexpr.eq(_to_expr(other)._rbexpr))
|
65
111
|
end
|
66
112
|
|
113
|
+
# Not equal.
|
114
|
+
#
|
115
|
+
# @return [Expr]
|
67
116
|
def !=(other)
|
68
117
|
wrap_expr(_rbexpr.neq(_to_expr(other)._rbexpr))
|
69
118
|
end
|
70
119
|
|
120
|
+
# Less than.
|
121
|
+
#
|
122
|
+
# @return [Expr]
|
71
123
|
def <(other)
|
72
124
|
wrap_expr(_rbexpr.lt(_to_expr(other)._rbexpr))
|
73
125
|
end
|
74
126
|
|
127
|
+
# Greater than.
|
128
|
+
#
|
129
|
+
# @return [Expr]
|
75
130
|
def >(other)
|
76
131
|
wrap_expr(_rbexpr.gt(_to_expr(other)._rbexpr))
|
77
132
|
end
|
78
133
|
|
134
|
+
# Performs negation.
|
135
|
+
#
|
136
|
+
# @return [Expr]
|
79
137
|
def -@
|
80
138
|
Utils.lit(0) - self
|
81
139
|
end
|
@@ -83,23 +141,119 @@ module Polars
|
|
83
141
|
# def to_physical
|
84
142
|
# end
|
85
143
|
|
144
|
+
# Check if any boolean value in a Boolean column is `true`.
|
86
145
|
#
|
146
|
+
# @return [Boolean]
|
147
|
+
#
|
148
|
+
# @example
|
149
|
+
# df = Polars::DataFrame.new({"TF" => [true, false], "FF" => [false, false]})
|
150
|
+
# df.select(Polars.all.any)
|
151
|
+
# # =>
|
152
|
+
# # shape: (1, 2)
|
153
|
+
# # ┌──────┬───────┐
|
154
|
+
# # │ TF ┆ FF │
|
155
|
+
# # │ --- ┆ --- │
|
156
|
+
# # │ bool ┆ bool │
|
157
|
+
# # ╞══════╪═══════╡
|
158
|
+
# # │ true ┆ false │
|
159
|
+
# # └──────┴───────┘
|
87
160
|
def any
|
88
161
|
wrap_expr(_rbexpr.any)
|
89
162
|
end
|
90
163
|
|
164
|
+
# Check if all boolean values in a Boolean column are `true`.
|
165
|
+
#
|
166
|
+
# This method is an expression - not to be confused with
|
167
|
+
# `Polars.all` which is a function to select all columns.
|
168
|
+
#
|
169
|
+
# @return [Boolean]
|
170
|
+
#
|
171
|
+
# @example
|
172
|
+
# df = Polars::DataFrame.new(
|
173
|
+
# {"TT" => [true, true], "TF" => [true, false], "FF" => [false, false]}
|
174
|
+
# )
|
175
|
+
# df.select(Polars.col("*").all)
|
176
|
+
# # =>
|
177
|
+
# # shape: (1, 3)
|
178
|
+
# # ┌──────┬───────┬───────┐
|
179
|
+
# # │ TT ┆ TF ┆ FF │
|
180
|
+
# # │ --- ┆ --- ┆ --- │
|
181
|
+
# # │ bool ┆ bool ┆ bool │
|
182
|
+
# # ╞══════╪═══════╪═══════╡
|
183
|
+
# # │ true ┆ false ┆ false │
|
184
|
+
# # └──────┴───────┴───────┘
|
91
185
|
def all
|
92
186
|
wrap_expr(_rbexpr.all)
|
93
187
|
end
|
94
188
|
|
189
|
+
# Compute the square root of the elements.
|
190
|
+
#
|
191
|
+
# @return [Expr]
|
192
|
+
#
|
193
|
+
# @example
|
194
|
+
# df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
|
195
|
+
# df.select(Polars.col("values").sqrt)
|
196
|
+
# # =>
|
197
|
+
# # shape: (3, 1)
|
198
|
+
# # ┌──────────┐
|
199
|
+
# # │ values │
|
200
|
+
# # │ --- │
|
201
|
+
# # │ f64 │
|
202
|
+
# # ╞══════════╡
|
203
|
+
# # │ 1.0 │
|
204
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
205
|
+
# # │ 1.414214 │
|
206
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
207
|
+
# # │ 2.0 │
|
208
|
+
# # └──────────┘
|
95
209
|
def sqrt
|
96
|
-
self
|
210
|
+
self**0.5
|
97
211
|
end
|
98
212
|
|
213
|
+
# Compute the base 10 logarithm of the input array, element-wise.
|
214
|
+
#
|
215
|
+
# @return [Expr]
|
216
|
+
#
|
217
|
+
# @example
|
218
|
+
# df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
|
219
|
+
# df.select(Polars.col("values").log10)
|
220
|
+
# # =>
|
221
|
+
# # shape: (3, 1)
|
222
|
+
# # ┌─────────┐
|
223
|
+
# # │ values │
|
224
|
+
# # │ --- │
|
225
|
+
# # │ f64 │
|
226
|
+
# # ╞═════════╡
|
227
|
+
# # │ 0.0 │
|
228
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
229
|
+
# # │ 0.30103 │
|
230
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
231
|
+
# # │ 0.60206 │
|
232
|
+
# # └─────────┘
|
99
233
|
def log10
|
100
234
|
log(10)
|
101
235
|
end
|
102
236
|
|
237
|
+
# Compute the exponential, element-wise.
|
238
|
+
#
|
239
|
+
# @return [Expr]
|
240
|
+
#
|
241
|
+
# @example
|
242
|
+
# df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
|
243
|
+
# df.select(Polars.col("values").exp)
|
244
|
+
# # =>
|
245
|
+
# # shape: (3, 1)
|
246
|
+
# # ┌──────────┐
|
247
|
+
# # │ values │
|
248
|
+
# # │ --- │
|
249
|
+
# # │ f64 │
|
250
|
+
# # ╞══════════╡
|
251
|
+
# # │ 2.718282 │
|
252
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
253
|
+
# # │ 7.389056 │
|
254
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
255
|
+
# # │ 54.59815 │
|
256
|
+
# # └──────────┘
|
103
257
|
def exp
|
104
258
|
wrap_expr(_rbexpr.exp)
|
105
259
|
end
|
@@ -146,23 +300,166 @@ module Polars
|
|
146
300
|
# def map_alias
|
147
301
|
# end
|
148
302
|
|
303
|
+
# Negate a boolean expression.
|
149
304
|
#
|
305
|
+
# @return [Expr]
|
306
|
+
#
|
307
|
+
# @example
|
308
|
+
# df = Polars::DataFrame.new(
|
309
|
+
# {
|
310
|
+
# "a" => [true, false, false],
|
311
|
+
# "b" => ["a", "b", nil]
|
312
|
+
# }
|
313
|
+
# )
|
314
|
+
# # =>
|
315
|
+
# # shape: (3, 2)
|
316
|
+
# # ┌───────┬──────┐
|
317
|
+
# # │ a ┆ b │
|
318
|
+
# # │ --- ┆ --- │
|
319
|
+
# # │ bool ┆ str │
|
320
|
+
# # ╞═══════╪══════╡
|
321
|
+
# # │ true ┆ a │
|
322
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
323
|
+
# # │ false ┆ b │
|
324
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
325
|
+
# # │ false ┆ null │
|
326
|
+
# # └───────┴──────┘
|
327
|
+
#
|
328
|
+
# @example
|
329
|
+
# df.select(Polars.col("a").is_not)
|
330
|
+
# # =>
|
331
|
+
# # shape: (3, 1)
|
332
|
+
# # ┌───────┐
|
333
|
+
# # │ a │
|
334
|
+
# # │ --- │
|
335
|
+
# # │ bool │
|
336
|
+
# # ╞═══════╡
|
337
|
+
# # │ false │
|
338
|
+
# # ├╌╌╌╌╌╌╌┤
|
339
|
+
# # │ true │
|
340
|
+
# # ├╌╌╌╌╌╌╌┤
|
341
|
+
# # │ true │
|
342
|
+
# # └───────┘
|
150
343
|
def is_not
|
151
344
|
wrap_expr(_rbexpr.is_not)
|
152
345
|
end
|
153
346
|
|
347
|
+
# Returns a boolean Series indicating which values are null.
|
348
|
+
#
|
349
|
+
# @return [Expr]
|
350
|
+
#
|
351
|
+
# @example
|
352
|
+
# df = Polars::DataFrame.new(
|
353
|
+
# {
|
354
|
+
# "a" => [1, 2, nil, 1, 5],
|
355
|
+
# "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
|
356
|
+
# }
|
357
|
+
# )
|
358
|
+
# df.with_column(Polars.all.is_null.suffix("_isnull"))
|
359
|
+
# # =>
|
360
|
+
# # shape: (5, 4)
|
361
|
+
# # ┌──────┬─────┬──────────┬──────────┐
|
362
|
+
# # │ a ┆ b ┆ a_isnull ┆ b_isnull │
|
363
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
364
|
+
# # │ i64 ┆ f64 ┆ bool ┆ bool │
|
365
|
+
# # ╞══════╪═════╪══════════╪══════════╡
|
366
|
+
# # │ 1 ┆ 1.0 ┆ false ┆ false │
|
367
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
368
|
+
# # │ 2 ┆ 2.0 ┆ false ┆ false │
|
369
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
370
|
+
# # │ null ┆ NaN ┆ true ┆ false │
|
371
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
372
|
+
# # │ 1 ┆ 1.0 ┆ false ┆ false │
|
373
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
374
|
+
# # │ 5 ┆ 5.0 ┆ false ┆ false │
|
375
|
+
# # └──────┴─────┴──────────┴──────────┘
|
154
376
|
def is_null
|
155
377
|
wrap_expr(_rbexpr.is_null)
|
156
378
|
end
|
157
379
|
|
380
|
+
# Returns a boolean Series indicating which values are not null.
|
381
|
+
#
|
382
|
+
# @return [Expr]
|
383
|
+
#
|
384
|
+
# @example
|
385
|
+
# df = Polars::DataFrame.new(
|
386
|
+
# {
|
387
|
+
# "a" => [1, 2, nil, 1, 5],
|
388
|
+
# "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
|
389
|
+
# }
|
390
|
+
# )
|
391
|
+
# df.with_column(Polars.all.is_not_null.suffix("_not_null"))
|
392
|
+
# # =>
|
393
|
+
# # shape: (5, 4)
|
394
|
+
# # ┌──────┬─────┬────────────┬────────────┐
|
395
|
+
# # │ a ┆ b ┆ a_not_null ┆ b_not_null │
|
396
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
397
|
+
# # │ i64 ┆ f64 ┆ bool ┆ bool │
|
398
|
+
# # ╞══════╪═════╪════════════╪════════════╡
|
399
|
+
# # │ 1 ┆ 1.0 ┆ true ┆ true │
|
400
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
401
|
+
# # │ 2 ┆ 2.0 ┆ true ┆ true │
|
402
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
403
|
+
# # │ null ┆ NaN ┆ false ┆ true │
|
404
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
405
|
+
# # │ 1 ┆ 1.0 ┆ true ┆ true │
|
406
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
407
|
+
# # │ 5 ┆ 5.0 ┆ true ┆ true │
|
408
|
+
# # └──────┴─────┴────────────┴────────────┘
|
158
409
|
def is_not_null
|
159
410
|
wrap_expr(_rbexpr.is_not_null)
|
160
411
|
end
|
161
412
|
|
413
|
+
# Returns a boolean Series indicating which values are finite.
|
414
|
+
#
|
415
|
+
# @return [Expr]
|
416
|
+
#
|
417
|
+
# @example
|
418
|
+
# df = Polars::DataFrame.new(
|
419
|
+
# {
|
420
|
+
# "A" => [1.0, 2],
|
421
|
+
# "B" => [3.0, Float::INFINITY]
|
422
|
+
# }
|
423
|
+
# )
|
424
|
+
# df.select(Polars.all.is_finite)
|
425
|
+
# # =>
|
426
|
+
# # shape: (2, 2)
|
427
|
+
# # ┌──────┬───────┐
|
428
|
+
# # │ A ┆ B │
|
429
|
+
# # │ --- ┆ --- │
|
430
|
+
# # │ bool ┆ bool │
|
431
|
+
# # ╞══════╪═══════╡
|
432
|
+
# # │ true ┆ true │
|
433
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
434
|
+
# # │ true ┆ false │
|
435
|
+
# # └──────┴───────┘
|
162
436
|
def is_finite
|
163
437
|
wrap_expr(_rbexpr.is_finite)
|
164
438
|
end
|
165
439
|
|
440
|
+
# Returns a boolean Series indicating which values are infinite.
|
441
|
+
#
|
442
|
+
# @return [Expr]
|
443
|
+
#
|
444
|
+
# @example
|
445
|
+
# df = Polars::DataFrame.new(
|
446
|
+
# {
|
447
|
+
# "A" => [1.0, 2],
|
448
|
+
# "B" => [3.0, Float::INFINITY]
|
449
|
+
# }
|
450
|
+
# )
|
451
|
+
# df.select(Polars.all.is_infinite)
|
452
|
+
# # =>
|
453
|
+
# # shape: (2, 2)
|
454
|
+
# # ┌───────┬───────┐
|
455
|
+
# # │ A ┆ B │
|
456
|
+
# # │ --- ┆ --- │
|
457
|
+
# # │ bool ┆ bool │
|
458
|
+
# # ╞═══════╪═══════╡
|
459
|
+
# # │ false ┆ false │
|
460
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
461
|
+
# # │ false ┆ true │
|
462
|
+
# # └───────┴───────┘
|
166
463
|
def is_infinite
|
167
464
|
wrap_expr(_rbexpr.is_infinite)
|
168
465
|
end
|
@@ -179,14 +476,77 @@ module Polars
|
|
179
476
|
wrap_expr(_rbexpr.agg_groups)
|
180
477
|
end
|
181
478
|
|
479
|
+
# Count the number of values in this expression.
|
480
|
+
#
|
481
|
+
# @return [Expr]
|
482
|
+
#
|
483
|
+
# @example
|
484
|
+
# df = Polars::DataFrame.new({"a" => [8, 9, 10], "b" => [nil, 4, 4]})
|
485
|
+
# df.select(Polars.all.count)
|
486
|
+
# # =>
|
487
|
+
# # shape: (1, 2)
|
488
|
+
# # ┌─────┬─────┐
|
489
|
+
# # │ a ┆ b │
|
490
|
+
# # │ --- ┆ --- │
|
491
|
+
# # │ u32 ┆ u32 │
|
492
|
+
# # ╞═════╪═════╡
|
493
|
+
# # │ 3 ┆ 3 │
|
494
|
+
# # └─────┴─────┘
|
182
495
|
def count
|
183
496
|
wrap_expr(_rbexpr.count)
|
184
497
|
end
|
185
498
|
|
499
|
+
# Count the number of values in this expression.
|
500
|
+
#
|
501
|
+
# Alias for {#count}.
|
502
|
+
#
|
503
|
+
# @return [Expr]
|
504
|
+
#
|
505
|
+
# @example
|
506
|
+
# df = Polars::DataFrame.new({"a" => [8, 9, 10], "b" => [nil, 4, 4]})
|
507
|
+
# df.select(Polars.all.len)
|
508
|
+
# # =>
|
509
|
+
# # shape: (1, 2)
|
510
|
+
# # ┌─────┬─────┐
|
511
|
+
# # │ a ┆ b │
|
512
|
+
# # │ --- ┆ --- │
|
513
|
+
# # │ u32 ┆ u32 │
|
514
|
+
# # ╞═════╪═════╡
|
515
|
+
# # │ 3 ┆ 3 │
|
516
|
+
# # └─────┴─────┘
|
186
517
|
def len
|
187
518
|
count
|
188
519
|
end
|
189
520
|
|
521
|
+
# Get a slice of this expression.
|
522
|
+
#
|
523
|
+
# @param offset [Integer]
|
524
|
+
# Start index. Negative indexing is supported.
|
525
|
+
# @param length [Integer]
|
526
|
+
# Length of the slice. If set to `nil`, all rows starting at the offset
|
527
|
+
# will be selected.
|
528
|
+
#
|
529
|
+
# @return [Expr]
|
530
|
+
#
|
531
|
+
# @example
|
532
|
+
# df = Polars::DataFrame.new(
|
533
|
+
# {
|
534
|
+
# "a" => [8, 9, 10, 11],
|
535
|
+
# "b" => [nil, 4, 4, 4]
|
536
|
+
# }
|
537
|
+
# )
|
538
|
+
# df.select(Polars.all.slice(1, 2))
|
539
|
+
# # =>
|
540
|
+
# # shape: (2, 2)
|
541
|
+
# # ┌─────┬─────┐
|
542
|
+
# # │ a ┆ b │
|
543
|
+
# # │ --- ┆ --- │
|
544
|
+
# # │ i64 ┆ i64 │
|
545
|
+
# # ╞═════╪═════╡
|
546
|
+
# # │ 9 ┆ 4 │
|
547
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
548
|
+
# # │ 10 ┆ 4 │
|
549
|
+
# # └─────┴─────┘
|
190
550
|
def slice(offset, length = nil)
|
191
551
|
if !offset.is_a?(Expr)
|
192
552
|
offset = Polars.lit(offset)
|
@@ -202,14 +562,90 @@ module Polars
|
|
202
562
|
wrap_expr(_rbexpr.append(other._rbexpr, upcast))
|
203
563
|
end
|
204
564
|
|
565
|
+
# Create a single chunk of memory for this Series.
|
566
|
+
#
|
567
|
+
# @return [Expr]
|
568
|
+
#
|
569
|
+
# @example Create a Series with 3 nulls, append column a then rechunk
|
570
|
+
# df = Polars::DataFrame.new({"a": [1, 1, 2]})
|
571
|
+
# df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
|
572
|
+
# # =>
|
573
|
+
# # shape: (6, 1)
|
574
|
+
# # ┌─────────┐
|
575
|
+
# # │ literal │
|
576
|
+
# # │ --- │
|
577
|
+
# # │ i64 │
|
578
|
+
# # ╞═════════╡
|
579
|
+
# # │ null │
|
580
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
581
|
+
# # │ null │
|
582
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
583
|
+
# # │ null │
|
584
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
585
|
+
# # │ 1 │
|
586
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
587
|
+
# # │ 1 │
|
588
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
589
|
+
# # │ 2 │
|
590
|
+
# # └─────────┘
|
205
591
|
def rechunk
|
206
592
|
wrap_expr(_rbexpr.rechunk)
|
207
593
|
end
|
208
594
|
|
595
|
+
# Drop null values.
|
596
|
+
#
|
597
|
+
# @return [Expr]
|
598
|
+
#
|
599
|
+
# @example
|
600
|
+
# df = Polars::DataFrame.new(
|
601
|
+
# {
|
602
|
+
# "a" => [8, 9, 10, 11],
|
603
|
+
# "b" => [nil, 4.0, 4.0, Float::NAN]
|
604
|
+
# }
|
605
|
+
# )
|
606
|
+
# df.select(Polars.col("b").drop_nulls)
|
607
|
+
# # =>
|
608
|
+
# # shape: (3, 1)
|
609
|
+
# # ┌─────┐
|
610
|
+
# # │ b │
|
611
|
+
# # │ --- │
|
612
|
+
# # │ f64 │
|
613
|
+
# # ╞═════╡
|
614
|
+
# # │ 4.0 │
|
615
|
+
# # ├╌╌╌╌╌┤
|
616
|
+
# # │ 4.0 │
|
617
|
+
# # ├╌╌╌╌╌┤
|
618
|
+
# # │ NaN │
|
619
|
+
# # └─────┘
|
209
620
|
def drop_nulls
|
210
621
|
wrap_expr(_rbexpr.drop_nulls)
|
211
622
|
end
|
212
623
|
|
624
|
+
# Drop floating point NaN values.
|
625
|
+
#
|
626
|
+
# @return [Expr]
|
627
|
+
#
|
628
|
+
# @example
|
629
|
+
# df = Polars::DataFrame.new(
|
630
|
+
# {
|
631
|
+
# "a" => [8, 9, 10, 11],
|
632
|
+
# "b" => [nil, 4.0, 4.0, Float::NAN]
|
633
|
+
# }
|
634
|
+
# )
|
635
|
+
# df.select(Polars.col("b").drop_nans)
|
636
|
+
# # =>
|
637
|
+
# # shape: (3, 1)
|
638
|
+
# # ┌──────┐
|
639
|
+
# # │ b │
|
640
|
+
# # │ --- │
|
641
|
+
# # │ f64 │
|
642
|
+
# # ╞══════╡
|
643
|
+
# # │ null │
|
644
|
+
# # ├╌╌╌╌╌╌┤
|
645
|
+
# # │ 4.0 │
|
646
|
+
# # ├╌╌╌╌╌╌┤
|
647
|
+
# # │ 4.0 │
|
648
|
+
# # └──────┘
|
213
649
|
def drop_nans
|
214
650
|
wrap_expr(_rbexpr.drop_nans)
|
215
651
|
end
|
@@ -234,14 +670,87 @@ module Polars
|
|
234
670
|
wrap_expr(_rbexpr.cumcount(reverse))
|
235
671
|
end
|
236
672
|
|
673
|
+
# Rounds down to the nearest integer value.
|
674
|
+
#
|
675
|
+
# Only works on floating point Series.
|
676
|
+
#
|
677
|
+
# @return [Expr]
|
678
|
+
#
|
679
|
+
# @example
|
680
|
+
# df = Polars::DataFrame.new({"a" => [0.3, 0.5, 1.0, 1.1]})
|
681
|
+
# df.select(Polars.col("a").floor)
|
682
|
+
# # =>
|
683
|
+
# # shape: (4, 1)
|
684
|
+
# # ┌─────┐
|
685
|
+
# # │ a │
|
686
|
+
# # │ --- │
|
687
|
+
# # │ f64 │
|
688
|
+
# # ╞═════╡
|
689
|
+
# # │ 0.0 │
|
690
|
+
# # ├╌╌╌╌╌┤
|
691
|
+
# # │ 0.0 │
|
692
|
+
# # ├╌╌╌╌╌┤
|
693
|
+
# # │ 1.0 │
|
694
|
+
# # ├╌╌╌╌╌┤
|
695
|
+
# # │ 1.0 │
|
696
|
+
# # └─────┘
|
237
697
|
def floor
|
238
698
|
wrap_expr(_rbexpr.floor)
|
239
699
|
end
|
240
700
|
|
701
|
+
# Rounds up to the nearest integer value.
|
702
|
+
#
|
703
|
+
# Only works on floating point Series.
|
704
|
+
#
|
705
|
+
# @return [Expr]
|
706
|
+
#
|
707
|
+
# @example
|
708
|
+
# df = Polars::DataFrame.new({"a" => [0.3, 0.5, 1.0, 1.1]})
|
709
|
+
# df.select(Polars.col("a").ceil)
|
710
|
+
# # =>
|
711
|
+
# # shape: (4, 1)
|
712
|
+
# # ┌─────┐
|
713
|
+
# # │ a │
|
714
|
+
# # │ --- │
|
715
|
+
# # │ f64 │
|
716
|
+
# # ╞═════╡
|
717
|
+
# # │ 1.0 │
|
718
|
+
# # ├╌╌╌╌╌┤
|
719
|
+
# # │ 1.0 │
|
720
|
+
# # ├╌╌╌╌╌┤
|
721
|
+
# # │ 1.0 │
|
722
|
+
# # ├╌╌╌╌╌┤
|
723
|
+
# # │ 2.0 │
|
724
|
+
# # └─────┘
|
241
725
|
def ceil
|
242
726
|
wrap_expr(_rbexpr.ceil)
|
243
727
|
end
|
244
728
|
|
729
|
+
# Round underlying floating point data by `decimals` digits.
|
730
|
+
#
|
731
|
+
# @param decimals [Integer]
|
732
|
+
# Number of decimals to round by.
|
733
|
+
#
|
734
|
+
# @return [Expr]
|
735
|
+
#
|
736
|
+
# @example
|
737
|
+
# df = Polars::DataFrame.new({"a" => [0.33, 0.52, 1.02, 1.17]})
|
738
|
+
# df.select(Polars.col("a").round(1))
|
739
|
+
# # =>
|
740
|
+
# # shape: (4, 1)
|
741
|
+
# # ┌─────┐
|
742
|
+
# # │ a │
|
743
|
+
# # │ --- │
|
744
|
+
# # │ f64 │
|
745
|
+
# # ╞═════╡
|
746
|
+
# # │ 0.3 │
|
747
|
+
# # ├╌╌╌╌╌┤
|
748
|
+
# # │ 0.5 │
|
749
|
+
# # ├╌╌╌╌╌┤
|
750
|
+
# # │ 1.0 │
|
751
|
+
# # ├╌╌╌╌╌┤
|
752
|
+
# # │ 1.2 │
|
753
|
+
# # └─────┘
|
245
754
|
def round(decimals = 0)
|
246
755
|
wrap_expr(_rbexpr.round(decimals))
|
247
756
|
end
|
@@ -251,6 +760,31 @@ module Polars
|
|
251
760
|
wrap_expr(_rbexpr.dot(other._rbexpr))
|
252
761
|
end
|
253
762
|
|
763
|
+
# Compute the most occurring value(s).
|
764
|
+
#
|
765
|
+
# Can return multiple Values.
|
766
|
+
#
|
767
|
+
# @return [Expr]
|
768
|
+
#
|
769
|
+
# @example
|
770
|
+
# df = Polars::DataFrame.new(
|
771
|
+
# {
|
772
|
+
# "a" => [1, 1, 2, 3],
|
773
|
+
# "b" => [1, 1, 2, 2]
|
774
|
+
# }
|
775
|
+
# )
|
776
|
+
# df.select(Polars.all.mode)
|
777
|
+
# # =>
|
778
|
+
# # shape: (2, 2)
|
779
|
+
# # ┌─────┬─────┐
|
780
|
+
# # │ a ┆ b │
|
781
|
+
# # │ --- ┆ --- │
|
782
|
+
# # │ i64 ┆ i64 │
|
783
|
+
# # ╞═════╪═════╡
|
784
|
+
# # │ 1 ┆ 1 │
|
785
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
786
|
+
# # │ 1 ┆ 2 │
|
787
|
+
# # └─────┴─────┘
|
254
788
|
def mode
|
255
789
|
wrap_expr(_rbexpr.mode)
|
256
790
|
end
|
@@ -272,10 +806,50 @@ module Polars
|
|
272
806
|
wrap_expr(_rbexpr.arg_sort(reverse, nulls_last))
|
273
807
|
end
|
274
808
|
|
809
|
+
# Get the index of the maximal value.
|
810
|
+
#
|
811
|
+
# @return [Expr]
|
812
|
+
#
|
813
|
+
# @example
|
814
|
+
# df = Polars::DataFrame.new(
|
815
|
+
# {
|
816
|
+
# "a" => [20, 10, 30]
|
817
|
+
# }
|
818
|
+
# )
|
819
|
+
# df.select(Polars.col("a").arg_max)
|
820
|
+
# # =>
|
821
|
+
# # shape: (1, 1)
|
822
|
+
# # ┌─────┐
|
823
|
+
# # │ a │
|
824
|
+
# # │ --- │
|
825
|
+
# # │ u32 │
|
826
|
+
# # ╞═════╡
|
827
|
+
# # │ 2 │
|
828
|
+
# # └─────┘
|
275
829
|
def arg_max
|
276
830
|
wrap_expr(_rbexpr.arg_max)
|
277
831
|
end
|
278
832
|
|
833
|
+
# Get the index of the minimal value.
|
834
|
+
#
|
835
|
+
# @return [Expr]
|
836
|
+
#
|
837
|
+
# @example
|
838
|
+
# df = Polars::DataFrame.new(
|
839
|
+
# {
|
840
|
+
# "a" => [20, 10, 30]
|
841
|
+
# }
|
842
|
+
# )
|
843
|
+
# df.select(Polars.col("a").arg_min)
|
844
|
+
# # =>
|
845
|
+
# # shape: (1, 1)
|
846
|
+
# # ┌─────┐
|
847
|
+
# # │ a │
|
848
|
+
# # │ --- │
|
849
|
+
# # │ u32 │
|
850
|
+
# # ╞═════╡
|
851
|
+
# # │ 1 │
|
852
|
+
# # └─────┘
|
279
853
|
def arg_min
|
280
854
|
wrap_expr(_rbexpr.arg_min)
|
281
855
|
end
|
@@ -297,10 +871,40 @@ module Polars
|
|
297
871
|
wrap_expr(_rbexpr.sort_by(by, reverse))
|
298
872
|
end
|
299
873
|
|
300
|
-
|
301
|
-
|
874
|
+
def take(indices)
|
875
|
+
if indices.is_a?(Array)
|
876
|
+
indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
|
877
|
+
else
|
878
|
+
indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
|
879
|
+
end
|
880
|
+
wrap_expr(_rbexpr.take(indices_lit._rbexpr))
|
881
|
+
end
|
302
882
|
|
883
|
+
# Shift the values by a given period.
|
884
|
+
#
|
885
|
+
# @param periods [Integer]
|
886
|
+
# Number of places to shift (may be negative).
|
303
887
|
#
|
888
|
+
# @return [Expr]
|
889
|
+
#
|
890
|
+
# @example
|
891
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
|
892
|
+
# df.select(Polars.col("foo").shift(1))
|
893
|
+
# # =>
|
894
|
+
# # shape: (4, 1)
|
895
|
+
# # ┌──────┐
|
896
|
+
# # │ foo │
|
897
|
+
# # │ --- │
|
898
|
+
# # │ i64 │
|
899
|
+
# # ╞══════╡
|
900
|
+
# # │ null │
|
901
|
+
# # ├╌╌╌╌╌╌┤
|
902
|
+
# # │ 1 │
|
903
|
+
# # ├╌╌╌╌╌╌┤
|
904
|
+
# # │ 2 │
|
905
|
+
# # ├╌╌╌╌╌╌┤
|
906
|
+
# # │ 3 │
|
907
|
+
# # └──────┘
|
304
908
|
def shift(periods = 1)
|
305
909
|
wrap_expr(_rbexpr.shift(periods))
|
306
910
|
end
|
@@ -660,10 +1264,11 @@ module Polars
|
|
660
1264
|
# def ewm_var
|
661
1265
|
# end
|
662
1266
|
|
663
|
-
# def extend_constant
|
664
|
-
# end
|
665
|
-
|
666
1267
|
#
|
1268
|
+
def extend_constant(value, n)
|
1269
|
+
wrap_expr(_rbexpr.extend_constant(value, n))
|
1270
|
+
end
|
1271
|
+
|
667
1272
|
def value_counts(multithreaded: false, sort: false)
|
668
1273
|
wrap_expr(_rbexpr.value_counts(multithreaded, sort))
|
669
1274
|
end
|
@@ -673,7 +1278,7 @@ module Polars
|
|
673
1278
|
end
|
674
1279
|
|
675
1280
|
def log(base = Math::E)
|
676
|
-
wrap_expr(
|
1281
|
+
wrap_expr(_rbexpr.log(base))
|
677
1282
|
end
|
678
1283
|
|
679
1284
|
def entropy(base: 2, normalize: false)
|