polars-df 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +8 -0
- data/Cargo.lock +2 -1
- data/README.md +1 -1
- data/ext/polars/Cargo.toml +7 -1
- data/ext/polars/src/batched_csv.rs +120 -0
- data/ext/polars/src/conversion.rs +139 -6
- data/ext/polars/src/dataframe.rs +360 -15
- data/ext/polars/src/error.rs +9 -0
- data/ext/polars/src/file.rs +8 -7
- data/ext/polars/src/lazy/apply.rs +7 -0
- data/ext/polars/src/lazy/dataframe.rs +135 -3
- data/ext/polars/src/lazy/dsl.rs +97 -2
- data/ext/polars/src/lazy/meta.rs +1 -1
- data/ext/polars/src/lazy/mod.rs +1 -0
- data/ext/polars/src/lib.rs +227 -12
- data/ext/polars/src/series.rs +190 -38
- data/ext/polars/src/set.rs +91 -0
- data/ext/polars/src/utils.rs +19 -0
- data/lib/polars/batched_csv_reader.rb +96 -0
- data/lib/polars/cat_expr.rb +39 -0
- data/lib/polars/data_frame.rb +2813 -100
- data/lib/polars/date_time_expr.rb +1282 -7
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +631 -11
- data/lib/polars/expr_dispatch.rb +14 -0
- data/lib/polars/functions.rb +219 -0
- data/lib/polars/group_by.rb +517 -0
- data/lib/polars/io.rb +763 -4
- data/lib/polars/lazy_frame.rb +1415 -67
- data/lib/polars/lazy_functions.rb +430 -9
- data/lib/polars/lazy_group_by.rb +79 -0
- data/lib/polars/list_expr.rb +5 -0
- data/lib/polars/meta_expr.rb +21 -0
- data/lib/polars/series.rb +2244 -192
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +663 -2
- data/lib/polars/struct_expr.rb +73 -0
- data/lib/polars/utils.rb +76 -3
- data/lib/polars/version.rb +2 -1
- data/lib/polars/when.rb +1 -0
- data/lib/polars/when_then.rb +1 -0
- data/lib/polars.rb +8 -2
- metadata +12 -2
data/lib/polars/expr.rb
CHANGED
@@ -1,78 +1,139 @@
|
|
1
1
|
module Polars
|
2
|
+
# Expressions that can be used in various contexts.
|
2
3
|
class Expr
|
4
|
+
# @private
|
3
5
|
attr_accessor :_rbexpr
|
4
6
|
|
7
|
+
# @private
|
5
8
|
def self._from_rbexpr(rbexpr)
|
6
9
|
expr = Expr.allocate
|
7
10
|
expr._rbexpr = rbexpr
|
8
11
|
expr
|
9
12
|
end
|
10
13
|
|
14
|
+
# Returns a string representing the Expr.
|
15
|
+
#
|
16
|
+
# @return [String]
|
11
17
|
def to_s
|
12
18
|
_rbexpr.to_str
|
13
19
|
end
|
14
20
|
alias_method :inspect, :to_s
|
15
21
|
|
22
|
+
# Bitwise XOR.
|
23
|
+
#
|
24
|
+
# @return [Expr]
|
16
25
|
def ^(other)
|
17
26
|
wrap_expr(_rbexpr._xor(_to_rbexpr(other)))
|
18
27
|
end
|
19
28
|
|
29
|
+
# Bitwise AND.
|
30
|
+
#
|
31
|
+
# @return [Expr]
|
20
32
|
def &(other)
|
21
33
|
wrap_expr(_rbexpr._and(_to_rbexpr(other)))
|
22
34
|
end
|
23
35
|
|
36
|
+
# Bitwise OR.
|
37
|
+
#
|
38
|
+
# @return [Expr]
|
24
39
|
def |(other)
|
25
40
|
wrap_expr(_rbexpr._or(_to_rbexpr(other)))
|
26
41
|
end
|
27
42
|
|
43
|
+
# Performs addition.
|
44
|
+
#
|
45
|
+
# @return [Expr]
|
28
46
|
def +(other)
|
29
47
|
wrap_expr(_rbexpr + _to_rbexpr(other))
|
30
48
|
end
|
31
49
|
|
50
|
+
# Performs subtraction.
|
51
|
+
#
|
52
|
+
# @return [Expr]
|
32
53
|
def -(other)
|
33
54
|
wrap_expr(_rbexpr - _to_rbexpr(other))
|
34
55
|
end
|
35
56
|
|
57
|
+
# Performs multiplication.
|
58
|
+
#
|
59
|
+
# @return [Expr]
|
36
60
|
def *(other)
|
37
61
|
wrap_expr(_rbexpr * _to_rbexpr(other))
|
38
62
|
end
|
39
63
|
|
64
|
+
# Performs division.
|
65
|
+
#
|
66
|
+
# @return [Expr]
|
40
67
|
def /(other)
|
41
68
|
wrap_expr(_rbexpr / _to_rbexpr(other))
|
42
69
|
end
|
43
70
|
|
71
|
+
# Performs floor division.
|
72
|
+
#
|
73
|
+
# @return [Expr]
|
74
|
+
def floordiv(other)
|
75
|
+
wrap_expr(_rbexpr.floordiv(_to_rbexpr(other)))
|
76
|
+
end
|
77
|
+
|
78
|
+
# Returns the modulo.
|
79
|
+
#
|
80
|
+
# @return [Expr]
|
44
81
|
def %(other)
|
45
82
|
wrap_expr(_rbexpr % _to_rbexpr(other))
|
46
83
|
end
|
47
84
|
|
85
|
+
# Raises to the power of exponent.
|
86
|
+
#
|
87
|
+
# @return [Expr]
|
48
88
|
def **(power)
|
49
89
|
pow(power)
|
50
90
|
end
|
51
91
|
|
92
|
+
# Greater than or equal.
|
93
|
+
#
|
94
|
+
# @return [Expr]
|
52
95
|
def >=(other)
|
53
96
|
wrap_expr(_rbexpr.gt_eq(_to_expr(other)._rbexpr))
|
54
97
|
end
|
55
98
|
|
99
|
+
# Less than or equal.
|
100
|
+
#
|
101
|
+
# @return [Expr]
|
56
102
|
def <=(other)
|
57
103
|
wrap_expr(_rbexpr.lt_eq(_to_expr(other)._rbexpr))
|
58
104
|
end
|
59
105
|
|
106
|
+
# Equal.
|
107
|
+
#
|
108
|
+
# @return [Expr]
|
60
109
|
def ==(other)
|
61
110
|
wrap_expr(_rbexpr.eq(_to_expr(other)._rbexpr))
|
62
111
|
end
|
63
112
|
|
113
|
+
# Not equal.
|
114
|
+
#
|
115
|
+
# @return [Expr]
|
64
116
|
def !=(other)
|
65
117
|
wrap_expr(_rbexpr.neq(_to_expr(other)._rbexpr))
|
66
118
|
end
|
67
119
|
|
120
|
+
# Less than.
|
121
|
+
#
|
122
|
+
# @return [Expr]
|
68
123
|
def <(other)
|
69
124
|
wrap_expr(_rbexpr.lt(_to_expr(other)._rbexpr))
|
70
125
|
end
|
71
126
|
|
127
|
+
# Greater than.
|
128
|
+
#
|
129
|
+
# @return [Expr]
|
72
130
|
def >(other)
|
73
131
|
wrap_expr(_rbexpr.gt(_to_expr(other)._rbexpr))
|
74
132
|
end
|
75
133
|
|
134
|
+
# Performs negation.
|
135
|
+
#
|
136
|
+
# @return [Expr]
|
76
137
|
def -@
|
77
138
|
Utils.lit(0) - self
|
78
139
|
end
|
@@ -80,22 +141,119 @@ module Polars
|
|
80
141
|
# def to_physical
|
81
142
|
# end
|
82
143
|
|
144
|
+
# Check if any boolean value in a Boolean column is `true`.
|
145
|
+
#
|
146
|
+
# @return [Boolean]
|
147
|
+
#
|
148
|
+
# @example
|
149
|
+
# df = Polars::DataFrame.new({"TF" => [true, false], "FF" => [false, false]})
|
150
|
+
# df.select(Polars.all.any)
|
151
|
+
# # =>
|
152
|
+
# # shape: (1, 2)
|
153
|
+
# # ┌──────┬───────┐
|
154
|
+
# # │ TF ┆ FF │
|
155
|
+
# # │ --- ┆ --- │
|
156
|
+
# # │ bool ┆ bool │
|
157
|
+
# # ╞══════╪═══════╡
|
158
|
+
# # │ true ┆ false │
|
159
|
+
# # └──────┴───────┘
|
83
160
|
def any
|
84
161
|
wrap_expr(_rbexpr.any)
|
85
162
|
end
|
86
163
|
|
164
|
+
# Check if all boolean values in a Boolean column are `true`.
|
165
|
+
#
|
166
|
+
# This method is an expression - not to be confused with
|
167
|
+
# `Polars.all` which is a function to select all columns.
|
168
|
+
#
|
169
|
+
# @return [Boolean]
|
170
|
+
#
|
171
|
+
# @example
|
172
|
+
# df = Polars::DataFrame.new(
|
173
|
+
# {"TT" => [true, true], "TF" => [true, false], "FF" => [false, false]}
|
174
|
+
# )
|
175
|
+
# df.select(Polars.col("*").all)
|
176
|
+
# # =>
|
177
|
+
# # shape: (1, 3)
|
178
|
+
# # ┌──────┬───────┬───────┐
|
179
|
+
# # │ TT ┆ TF ┆ FF │
|
180
|
+
# # │ --- ┆ --- ┆ --- │
|
181
|
+
# # │ bool ┆ bool ┆ bool │
|
182
|
+
# # ╞══════╪═══════╪═══════╡
|
183
|
+
# # │ true ┆ false ┆ false │
|
184
|
+
# # └──────┴───────┴───────┘
|
87
185
|
def all
|
88
186
|
wrap_expr(_rbexpr.all)
|
89
187
|
end
|
90
188
|
|
189
|
+
# Compute the square root of the elements.
|
190
|
+
#
|
191
|
+
# @return [Expr]
|
192
|
+
#
|
193
|
+
# @example
|
194
|
+
# df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
|
195
|
+
# df.select(Polars.col("values").sqrt)
|
196
|
+
# # =>
|
197
|
+
# # shape: (3, 1)
|
198
|
+
# # ┌──────────┐
|
199
|
+
# # │ values │
|
200
|
+
# # │ --- │
|
201
|
+
# # │ f64 │
|
202
|
+
# # ╞══════════╡
|
203
|
+
# # │ 1.0 │
|
204
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
205
|
+
# # │ 1.414214 │
|
206
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
207
|
+
# # │ 2.0 │
|
208
|
+
# # └──────────┘
|
91
209
|
def sqrt
|
92
|
-
self
|
93
|
-
end
|
94
|
-
|
210
|
+
self**0.5
|
211
|
+
end
|
212
|
+
|
213
|
+
# Compute the base 10 logarithm of the input array, element-wise.
|
214
|
+
#
|
215
|
+
# @return [Expr]
|
216
|
+
#
|
217
|
+
# @example
|
218
|
+
# df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
|
219
|
+
# df.select(Polars.col("values").log10)
|
220
|
+
# # =>
|
221
|
+
# # shape: (3, 1)
|
222
|
+
# # ┌─────────┐
|
223
|
+
# # │ values │
|
224
|
+
# # │ --- │
|
225
|
+
# # │ f64 │
|
226
|
+
# # ╞═════════╡
|
227
|
+
# # │ 0.0 │
|
228
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
229
|
+
# # │ 0.30103 │
|
230
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
231
|
+
# # │ 0.60206 │
|
232
|
+
# # └─────────┘
|
95
233
|
def log10
|
96
234
|
log(10)
|
97
235
|
end
|
98
236
|
|
237
|
+
# Compute the exponential, element-wise.
|
238
|
+
#
|
239
|
+
# @return [Expr]
|
240
|
+
#
|
241
|
+
# @example
|
242
|
+
# df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
|
243
|
+
# df.select(Polars.col("values").exp)
|
244
|
+
# # =>
|
245
|
+
# # shape: (3, 1)
|
246
|
+
# # ┌──────────┐
|
247
|
+
# # │ values │
|
248
|
+
# # │ --- │
|
249
|
+
# # │ f64 │
|
250
|
+
# # ╞══════════╡
|
251
|
+
# # │ 2.718282 │
|
252
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
253
|
+
# # │ 7.389056 │
|
254
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
255
|
+
# # │ 54.59815 │
|
256
|
+
# # └──────────┘
|
99
257
|
def exp
|
100
258
|
wrap_expr(_rbexpr.exp)
|
101
259
|
end
|
@@ -104,7 +262,9 @@ module Polars
|
|
104
262
|
wrap_expr(_rbexpr._alias(name))
|
105
263
|
end
|
106
264
|
|
107
|
-
# TODO support symbols
|
265
|
+
# TODO support symbols for exclude
|
266
|
+
|
267
|
+
#
|
108
268
|
def exclude(columns)
|
109
269
|
if columns.is_a?(String)
|
110
270
|
columns = [columns]
|
@@ -140,22 +300,166 @@ module Polars
|
|
140
300
|
# def map_alias
|
141
301
|
# end
|
142
302
|
|
303
|
+
# Negate a boolean expression.
|
304
|
+
#
|
305
|
+
# @return [Expr]
|
306
|
+
#
|
307
|
+
# @example
|
308
|
+
# df = Polars::DataFrame.new(
|
309
|
+
# {
|
310
|
+
# "a" => [true, false, false],
|
311
|
+
# "b" => ["a", "b", nil]
|
312
|
+
# }
|
313
|
+
# )
|
314
|
+
# # =>
|
315
|
+
# # shape: (3, 2)
|
316
|
+
# # ┌───────┬──────┐
|
317
|
+
# # │ a ┆ b │
|
318
|
+
# # │ --- ┆ --- │
|
319
|
+
# # │ bool ┆ str │
|
320
|
+
# # ╞═══════╪══════╡
|
321
|
+
# # │ true ┆ a │
|
322
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
323
|
+
# # │ false ┆ b │
|
324
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
325
|
+
# # │ false ┆ null │
|
326
|
+
# # └───────┴──────┘
|
327
|
+
#
|
328
|
+
# @example
|
329
|
+
# df.select(Polars.col("a").is_not)
|
330
|
+
# # =>
|
331
|
+
# # shape: (3, 1)
|
332
|
+
# # ┌───────┐
|
333
|
+
# # │ a │
|
334
|
+
# # │ --- │
|
335
|
+
# # │ bool │
|
336
|
+
# # ╞═══════╡
|
337
|
+
# # │ false │
|
338
|
+
# # ├╌╌╌╌╌╌╌┤
|
339
|
+
# # │ true │
|
340
|
+
# # ├╌╌╌╌╌╌╌┤
|
341
|
+
# # │ true │
|
342
|
+
# # └───────┘
|
143
343
|
def is_not
|
144
344
|
wrap_expr(_rbexpr.is_not)
|
145
345
|
end
|
146
346
|
|
347
|
+
# Returns a boolean Series indicating which values are null.
|
348
|
+
#
|
349
|
+
# @return [Expr]
|
350
|
+
#
|
351
|
+
# @example
|
352
|
+
# df = Polars::DataFrame.new(
|
353
|
+
# {
|
354
|
+
# "a" => [1, 2, nil, 1, 5],
|
355
|
+
# "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
|
356
|
+
# }
|
357
|
+
# )
|
358
|
+
# df.with_column(Polars.all.is_null.suffix("_isnull"))
|
359
|
+
# # =>
|
360
|
+
# # shape: (5, 4)
|
361
|
+
# # ┌──────┬─────┬──────────┬──────────┐
|
362
|
+
# # │ a ┆ b ┆ a_isnull ┆ b_isnull │
|
363
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
364
|
+
# # │ i64 ┆ f64 ┆ bool ┆ bool │
|
365
|
+
# # ╞══════╪═════╪══════════╪══════════╡
|
366
|
+
# # │ 1 ┆ 1.0 ┆ false ┆ false │
|
367
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
368
|
+
# # │ 2 ┆ 2.0 ┆ false ┆ false │
|
369
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
370
|
+
# # │ null ┆ NaN ┆ true ┆ false │
|
371
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
372
|
+
# # │ 1 ┆ 1.0 ┆ false ┆ false │
|
373
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
374
|
+
# # │ 5 ┆ 5.0 ┆ false ┆ false │
|
375
|
+
# # └──────┴─────┴──────────┴──────────┘
|
147
376
|
def is_null
|
148
377
|
wrap_expr(_rbexpr.is_null)
|
149
378
|
end
|
150
379
|
|
380
|
+
# Returns a boolean Series indicating which values are not null.
|
381
|
+
#
|
382
|
+
# @return [Expr]
|
383
|
+
#
|
384
|
+
# @example
|
385
|
+
# df = Polars::DataFrame.new(
|
386
|
+
# {
|
387
|
+
# "a" => [1, 2, nil, 1, 5],
|
388
|
+
# "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
|
389
|
+
# }
|
390
|
+
# )
|
391
|
+
# df.with_column(Polars.all.is_not_null.suffix("_not_null"))
|
392
|
+
# # =>
|
393
|
+
# # shape: (5, 4)
|
394
|
+
# # ┌──────┬─────┬────────────┬────────────┐
|
395
|
+
# # │ a ┆ b ┆ a_not_null ┆ b_not_null │
|
396
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
397
|
+
# # │ i64 ┆ f64 ┆ bool ┆ bool │
|
398
|
+
# # ╞══════╪═════╪════════════╪════════════╡
|
399
|
+
# # │ 1 ┆ 1.0 ┆ true ┆ true │
|
400
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
401
|
+
# # │ 2 ┆ 2.0 ┆ true ┆ true │
|
402
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
403
|
+
# # │ null ┆ NaN ┆ false ┆ true │
|
404
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
405
|
+
# # │ 1 ┆ 1.0 ┆ true ┆ true │
|
406
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
407
|
+
# # │ 5 ┆ 5.0 ┆ true ┆ true │
|
408
|
+
# # └──────┴─────┴────────────┴────────────┘
|
151
409
|
def is_not_null
|
152
410
|
wrap_expr(_rbexpr.is_not_null)
|
153
411
|
end
|
154
412
|
|
413
|
+
# Returns a boolean Series indicating which values are finite.
|
414
|
+
#
|
415
|
+
# @return [Expr]
|
416
|
+
#
|
417
|
+
# @example
|
418
|
+
# df = Polars::DataFrame.new(
|
419
|
+
# {
|
420
|
+
# "A" => [1.0, 2],
|
421
|
+
# "B" => [3.0, Float::INFINITY]
|
422
|
+
# }
|
423
|
+
# )
|
424
|
+
# df.select(Polars.all.is_finite)
|
425
|
+
# # =>
|
426
|
+
# # shape: (2, 2)
|
427
|
+
# # ┌──────┬───────┐
|
428
|
+
# # │ A ┆ B │
|
429
|
+
# # │ --- ┆ --- │
|
430
|
+
# # │ bool ┆ bool │
|
431
|
+
# # ╞══════╪═══════╡
|
432
|
+
# # │ true ┆ true │
|
433
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
434
|
+
# # │ true ┆ false │
|
435
|
+
# # └──────┴───────┘
|
155
436
|
def is_finite
|
156
437
|
wrap_expr(_rbexpr.is_finite)
|
157
438
|
end
|
158
439
|
|
440
|
+
# Returns a boolean Series indicating which values are infinite.
|
441
|
+
#
|
442
|
+
# @return [Expr]
|
443
|
+
#
|
444
|
+
# @example
|
445
|
+
# df = Polars::DataFrame.new(
|
446
|
+
# {
|
447
|
+
# "A" => [1.0, 2],
|
448
|
+
# "B" => [3.0, Float::INFINITY]
|
449
|
+
# }
|
450
|
+
# )
|
451
|
+
# df.select(Polars.all.is_infinite)
|
452
|
+
# # =>
|
453
|
+
# # shape: (2, 2)
|
454
|
+
# # ┌───────┬───────┐
|
455
|
+
# # │ A ┆ B │
|
456
|
+
# # │ --- ┆ --- │
|
457
|
+
# # │ bool ┆ bool │
|
458
|
+
# # ╞═══════╪═══════╡
|
459
|
+
# # │ false ┆ false │
|
460
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
461
|
+
# # │ false ┆ true │
|
462
|
+
# # └───────┴───────┘
|
159
463
|
def is_infinite
|
160
464
|
wrap_expr(_rbexpr.is_infinite)
|
161
465
|
end
|
@@ -172,14 +476,77 @@ module Polars
|
|
172
476
|
wrap_expr(_rbexpr.agg_groups)
|
173
477
|
end
|
174
478
|
|
479
|
+
# Count the number of values in this expression.
|
480
|
+
#
|
481
|
+
# @return [Expr]
|
482
|
+
#
|
483
|
+
# @example
|
484
|
+
# df = Polars::DataFrame.new({"a" => [8, 9, 10], "b" => [nil, 4, 4]})
|
485
|
+
# df.select(Polars.all.count)
|
486
|
+
# # =>
|
487
|
+
# # shape: (1, 2)
|
488
|
+
# # ┌─────┬─────┐
|
489
|
+
# # │ a ┆ b │
|
490
|
+
# # │ --- ┆ --- │
|
491
|
+
# # │ u32 ┆ u32 │
|
492
|
+
# # ╞═════╪═════╡
|
493
|
+
# # │ 3 ┆ 3 │
|
494
|
+
# # └─────┴─────┘
|
175
495
|
def count
|
176
496
|
wrap_expr(_rbexpr.count)
|
177
497
|
end
|
178
498
|
|
499
|
+
# Count the number of values in this expression.
|
500
|
+
#
|
501
|
+
# Alias for {#count}.
|
502
|
+
#
|
503
|
+
# @return [Expr]
|
504
|
+
#
|
505
|
+
# @example
|
506
|
+
# df = Polars::DataFrame.new({"a" => [8, 9, 10], "b" => [nil, 4, 4]})
|
507
|
+
# df.select(Polars.all.len)
|
508
|
+
# # =>
|
509
|
+
# # shape: (1, 2)
|
510
|
+
# # ┌─────┬─────┐
|
511
|
+
# # │ a ┆ b │
|
512
|
+
# # │ --- ┆ --- │
|
513
|
+
# # │ u32 ┆ u32 │
|
514
|
+
# # ╞═════╪═════╡
|
515
|
+
# # │ 3 ┆ 3 │
|
516
|
+
# # └─────┴─────┘
|
179
517
|
def len
|
180
518
|
count
|
181
519
|
end
|
182
520
|
|
521
|
+
# Get a slice of this expression.
|
522
|
+
#
|
523
|
+
# @param offset [Integer]
|
524
|
+
# Start index. Negative indexing is supported.
|
525
|
+
# @param length [Integer]
|
526
|
+
# Length of the slice. If set to `nil`, all rows starting at the offset
|
527
|
+
# will be selected.
|
528
|
+
#
|
529
|
+
# @return [Expr]
|
530
|
+
#
|
531
|
+
# @example
|
532
|
+
# df = Polars::DataFrame.new(
|
533
|
+
# {
|
534
|
+
# "a" => [8, 9, 10, 11],
|
535
|
+
# "b" => [nil, 4, 4, 4]
|
536
|
+
# }
|
537
|
+
# )
|
538
|
+
# df.select(Polars.all.slice(1, 2))
|
539
|
+
# # =>
|
540
|
+
# # shape: (2, 2)
|
541
|
+
# # ┌─────┬─────┐
|
542
|
+
# # │ a ┆ b │
|
543
|
+
# # │ --- ┆ --- │
|
544
|
+
# # │ i64 ┆ i64 │
|
545
|
+
# # ╞═════╪═════╡
|
546
|
+
# # │ 9 ┆ 4 │
|
547
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
548
|
+
# # │ 10 ┆ 4 │
|
549
|
+
# # └─────┴─────┘
|
183
550
|
def slice(offset, length = nil)
|
184
551
|
if !offset.is_a?(Expr)
|
185
552
|
offset = Polars.lit(offset)
|
@@ -195,14 +562,90 @@ module Polars
|
|
195
562
|
wrap_expr(_rbexpr.append(other._rbexpr, upcast))
|
196
563
|
end
|
197
564
|
|
565
|
+
# Create a single chunk of memory for this Series.
|
566
|
+
#
|
567
|
+
# @return [Expr]
|
568
|
+
#
|
569
|
+
# @example Create a Series with 3 nulls, append column a then rechunk
|
570
|
+
# df = Polars::DataFrame.new({"a": [1, 1, 2]})
|
571
|
+
# df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
|
572
|
+
# # =>
|
573
|
+
# # shape: (6, 1)
|
574
|
+
# # ┌─────────┐
|
575
|
+
# # │ literal │
|
576
|
+
# # │ --- │
|
577
|
+
# # │ i64 │
|
578
|
+
# # ╞═════════╡
|
579
|
+
# # │ null │
|
580
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
581
|
+
# # │ null │
|
582
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
583
|
+
# # │ null │
|
584
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
585
|
+
# # │ 1 │
|
586
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
587
|
+
# # │ 1 │
|
588
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
589
|
+
# # │ 2 │
|
590
|
+
# # └─────────┘
|
198
591
|
def rechunk
|
199
592
|
wrap_expr(_rbexpr.rechunk)
|
200
593
|
end
|
201
594
|
|
595
|
+
# Drop null values.
|
596
|
+
#
|
597
|
+
# @return [Expr]
|
598
|
+
#
|
599
|
+
# @example
|
600
|
+
# df = Polars::DataFrame.new(
|
601
|
+
# {
|
602
|
+
# "a" => [8, 9, 10, 11],
|
603
|
+
# "b" => [nil, 4.0, 4.0, Float::NAN]
|
604
|
+
# }
|
605
|
+
# )
|
606
|
+
# df.select(Polars.col("b").drop_nulls)
|
607
|
+
# # =>
|
608
|
+
# # shape: (3, 1)
|
609
|
+
# # ┌─────┐
|
610
|
+
# # │ b │
|
611
|
+
# # │ --- │
|
612
|
+
# # │ f64 │
|
613
|
+
# # ╞═════╡
|
614
|
+
# # │ 4.0 │
|
615
|
+
# # ├╌╌╌╌╌┤
|
616
|
+
# # │ 4.0 │
|
617
|
+
# # ├╌╌╌╌╌┤
|
618
|
+
# # │ NaN │
|
619
|
+
# # └─────┘
|
202
620
|
def drop_nulls
|
203
621
|
wrap_expr(_rbexpr.drop_nulls)
|
204
622
|
end
|
205
623
|
|
624
|
+
# Drop floating point NaN values.
|
625
|
+
#
|
626
|
+
# @return [Expr]
|
627
|
+
#
|
628
|
+
# @example
|
629
|
+
# df = Polars::DataFrame.new(
|
630
|
+
# {
|
631
|
+
# "a" => [8, 9, 10, 11],
|
632
|
+
# "b" => [nil, 4.0, 4.0, Float::NAN]
|
633
|
+
# }
|
634
|
+
# )
|
635
|
+
# df.select(Polars.col("b").drop_nans)
|
636
|
+
# # =>
|
637
|
+
# # shape: (3, 1)
|
638
|
+
# # ┌──────┐
|
639
|
+
# # │ b │
|
640
|
+
# # │ --- │
|
641
|
+
# # │ f64 │
|
642
|
+
# # ╞══════╡
|
643
|
+
# # │ null │
|
644
|
+
# # ├╌╌╌╌╌╌┤
|
645
|
+
# # │ 4.0 │
|
646
|
+
# # ├╌╌╌╌╌╌┤
|
647
|
+
# # │ 4.0 │
|
648
|
+
# # └──────┘
|
206
649
|
def drop_nans
|
207
650
|
wrap_expr(_rbexpr.drop_nans)
|
208
651
|
end
|
@@ -227,14 +670,87 @@ module Polars
|
|
227
670
|
wrap_expr(_rbexpr.cumcount(reverse))
|
228
671
|
end
|
229
672
|
|
673
|
+
# Rounds down to the nearest integer value.
|
674
|
+
#
|
675
|
+
# Only works on floating point Series.
|
676
|
+
#
|
677
|
+
# @return [Expr]
|
678
|
+
#
|
679
|
+
# @example
|
680
|
+
# df = Polars::DataFrame.new({"a" => [0.3, 0.5, 1.0, 1.1]})
|
681
|
+
# df.select(Polars.col("a").floor)
|
682
|
+
# # =>
|
683
|
+
# # shape: (4, 1)
|
684
|
+
# # ┌─────┐
|
685
|
+
# # │ a │
|
686
|
+
# # │ --- │
|
687
|
+
# # │ f64 │
|
688
|
+
# # ╞═════╡
|
689
|
+
# # │ 0.0 │
|
690
|
+
# # ├╌╌╌╌╌┤
|
691
|
+
# # │ 0.0 │
|
692
|
+
# # ├╌╌╌╌╌┤
|
693
|
+
# # │ 1.0 │
|
694
|
+
# # ├╌╌╌╌╌┤
|
695
|
+
# # │ 1.0 │
|
696
|
+
# # └─────┘
|
230
697
|
def floor
|
231
698
|
wrap_expr(_rbexpr.floor)
|
232
699
|
end
|
233
700
|
|
701
|
+
# Rounds up to the nearest integer value.
|
702
|
+
#
|
703
|
+
# Only works on floating point Series.
|
704
|
+
#
|
705
|
+
# @return [Expr]
|
706
|
+
#
|
707
|
+
# @example
|
708
|
+
# df = Polars::DataFrame.new({"a" => [0.3, 0.5, 1.0, 1.1]})
|
709
|
+
# df.select(Polars.col("a").ceil)
|
710
|
+
# # =>
|
711
|
+
# # shape: (4, 1)
|
712
|
+
# # ┌─────┐
|
713
|
+
# # │ a │
|
714
|
+
# # │ --- │
|
715
|
+
# # │ f64 │
|
716
|
+
# # ╞═════╡
|
717
|
+
# # │ 1.0 │
|
718
|
+
# # ├╌╌╌╌╌┤
|
719
|
+
# # │ 1.0 │
|
720
|
+
# # ├╌╌╌╌╌┤
|
721
|
+
# # │ 1.0 │
|
722
|
+
# # ├╌╌╌╌╌┤
|
723
|
+
# # │ 2.0 │
|
724
|
+
# # └─────┘
|
234
725
|
def ceil
|
235
726
|
wrap_expr(_rbexpr.ceil)
|
236
727
|
end
|
237
728
|
|
729
|
+
# Round underlying floating point data by `decimals` digits.
|
730
|
+
#
|
731
|
+
# @param decimals [Integer]
|
732
|
+
# Number of decimals to round by.
|
733
|
+
#
|
734
|
+
# @return [Expr]
|
735
|
+
#
|
736
|
+
# @example
|
737
|
+
# df = Polars::DataFrame.new({"a" => [0.33, 0.52, 1.02, 1.17]})
|
738
|
+
# df.select(Polars.col("a").round(1))
|
739
|
+
# # =>
|
740
|
+
# # shape: (4, 1)
|
741
|
+
# # ┌─────┐
|
742
|
+
# # │ a │
|
743
|
+
# # │ --- │
|
744
|
+
# # │ f64 │
|
745
|
+
# # ╞═════╡
|
746
|
+
# # │ 0.3 │
|
747
|
+
# # ├╌╌╌╌╌┤
|
748
|
+
# # │ 0.5 │
|
749
|
+
# # ├╌╌╌╌╌┤
|
750
|
+
# # │ 1.0 │
|
751
|
+
# # ├╌╌╌╌╌┤
|
752
|
+
# # │ 1.2 │
|
753
|
+
# # └─────┘
|
238
754
|
def round(decimals = 0)
|
239
755
|
wrap_expr(_rbexpr.round(decimals))
|
240
756
|
end
|
@@ -244,6 +760,31 @@ module Polars
|
|
244
760
|
wrap_expr(_rbexpr.dot(other._rbexpr))
|
245
761
|
end
|
246
762
|
|
763
|
+
# Compute the most occurring value(s).
|
764
|
+
#
|
765
|
+
# Can return multiple Values.
|
766
|
+
#
|
767
|
+
# @return [Expr]
|
768
|
+
#
|
769
|
+
# @example
|
770
|
+
# df = Polars::DataFrame.new(
|
771
|
+
# {
|
772
|
+
# "a" => [1, 1, 2, 3],
|
773
|
+
# "b" => [1, 1, 2, 2]
|
774
|
+
# }
|
775
|
+
# )
|
776
|
+
# df.select(Polars.all.mode)
|
777
|
+
# # =>
|
778
|
+
# # shape: (2, 2)
|
779
|
+
# # ┌─────┬─────┐
|
780
|
+
# # │ a ┆ b │
|
781
|
+
# # │ --- ┆ --- │
|
782
|
+
# # │ i64 ┆ i64 │
|
783
|
+
# # ╞═════╪═════╡
|
784
|
+
# # │ 1 ┆ 1 │
|
785
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
786
|
+
# # │ 1 ┆ 2 │
|
787
|
+
# # └─────┴─────┘
|
247
788
|
def mode
|
248
789
|
wrap_expr(_rbexpr.mode)
|
249
790
|
end
|
@@ -265,10 +806,50 @@ module Polars
|
|
265
806
|
wrap_expr(_rbexpr.arg_sort(reverse, nulls_last))
|
266
807
|
end
|
267
808
|
|
809
|
+
# Get the index of the maximal value.
|
810
|
+
#
|
811
|
+
# @return [Expr]
|
812
|
+
#
|
813
|
+
# @example
|
814
|
+
# df = Polars::DataFrame.new(
|
815
|
+
# {
|
816
|
+
# "a" => [20, 10, 30]
|
817
|
+
# }
|
818
|
+
# )
|
819
|
+
# df.select(Polars.col("a").arg_max)
|
820
|
+
# # =>
|
821
|
+
# # shape: (1, 1)
|
822
|
+
# # ┌─────┐
|
823
|
+
# # │ a │
|
824
|
+
# # │ --- │
|
825
|
+
# # │ u32 │
|
826
|
+
# # ╞═════╡
|
827
|
+
# # │ 2 │
|
828
|
+
# # └─────┘
|
268
829
|
def arg_max
|
269
830
|
wrap_expr(_rbexpr.arg_max)
|
270
831
|
end
|
271
832
|
|
833
|
+
# Get the index of the minimal value.
|
834
|
+
#
|
835
|
+
# @return [Expr]
|
836
|
+
#
|
837
|
+
# @example
|
838
|
+
# df = Polars::DataFrame.new(
|
839
|
+
# {
|
840
|
+
# "a" => [20, 10, 30]
|
841
|
+
# }
|
842
|
+
# )
|
843
|
+
# df.select(Polars.col("a").arg_min)
|
844
|
+
# # =>
|
845
|
+
# # shape: (1, 1)
|
846
|
+
# # ┌─────┐
|
847
|
+
# # │ a │
|
848
|
+
# # │ --- │
|
849
|
+
# # │ u32 │
|
850
|
+
# # ╞═════╡
|
851
|
+
# # │ 1 │
|
852
|
+
# # └─────┘
|
272
853
|
def arg_min
|
273
854
|
wrap_expr(_rbexpr.arg_min)
|
274
855
|
end
|
@@ -290,10 +871,41 @@ module Polars
|
|
290
871
|
wrap_expr(_rbexpr.sort_by(by, reverse))
|
291
872
|
end
|
292
873
|
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
874
|
+
def take(indices)
|
875
|
+
if indices.is_a?(Array)
|
876
|
+
indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
|
877
|
+
else
|
878
|
+
indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
|
879
|
+
end
|
880
|
+
wrap_expr(_rbexpr.take(indices_lit._rbexpr))
|
881
|
+
end
|
882
|
+
|
883
|
+
# Shift the values by a given period.
|
884
|
+
#
|
885
|
+
# @param periods [Integer]
|
886
|
+
# Number of places to shift (may be negative).
|
887
|
+
#
|
888
|
+
# @return [Expr]
|
889
|
+
#
|
890
|
+
# @example
|
891
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
|
892
|
+
# df.select(Polars.col("foo").shift(1))
|
893
|
+
# # =>
|
894
|
+
# # shape: (4, 1)
|
895
|
+
# # ┌──────┐
|
896
|
+
# # │ foo │
|
897
|
+
# # │ --- │
|
898
|
+
# # │ i64 │
|
899
|
+
# # ╞══════╡
|
900
|
+
# # │ null │
|
901
|
+
# # ├╌╌╌╌╌╌┤
|
902
|
+
# # │ 1 │
|
903
|
+
# # ├╌╌╌╌╌╌┤
|
904
|
+
# # │ 2 │
|
905
|
+
# # ├╌╌╌╌╌╌┤
|
906
|
+
# # │ 3 │
|
907
|
+
# # └──────┘
|
908
|
+
def shift(periods = 1)
|
297
909
|
wrap_expr(_rbexpr.shift(periods))
|
298
910
|
end
|
299
911
|
|
@@ -439,6 +1051,7 @@ module Polars
|
|
439
1051
|
# def apply
|
440
1052
|
# end
|
441
1053
|
|
1054
|
+
#
|
442
1055
|
def flatten
|
443
1056
|
wrap_expr(_rbexpr.explode)
|
444
1057
|
end
|
@@ -471,6 +1084,7 @@ module Polars
|
|
471
1084
|
# def is_in
|
472
1085
|
# end
|
473
1086
|
|
1087
|
+
#
|
474
1088
|
def repeat_by(by)
|
475
1089
|
by = Utils.expr_to_lit_or_expr(by, false)
|
476
1090
|
wrap_expr(_rbexpr.repeat_by(by._rbexpr))
|
@@ -482,6 +1096,7 @@ module Polars
|
|
482
1096
|
# def _hash
|
483
1097
|
# end
|
484
1098
|
|
1099
|
+
#
|
485
1100
|
def reinterpret(signed: false)
|
486
1101
|
wrap_expr(_rbexpr.reinterpret(signed))
|
487
1102
|
end
|
@@ -489,6 +1104,7 @@ module Polars
|
|
489
1104
|
# def _inspect
|
490
1105
|
# end
|
491
1106
|
|
1107
|
+
#
|
492
1108
|
def interpolate
|
493
1109
|
wrap_expr(_rbexpr.interpolate)
|
494
1110
|
end
|
@@ -520,6 +1136,7 @@ module Polars
|
|
520
1136
|
# def rolling_apply
|
521
1137
|
# end
|
522
1138
|
|
1139
|
+
#
|
523
1140
|
def rolling_skew(window_size, bias: true)
|
524
1141
|
wrap_expr(_rbexpr.rolling_skew(window_size, bias))
|
525
1142
|
end
|
@@ -647,8 +1264,10 @@ module Polars
|
|
647
1264
|
# def ewm_var
|
648
1265
|
# end
|
649
1266
|
|
650
|
-
#
|
651
|
-
|
1267
|
+
#
|
1268
|
+
def extend_constant(value, n)
|
1269
|
+
wrap_expr(_rbexpr.extend_constant(value, n))
|
1270
|
+
end
|
652
1271
|
|
653
1272
|
def value_counts(multithreaded: false, sort: false)
|
654
1273
|
wrap_expr(_rbexpr.value_counts(multithreaded, sort))
|
@@ -659,7 +1278,7 @@ module Polars
|
|
659
1278
|
end
|
660
1279
|
|
661
1280
|
def log(base = Math::E)
|
662
|
-
wrap_expr(
|
1281
|
+
wrap_expr(_rbexpr.log(base))
|
663
1282
|
end
|
664
1283
|
|
665
1284
|
def entropy(base: 2, normalize: false)
|
@@ -672,6 +1291,7 @@ module Polars
|
|
672
1291
|
# def set_sorted
|
673
1292
|
# end
|
674
1293
|
|
1294
|
+
#
|
675
1295
|
def list
|
676
1296
|
wrap_expr(_rbexpr.list)
|
677
1297
|
end
|