polars-df 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +8 -0
- data/Cargo.lock +2 -1
- data/README.md +1 -1
- data/ext/polars/Cargo.toml +7 -1
- data/ext/polars/src/batched_csv.rs +120 -0
- data/ext/polars/src/conversion.rs +139 -6
- data/ext/polars/src/dataframe.rs +360 -15
- data/ext/polars/src/error.rs +9 -0
- data/ext/polars/src/file.rs +8 -7
- data/ext/polars/src/lazy/apply.rs +7 -0
- data/ext/polars/src/lazy/dataframe.rs +135 -3
- data/ext/polars/src/lazy/dsl.rs +97 -2
- data/ext/polars/src/lazy/meta.rs +1 -1
- data/ext/polars/src/lazy/mod.rs +1 -0
- data/ext/polars/src/lib.rs +227 -12
- data/ext/polars/src/series.rs +190 -38
- data/ext/polars/src/set.rs +91 -0
- data/ext/polars/src/utils.rs +19 -0
- data/lib/polars/batched_csv_reader.rb +96 -0
- data/lib/polars/cat_expr.rb +39 -0
- data/lib/polars/data_frame.rb +2813 -100
- data/lib/polars/date_time_expr.rb +1282 -7
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +631 -11
- data/lib/polars/expr_dispatch.rb +14 -0
- data/lib/polars/functions.rb +219 -0
- data/lib/polars/group_by.rb +517 -0
- data/lib/polars/io.rb +763 -4
- data/lib/polars/lazy_frame.rb +1415 -67
- data/lib/polars/lazy_functions.rb +430 -9
- data/lib/polars/lazy_group_by.rb +79 -0
- data/lib/polars/list_expr.rb +5 -0
- data/lib/polars/meta_expr.rb +21 -0
- data/lib/polars/series.rb +2244 -192
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +663 -2
- data/lib/polars/struct_expr.rb +73 -0
- data/lib/polars/utils.rb +76 -3
- data/lib/polars/version.rb +2 -1
- data/lib/polars/when.rb +1 -0
- data/lib/polars/when_then.rb +1 -0
- data/lib/polars.rb +8 -2
- metadata +12 -2
@@ -1,14 +1,87 @@
|
|
1
1
|
module Polars
|
2
2
|
module LazyFunctions
|
3
|
+
# Return an expression representing a column in a DataFrame.
|
4
|
+
#
|
5
|
+
# @return [Expr]
|
3
6
|
def col(name)
|
4
|
-
|
5
|
-
|
7
|
+
if name.is_a?(Series)
|
8
|
+
name = name.to_a
|
9
|
+
end
|
10
|
+
|
11
|
+
if name.is_a?(Array)
|
12
|
+
if name.length == 0 || name[0].is_a?(String) || name[0].is_a?(Symbol)
|
13
|
+
name = name.map { |v| v.is_a?(Symbol) ? v.to_s : v }
|
14
|
+
Utils.wrap_expr(RbExpr.cols(name))
|
15
|
+
elsif Utils.is_polars_dtype(name[0])
|
16
|
+
raise Todo
|
17
|
+
# Utils.wrap_expr(_dtype_cols(name))
|
18
|
+
else
|
19
|
+
raise ArgumentError, "Expected list values to be all `str` or all `DataType`"
|
20
|
+
end
|
21
|
+
else
|
22
|
+
name = name.to_s if name.is_a?(Symbol)
|
23
|
+
Utils.wrap_expr(RbExpr.col(name))
|
24
|
+
end
|
6
25
|
end
|
7
26
|
|
27
|
+
# Alias for an element in evaluated in an `eval` expression.
|
28
|
+
#
|
29
|
+
# @return [Expr]
|
30
|
+
#
|
31
|
+
# @example A horizontal rank computation by taking the elements of a list
|
32
|
+
# df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
|
33
|
+
# df.with_column(
|
34
|
+
# Polars.concat_list(["a", "b"]).arr.eval(Polars.element.rank).alias("rank")
|
35
|
+
# )
|
36
|
+
# # =>
|
37
|
+
# # shape: (3, 3)
|
38
|
+
# # ┌─────┬─────┬────────────┐
|
39
|
+
# # │ a ┆ b ┆ rank │
|
40
|
+
# # │ --- ┆ --- ┆ --- │
|
41
|
+
# # │ i64 ┆ i64 ┆ list[f32] │
|
42
|
+
# # ╞═════╪═════╪════════════╡
|
43
|
+
# # │ 1 ┆ 4 ┆ [1.0, 2.0] │
|
44
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
45
|
+
# # │ 8 ┆ 5 ┆ [2.0, 1.0] │
|
46
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
47
|
+
# # │ 3 ┆ 2 ┆ [2.0, 1.0] │
|
48
|
+
# # └─────┴─────┴────────────┘
|
8
49
|
def element
|
9
50
|
col("")
|
10
51
|
end
|
11
52
|
|
53
|
+
# Count the number of values in this column/context.
|
54
|
+
#
|
55
|
+
# @param column [String, Series, nil]
|
56
|
+
# If dtype is:
|
57
|
+
#
|
58
|
+
# * `Series` : count the values in the series.
|
59
|
+
# * `String` : count the values in this column.
|
60
|
+
# * `None` : count the number of values in this context.
|
61
|
+
#
|
62
|
+
# @return [Expr, Integer]
|
63
|
+
def count(column = nil)
|
64
|
+
if column.nil?
|
65
|
+
return Utils.wrap_expr(RbExpr.count)
|
66
|
+
end
|
67
|
+
|
68
|
+
if column.is_a?(Series)
|
69
|
+
column.len
|
70
|
+
else
|
71
|
+
col(column).count
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Aggregate to list.
|
76
|
+
#
|
77
|
+
# @return [Expr]
|
78
|
+
def to_list(name)
|
79
|
+
col(name).list
|
80
|
+
end
|
81
|
+
|
82
|
+
# Get the standard deviation.
|
83
|
+
#
|
84
|
+
# @return [Object]
|
12
85
|
def std(column, ddof: 1)
|
13
86
|
if column.is_a?(Series)
|
14
87
|
column.std(ddof: ddof)
|
@@ -17,6 +90,9 @@ module Polars
|
|
17
90
|
end
|
18
91
|
end
|
19
92
|
|
93
|
+
# Get the variance.
|
94
|
+
#
|
95
|
+
# @return [Object]
|
20
96
|
def var(column, ddof: 1)
|
21
97
|
if column.is_a?(Series)
|
22
98
|
column.var(ddof: ddof)
|
@@ -25,6 +101,16 @@ module Polars
|
|
25
101
|
end
|
26
102
|
end
|
27
103
|
|
104
|
+
# Get the maximum value.
|
105
|
+
#
|
106
|
+
# @param column [Object]
|
107
|
+
# Column(s) to be used in aggregation. Will lead to different behavior based on
|
108
|
+
# the input:
|
109
|
+
#
|
110
|
+
# - [String, Series] -> aggregate the maximum value of that column.
|
111
|
+
# - [Array<Expr>] -> aggregate the maximum value horizontally.
|
112
|
+
#
|
113
|
+
# @return [Expr, Object]
|
28
114
|
def max(column)
|
29
115
|
if column.is_a?(Series)
|
30
116
|
column.max
|
@@ -37,6 +123,16 @@ module Polars
|
|
37
123
|
end
|
38
124
|
end
|
39
125
|
|
126
|
+
# Get the minimum value.
|
127
|
+
#
|
128
|
+
# @param column [Object]
|
129
|
+
# Column(s) to be used in aggregation. Will lead to different behavior based on
|
130
|
+
# the input:
|
131
|
+
#
|
132
|
+
# - [String, Series] -> aggregate the minimum value of that column.
|
133
|
+
# - [Array<Expr>] -> aggregate the minimum value horizontally.
|
134
|
+
#
|
135
|
+
# @return [Expr, Object]
|
40
136
|
def min(column)
|
41
137
|
if column.is_a?(Series)
|
42
138
|
column.min
|
@@ -49,6 +145,9 @@ module Polars
|
|
49
145
|
end
|
50
146
|
end
|
51
147
|
|
148
|
+
# Sum values in a column/Series, or horizontally across list of columns/expressions.
|
149
|
+
#
|
150
|
+
# @return [Object]
|
52
151
|
def sum(column)
|
53
152
|
if column.is_a?(Series)
|
54
153
|
column.sum
|
@@ -59,10 +158,13 @@ module Polars
|
|
59
158
|
# TODO
|
60
159
|
Utils.wrap_expr(_sum_exprs(exprs))
|
61
160
|
else
|
62
|
-
raise
|
161
|
+
raise Todo
|
63
162
|
end
|
64
163
|
end
|
65
164
|
|
165
|
+
# Get the mean value.
|
166
|
+
#
|
167
|
+
# @return [Expr, Float]
|
66
168
|
def mean(column)
|
67
169
|
if column.is_a?(Series)
|
68
170
|
column.mean
|
@@ -71,10 +173,16 @@ module Polars
|
|
71
173
|
end
|
72
174
|
end
|
73
175
|
|
176
|
+
# Get the mean value.
|
177
|
+
#
|
178
|
+
# @return [Expr, Float]
|
74
179
|
def avg(column)
|
75
180
|
mean(column)
|
76
181
|
end
|
77
182
|
|
183
|
+
# Get the median value.
|
184
|
+
#
|
185
|
+
# @return [Object]
|
78
186
|
def median(column)
|
79
187
|
if column.is_a?(Series)
|
80
188
|
column.median
|
@@ -83,10 +191,160 @@ module Polars
|
|
83
191
|
end
|
84
192
|
end
|
85
193
|
|
194
|
+
# def n_unique
|
195
|
+
# end
|
196
|
+
|
197
|
+
# Get the first value.
|
198
|
+
#
|
199
|
+
# @return [Object]
|
200
|
+
def first(column = nil)
|
201
|
+
if column.nil?
|
202
|
+
return Utils.wrap_expr(RbExpr.first)
|
203
|
+
end
|
204
|
+
|
205
|
+
if column.is_a?(Series)
|
206
|
+
if column.len > 0
|
207
|
+
column[0]
|
208
|
+
else
|
209
|
+
raise IndexError, "The series is empty, so no first value can be returned."
|
210
|
+
end
|
211
|
+
else
|
212
|
+
col(column).first
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
# def last
|
217
|
+
# end
|
218
|
+
|
219
|
+
# def head
|
220
|
+
# end
|
221
|
+
|
222
|
+
# def tail
|
223
|
+
# end
|
224
|
+
|
225
|
+
# Return an expression representing a literal value.
|
226
|
+
#
|
227
|
+
# @return [Expr]
|
86
228
|
def lit(value)
|
229
|
+
if value.is_a?(Polars::Series)
|
230
|
+
name = value.name
|
231
|
+
value = value._s
|
232
|
+
e = Utils.wrap_expr(RbExpr.lit(value))
|
233
|
+
if name == ""
|
234
|
+
return e
|
235
|
+
end
|
236
|
+
return e.alias(name)
|
237
|
+
end
|
238
|
+
|
87
239
|
Utils.wrap_expr(RbExpr.lit(value))
|
88
240
|
end
|
89
241
|
|
242
|
+
# def cumsum
|
243
|
+
# end
|
244
|
+
|
245
|
+
# def spearman_rank_corr
|
246
|
+
# end
|
247
|
+
|
248
|
+
# def pearson_corr
|
249
|
+
# end
|
250
|
+
|
251
|
+
# def cov
|
252
|
+
# end
|
253
|
+
|
254
|
+
# def map
|
255
|
+
# end
|
256
|
+
|
257
|
+
# def apply
|
258
|
+
# end
|
259
|
+
|
260
|
+
# Accumulate over multiple columns horizontally/ row wise with a left fold.
|
261
|
+
#
|
262
|
+
# @return [Expr]
|
263
|
+
def fold(acc, f, exprs)
|
264
|
+
acc = Utils.expr_to_lit_or_expr(acc, str_to_lit: true)
|
265
|
+
if exprs.is_a?(Expr)
|
266
|
+
exprs = [exprs]
|
267
|
+
end
|
268
|
+
|
269
|
+
exprs = Utils.selection_to_rbexpr_list(exprs)
|
270
|
+
Utils.wrap_expr(RbExpr.fold(acc._rbexpr, f, exprs))
|
271
|
+
end
|
272
|
+
|
273
|
+
# def reduce
|
274
|
+
# end
|
275
|
+
|
276
|
+
# def cumfold
|
277
|
+
# end
|
278
|
+
|
279
|
+
# def cumreduce
|
280
|
+
# end
|
281
|
+
|
282
|
+
# def any
|
283
|
+
# end
|
284
|
+
|
285
|
+
# def exclude
|
286
|
+
# end
|
287
|
+
|
288
|
+
# Do one of two things.
|
289
|
+
#
|
290
|
+
# * function can do a columnwise or elementwise AND operation
|
291
|
+
# * a wildcard column selection
|
292
|
+
#
|
293
|
+
# @param name [Object]
|
294
|
+
# If given this function will apply a bitwise & on the columns.
|
295
|
+
#
|
296
|
+
# @return [Expr]
|
297
|
+
#
|
298
|
+
# @example Sum all columns
|
299
|
+
# df = Polars::DataFrame.new(
|
300
|
+
# {"a" => [1, 2, 3], "b" => ["hello", "foo", "bar"], "c" => [1, 1, 1]}
|
301
|
+
# )
|
302
|
+
# df.select(Polars.all.sum)
|
303
|
+
# # =>
|
304
|
+
# # shape: (1, 3)
|
305
|
+
# # ┌─────┬──────┬─────┐
|
306
|
+
# # │ a ┆ b ┆ c │
|
307
|
+
# # │ --- ┆ --- ┆ --- │
|
308
|
+
# # │ i64 ┆ str ┆ i64 │
|
309
|
+
# # ╞═════╪══════╪═════╡
|
310
|
+
# # │ 6 ┆ null ┆ 3 │
|
311
|
+
# # └─────┴──────┴─────┘
|
312
|
+
def all(name = nil)
|
313
|
+
if name.nil?
|
314
|
+
col("*")
|
315
|
+
elsif name.is_a?(String) || name.is_a?(Symbol)
|
316
|
+
col(name).all
|
317
|
+
else
|
318
|
+
raise Todo
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
# def groups
|
323
|
+
# end
|
324
|
+
|
325
|
+
# def quantile
|
326
|
+
# end
|
327
|
+
|
328
|
+
# Create a range expression (or Series).
|
329
|
+
#
|
330
|
+
# This can be used in a `select`, `with_column`, etc. Be sure that the resulting
|
331
|
+
# range size is equal to the length of the DataFrame you are collecting.
|
332
|
+
#
|
333
|
+
# @param low [Integer, Expr, Series]
|
334
|
+
# Lower bound of range.
|
335
|
+
# @param high [Integer, Expr, Series]
|
336
|
+
# Upper bound of range.
|
337
|
+
# @param step [Integer]
|
338
|
+
# Step size of the range.
|
339
|
+
# @param eager [Boolean]
|
340
|
+
# If eager evaluation is `True`, a Series is returned instead of an Expr.
|
341
|
+
# @param dtype [Symbol]
|
342
|
+
# Apply an explicit integer dtype to the resulting expression (default is Int64).
|
343
|
+
#
|
344
|
+
# @return [Expr, Series]
|
345
|
+
#
|
346
|
+
# @example
|
347
|
+
# df.lazy.filter(Polars.col("foo") < Polars.arange(0, 100)).collect
|
90
348
|
def arange(low, high, step: 1, eager: false, dtype: nil)
|
91
349
|
low = Utils.expr_to_lit_or_expr(low, str_to_lit: false)
|
92
350
|
high = Utils.expr_to_lit_or_expr(high, str_to_lit: false)
|
@@ -106,16 +364,179 @@ module Polars
|
|
106
364
|
end
|
107
365
|
end
|
108
366
|
|
109
|
-
def
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
367
|
+
# def argsort_by
|
368
|
+
# end
|
369
|
+
|
370
|
+
# def duration
|
371
|
+
# end
|
372
|
+
|
373
|
+
# def format
|
374
|
+
# end
|
375
|
+
|
376
|
+
# Concat the arrays in a Series dtype List in linear time.
|
377
|
+
#
|
378
|
+
# @return [Expr]
|
379
|
+
def concat_list(exprs)
|
380
|
+
exprs = Utils.selection_to_rbexpr_list(exprs)
|
381
|
+
Utils.wrap_expr(RbExpr.concat_lst(exprs))
|
382
|
+
end
|
383
|
+
|
384
|
+
# def collect_all
|
385
|
+
# end
|
386
|
+
|
387
|
+
# Run polars expressions without a context.
|
388
|
+
#
|
389
|
+
# @return [DataFrame]
|
390
|
+
def select(exprs)
|
391
|
+
DataFrame.new([]).select(exprs)
|
392
|
+
end
|
393
|
+
|
394
|
+
# Collect several columns into a Series of dtype Struct.
|
395
|
+
#
|
396
|
+
# @param exprs [Object]
|
397
|
+
# Columns/Expressions to collect into a Struct
|
398
|
+
# @param eager [Boolean]
|
399
|
+
# Evaluate immediately
|
400
|
+
#
|
401
|
+
# @return [Object]
|
402
|
+
#
|
403
|
+
# @example
|
404
|
+
# Polars::DataFrame.new(
|
405
|
+
# {
|
406
|
+
# "int" => [1, 2],
|
407
|
+
# "str" => ["a", "b"],
|
408
|
+
# "bool" => [true, nil],
|
409
|
+
# "list" => [[1, 2], [3]],
|
410
|
+
# }
|
411
|
+
# ).select([Polars.struct(Polars.all()).alias("my_struct")])
|
412
|
+
# # =>
|
413
|
+
# # shape: (2, 1)
|
414
|
+
# # ┌─────────────────────┐
|
415
|
+
# # │ my_struct │
|
416
|
+
# # │ --- │
|
417
|
+
# # │ struct[4] │
|
418
|
+
# # ╞═════════════════════╡
|
419
|
+
# # │ {1,"a",true,[1, 2]} │
|
420
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
421
|
+
# # │ {2,"b",null,[3]} │
|
422
|
+
# # └─────────────────────┘
|
423
|
+
#
|
424
|
+
# @example Only collect specific columns as a struct:
|
425
|
+
# df = Polars::DataFrame.new(
|
426
|
+
# {"a" => [1, 2, 3, 4], "b" => ["one", "two", "three", "four"], "c" => [9, 8, 7, 6]}
|
427
|
+
# )
|
428
|
+
# df.with_column(pl.struct(pl.col(["a", "b"])).alias("a_and_b"))
|
429
|
+
# # =>
|
430
|
+
# # shape: (4, 4)
|
431
|
+
# # ┌─────┬───────┬─────┬─────────────┐
|
432
|
+
# # │ a ┆ b ┆ c ┆ a_and_b │
|
433
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
434
|
+
# # │ i64 ┆ str ┆ i64 ┆ struct[2] │
|
435
|
+
# # ╞═════╪═══════╪═════╪═════════════╡
|
436
|
+
# # │ 1 ┆ one ┆ 9 ┆ {1,"one"} │
|
437
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
438
|
+
# # │ 2 ┆ two ┆ 8 ┆ {2,"two"} │
|
439
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
440
|
+
# # │ 3 ┆ three ┆ 7 ┆ {3,"three"} │
|
441
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
442
|
+
# # │ 4 ┆ four ┆ 6 ┆ {4,"four"} │
|
443
|
+
# # └─────┴───────┴─────┴─────────────┘
|
444
|
+
def struct(exprs, eager: false)
|
445
|
+
if eager
|
446
|
+
Polars.select(struct(exprs, eager: false)).to_series
|
447
|
+
end
|
448
|
+
exprs = Utils.selection_to_rbexpr_list(exprs)
|
449
|
+
Utils.wrap_expr(_as_struct(exprs))
|
450
|
+
end
|
451
|
+
|
452
|
+
# Repeat a single value n times.
|
453
|
+
#
|
454
|
+
# @param value [Object]
|
455
|
+
# Value to repeat.
|
456
|
+
# @param n [Integer]
|
457
|
+
# Repeat `n` times.
|
458
|
+
# @param eager [Boolean]
|
459
|
+
# Run eagerly and collect into a `Series`.
|
460
|
+
# @param name [String]
|
461
|
+
# Only used in `eager` mode. As expression, use `alias`.
|
462
|
+
#
|
463
|
+
# @return [Expr]
|
464
|
+
def repeat(value, n, eager: false, name: nil)
|
465
|
+
if eager
|
466
|
+
if name.nil?
|
467
|
+
name = ""
|
468
|
+
end
|
469
|
+
dtype = py_type_to_dtype(type(value))
|
470
|
+
Series._repeat(name, value, n, dtype)
|
114
471
|
else
|
115
|
-
|
472
|
+
if n.is_a?(Integer)
|
473
|
+
n = lit(n)
|
474
|
+
end
|
475
|
+
Utils.wrap_expr(RbExpr.repeat(value, n._rbexpr))
|
116
476
|
end
|
117
477
|
end
|
118
478
|
|
479
|
+
# Return indices where `condition` evaluates `true`.
|
480
|
+
#
|
481
|
+
# @param condition [Expr]
|
482
|
+
# Boolean expression to evaluate
|
483
|
+
# @param eager [Boolean]
|
484
|
+
# Whether to apply this function eagerly (as opposed to lazily).
|
485
|
+
#
|
486
|
+
# @return [Expr, Series]
|
487
|
+
#
|
488
|
+
# @example
|
489
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
|
490
|
+
# df.select(
|
491
|
+
# [
|
492
|
+
# Polars.arg_where(Polars.col("a") % 2 == 0)
|
493
|
+
# ]
|
494
|
+
# ).to_series
|
495
|
+
# # =>
|
496
|
+
# # shape: (2,)
|
497
|
+
# # Series: 'a' [u32]
|
498
|
+
# # [
|
499
|
+
# # 1
|
500
|
+
# # 3
|
501
|
+
# # ]
|
502
|
+
def arg_where(condition, eager: false)
|
503
|
+
if eager
|
504
|
+
if !condition.is_a?(Series)
|
505
|
+
raise ArgumentError, "expected 'Series' in 'arg_where' if 'eager=True', got #{condition.class.name}"
|
506
|
+
end
|
507
|
+
condition.to_frame.select(arg_where(Polars.col(condition.name))).to_series
|
508
|
+
else
|
509
|
+
condition = Utils.expr_to_lit_or_expr(condition, str_to_lit: true)
|
510
|
+
Utils.wrap_expr(_arg_where(condition._rbexpr))
|
511
|
+
end
|
512
|
+
end
|
513
|
+
|
514
|
+
# def coalesce
|
515
|
+
# end
|
516
|
+
|
517
|
+
# def from_epoch
|
518
|
+
# end
|
519
|
+
|
520
|
+
# Start a "when, then, otherwise" expression.
|
521
|
+
#
|
522
|
+
# @return [When]
|
523
|
+
#
|
524
|
+
# @example
|
525
|
+
# df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
|
526
|
+
# df.with_column(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
|
527
|
+
# # =>
|
528
|
+
# # shape: (3, 3)
|
529
|
+
# # ┌─────┬─────┬─────────┐
|
530
|
+
# # │ foo ┆ bar ┆ literal │
|
531
|
+
# # │ --- ┆ --- ┆ --- │
|
532
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
533
|
+
# # ╞═════╪═════╪═════════╡
|
534
|
+
# # │ 1 ┆ 3 ┆ -1 │
|
535
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
536
|
+
# # │ 3 ┆ 4 ┆ 1 │
|
537
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
538
|
+
# # │ 4 ┆ 0 ┆ 1 │
|
539
|
+
# # └─────┴─────┴─────────┘
|
119
540
|
def when(expr)
|
120
541
|
expr = Utils.expr_to_lit_or_expr(expr)
|
121
542
|
pw = RbExpr.when(expr._rbexpr)
|
data/lib/polars/lazy_group_by.rb
CHANGED
@@ -1,13 +1,92 @@
|
|
1
1
|
module Polars
|
2
2
|
class LazyGroupBy
|
3
|
+
# @private
|
3
4
|
def initialize(lgb, lazyframe_class)
|
4
5
|
@lgb = lgb
|
5
6
|
@lazyframe_class = lazyframe_class
|
6
7
|
end
|
7
8
|
|
9
|
+
# Describe the aggregation that need to be done on a group.
|
10
|
+
#
|
11
|
+
# @return [LazyFrame]
|
8
12
|
def agg(aggs)
|
9
13
|
rbexprs = Utils.selection_to_rbexpr_list(aggs)
|
10
14
|
@lazyframe_class._from_rbldf(@lgb.agg(rbexprs))
|
11
15
|
end
|
16
|
+
|
17
|
+
# Get the first `n` rows of each group.
|
18
|
+
#
|
19
|
+
# @param n [Integer]
|
20
|
+
# Number of rows to return.
|
21
|
+
#
|
22
|
+
# @return [LazyFrame]
|
23
|
+
#
|
24
|
+
# @example
|
25
|
+
# df = Polars::DataFrame.new(
|
26
|
+
# {
|
27
|
+
# "letters" => ["c", "c", "a", "c", "a", "b"],
|
28
|
+
# "nrs" => [1, 2, 3, 4, 5, 6]
|
29
|
+
# }
|
30
|
+
# )
|
31
|
+
# df.groupby("letters").head(2).sort("letters")
|
32
|
+
# # =>
|
33
|
+
# # shape: (5, 2)
|
34
|
+
# # ┌─────────┬─────┐
|
35
|
+
# # │ letters ┆ nrs │
|
36
|
+
# # │ --- ┆ --- │
|
37
|
+
# # │ str ┆ i64 │
|
38
|
+
# # ╞═════════╪═════╡
|
39
|
+
# # │ a ┆ 3 │
|
40
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
41
|
+
# # │ a ┆ 5 │
|
42
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
43
|
+
# # │ b ┆ 6 │
|
44
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
45
|
+
# # │ c ┆ 1 │
|
46
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
47
|
+
# # │ c ┆ 2 │
|
48
|
+
# # └─────────┴─────┘
|
49
|
+
def head(n = 5)
|
50
|
+
@lazyframe_class._from_rbldf(@lgb.head(n))
|
51
|
+
end
|
52
|
+
|
53
|
+
# Get the last `n` rows of each group.
|
54
|
+
#
|
55
|
+
# @param n [Integer]
|
56
|
+
# Number of rows to return.
|
57
|
+
#
|
58
|
+
# @return [LazyFrame]
|
59
|
+
#
|
60
|
+
# @example
|
61
|
+
# df = Polars::DataFrame.new(
|
62
|
+
# {
|
63
|
+
# "letters" => ["c", "c", "a", "c", "a", "b"],
|
64
|
+
# "nrs" => [1, 2, 3, 4, 5, 6]
|
65
|
+
# }
|
66
|
+
# )
|
67
|
+
# df.groupby("letters").tail(2).sort("letters")
|
68
|
+
# # =>
|
69
|
+
# # shape: (5, 2)
|
70
|
+
# # ┌─────────┬─────┐
|
71
|
+
# # │ letters ┆ nrs │
|
72
|
+
# # │ --- ┆ --- │
|
73
|
+
# # │ str ┆ i64 │
|
74
|
+
# # ╞═════════╪═════╡
|
75
|
+
# # │ a ┆ 3 │
|
76
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
77
|
+
# # │ a ┆ 5 │
|
78
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
79
|
+
# # │ b ┆ 6 │
|
80
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
81
|
+
# # │ c ┆ 2 │
|
82
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
83
|
+
# # │ c ┆ 4 │
|
84
|
+
# # └─────────┴─────┘
|
85
|
+
def tail(n = 5)
|
86
|
+
@lazyframe_class._from_rbldf(@lgb.tail(n))
|
87
|
+
end
|
88
|
+
|
89
|
+
# def apply
|
90
|
+
# end
|
12
91
|
end
|
13
92
|
end
|
data/lib/polars/list_expr.rb
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
module Polars
|
2
|
+
# Namespace for list related expressions.
|
2
3
|
class ListExpr
|
4
|
+
# @private
|
3
5
|
attr_accessor :_rbexpr
|
4
6
|
|
7
|
+
# @private
|
5
8
|
def initialize(expr)
|
6
9
|
self._rbexpr = expr._rbexpr
|
7
10
|
end
|
@@ -41,6 +44,7 @@ module Polars
|
|
41
44
|
# def concat
|
42
45
|
# end
|
43
46
|
|
47
|
+
#
|
44
48
|
def get(index)
|
45
49
|
index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
|
46
50
|
Utils.wrap_expr(_rbexpr.lst_get(index))
|
@@ -101,6 +105,7 @@ module Polars
|
|
101
105
|
# Utils.wrap_expr(_rbexpr.lst_to_struct(n_field_strategy, name_generator))
|
102
106
|
# end
|
103
107
|
|
108
|
+
#
|
104
109
|
def eval(expr, parallel: false)
|
105
110
|
Utils.wrap_expr(_rbexpr.lst_eval(expr._rbexpr, parallel))
|
106
111
|
end
|
data/lib/polars/meta_expr.rb
CHANGED
@@ -1,31 +1,52 @@
|
|
1
1
|
module Polars
|
2
|
+
# Namespace for expressions on a meta level.
|
2
3
|
class MetaExpr
|
4
|
+
# @private
|
3
5
|
attr_accessor :_rbexpr
|
4
6
|
|
7
|
+
# @private
|
5
8
|
def initialize(expr)
|
6
9
|
self._rbexpr = expr._rbexpr
|
7
10
|
end
|
8
11
|
|
12
|
+
# Equal.
|
13
|
+
#
|
14
|
+
# @return [Boolean]
|
9
15
|
def ==(other)
|
10
16
|
_rbexpr.meta_eq(other._rbexpr)
|
11
17
|
end
|
12
18
|
|
19
|
+
# Not equal.
|
20
|
+
#
|
21
|
+
# @return [Boolean]
|
13
22
|
def !=(other)
|
14
23
|
!(self == other)
|
15
24
|
end
|
16
25
|
|
26
|
+
# Pop the latest expression and return the input(s) of the popped expression.
|
27
|
+
#
|
28
|
+
# @return [Array]
|
17
29
|
def pop
|
18
30
|
_rbexpr.meta_pop.map { |e| Utils.wrap_expr(e) }
|
19
31
|
end
|
20
32
|
|
33
|
+
# Get a list with the root column name.
|
34
|
+
#
|
35
|
+
# @return [Array]
|
21
36
|
def root_names
|
22
37
|
_rbexpr.meta_roots
|
23
38
|
end
|
24
39
|
|
40
|
+
# Get the column name that this expression would produce.
|
41
|
+
#
|
42
|
+
# @return [String]
|
25
43
|
def output_name
|
26
44
|
_rbexpr.meta_output_name
|
27
45
|
end
|
28
46
|
|
47
|
+
# Undo any renaming operation like `alias` or `keep_name`.
|
48
|
+
#
|
49
|
+
# @return [Expr]
|
29
50
|
def undo_aliases
|
30
51
|
Utils.wrap_expr(_rbexpr.meta_undo_aliases)
|
31
52
|
end
|