polars-df 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +4 -0
- data/Cargo.lock +2 -1
- data/README.md +1 -1
- data/ext/polars/Cargo.toml +7 -1
- data/ext/polars/src/conversion.rs +35 -2
- data/ext/polars/src/dataframe.rs +228 -11
- data/ext/polars/src/lazy/dataframe.rs +3 -3
- data/ext/polars/src/lazy/dsl.rs +59 -2
- data/ext/polars/src/lib.rs +151 -10
- data/ext/polars/src/series.rs +182 -29
- data/ext/polars/src/set.rs +91 -0
- data/ext/polars/src/utils.rs +19 -0
- data/lib/polars/batched_csv_reader.rb +1 -0
- data/lib/polars/cat_expr.rb +39 -0
- data/lib/polars/data_frame.rb +2284 -137
- data/lib/polars/date_time_expr.rb +1282 -7
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +612 -7
- data/lib/polars/expr_dispatch.rb +14 -0
- data/lib/polars/functions.rb +219 -0
- data/lib/polars/group_by.rb +517 -0
- data/lib/polars/io.rb +421 -2
- data/lib/polars/lazy_frame.rb +1261 -67
- data/lib/polars/lazy_functions.rb +288 -10
- data/lib/polars/lazy_group_by.rb +79 -0
- data/lib/polars/list_expr.rb +5 -0
- data/lib/polars/meta_expr.rb +21 -0
- data/lib/polars/series.rb +1476 -212
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +663 -2
- data/lib/polars/struct_expr.rb +73 -0
- data/lib/polars/utils.rb +43 -3
- data/lib/polars/version.rb +2 -1
- data/lib/polars/when.rb +1 -0
- data/lib/polars/when_then.rb +1 -0
- data/lib/polars.rb +7 -10
- metadata +9 -2
@@ -1,5 +1,8 @@
|
|
1
1
|
module Polars
|
2
2
|
module LazyFunctions
|
3
|
+
# Return an expression representing a column in a DataFrame.
|
4
|
+
#
|
5
|
+
# @return [Expr]
|
3
6
|
def col(name)
|
4
7
|
if name.is_a?(Series)
|
5
8
|
name = name.to_a
|
@@ -21,10 +24,42 @@ module Polars
|
|
21
24
|
end
|
22
25
|
end
|
23
26
|
|
27
|
+
# Alias for an element in evaluated in an `eval` expression.
|
28
|
+
#
|
29
|
+
# @return [Expr]
|
30
|
+
#
|
31
|
+
# @example A horizontal rank computation by taking the elements of a list
|
32
|
+
# df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
|
33
|
+
# df.with_column(
|
34
|
+
# Polars.concat_list(["a", "b"]).arr.eval(Polars.element.rank).alias("rank")
|
35
|
+
# )
|
36
|
+
# # =>
|
37
|
+
# # shape: (3, 3)
|
38
|
+
# # ┌─────┬─────┬────────────┐
|
39
|
+
# # │ a ┆ b ┆ rank │
|
40
|
+
# # │ --- ┆ --- ┆ --- │
|
41
|
+
# # │ i64 ┆ i64 ┆ list[f32] │
|
42
|
+
# # ╞═════╪═════╪════════════╡
|
43
|
+
# # │ 1 ┆ 4 ┆ [1.0, 2.0] │
|
44
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
45
|
+
# # │ 8 ┆ 5 ┆ [2.0, 1.0] │
|
46
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
47
|
+
# # │ 3 ┆ 2 ┆ [2.0, 1.0] │
|
48
|
+
# # └─────┴─────┴────────────┘
|
24
49
|
def element
|
25
50
|
col("")
|
26
51
|
end
|
27
52
|
|
53
|
+
# Count the number of values in this column/context.
|
54
|
+
#
|
55
|
+
# @param column [String, Series, nil]
|
56
|
+
# If dtype is:
|
57
|
+
#
|
58
|
+
# * `Series` : count the values in the series.
|
59
|
+
# * `String` : count the values in this column.
|
60
|
+
# * `None` : count the number of values in this context.
|
61
|
+
#
|
62
|
+
# @return [Expr, Integer]
|
28
63
|
def count(column = nil)
|
29
64
|
if column.nil?
|
30
65
|
return Utils.wrap_expr(RbExpr.count)
|
@@ -37,9 +72,16 @@ module Polars
|
|
37
72
|
end
|
38
73
|
end
|
39
74
|
|
40
|
-
#
|
41
|
-
#
|
75
|
+
# Aggregate to list.
|
76
|
+
#
|
77
|
+
# @return [Expr]
|
78
|
+
def to_list(name)
|
79
|
+
col(name).list
|
80
|
+
end
|
42
81
|
|
82
|
+
# Get the standard deviation.
|
83
|
+
#
|
84
|
+
# @return [Object]
|
43
85
|
def std(column, ddof: 1)
|
44
86
|
if column.is_a?(Series)
|
45
87
|
column.std(ddof: ddof)
|
@@ -48,6 +90,9 @@ module Polars
|
|
48
90
|
end
|
49
91
|
end
|
50
92
|
|
93
|
+
# Get the variance.
|
94
|
+
#
|
95
|
+
# @return [Object]
|
51
96
|
def var(column, ddof: 1)
|
52
97
|
if column.is_a?(Series)
|
53
98
|
column.var(ddof: ddof)
|
@@ -56,6 +101,16 @@ module Polars
|
|
56
101
|
end
|
57
102
|
end
|
58
103
|
|
104
|
+
# Get the maximum value.
|
105
|
+
#
|
106
|
+
# @param column [Object]
|
107
|
+
# Column(s) to be used in aggregation. Will lead to different behavior based on
|
108
|
+
# the input:
|
109
|
+
#
|
110
|
+
# - [String, Series] -> aggregate the maximum value of that column.
|
111
|
+
# - [Array<Expr>] -> aggregate the maximum value horizontally.
|
112
|
+
#
|
113
|
+
# @return [Expr, Object]
|
59
114
|
def max(column)
|
60
115
|
if column.is_a?(Series)
|
61
116
|
column.max
|
@@ -68,6 +123,16 @@ module Polars
|
|
68
123
|
end
|
69
124
|
end
|
70
125
|
|
126
|
+
# Get the minimum value.
|
127
|
+
#
|
128
|
+
# @param column [Object]
|
129
|
+
# Column(s) to be used in aggregation. Will lead to different behavior based on
|
130
|
+
# the input:
|
131
|
+
#
|
132
|
+
# - [String, Series] -> aggregate the minimum value of that column.
|
133
|
+
# - [Array<Expr>] -> aggregate the minimum value horizontally.
|
134
|
+
#
|
135
|
+
# @return [Expr, Object]
|
71
136
|
def min(column)
|
72
137
|
if column.is_a?(Series)
|
73
138
|
column.min
|
@@ -80,6 +145,9 @@ module Polars
|
|
80
145
|
end
|
81
146
|
end
|
82
147
|
|
148
|
+
# Sum values in a column/Series, or horizontally across list of columns/expressions.
|
149
|
+
#
|
150
|
+
# @return [Object]
|
83
151
|
def sum(column)
|
84
152
|
if column.is_a?(Series)
|
85
153
|
column.sum
|
@@ -94,6 +162,9 @@ module Polars
|
|
94
162
|
end
|
95
163
|
end
|
96
164
|
|
165
|
+
# Get the mean value.
|
166
|
+
#
|
167
|
+
# @return [Expr, Float]
|
97
168
|
def mean(column)
|
98
169
|
if column.is_a?(Series)
|
99
170
|
column.mean
|
@@ -102,10 +173,16 @@ module Polars
|
|
102
173
|
end
|
103
174
|
end
|
104
175
|
|
176
|
+
# Get the mean value.
|
177
|
+
#
|
178
|
+
# @return [Expr, Float]
|
105
179
|
def avg(column)
|
106
180
|
mean(column)
|
107
181
|
end
|
108
182
|
|
183
|
+
# Get the median value.
|
184
|
+
#
|
185
|
+
# @return [Object]
|
109
186
|
def median(column)
|
110
187
|
if column.is_a?(Series)
|
111
188
|
column.median
|
@@ -117,6 +194,9 @@ module Polars
|
|
117
194
|
# def n_unique
|
118
195
|
# end
|
119
196
|
|
197
|
+
# Get the first value.
|
198
|
+
#
|
199
|
+
# @return [Object]
|
120
200
|
def first(column = nil)
|
121
201
|
if column.nil?
|
122
202
|
return Utils.wrap_expr(RbExpr.first)
|
@@ -142,7 +222,20 @@ module Polars
|
|
142
222
|
# def tail
|
143
223
|
# end
|
144
224
|
|
225
|
+
# Return an expression representing a literal value.
|
226
|
+
#
|
227
|
+
# @return [Expr]
|
145
228
|
def lit(value)
|
229
|
+
if value.is_a?(Polars::Series)
|
230
|
+
name = value.name
|
231
|
+
value = value._s
|
232
|
+
e = Utils.wrap_expr(RbExpr.lit(value))
|
233
|
+
if name == ""
|
234
|
+
return e
|
235
|
+
end
|
236
|
+
return e.alias(name)
|
237
|
+
end
|
238
|
+
|
146
239
|
Utils.wrap_expr(RbExpr.lit(value))
|
147
240
|
end
|
148
241
|
|
@@ -164,6 +257,9 @@ module Polars
|
|
164
257
|
# def apply
|
165
258
|
# end
|
166
259
|
|
260
|
+
# Accumulate over multiple columns horizontally/ row wise with a left fold.
|
261
|
+
#
|
262
|
+
# @return [Expr]
|
167
263
|
def fold(acc, f, exprs)
|
168
264
|
acc = Utils.expr_to_lit_or_expr(acc, str_to_lit: true)
|
169
265
|
if exprs.is_a?(Expr)
|
@@ -189,6 +285,30 @@ module Polars
|
|
189
285
|
# def exclude
|
190
286
|
# end
|
191
287
|
|
288
|
+
# Do one of two things.
|
289
|
+
#
|
290
|
+
# * function can do a columnwise or elementwise AND operation
|
291
|
+
# * a wildcard column selection
|
292
|
+
#
|
293
|
+
# @param name [Object]
|
294
|
+
# If given this function will apply a bitwise & on the columns.
|
295
|
+
#
|
296
|
+
# @return [Expr]
|
297
|
+
#
|
298
|
+
# @example Sum all columns
|
299
|
+
# df = Polars::DataFrame.new(
|
300
|
+
# {"a" => [1, 2, 3], "b" => ["hello", "foo", "bar"], "c" => [1, 1, 1]}
|
301
|
+
# )
|
302
|
+
# df.select(Polars.all.sum)
|
303
|
+
# # =>
|
304
|
+
# # shape: (1, 3)
|
305
|
+
# # ┌─────┬──────┬─────┐
|
306
|
+
# # │ a ┆ b ┆ c │
|
307
|
+
# # │ --- ┆ --- ┆ --- │
|
308
|
+
# # │ i64 ┆ str ┆ i64 │
|
309
|
+
# # ╞═════╪══════╪═════╡
|
310
|
+
# # │ 6 ┆ null ┆ 3 │
|
311
|
+
# # └─────┴──────┴─────┘
|
192
312
|
def all(name = nil)
|
193
313
|
if name.nil?
|
194
314
|
col("*")
|
@@ -205,6 +325,26 @@ module Polars
|
|
205
325
|
# def quantile
|
206
326
|
# end
|
207
327
|
|
328
|
+
# Create a range expression (or Series).
|
329
|
+
#
|
330
|
+
# This can be used in a `select`, `with_column`, etc. Be sure that the resulting
|
331
|
+
# range size is equal to the length of the DataFrame you are collecting.
|
332
|
+
#
|
333
|
+
# @param low [Integer, Expr, Series]
|
334
|
+
# Lower bound of range.
|
335
|
+
# @param high [Integer, Expr, Series]
|
336
|
+
# Upper bound of range.
|
337
|
+
# @param step [Integer]
|
338
|
+
# Step size of the range.
|
339
|
+
# @param eager [Boolean]
|
340
|
+
# If eager evaluation is `True`, a Series is returned instead of an Expr.
|
341
|
+
# @param dtype [Symbol]
|
342
|
+
# Apply an explicit integer dtype to the resulting expression (default is Int64).
|
343
|
+
#
|
344
|
+
# @return [Expr, Series]
|
345
|
+
#
|
346
|
+
# @example
|
347
|
+
# df.lazy.filter(Polars.col("foo") < Polars.arange(0, 100)).collect
|
208
348
|
def arange(low, high, step: 1, eager: false, dtype: nil)
|
209
349
|
low = Utils.expr_to_lit_or_expr(low, str_to_lit: false)
|
210
350
|
high = Utils.expr_to_lit_or_expr(high, str_to_lit: false)
|
@@ -233,6 +373,9 @@ module Polars
|
|
233
373
|
# def format
|
234
374
|
# end
|
235
375
|
|
376
|
+
# Concat the arrays in a Series dtype List in linear time.
|
377
|
+
#
|
378
|
+
# @return [Expr]
|
236
379
|
def concat_list(exprs)
|
237
380
|
exprs = Utils.selection_to_rbexpr_list(exprs)
|
238
381
|
Utils.wrap_expr(RbExpr.concat_lst(exprs))
|
@@ -241,17 +384,132 @@ module Polars
|
|
241
384
|
# def collect_all
|
242
385
|
# end
|
243
386
|
|
244
|
-
#
|
245
|
-
#
|
387
|
+
# Run polars expressions without a context.
|
388
|
+
#
|
389
|
+
# @return [DataFrame]
|
390
|
+
def select(exprs)
|
391
|
+
DataFrame.new([]).select(exprs)
|
392
|
+
end
|
246
393
|
|
247
|
-
#
|
248
|
-
#
|
394
|
+
# Collect several columns into a Series of dtype Struct.
|
395
|
+
#
|
396
|
+
# @param exprs [Object]
|
397
|
+
# Columns/Expressions to collect into a Struct
|
398
|
+
# @param eager [Boolean]
|
399
|
+
# Evaluate immediately
|
400
|
+
#
|
401
|
+
# @return [Object]
|
402
|
+
#
|
403
|
+
# @example
|
404
|
+
# Polars::DataFrame.new(
|
405
|
+
# {
|
406
|
+
# "int" => [1, 2],
|
407
|
+
# "str" => ["a", "b"],
|
408
|
+
# "bool" => [true, nil],
|
409
|
+
# "list" => [[1, 2], [3]],
|
410
|
+
# }
|
411
|
+
# ).select([Polars.struct(Polars.all()).alias("my_struct")])
|
412
|
+
# # =>
|
413
|
+
# # shape: (2, 1)
|
414
|
+
# # ┌─────────────────────┐
|
415
|
+
# # │ my_struct │
|
416
|
+
# # │ --- │
|
417
|
+
# # │ struct[4] │
|
418
|
+
# # ╞═════════════════════╡
|
419
|
+
# # │ {1,"a",true,[1, 2]} │
|
420
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
421
|
+
# # │ {2,"b",null,[3]} │
|
422
|
+
# # └─────────────────────┘
|
423
|
+
#
|
424
|
+
# @example Only collect specific columns as a struct:
|
425
|
+
# df = Polars::DataFrame.new(
|
426
|
+
# {"a" => [1, 2, 3, 4], "b" => ["one", "two", "three", "four"], "c" => [9, 8, 7, 6]}
|
427
|
+
# )
|
428
|
+
# df.with_column(pl.struct(pl.col(["a", "b"])).alias("a_and_b"))
|
429
|
+
# # =>
|
430
|
+
# # shape: (4, 4)
|
431
|
+
# # ┌─────┬───────┬─────┬─────────────┐
|
432
|
+
# # │ a ┆ b ┆ c ┆ a_and_b │
|
433
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
434
|
+
# # │ i64 ┆ str ┆ i64 ┆ struct[2] │
|
435
|
+
# # ╞═════╪═══════╪═════╪═════════════╡
|
436
|
+
# # │ 1 ┆ one ┆ 9 ┆ {1,"one"} │
|
437
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
438
|
+
# # │ 2 ┆ two ┆ 8 ┆ {2,"two"} │
|
439
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
440
|
+
# # │ 3 ┆ three ┆ 7 ┆ {3,"three"} │
|
441
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
442
|
+
# # │ 4 ┆ four ┆ 6 ┆ {4,"four"} │
|
443
|
+
# # └─────┴───────┴─────┴─────────────┘
|
444
|
+
def struct(exprs, eager: false)
|
445
|
+
if eager
|
446
|
+
Polars.select(struct(exprs, eager: false)).to_series
|
447
|
+
end
|
448
|
+
exprs = Utils.selection_to_rbexpr_list(exprs)
|
449
|
+
Utils.wrap_expr(_as_struct(exprs))
|
450
|
+
end
|
249
451
|
|
250
|
-
#
|
251
|
-
#
|
452
|
+
# Repeat a single value n times.
|
453
|
+
#
|
454
|
+
# @param value [Object]
|
455
|
+
# Value to repeat.
|
456
|
+
# @param n [Integer]
|
457
|
+
# Repeat `n` times.
|
458
|
+
# @param eager [Boolean]
|
459
|
+
# Run eagerly and collect into a `Series`.
|
460
|
+
# @param name [String]
|
461
|
+
# Only used in `eager` mode. As expression, use `alias`.
|
462
|
+
#
|
463
|
+
# @return [Expr]
|
464
|
+
def repeat(value, n, eager: false, name: nil)
|
465
|
+
if eager
|
466
|
+
if name.nil?
|
467
|
+
name = ""
|
468
|
+
end
|
469
|
+
dtype = py_type_to_dtype(type(value))
|
470
|
+
Series._repeat(name, value, n, dtype)
|
471
|
+
else
|
472
|
+
if n.is_a?(Integer)
|
473
|
+
n = lit(n)
|
474
|
+
end
|
475
|
+
Utils.wrap_expr(RbExpr.repeat(value, n._rbexpr))
|
476
|
+
end
|
477
|
+
end
|
252
478
|
|
253
|
-
#
|
254
|
-
#
|
479
|
+
# Return indices where `condition` evaluates `true`.
|
480
|
+
#
|
481
|
+
# @param condition [Expr]
|
482
|
+
# Boolean expression to evaluate
|
483
|
+
# @param eager [Boolean]
|
484
|
+
# Whether to apply this function eagerly (as opposed to lazily).
|
485
|
+
#
|
486
|
+
# @return [Expr, Series]
|
487
|
+
#
|
488
|
+
# @example
|
489
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
|
490
|
+
# df.select(
|
491
|
+
# [
|
492
|
+
# Polars.arg_where(Polars.col("a") % 2 == 0)
|
493
|
+
# ]
|
494
|
+
# ).to_series
|
495
|
+
# # =>
|
496
|
+
# # shape: (2,)
|
497
|
+
# # Series: 'a' [u32]
|
498
|
+
# # [
|
499
|
+
# # 1
|
500
|
+
# # 3
|
501
|
+
# # ]
|
502
|
+
def arg_where(condition, eager: false)
|
503
|
+
if eager
|
504
|
+
if !condition.is_a?(Series)
|
505
|
+
raise ArgumentError, "expected 'Series' in 'arg_where' if 'eager=True', got #{condition.class.name}"
|
506
|
+
end
|
507
|
+
condition.to_frame.select(arg_where(Polars.col(condition.name))).to_series
|
508
|
+
else
|
509
|
+
condition = Utils.expr_to_lit_or_expr(condition, str_to_lit: true)
|
510
|
+
Utils.wrap_expr(_arg_where(condition._rbexpr))
|
511
|
+
end
|
512
|
+
end
|
255
513
|
|
256
514
|
# def coalesce
|
257
515
|
# end
|
@@ -259,6 +517,26 @@ module Polars
|
|
259
517
|
# def from_epoch
|
260
518
|
# end
|
261
519
|
|
520
|
+
# Start a "when, then, otherwise" expression.
|
521
|
+
#
|
522
|
+
# @return [When]
|
523
|
+
#
|
524
|
+
# @example
|
525
|
+
# df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
|
526
|
+
# df.with_column(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
|
527
|
+
# # =>
|
528
|
+
# # shape: (3, 3)
|
529
|
+
# # ┌─────┬─────┬─────────┐
|
530
|
+
# # │ foo ┆ bar ┆ literal │
|
531
|
+
# # │ --- ┆ --- ┆ --- │
|
532
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
533
|
+
# # ╞═════╪═════╪═════════╡
|
534
|
+
# # │ 1 ┆ 3 ┆ -1 │
|
535
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
536
|
+
# # │ 3 ┆ 4 ┆ 1 │
|
537
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
538
|
+
# # │ 4 ┆ 0 ┆ 1 │
|
539
|
+
# # └─────┴─────┴─────────┘
|
262
540
|
def when(expr)
|
263
541
|
expr = Utils.expr_to_lit_or_expr(expr)
|
264
542
|
pw = RbExpr.when(expr._rbexpr)
|
data/lib/polars/lazy_group_by.rb
CHANGED
@@ -1,13 +1,92 @@
|
|
1
1
|
module Polars
|
2
2
|
class LazyGroupBy
|
3
|
+
# @private
|
3
4
|
def initialize(lgb, lazyframe_class)
|
4
5
|
@lgb = lgb
|
5
6
|
@lazyframe_class = lazyframe_class
|
6
7
|
end
|
7
8
|
|
9
|
+
# Describe the aggregation that need to be done on a group.
|
10
|
+
#
|
11
|
+
# @return [LazyFrame]
|
8
12
|
def agg(aggs)
|
9
13
|
rbexprs = Utils.selection_to_rbexpr_list(aggs)
|
10
14
|
@lazyframe_class._from_rbldf(@lgb.agg(rbexprs))
|
11
15
|
end
|
16
|
+
|
17
|
+
# Get the first `n` rows of each group.
|
18
|
+
#
|
19
|
+
# @param n [Integer]
|
20
|
+
# Number of rows to return.
|
21
|
+
#
|
22
|
+
# @return [LazyFrame]
|
23
|
+
#
|
24
|
+
# @example
|
25
|
+
# df = Polars::DataFrame.new(
|
26
|
+
# {
|
27
|
+
# "letters" => ["c", "c", "a", "c", "a", "b"],
|
28
|
+
# "nrs" => [1, 2, 3, 4, 5, 6]
|
29
|
+
# }
|
30
|
+
# )
|
31
|
+
# df.groupby("letters").head(2).sort("letters")
|
32
|
+
# # =>
|
33
|
+
# # shape: (5, 2)
|
34
|
+
# # ┌─────────┬─────┐
|
35
|
+
# # │ letters ┆ nrs │
|
36
|
+
# # │ --- ┆ --- │
|
37
|
+
# # │ str ┆ i64 │
|
38
|
+
# # ╞═════════╪═════╡
|
39
|
+
# # │ a ┆ 3 │
|
40
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
41
|
+
# # │ a ┆ 5 │
|
42
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
43
|
+
# # │ b ┆ 6 │
|
44
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
45
|
+
# # │ c ┆ 1 │
|
46
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
47
|
+
# # │ c ┆ 2 │
|
48
|
+
# # └─────────┴─────┘
|
49
|
+
def head(n = 5)
|
50
|
+
@lazyframe_class._from_rbldf(@lgb.head(n))
|
51
|
+
end
|
52
|
+
|
53
|
+
# Get the last `n` rows of each group.
|
54
|
+
#
|
55
|
+
# @param n [Integer]
|
56
|
+
# Number of rows to return.
|
57
|
+
#
|
58
|
+
# @return [LazyFrame]
|
59
|
+
#
|
60
|
+
# @example
|
61
|
+
# df = Polars::DataFrame.new(
|
62
|
+
# {
|
63
|
+
# "letters" => ["c", "c", "a", "c", "a", "b"],
|
64
|
+
# "nrs" => [1, 2, 3, 4, 5, 6]
|
65
|
+
# }
|
66
|
+
# )
|
67
|
+
# df.groupby("letters").tail(2).sort("letters")
|
68
|
+
# # =>
|
69
|
+
# # shape: (5, 2)
|
70
|
+
# # ┌─────────┬─────┐
|
71
|
+
# # │ letters ┆ nrs │
|
72
|
+
# # │ --- ┆ --- │
|
73
|
+
# # │ str ┆ i64 │
|
74
|
+
# # ╞═════════╪═════╡
|
75
|
+
# # │ a ┆ 3 │
|
76
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
77
|
+
# # │ a ┆ 5 │
|
78
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
79
|
+
# # │ b ┆ 6 │
|
80
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
81
|
+
# # │ c ┆ 2 │
|
82
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
83
|
+
# # │ c ┆ 4 │
|
84
|
+
# # └─────────┴─────┘
|
85
|
+
def tail(n = 5)
|
86
|
+
@lazyframe_class._from_rbldf(@lgb.tail(n))
|
87
|
+
end
|
88
|
+
|
89
|
+
# def apply
|
90
|
+
# end
|
12
91
|
end
|
13
92
|
end
|
data/lib/polars/list_expr.rb
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
module Polars
|
2
|
+
# Namespace for list related expressions.
|
2
3
|
class ListExpr
|
4
|
+
# @private
|
3
5
|
attr_accessor :_rbexpr
|
4
6
|
|
7
|
+
# @private
|
5
8
|
def initialize(expr)
|
6
9
|
self._rbexpr = expr._rbexpr
|
7
10
|
end
|
@@ -41,6 +44,7 @@ module Polars
|
|
41
44
|
# def concat
|
42
45
|
# end
|
43
46
|
|
47
|
+
#
|
44
48
|
def get(index)
|
45
49
|
index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
|
46
50
|
Utils.wrap_expr(_rbexpr.lst_get(index))
|
@@ -101,6 +105,7 @@ module Polars
|
|
101
105
|
# Utils.wrap_expr(_rbexpr.lst_to_struct(n_field_strategy, name_generator))
|
102
106
|
# end
|
103
107
|
|
108
|
+
#
|
104
109
|
def eval(expr, parallel: false)
|
105
110
|
Utils.wrap_expr(_rbexpr.lst_eval(expr._rbexpr, parallel))
|
106
111
|
end
|
data/lib/polars/meta_expr.rb
CHANGED
@@ -1,31 +1,52 @@
|
|
1
1
|
module Polars
|
2
|
+
# Namespace for expressions on a meta level.
|
2
3
|
class MetaExpr
|
4
|
+
# @private
|
3
5
|
attr_accessor :_rbexpr
|
4
6
|
|
7
|
+
# @private
|
5
8
|
def initialize(expr)
|
6
9
|
self._rbexpr = expr._rbexpr
|
7
10
|
end
|
8
11
|
|
12
|
+
# Equal.
|
13
|
+
#
|
14
|
+
# @return [Boolean]
|
9
15
|
def ==(other)
|
10
16
|
_rbexpr.meta_eq(other._rbexpr)
|
11
17
|
end
|
12
18
|
|
19
|
+
# Not equal.
|
20
|
+
#
|
21
|
+
# @return [Boolean]
|
13
22
|
def !=(other)
|
14
23
|
!(self == other)
|
15
24
|
end
|
16
25
|
|
26
|
+
# Pop the latest expression and return the input(s) of the popped expression.
|
27
|
+
#
|
28
|
+
# @return [Array]
|
17
29
|
def pop
|
18
30
|
_rbexpr.meta_pop.map { |e| Utils.wrap_expr(e) }
|
19
31
|
end
|
20
32
|
|
33
|
+
# Get a list with the root column name.
|
34
|
+
#
|
35
|
+
# @return [Array]
|
21
36
|
def root_names
|
22
37
|
_rbexpr.meta_roots
|
23
38
|
end
|
24
39
|
|
40
|
+
# Get the column name that this expression would produce.
|
41
|
+
#
|
42
|
+
# @return [String]
|
25
43
|
def output_name
|
26
44
|
_rbexpr.meta_output_name
|
27
45
|
end
|
28
46
|
|
47
|
+
# Undo any renaming operation like `alias` or `keep_name`.
|
48
|
+
#
|
49
|
+
# @return [Expr]
|
29
50
|
def undo_aliases
|
30
51
|
Utils.wrap_expr(_rbexpr.meta_undo_aliases)
|
31
52
|
end
|