polars-df 0.1.2 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +9 -0
  4. data/Cargo.lock +74 -3
  5. data/Cargo.toml +3 -0
  6. data/README.md +1 -1
  7. data/ext/polars/Cargo.toml +18 -1
  8. data/ext/polars/src/conversion.rs +115 -2
  9. data/ext/polars/src/dataframe.rs +228 -11
  10. data/ext/polars/src/error.rs +4 -0
  11. data/ext/polars/src/lazy/dataframe.rs +5 -5
  12. data/ext/polars/src/lazy/dsl.rs +157 -2
  13. data/ext/polars/src/lib.rs +185 -10
  14. data/ext/polars/src/list_construction.rs +100 -0
  15. data/ext/polars/src/series.rs +217 -29
  16. data/ext/polars/src/set.rs +91 -0
  17. data/ext/polars/src/utils.rs +19 -0
  18. data/lib/polars/batched_csv_reader.rb +1 -0
  19. data/lib/polars/cat_expr.rb +39 -0
  20. data/lib/polars/cat_name_space.rb +54 -0
  21. data/lib/polars/data_frame.rb +2384 -140
  22. data/lib/polars/date_time_expr.rb +1282 -7
  23. data/lib/polars/date_time_name_space.rb +1484 -0
  24. data/lib/polars/exceptions.rb +20 -0
  25. data/lib/polars/expr.rb +4374 -53
  26. data/lib/polars/expr_dispatch.rb +22 -0
  27. data/lib/polars/functions.rb +219 -0
  28. data/lib/polars/group_by.rb +518 -0
  29. data/lib/polars/io.rb +421 -2
  30. data/lib/polars/lazy_frame.rb +1267 -69
  31. data/lib/polars/lazy_functions.rb +412 -24
  32. data/lib/polars/lazy_group_by.rb +80 -0
  33. data/lib/polars/list_expr.rb +507 -5
  34. data/lib/polars/list_name_space.rb +346 -0
  35. data/lib/polars/meta_expr.rb +21 -0
  36. data/lib/polars/series.rb +2256 -242
  37. data/lib/polars/slice.rb +104 -0
  38. data/lib/polars/string_expr.rb +847 -10
  39. data/lib/polars/string_name_space.rb +690 -0
  40. data/lib/polars/struct_expr.rb +73 -0
  41. data/lib/polars/struct_name_space.rb +64 -0
  42. data/lib/polars/utils.rb +71 -3
  43. data/lib/polars/version.rb +2 -1
  44. data/lib/polars/when.rb +1 -0
  45. data/lib/polars/when_then.rb +1 -0
  46. data/lib/polars.rb +12 -10
  47. metadata +15 -2
@@ -1,5 +1,8 @@
1
1
  module Polars
2
2
  module LazyFunctions
3
+ # Return an expression representing a column in a DataFrame.
4
+ #
5
+ # @return [Expr]
3
6
  def col(name)
4
7
  if name.is_a?(Series)
5
8
  name = name.to_a
@@ -21,10 +24,42 @@ module Polars
21
24
  end
22
25
  end
23
26
 
27
+ # Alias for an element in evaluated in an `eval` expression.
28
+ #
29
+ # @return [Expr]
30
+ #
31
+ # @example A horizontal rank computation by taking the elements of a list
32
+ # df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
33
+ # df.with_column(
34
+ # Polars.concat_list(["a", "b"]).arr.eval(Polars.element.rank).alias("rank")
35
+ # )
36
+ # # =>
37
+ # # shape: (3, 3)
38
+ # # ┌─────┬─────┬────────────┐
39
+ # # │ a ┆ b ┆ rank │
40
+ # # │ --- ┆ --- ┆ --- │
41
+ # # │ i64 ┆ i64 ┆ list[f32] │
42
+ # # ╞═════╪═════╪════════════╡
43
+ # # │ 1 ┆ 4 ┆ [1.0, 2.0] │
44
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
45
+ # # │ 8 ┆ 5 ┆ [2.0, 1.0] │
46
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
47
+ # # │ 3 ┆ 2 ┆ [2.0, 1.0] │
48
+ # # └─────┴─────┴────────────┘
24
49
  def element
25
50
  col("")
26
51
  end
27
52
 
53
+ # Count the number of values in this column/context.
54
+ #
55
+ # @param column [String, Series, nil]
56
+ # If dtype is:
57
+ #
58
+ # * `Series` : count the values in the series.
59
+ # * `String` : count the values in this column.
60
+ # * `None` : count the number of values in this context.
61
+ #
62
+ # @return [Expr, Integer]
28
63
  def count(column = nil)
29
64
  if column.nil?
30
65
  return Utils.wrap_expr(RbExpr.count)
@@ -37,9 +72,16 @@ module Polars
37
72
  end
38
73
  end
39
74
 
40
- # def to_list
41
- # end
75
+ # Aggregate to list.
76
+ #
77
+ # @return [Expr]
78
+ def to_list(name)
79
+ col(name).list
80
+ end
42
81
 
82
+ # Get the standard deviation.
83
+ #
84
+ # @return [Object]
43
85
  def std(column, ddof: 1)
44
86
  if column.is_a?(Series)
45
87
  column.std(ddof: ddof)
@@ -48,6 +90,9 @@ module Polars
48
90
  end
49
91
  end
50
92
 
93
+ # Get the variance.
94
+ #
95
+ # @return [Object]
51
96
  def var(column, ddof: 1)
52
97
  if column.is_a?(Series)
53
98
  column.var(ddof: ddof)
@@ -56,6 +101,16 @@ module Polars
56
101
  end
57
102
  end
58
103
 
104
+ # Get the maximum value.
105
+ #
106
+ # @param column [Object]
107
+ # Column(s) to be used in aggregation. Will lead to different behavior based on
108
+ # the input:
109
+ #
110
+ # - [String, Series] -> aggregate the maximum value of that column.
111
+ # - [Array<Expr>] -> aggregate the maximum value horizontally.
112
+ #
113
+ # @return [Expr, Object]
59
114
  def max(column)
60
115
  if column.is_a?(Series)
61
116
  column.max
@@ -68,6 +123,16 @@ module Polars
68
123
  end
69
124
  end
70
125
 
126
+ # Get the minimum value.
127
+ #
128
+ # @param column [Object]
129
+ # Column(s) to be used in aggregation. Will lead to different behavior based on
130
+ # the input:
131
+ #
132
+ # - [String, Series] -> aggregate the minimum value of that column.
133
+ # - [Array<Expr>] -> aggregate the minimum value horizontally.
134
+ #
135
+ # @return [Expr, Object]
71
136
  def min(column)
72
137
  if column.is_a?(Series)
73
138
  column.min
@@ -80,6 +145,9 @@ module Polars
80
145
  end
81
146
  end
82
147
 
148
+ # Sum values in a column/Series, or horizontally across list of columns/expressions.
149
+ #
150
+ # @return [Object]
83
151
  def sum(column)
84
152
  if column.is_a?(Series)
85
153
  column.sum
@@ -94,6 +162,9 @@ module Polars
94
162
  end
95
163
  end
96
164
 
165
+ # Get the mean value.
166
+ #
167
+ # @return [Expr, Float]
97
168
  def mean(column)
98
169
  if column.is_a?(Series)
99
170
  column.mean
@@ -102,10 +173,16 @@ module Polars
102
173
  end
103
174
  end
104
175
 
176
+ # Get the mean value.
177
+ #
178
+ # @return [Expr, Float]
105
179
  def avg(column)
106
180
  mean(column)
107
181
  end
108
182
 
183
+ # Get the median value.
184
+ #
185
+ # @return [Object]
109
186
  def median(column)
110
187
  if column.is_a?(Series)
111
188
  column.median
@@ -114,9 +191,20 @@ module Polars
114
191
  end
115
192
  end
116
193
 
117
- # def n_unique
118
- # end
194
+ # Count unique values.
195
+ #
196
+ # @return [Object]
197
+ def n_unique(column)
198
+ if column.is_a?(Series)
199
+ column.n_unique
200
+ else
201
+ col(column).n_unique
202
+ end
203
+ end
119
204
 
205
+ # Get the first value.
206
+ #
207
+ # @return [Object]
120
208
  def first(column = nil)
121
209
  if column.nil?
122
210
  return Utils.wrap_expr(RbExpr.first)
@@ -133,30 +221,145 @@ module Polars
133
221
  end
134
222
  end
135
223
 
136
- # def last
137
- # end
224
+ # Get the last value.
225
+ #
226
+ # Depending on the input type this function does different things:
227
+ #
228
+ # - nil -> expression to take last column of a context.
229
+ # - String -> syntactic sugar for `Polars.col(..).last`
230
+ # - Series -> Take last value in `Series`
231
+ #
232
+ # @return [Object]
233
+ def last(column = nil)
234
+ if column.nil?
235
+ return Utils.wrap_expr(_last)
236
+ end
138
237
 
139
- # def head
140
- # end
238
+ if column.is_a?(Series)
239
+ if column.len > 0
240
+ return column[-1]
241
+ else
242
+ raise IndexError, "The series is empty, so no last value can be returned"
243
+ end
244
+ end
245
+ col(column).last
246
+ end
141
247
 
142
- # def tail
143
- # end
248
+ # Get the first `n` rows.
249
+ #
250
+ # @param column [Object]
251
+ # Column name or Series.
252
+ # @param n [Integer]
253
+ # Number of rows to return.
254
+ #
255
+ # @return [Object]
256
+ def head(column, n = 10)
257
+ if column.is_a?(Series)
258
+ column.head(n)
259
+ else
260
+ col(column).head(n)
261
+ end
262
+ end
144
263
 
264
+ # Get the last `n` rows.
265
+ #
266
+ # @param column [Object]
267
+ # Column name or Series.
268
+ # @param n [Integer]
269
+ # Number of rows to return.
270
+ #
271
+ # @return [Object]
272
+ def tail(column, n = 10)
273
+ if column.is_a?(Series)
274
+ column.tail(n)
275
+ else
276
+ col(column).tail(n)
277
+ end
278
+ end
279
+
280
+ # Return an expression representing a literal value.
281
+ #
282
+ # @return [Expr]
145
283
  def lit(value)
284
+ if value.is_a?(Polars::Series)
285
+ name = value.name
286
+ value = value._s
287
+ e = Utils.wrap_expr(RbExpr.lit(value))
288
+ if name == ""
289
+ return e
290
+ end
291
+ return e.alias(name)
292
+ end
293
+
146
294
  Utils.wrap_expr(RbExpr.lit(value))
147
295
  end
148
296
 
149
297
  # def cumsum
150
298
  # end
151
299
 
152
- # def spearman_rank_corr
153
- # end
300
+ # Compute the spearman rank correlation between two columns.
301
+ #
302
+ # Missing data will be excluded from the computation.
303
+ #
304
+ # @param a [Object]
305
+ # Column name or Expression.
306
+ # @param b [Object]
307
+ # Column name or Expression.
308
+ # @param ddof [Integer]
309
+ # Delta degrees of freedom
310
+ # @param propagate_nans [Boolean]
311
+ # If `True` any `NaN` encountered will lead to `NaN` in the output.
312
+ # Defaults to `False` where `NaN` are regarded as larger than any finite number
313
+ # and thus lead to the highest rank.
314
+ #
315
+ # @return [Expr]
316
+ def spearman_rank_corr(a, b, ddof: 1, propagate_nans: false)
317
+ if a.is_a?(String)
318
+ a = col(a)
319
+ end
320
+ if b.is_a?(String)
321
+ b = col(b)
322
+ end
323
+ Utils.wrap_expr(RbExpr.spearman_rank_corr(a._rbexpr, b._rbexpr, ddof, propagate_nans))
324
+ end
154
325
 
155
- # def pearson_corr
156
- # end
326
+ # Compute the pearson's correlation between two columns.
327
+ #
328
+ # @param a [Object]
329
+ # Column name or Expression.
330
+ # @param b [Object]
331
+ # Column name or Expression.
332
+ # @param ddof [Integer]
333
+ # Delta degrees of freedom
334
+ #
335
+ # @return [Expr]
336
+ def pearson_corr(a, b, ddof: 1)
337
+ if a.is_a?(String)
338
+ a = col(a)
339
+ end
340
+ if b.is_a?(String)
341
+ b = col(b)
342
+ end
343
+ Utils.wrap_expr(RbExpr.pearson_corr(a._rbexpr, b._rbexpr, ddof))
344
+ end
157
345
 
158
- # def cov
159
- # end
346
+ # Compute the covariance between two columns/ expressions.
347
+ #
348
+ # @param a [Object]
349
+ # Column name or Expression.
350
+ # @param b [Object]
351
+ # Column name or Expression.
352
+ #
353
+ # @return [Expr]
354
+ def cov(a, b)
355
+ if a.is_a?(String)
356
+ a = col(a)
357
+ end
358
+ if b.is_a?(String)
359
+ b = col(b)
360
+ end
361
+ Utils.wrap_expr(RbExpr.cov(a._rbexpr, b._rbexpr))
362
+ end
160
363
 
161
364
  # def map
162
365
  # end
@@ -164,6 +367,9 @@ module Polars
164
367
  # def apply
165
368
  # end
166
369
 
370
+ # Accumulate over multiple columns horizontally/ row wise with a left fold.
371
+ #
372
+ # @return [Expr]
167
373
  def fold(acc, f, exprs)
168
374
  acc = Utils.expr_to_lit_or_expr(acc, str_to_lit: true)
169
375
  if exprs.is_a?(Expr)
@@ -189,6 +395,30 @@ module Polars
189
395
  # def exclude
190
396
  # end
191
397
 
398
+ # Do one of two things.
399
+ #
400
+ # * function can do a columnwise or elementwise AND operation
401
+ # * a wildcard column selection
402
+ #
403
+ # @param name [Object]
404
+ # If given this function will apply a bitwise & on the columns.
405
+ #
406
+ # @return [Expr]
407
+ #
408
+ # @example Sum all columns
409
+ # df = Polars::DataFrame.new(
410
+ # {"a" => [1, 2, 3], "b" => ["hello", "foo", "bar"], "c" => [1, 1, 1]}
411
+ # )
412
+ # df.select(Polars.all.sum)
413
+ # # =>
414
+ # # shape: (1, 3)
415
+ # # ┌─────┬──────┬─────┐
416
+ # # │ a ┆ b ┆ c │
417
+ # # │ --- ┆ --- ┆ --- │
418
+ # # │ i64 ┆ str ┆ i64 │
419
+ # # ╞═════╪══════╪═════╡
420
+ # # │ 6 ┆ null ┆ 3 │
421
+ # # └─────┴──────┴─────┘
192
422
  def all(name = nil)
193
423
  if name.nil?
194
424
  col("*")
@@ -205,6 +435,26 @@ module Polars
205
435
  # def quantile
206
436
  # end
207
437
 
438
+ # Create a range expression (or Series).
439
+ #
440
+ # This can be used in a `select`, `with_column`, etc. Be sure that the resulting
441
+ # range size is equal to the length of the DataFrame you are collecting.
442
+ #
443
+ # @param low [Integer, Expr, Series]
444
+ # Lower bound of range.
445
+ # @param high [Integer, Expr, Series]
446
+ # Upper bound of range.
447
+ # @param step [Integer]
448
+ # Step size of the range.
449
+ # @param eager [Boolean]
450
+ # If eager evaluation is `True`, a Series is returned instead of an Expr.
451
+ # @param dtype [Symbol]
452
+ # Apply an explicit integer dtype to the resulting expression (default is Int64).
453
+ #
454
+ # @return [Expr, Series]
455
+ #
456
+ # @example
457
+ # df.lazy.filter(Polars.col("foo") < Polars.arange(0, 100)).collect
208
458
  def arange(low, high, step: 1, eager: false, dtype: nil)
209
459
  low = Utils.expr_to_lit_or_expr(low, str_to_lit: false)
210
460
  high = Utils.expr_to_lit_or_expr(high, str_to_lit: false)
@@ -233,6 +483,9 @@ module Polars
233
483
  # def format
234
484
  # end
235
485
 
486
+ # Concat the arrays in a Series dtype List in linear time.
487
+ #
488
+ # @return [Expr]
236
489
  def concat_list(exprs)
237
490
  exprs = Utils.selection_to_rbexpr_list(exprs)
238
491
  Utils.wrap_expr(RbExpr.concat_lst(exprs))
@@ -241,17 +494,132 @@ module Polars
241
494
  # def collect_all
242
495
  # end
243
496
 
244
- # def select
245
- # end
497
+ # Run polars expressions without a context.
498
+ #
499
+ # @return [DataFrame]
500
+ def select(exprs)
501
+ DataFrame.new([]).select(exprs)
502
+ end
246
503
 
247
- # def struct
248
- # end
504
+ # Collect several columns into a Series of dtype Struct.
505
+ #
506
+ # @param exprs [Object]
507
+ # Columns/Expressions to collect into a Struct
508
+ # @param eager [Boolean]
509
+ # Evaluate immediately
510
+ #
511
+ # @return [Object]
512
+ #
513
+ # @example
514
+ # Polars::DataFrame.new(
515
+ # {
516
+ # "int" => [1, 2],
517
+ # "str" => ["a", "b"],
518
+ # "bool" => [true, nil],
519
+ # "list" => [[1, 2], [3]],
520
+ # }
521
+ # ).select([Polars.struct(Polars.all).alias("my_struct")])
522
+ # # =>
523
+ # # shape: (2, 1)
524
+ # # ┌─────────────────────┐
525
+ # # │ my_struct │
526
+ # # │ --- │
527
+ # # │ struct[4] │
528
+ # # ╞═════════════════════╡
529
+ # # │ {1,"a",true,[1, 2]} │
530
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
531
+ # # │ {2,"b",null,[3]} │
532
+ # # └─────────────────────┘
533
+ #
534
+ # @example Only collect specific columns as a struct:
535
+ # df = Polars::DataFrame.new(
536
+ # {"a" => [1, 2, 3, 4], "b" => ["one", "two", "three", "four"], "c" => [9, 8, 7, 6]}
537
+ # )
538
+ # df.with_column(Polars.struct(Polars.col(["a", "b"])).alias("a_and_b"))
539
+ # # =>
540
+ # # shape: (4, 4)
541
+ # # ┌─────┬───────┬─────┬─────────────┐
542
+ # # │ a ┆ b ┆ c ┆ a_and_b │
543
+ # # │ --- ┆ --- ┆ --- ┆ --- │
544
+ # # │ i64 ┆ str ┆ i64 ┆ struct[2] │
545
+ # # ╞═════╪═══════╪═════╪═════════════╡
546
+ # # │ 1 ┆ one ┆ 9 ┆ {1,"one"} │
547
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
548
+ # # │ 2 ┆ two ┆ 8 ┆ {2,"two"} │
549
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
550
+ # # │ 3 ┆ three ┆ 7 ┆ {3,"three"} │
551
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
552
+ # # │ 4 ┆ four ┆ 6 ┆ {4,"four"} │
553
+ # # └─────┴───────┴─────┴─────────────┘
554
+ def struct(exprs, eager: false)
555
+ if eager
556
+ Polars.select(struct(exprs, eager: false)).to_series
557
+ end
558
+ exprs = Utils.selection_to_rbexpr_list(exprs)
559
+ Utils.wrap_expr(_as_struct(exprs))
560
+ end
249
561
 
250
- # def repeat
251
- # end
562
+ # Repeat a single value n times.
563
+ #
564
+ # @param value [Object]
565
+ # Value to repeat.
566
+ # @param n [Integer]
567
+ # Repeat `n` times.
568
+ # @param eager [Boolean]
569
+ # Run eagerly and collect into a `Series`.
570
+ # @param name [String]
571
+ # Only used in `eager` mode. As expression, use `alias`.
572
+ #
573
+ # @return [Expr]
574
+ def repeat(value, n, eager: false, name: nil)
575
+ if eager
576
+ if name.nil?
577
+ name = ""
578
+ end
579
+ dtype = py_type_to_dtype(type(value))
580
+ Series._repeat(name, value, n, dtype)
581
+ else
582
+ if n.is_a?(Integer)
583
+ n = lit(n)
584
+ end
585
+ Utils.wrap_expr(RbExpr.repeat(value, n._rbexpr))
586
+ end
587
+ end
252
588
 
253
- # def arg_where
254
- # end
589
+ # Return indices where `condition` evaluates `true`.
590
+ #
591
+ # @param condition [Expr]
592
+ # Boolean expression to evaluate
593
+ # @param eager [Boolean]
594
+ # Whether to apply this function eagerly (as opposed to lazily).
595
+ #
596
+ # @return [Expr, Series]
597
+ #
598
+ # @example
599
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
600
+ # df.select(
601
+ # [
602
+ # Polars.arg_where(Polars.col("a") % 2 == 0)
603
+ # ]
604
+ # ).to_series
605
+ # # =>
606
+ # # shape: (2,)
607
+ # # Series: 'a' [u32]
608
+ # # [
609
+ # # 1
610
+ # # 3
611
+ # # ]
612
+ def arg_where(condition, eager: false)
613
+ if eager
614
+ if !condition.is_a?(Series)
615
+ raise ArgumentError, "expected 'Series' in 'arg_where' if 'eager=True', got #{condition.class.name}"
616
+ end
617
+ condition.to_frame.select(arg_where(Polars.col(condition.name))).to_series
618
+ else
619
+ condition = Utils.expr_to_lit_or_expr(condition, str_to_lit: true)
620
+ Utils.wrap_expr(_arg_where(condition._rbexpr))
621
+ end
622
+ end
255
623
 
256
624
  # def coalesce
257
625
  # end
@@ -259,6 +627,26 @@ module Polars
259
627
  # def from_epoch
260
628
  # end
261
629
 
630
+ # Start a "when, then, otherwise" expression.
631
+ #
632
+ # @return [When]
633
+ #
634
+ # @example
635
+ # df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
636
+ # df.with_column(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
637
+ # # =>
638
+ # # shape: (3, 3)
639
+ # # ┌─────┬─────┬─────────┐
640
+ # # │ foo ┆ bar ┆ literal │
641
+ # # │ --- ┆ --- ┆ --- │
642
+ # # │ i64 ┆ i64 ┆ i32 │
643
+ # # ╞═════╪═════╪═════════╡
644
+ # # │ 1 ┆ 3 ┆ -1 │
645
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
646
+ # # │ 3 ┆ 4 ┆ 1 │
647
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
648
+ # # │ 4 ┆ 0 ┆ 1 │
649
+ # # └─────┴─────┴─────────┘
262
650
  def when(expr)
263
651
  expr = Utils.expr_to_lit_or_expr(expr)
264
652
  pw = RbExpr.when(expr._rbexpr)
@@ -1,13 +1,93 @@
1
1
  module Polars
2
+ # Created by `df.lazy.groupby("foo")`.
2
3
  class LazyGroupBy
4
+ # @private
3
5
  def initialize(lgb, lazyframe_class)
4
6
  @lgb = lgb
5
7
  @lazyframe_class = lazyframe_class
6
8
  end
7
9
 
10
+ # Describe the aggregation that need to be done on a group.
11
+ #
12
+ # @return [LazyFrame]
8
13
  def agg(aggs)
9
14
  rbexprs = Utils.selection_to_rbexpr_list(aggs)
10
15
  @lazyframe_class._from_rbldf(@lgb.agg(rbexprs))
11
16
  end
17
+
18
+ # Get the first `n` rows of each group.
19
+ #
20
+ # @param n [Integer]
21
+ # Number of rows to return.
22
+ #
23
+ # @return [LazyFrame]
24
+ #
25
+ # @example
26
+ # df = Polars::DataFrame.new(
27
+ # {
28
+ # "letters" => ["c", "c", "a", "c", "a", "b"],
29
+ # "nrs" => [1, 2, 3, 4, 5, 6]
30
+ # }
31
+ # )
32
+ # df.groupby("letters").head(2).sort("letters")
33
+ # # =>
34
+ # # shape: (5, 2)
35
+ # # ┌─────────┬─────┐
36
+ # # │ letters ┆ nrs │
37
+ # # │ --- ┆ --- │
38
+ # # │ str ┆ i64 │
39
+ # # ╞═════════╪═════╡
40
+ # # │ a ┆ 3 │
41
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
42
+ # # │ a ┆ 5 │
43
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
44
+ # # │ b ┆ 6 │
45
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
46
+ # # │ c ┆ 1 │
47
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
48
+ # # │ c ┆ 2 │
49
+ # # └─────────┴─────┘
50
+ def head(n = 5)
51
+ @lazyframe_class._from_rbldf(@lgb.head(n))
52
+ end
53
+
54
+ # Get the last `n` rows of each group.
55
+ #
56
+ # @param n [Integer]
57
+ # Number of rows to return.
58
+ #
59
+ # @return [LazyFrame]
60
+ #
61
+ # @example
62
+ # df = Polars::DataFrame.new(
63
+ # {
64
+ # "letters" => ["c", "c", "a", "c", "a", "b"],
65
+ # "nrs" => [1, 2, 3, 4, 5, 6]
66
+ # }
67
+ # )
68
+ # df.groupby("letters").tail(2).sort("letters")
69
+ # # =>
70
+ # # shape: (5, 2)
71
+ # # ┌─────────┬─────┐
72
+ # # │ letters ┆ nrs │
73
+ # # │ --- ┆ --- │
74
+ # # │ str ┆ i64 │
75
+ # # ╞═════════╪═════╡
76
+ # # │ a ┆ 3 │
77
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
78
+ # # │ a ┆ 5 │
79
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
80
+ # # │ b ┆ 6 │
81
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
82
+ # # │ c ┆ 2 │
83
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
84
+ # # │ c ┆ 4 │
85
+ # # └─────────┴─────┘
86
+ def tail(n = 5)
87
+ @lazyframe_class._from_rbldf(@lgb.tail(n))
88
+ end
89
+
90
+ # def apply
91
+ # end
12
92
  end
13
93
  end