polars-df 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +9 -0
  4. data/Cargo.lock +74 -3
  5. data/Cargo.toml +3 -0
  6. data/README.md +1 -1
  7. data/ext/polars/Cargo.toml +18 -1
  8. data/ext/polars/src/conversion.rs +115 -2
  9. data/ext/polars/src/dataframe.rs +228 -11
  10. data/ext/polars/src/error.rs +4 -0
  11. data/ext/polars/src/lazy/dataframe.rs +5 -5
  12. data/ext/polars/src/lazy/dsl.rs +157 -2
  13. data/ext/polars/src/lib.rs +185 -10
  14. data/ext/polars/src/list_construction.rs +100 -0
  15. data/ext/polars/src/series.rs +217 -29
  16. data/ext/polars/src/set.rs +91 -0
  17. data/ext/polars/src/utils.rs +19 -0
  18. data/lib/polars/batched_csv_reader.rb +1 -0
  19. data/lib/polars/cat_expr.rb +39 -0
  20. data/lib/polars/cat_name_space.rb +54 -0
  21. data/lib/polars/data_frame.rb +2384 -140
  22. data/lib/polars/date_time_expr.rb +1282 -7
  23. data/lib/polars/date_time_name_space.rb +1484 -0
  24. data/lib/polars/exceptions.rb +20 -0
  25. data/lib/polars/expr.rb +4374 -53
  26. data/lib/polars/expr_dispatch.rb +22 -0
  27. data/lib/polars/functions.rb +219 -0
  28. data/lib/polars/group_by.rb +518 -0
  29. data/lib/polars/io.rb +421 -2
  30. data/lib/polars/lazy_frame.rb +1267 -69
  31. data/lib/polars/lazy_functions.rb +412 -24
  32. data/lib/polars/lazy_group_by.rb +80 -0
  33. data/lib/polars/list_expr.rb +507 -5
  34. data/lib/polars/list_name_space.rb +346 -0
  35. data/lib/polars/meta_expr.rb +21 -0
  36. data/lib/polars/series.rb +2256 -242
  37. data/lib/polars/slice.rb +104 -0
  38. data/lib/polars/string_expr.rb +847 -10
  39. data/lib/polars/string_name_space.rb +690 -0
  40. data/lib/polars/struct_expr.rb +73 -0
  41. data/lib/polars/struct_name_space.rb +64 -0
  42. data/lib/polars/utils.rb +71 -3
  43. data/lib/polars/version.rb +2 -1
  44. data/lib/polars/when.rb +1 -0
  45. data/lib/polars/when_then.rb +1 -0
  46. data/lib/polars.rb +12 -10
  47. metadata +15 -2
@@ -1,5 +1,8 @@
1
1
  module Polars
2
2
  module LazyFunctions
3
+ # Return an expression representing a column in a DataFrame.
4
+ #
5
+ # @return [Expr]
3
6
  def col(name)
4
7
  if name.is_a?(Series)
5
8
  name = name.to_a
@@ -21,10 +24,42 @@ module Polars
21
24
  end
22
25
  end
23
26
 
27
+ # Alias for an element in evaluated in an `eval` expression.
28
+ #
29
+ # @return [Expr]
30
+ #
31
+ # @example A horizontal rank computation by taking the elements of a list
32
+ # df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
33
+ # df.with_column(
34
+ # Polars.concat_list(["a", "b"]).arr.eval(Polars.element.rank).alias("rank")
35
+ # )
36
+ # # =>
37
+ # # shape: (3, 3)
38
+ # # ┌─────┬─────┬────────────┐
39
+ # # │ a ┆ b ┆ rank │
40
+ # # │ --- ┆ --- ┆ --- │
41
+ # # │ i64 ┆ i64 ┆ list[f32] │
42
+ # # ╞═════╪═════╪════════════╡
43
+ # # │ 1 ┆ 4 ┆ [1.0, 2.0] │
44
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
45
+ # # │ 8 ┆ 5 ┆ [2.0, 1.0] │
46
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
47
+ # # │ 3 ┆ 2 ┆ [2.0, 1.0] │
48
+ # # └─────┴─────┴────────────┘
24
49
  def element
25
50
  col("")
26
51
  end
27
52
 
53
+ # Count the number of values in this column/context.
54
+ #
55
+ # @param column [String, Series, nil]
56
+ # If dtype is:
57
+ #
58
+ # * `Series` : count the values in the series.
59
+ # * `String` : count the values in this column.
60
+ # * `None` : count the number of values in this context.
61
+ #
62
+ # @return [Expr, Integer]
28
63
  def count(column = nil)
29
64
  if column.nil?
30
65
  return Utils.wrap_expr(RbExpr.count)
@@ -37,9 +72,16 @@ module Polars
37
72
  end
38
73
  end
39
74
 
40
- # def to_list
41
- # end
75
+ # Aggregate to list.
76
+ #
77
+ # @return [Expr]
78
+ def to_list(name)
79
+ col(name).list
80
+ end
42
81
 
82
+ # Get the standard deviation.
83
+ #
84
+ # @return [Object]
43
85
  def std(column, ddof: 1)
44
86
  if column.is_a?(Series)
45
87
  column.std(ddof: ddof)
@@ -48,6 +90,9 @@ module Polars
48
90
  end
49
91
  end
50
92
 
93
+ # Get the variance.
94
+ #
95
+ # @return [Object]
51
96
  def var(column, ddof: 1)
52
97
  if column.is_a?(Series)
53
98
  column.var(ddof: ddof)
@@ -56,6 +101,16 @@ module Polars
56
101
  end
57
102
  end
58
103
 
104
+ # Get the maximum value.
105
+ #
106
+ # @param column [Object]
107
+ # Column(s) to be used in aggregation. Will lead to different behavior based on
108
+ # the input:
109
+ #
110
+ # - [String, Series] -> aggregate the maximum value of that column.
111
+ # - [Array<Expr>] -> aggregate the maximum value horizontally.
112
+ #
113
+ # @return [Expr, Object]
59
114
  def max(column)
60
115
  if column.is_a?(Series)
61
116
  column.max
@@ -68,6 +123,16 @@ module Polars
68
123
  end
69
124
  end
70
125
 
126
+ # Get the minimum value.
127
+ #
128
+ # @param column [Object]
129
+ # Column(s) to be used in aggregation. Will lead to different behavior based on
130
+ # the input:
131
+ #
132
+ # - [String, Series] -> aggregate the minimum value of that column.
133
+ # - [Array<Expr>] -> aggregate the minimum value horizontally.
134
+ #
135
+ # @return [Expr, Object]
71
136
  def min(column)
72
137
  if column.is_a?(Series)
73
138
  column.min
@@ -80,6 +145,9 @@ module Polars
80
145
  end
81
146
  end
82
147
 
148
+ # Sum values in a column/Series, or horizontally across list of columns/expressions.
149
+ #
150
+ # @return [Object]
83
151
  def sum(column)
84
152
  if column.is_a?(Series)
85
153
  column.sum
@@ -94,6 +162,9 @@ module Polars
94
162
  end
95
163
  end
96
164
 
165
+ # Get the mean value.
166
+ #
167
+ # @return [Expr, Float]
97
168
  def mean(column)
98
169
  if column.is_a?(Series)
99
170
  column.mean
@@ -102,10 +173,16 @@ module Polars
102
173
  end
103
174
  end
104
175
 
176
+ # Get the mean value.
177
+ #
178
+ # @return [Expr, Float]
105
179
  def avg(column)
106
180
  mean(column)
107
181
  end
108
182
 
183
+ # Get the median value.
184
+ #
185
+ # @return [Object]
109
186
  def median(column)
110
187
  if column.is_a?(Series)
111
188
  column.median
@@ -114,9 +191,20 @@ module Polars
114
191
  end
115
192
  end
116
193
 
117
- # def n_unique
118
- # end
194
+ # Count unique values.
195
+ #
196
+ # @return [Object]
197
+ def n_unique(column)
198
+ if column.is_a?(Series)
199
+ column.n_unique
200
+ else
201
+ col(column).n_unique
202
+ end
203
+ end
119
204
 
205
+ # Get the first value.
206
+ #
207
+ # @return [Object]
120
208
  def first(column = nil)
121
209
  if column.nil?
122
210
  return Utils.wrap_expr(RbExpr.first)
@@ -133,30 +221,145 @@ module Polars
133
221
  end
134
222
  end
135
223
 
136
- # def last
137
- # end
224
+ # Get the last value.
225
+ #
226
+ # Depending on the input type this function does different things:
227
+ #
228
+ # - nil -> expression to take last column of a context.
229
+ # - String -> syntactic sugar for `Polars.col(..).last`
230
+ # - Series -> Take last value in `Series`
231
+ #
232
+ # @return [Object]
233
+ def last(column = nil)
234
+ if column.nil?
235
+ return Utils.wrap_expr(_last)
236
+ end
138
237
 
139
- # def head
140
- # end
238
+ if column.is_a?(Series)
239
+ if column.len > 0
240
+ return column[-1]
241
+ else
242
+ raise IndexError, "The series is empty, so no last value can be returned"
243
+ end
244
+ end
245
+ col(column).last
246
+ end
141
247
 
142
- # def tail
143
- # end
248
+ # Get the first `n` rows.
249
+ #
250
+ # @param column [Object]
251
+ # Column name or Series.
252
+ # @param n [Integer]
253
+ # Number of rows to return.
254
+ #
255
+ # @return [Object]
256
+ def head(column, n = 10)
257
+ if column.is_a?(Series)
258
+ column.head(n)
259
+ else
260
+ col(column).head(n)
261
+ end
262
+ end
144
263
 
264
+ # Get the last `n` rows.
265
+ #
266
+ # @param column [Object]
267
+ # Column name or Series.
268
+ # @param n [Integer]
269
+ # Number of rows to return.
270
+ #
271
+ # @return [Object]
272
+ def tail(column, n = 10)
273
+ if column.is_a?(Series)
274
+ column.tail(n)
275
+ else
276
+ col(column).tail(n)
277
+ end
278
+ end
279
+
280
+ # Return an expression representing a literal value.
281
+ #
282
+ # @return [Expr]
145
283
  def lit(value)
284
+ if value.is_a?(Polars::Series)
285
+ name = value.name
286
+ value = value._s
287
+ e = Utils.wrap_expr(RbExpr.lit(value))
288
+ if name == ""
289
+ return e
290
+ end
291
+ return e.alias(name)
292
+ end
293
+
146
294
  Utils.wrap_expr(RbExpr.lit(value))
147
295
  end
148
296
 
149
297
  # def cumsum
150
298
  # end
151
299
 
152
- # def spearman_rank_corr
153
- # end
300
+ # Compute the spearman rank correlation between two columns.
301
+ #
302
+ # Missing data will be excluded from the computation.
303
+ #
304
+ # @param a [Object]
305
+ # Column name or Expression.
306
+ # @param b [Object]
307
+ # Column name or Expression.
308
+ # @param ddof [Integer]
309
+ # Delta degrees of freedom
310
+ # @param propagate_nans [Boolean]
311
+ # If `True` any `NaN` encountered will lead to `NaN` in the output.
312
+ # Defaults to `False` where `NaN` are regarded as larger than any finite number
313
+ # and thus lead to the highest rank.
314
+ #
315
+ # @return [Expr]
316
+ def spearman_rank_corr(a, b, ddof: 1, propagate_nans: false)
317
+ if a.is_a?(String)
318
+ a = col(a)
319
+ end
320
+ if b.is_a?(String)
321
+ b = col(b)
322
+ end
323
+ Utils.wrap_expr(RbExpr.spearman_rank_corr(a._rbexpr, b._rbexpr, ddof, propagate_nans))
324
+ end
154
325
 
155
- # def pearson_corr
156
- # end
326
+ # Compute the pearson's correlation between two columns.
327
+ #
328
+ # @param a [Object]
329
+ # Column name or Expression.
330
+ # @param b [Object]
331
+ # Column name or Expression.
332
+ # @param ddof [Integer]
333
+ # Delta degrees of freedom
334
+ #
335
+ # @return [Expr]
336
+ def pearson_corr(a, b, ddof: 1)
337
+ if a.is_a?(String)
338
+ a = col(a)
339
+ end
340
+ if b.is_a?(String)
341
+ b = col(b)
342
+ end
343
+ Utils.wrap_expr(RbExpr.pearson_corr(a._rbexpr, b._rbexpr, ddof))
344
+ end
157
345
 
158
- # def cov
159
- # end
346
+ # Compute the covariance between two columns/ expressions.
347
+ #
348
+ # @param a [Object]
349
+ # Column name or Expression.
350
+ # @param b [Object]
351
+ # Column name or Expression.
352
+ #
353
+ # @return [Expr]
354
+ def cov(a, b)
355
+ if a.is_a?(String)
356
+ a = col(a)
357
+ end
358
+ if b.is_a?(String)
359
+ b = col(b)
360
+ end
361
+ Utils.wrap_expr(RbExpr.cov(a._rbexpr, b._rbexpr))
362
+ end
160
363
 
161
364
  # def map
162
365
  # end
@@ -164,6 +367,9 @@ module Polars
164
367
  # def apply
165
368
  # end
166
369
 
370
+ # Accumulate over multiple columns horizontally/ row wise with a left fold.
371
+ #
372
+ # @return [Expr]
167
373
  def fold(acc, f, exprs)
168
374
  acc = Utils.expr_to_lit_or_expr(acc, str_to_lit: true)
169
375
  if exprs.is_a?(Expr)
@@ -189,6 +395,30 @@ module Polars
189
395
  # def exclude
190
396
  # end
191
397
 
398
+ # Do one of two things.
399
+ #
400
+ # * function can do a columnwise or elementwise AND operation
401
+ # * a wildcard column selection
402
+ #
403
+ # @param name [Object]
404
+ # If given this function will apply a bitwise & on the columns.
405
+ #
406
+ # @return [Expr]
407
+ #
408
+ # @example Sum all columns
409
+ # df = Polars::DataFrame.new(
410
+ # {"a" => [1, 2, 3], "b" => ["hello", "foo", "bar"], "c" => [1, 1, 1]}
411
+ # )
412
+ # df.select(Polars.all.sum)
413
+ # # =>
414
+ # # shape: (1, 3)
415
+ # # ┌─────┬──────┬─────┐
416
+ # # │ a ┆ b ┆ c │
417
+ # # │ --- ┆ --- ┆ --- │
418
+ # # │ i64 ┆ str ┆ i64 │
419
+ # # ╞═════╪══════╪═════╡
420
+ # # │ 6 ┆ null ┆ 3 │
421
+ # # └─────┴──────┴─────┘
192
422
  def all(name = nil)
193
423
  if name.nil?
194
424
  col("*")
@@ -205,6 +435,26 @@ module Polars
205
435
  # def quantile
206
436
  # end
207
437
 
438
+ # Create a range expression (or Series).
439
+ #
440
+ # This can be used in a `select`, `with_column`, etc. Be sure that the resulting
441
+ # range size is equal to the length of the DataFrame you are collecting.
442
+ #
443
+ # @param low [Integer, Expr, Series]
444
+ # Lower bound of range.
445
+ # @param high [Integer, Expr, Series]
446
+ # Upper bound of range.
447
+ # @param step [Integer]
448
+ # Step size of the range.
449
+ # @param eager [Boolean]
450
+ # If eager evaluation is `True`, a Series is returned instead of an Expr.
451
+ # @param dtype [Symbol]
452
+ # Apply an explicit integer dtype to the resulting expression (default is Int64).
453
+ #
454
+ # @return [Expr, Series]
455
+ #
456
+ # @example
457
+ # df.lazy.filter(Polars.col("foo") < Polars.arange(0, 100)).collect
208
458
  def arange(low, high, step: 1, eager: false, dtype: nil)
209
459
  low = Utils.expr_to_lit_or_expr(low, str_to_lit: false)
210
460
  high = Utils.expr_to_lit_or_expr(high, str_to_lit: false)
@@ -233,6 +483,9 @@ module Polars
233
483
  # def format
234
484
  # end
235
485
 
486
+ # Concat the arrays in a Series dtype List in linear time.
487
+ #
488
+ # @return [Expr]
236
489
  def concat_list(exprs)
237
490
  exprs = Utils.selection_to_rbexpr_list(exprs)
238
491
  Utils.wrap_expr(RbExpr.concat_lst(exprs))
@@ -241,17 +494,132 @@ module Polars
241
494
  # def collect_all
242
495
  # end
243
496
 
244
- # def select
245
- # end
497
+ # Run polars expressions without a context.
498
+ #
499
+ # @return [DataFrame]
500
+ def select(exprs)
501
+ DataFrame.new([]).select(exprs)
502
+ end
246
503
 
247
- # def struct
248
- # end
504
+ # Collect several columns into a Series of dtype Struct.
505
+ #
506
+ # @param exprs [Object]
507
+ # Columns/Expressions to collect into a Struct
508
+ # @param eager [Boolean]
509
+ # Evaluate immediately
510
+ #
511
+ # @return [Object]
512
+ #
513
+ # @example
514
+ # Polars::DataFrame.new(
515
+ # {
516
+ # "int" => [1, 2],
517
+ # "str" => ["a", "b"],
518
+ # "bool" => [true, nil],
519
+ # "list" => [[1, 2], [3]],
520
+ # }
521
+ # ).select([Polars.struct(Polars.all).alias("my_struct")])
522
+ # # =>
523
+ # # shape: (2, 1)
524
+ # # ┌─────────────────────┐
525
+ # # │ my_struct │
526
+ # # │ --- │
527
+ # # │ struct[4] │
528
+ # # ╞═════════════════════╡
529
+ # # │ {1,"a",true,[1, 2]} │
530
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
531
+ # # │ {2,"b",null,[3]} │
532
+ # # └─────────────────────┘
533
+ #
534
+ # @example Only collect specific columns as a struct:
535
+ # df = Polars::DataFrame.new(
536
+ # {"a" => [1, 2, 3, 4], "b" => ["one", "two", "three", "four"], "c" => [9, 8, 7, 6]}
537
+ # )
538
+ # df.with_column(Polars.struct(Polars.col(["a", "b"])).alias("a_and_b"))
539
+ # # =>
540
+ # # shape: (4, 4)
541
+ # # ┌─────┬───────┬─────┬─────────────┐
542
+ # # │ a ┆ b ┆ c ┆ a_and_b │
543
+ # # │ --- ┆ --- ┆ --- ┆ --- │
544
+ # # │ i64 ┆ str ┆ i64 ┆ struct[2] │
545
+ # # ╞═════╪═══════╪═════╪═════════════╡
546
+ # # │ 1 ┆ one ┆ 9 ┆ {1,"one"} │
547
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
548
+ # # │ 2 ┆ two ┆ 8 ┆ {2,"two"} │
549
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
550
+ # # │ 3 ┆ three ┆ 7 ┆ {3,"three"} │
551
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
552
+ # # │ 4 ┆ four ┆ 6 ┆ {4,"four"} │
553
+ # # └─────┴───────┴─────┴─────────────┘
554
+ def struct(exprs, eager: false)
555
+ if eager
556
+ Polars.select(struct(exprs, eager: false)).to_series
557
+ end
558
+ exprs = Utils.selection_to_rbexpr_list(exprs)
559
+ Utils.wrap_expr(_as_struct(exprs))
560
+ end
249
561
 
250
- # def repeat
251
- # end
562
+ # Repeat a single value n times.
563
+ #
564
+ # @param value [Object]
565
+ # Value to repeat.
566
+ # @param n [Integer]
567
+ # Repeat `n` times.
568
+ # @param eager [Boolean]
569
+ # Run eagerly and collect into a `Series`.
570
+ # @param name [String]
571
+ # Only used in `eager` mode. As expression, use `alias`.
572
+ #
573
+ # @return [Expr]
574
+ def repeat(value, n, eager: false, name: nil)
575
+ if eager
576
+ if name.nil?
577
+ name = ""
578
+ end
579
+ dtype = py_type_to_dtype(type(value))
580
+ Series._repeat(name, value, n, dtype)
581
+ else
582
+ if n.is_a?(Integer)
583
+ n = lit(n)
584
+ end
585
+ Utils.wrap_expr(RbExpr.repeat(value, n._rbexpr))
586
+ end
587
+ end
252
588
 
253
- # def arg_where
254
- # end
589
+ # Return indices where `condition` evaluates `true`.
590
+ #
591
+ # @param condition [Expr]
592
+ # Boolean expression to evaluate
593
+ # @param eager [Boolean]
594
+ # Whether to apply this function eagerly (as opposed to lazily).
595
+ #
596
+ # @return [Expr, Series]
597
+ #
598
+ # @example
599
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
600
+ # df.select(
601
+ # [
602
+ # Polars.arg_where(Polars.col("a") % 2 == 0)
603
+ # ]
604
+ # ).to_series
605
+ # # =>
606
+ # # shape: (2,)
607
+ # # Series: 'a' [u32]
608
+ # # [
609
+ # # 1
610
+ # # 3
611
+ # # ]
612
+ def arg_where(condition, eager: false)
613
+ if eager
614
+ if !condition.is_a?(Series)
615
+ raise ArgumentError, "expected 'Series' in 'arg_where' if 'eager=True', got #{condition.class.name}"
616
+ end
617
+ condition.to_frame.select(arg_where(Polars.col(condition.name))).to_series
618
+ else
619
+ condition = Utils.expr_to_lit_or_expr(condition, str_to_lit: true)
620
+ Utils.wrap_expr(_arg_where(condition._rbexpr))
621
+ end
622
+ end
255
623
 
256
624
  # def coalesce
257
625
  # end
@@ -259,6 +627,26 @@ module Polars
259
627
  # def from_epoch
260
628
  # end
261
629
 
630
+ # Start a "when, then, otherwise" expression.
631
+ #
632
+ # @return [When]
633
+ #
634
+ # @example
635
+ # df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
636
+ # df.with_column(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
637
+ # # =>
638
+ # # shape: (3, 3)
639
+ # # ┌─────┬─────┬─────────┐
640
+ # # │ foo ┆ bar ┆ literal │
641
+ # # │ --- ┆ --- ┆ --- │
642
+ # # │ i64 ┆ i64 ┆ i32 │
643
+ # # ╞═════╪═════╪═════════╡
644
+ # # │ 1 ┆ 3 ┆ -1 │
645
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
646
+ # # │ 3 ┆ 4 ┆ 1 │
647
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
648
+ # # │ 4 ┆ 0 ┆ 1 │
649
+ # # └─────┴─────┴─────────┘
262
650
  def when(expr)
263
651
  expr = Utils.expr_to_lit_or_expr(expr)
264
652
  pw = RbExpr.when(expr._rbexpr)
@@ -1,13 +1,93 @@
1
1
  module Polars
2
+ # Created by `df.lazy.groupby("foo")`.
2
3
  class LazyGroupBy
4
+ # @private
3
5
  def initialize(lgb, lazyframe_class)
4
6
  @lgb = lgb
5
7
  @lazyframe_class = lazyframe_class
6
8
  end
7
9
 
10
+ # Describe the aggregation that need to be done on a group.
11
+ #
12
+ # @return [LazyFrame]
8
13
  def agg(aggs)
9
14
  rbexprs = Utils.selection_to_rbexpr_list(aggs)
10
15
  @lazyframe_class._from_rbldf(@lgb.agg(rbexprs))
11
16
  end
17
+
18
+ # Get the first `n` rows of each group.
19
+ #
20
+ # @param n [Integer]
21
+ # Number of rows to return.
22
+ #
23
+ # @return [LazyFrame]
24
+ #
25
+ # @example
26
+ # df = Polars::DataFrame.new(
27
+ # {
28
+ # "letters" => ["c", "c", "a", "c", "a", "b"],
29
+ # "nrs" => [1, 2, 3, 4, 5, 6]
30
+ # }
31
+ # )
32
+ # df.groupby("letters").head(2).sort("letters")
33
+ # # =>
34
+ # # shape: (5, 2)
35
+ # # ┌─────────┬─────┐
36
+ # # │ letters ┆ nrs │
37
+ # # │ --- ┆ --- │
38
+ # # │ str ┆ i64 │
39
+ # # ╞═════════╪═════╡
40
+ # # │ a ┆ 3 │
41
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
42
+ # # │ a ┆ 5 │
43
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
44
+ # # │ b ┆ 6 │
45
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
46
+ # # │ c ┆ 1 │
47
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
48
+ # # │ c ┆ 2 │
49
+ # # └─────────┴─────┘
50
+ def head(n = 5)
51
+ @lazyframe_class._from_rbldf(@lgb.head(n))
52
+ end
53
+
54
+ # Get the last `n` rows of each group.
55
+ #
56
+ # @param n [Integer]
57
+ # Number of rows to return.
58
+ #
59
+ # @return [LazyFrame]
60
+ #
61
+ # @example
62
+ # df = Polars::DataFrame.new(
63
+ # {
64
+ # "letters" => ["c", "c", "a", "c", "a", "b"],
65
+ # "nrs" => [1, 2, 3, 4, 5, 6]
66
+ # }
67
+ # )
68
+ # df.groupby("letters").tail(2).sort("letters")
69
+ # # =>
70
+ # # shape: (5, 2)
71
+ # # ┌─────────┬─────┐
72
+ # # │ letters ┆ nrs │
73
+ # # │ --- ┆ --- │
74
+ # # │ str ┆ i64 │
75
+ # # ╞═════════╪═════╡
76
+ # # │ a ┆ 3 │
77
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
78
+ # # │ a ┆ 5 │
79
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
80
+ # # │ b ┆ 6 │
81
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
82
+ # # │ c ┆ 2 │
83
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
84
+ # # │ c ┆ 4 │
85
+ # # └─────────┴─────┘
86
+ def tail(n = 5)
87
+ @lazyframe_class._from_rbldf(@lgb.tail(n))
88
+ end
89
+
90
+ # def apply
91
+ # end
12
92
  end
13
93
  end