polars-df 0.2.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +33 -0
  4. data/Cargo.lock +2230 -0
  5. data/Cargo.toml +10 -0
  6. data/LICENSE-THIRD-PARTY.txt +38856 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +91 -0
  9. data/lib/polars/3.0/polars.bundle +0 -0
  10. data/lib/polars/3.1/polars.bundle +0 -0
  11. data/lib/polars/3.2/polars.bundle +0 -0
  12. data/lib/polars/batched_csv_reader.rb +96 -0
  13. data/lib/polars/cat_expr.rb +52 -0
  14. data/lib/polars/cat_name_space.rb +54 -0
  15. data/lib/polars/convert.rb +100 -0
  16. data/lib/polars/data_frame.rb +4833 -0
  17. data/lib/polars/data_types.rb +122 -0
  18. data/lib/polars/date_time_expr.rb +1418 -0
  19. data/lib/polars/date_time_name_space.rb +1484 -0
  20. data/lib/polars/dynamic_group_by.rb +52 -0
  21. data/lib/polars/exceptions.rb +20 -0
  22. data/lib/polars/expr.rb +5307 -0
  23. data/lib/polars/expr_dispatch.rb +22 -0
  24. data/lib/polars/functions.rb +453 -0
  25. data/lib/polars/group_by.rb +558 -0
  26. data/lib/polars/io.rb +814 -0
  27. data/lib/polars/lazy_frame.rb +2442 -0
  28. data/lib/polars/lazy_functions.rb +1195 -0
  29. data/lib/polars/lazy_group_by.rb +93 -0
  30. data/lib/polars/list_expr.rb +610 -0
  31. data/lib/polars/list_name_space.rb +346 -0
  32. data/lib/polars/meta_expr.rb +54 -0
  33. data/lib/polars/rolling_group_by.rb +35 -0
  34. data/lib/polars/series.rb +3730 -0
  35. data/lib/polars/slice.rb +104 -0
  36. data/lib/polars/string_expr.rb +972 -0
  37. data/lib/polars/string_name_space.rb +690 -0
  38. data/lib/polars/struct_expr.rb +100 -0
  39. data/lib/polars/struct_name_space.rb +64 -0
  40. data/lib/polars/utils.rb +192 -0
  41. data/lib/polars/version.rb +4 -0
  42. data/lib/polars/when.rb +16 -0
  43. data/lib/polars/when_then.rb +19 -0
  44. data/lib/polars-df.rb +1 -0
  45. data/lib/polars.rb +50 -0
  46. metadata +89 -0
@@ -0,0 +1,558 @@
1
+ module Polars
2
+ # Starts a new GroupBy operation.
3
+ class GroupBy
4
+ # @private
5
+ attr_accessor :_df, :_dataframe_class, :by, :maintain_order
6
+
7
+ # @private
8
+ def initialize(df, by, dataframe_class, maintain_order: false)
9
+ self._df = df
10
+ self._dataframe_class = dataframe_class
11
+ self.by = by
12
+ self.maintain_order = maintain_order
13
+ end
14
+
15
+ # Apply a custom/user-defined function (UDF) over the groups as a sub-DataFrame.
16
+ #
17
+ # Implementing logic using a Ruby function is almost always _significantly_
18
+ # slower and more memory intensive than implementing the same logic using
19
+ # the native expression API because:
20
+
21
+ # - The native expression engine runs in Rust; UDFs run in Ruby.
22
+ # - Use of Ruby UDFs forces the DataFrame to be materialized in memory.
23
+ # - Polars-native expressions can be parallelised (UDFs cannot).
24
+ # - Polars-native expressions can be logically optimised (UDFs cannot).
25
+ #
26
+ # Wherever possible you should strongly prefer the native expression API
27
+ # to achieve the best performance.
28
+ #
29
+ # @return [DataFrame]
30
+ #
31
+ # @example
32
+ # df = Polars::DataFrame.new(
33
+ # {
34
+ # "id" => [0, 1, 2, 3, 4],
35
+ # "color" => ["red", "green", "green", "red", "red"],
36
+ # "shape" => ["square", "triangle", "square", "triangle", "square"]
37
+ # }
38
+ # )
39
+ # df.groupby("color").apply { |group_df| group_df.sample(2) }
40
+ # # =>
41
+ # # shape: (4, 3)
42
+ # # ┌─────┬───────┬──────────┐
43
+ # # │ id ┆ color ┆ shape │
44
+ # # │ --- ┆ --- ┆ --- │
45
+ # # │ i64 ┆ str ┆ str │
46
+ # # ╞═════╪═══════╪══════════╡
47
+ # # │ 1 ┆ green ┆ triangle │
48
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
49
+ # # │ 2 ┆ green ┆ square │
50
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
51
+ # # │ 4 ┆ red ┆ square │
52
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
53
+ # # │ 3 ┆ red ┆ triangle │
54
+ # # └─────┴───────┴──────────┘
55
+ # def apply(&f)
56
+ # _dataframe_class._from_rbdf(_df.groupby_apply(by, f))
57
+ # end
58
+
59
+ # Use multiple aggregations on columns.
60
+ #
61
+ # This can be combined with complete lazy API and is considered idiomatic polars.
62
+ #
63
+ # @param aggs [Object]
64
+ # Single / multiple aggregation expression(s).
65
+ #
66
+ # @return [DataFrame]
67
+ #
68
+ # @example
69
+ # df = Polars::DataFrame.new(
70
+ # {"foo" => ["one", "two", "two", "one", "two"], "bar" => [5, 3, 2, 4, 1]}
71
+ # )
72
+ # df.groupby("foo", maintain_order: true).agg(
73
+ # [
74
+ # Polars.sum("bar").suffix("_sum"),
75
+ # Polars.col("bar").sort.tail(2).sum.suffix("_tail_sum")
76
+ # ]
77
+ # )
78
+ # # =>
79
+ # # shape: (2, 3)
80
+ # # ┌─────┬─────────┬──────────────┐
81
+ # # │ foo ┆ bar_sum ┆ bar_tail_sum │
82
+ # # │ --- ┆ --- ┆ --- │
83
+ # # │ str ┆ i64 ┆ i64 │
84
+ # # ╞═════╪═════════╪══════════════╡
85
+ # # │ one ┆ 9 ┆ 9 │
86
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
87
+ # # │ two ┆ 6 ┆ 5 │
88
+ # # └─────┴─────────┴──────────────┘
89
+ def agg(aggs)
90
+ df = Utils.wrap_df(_df)
91
+ .lazy
92
+ .groupby(by, maintain_order: maintain_order)
93
+ .agg(aggs)
94
+ .collect(no_optimization: true, string_cache: false)
95
+ _dataframe_class._from_rbdf(df._df)
96
+ end
97
+
98
+ # Get the first `n` rows of each group.
99
+ #
100
+ # @param n [Integer]
101
+ # Number of rows to return.
102
+ #
103
+ # @return [DataFrame]
104
+ #
105
+ # @example
106
+ # df = Polars::DataFrame.new(
107
+ # {
108
+ # "letters" => ["c", "c", "a", "c", "a", "b"],
109
+ # "nrs" => [1, 2, 3, 4, 5, 6]
110
+ # }
111
+ # )
112
+ # # =>
113
+ # # shape: (6, 2)
114
+ # # ┌─────────┬─────┐
115
+ # # │ letters ┆ nrs │
116
+ # # │ --- ┆ --- │
117
+ # # │ str ┆ i64 │
118
+ # # ╞═════════╪═════╡
119
+ # # │ c ┆ 1 │
120
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
121
+ # # │ c ┆ 2 │
122
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
123
+ # # │ a ┆ 3 │
124
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
125
+ # # │ c ┆ 4 │
126
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
127
+ # # │ a ┆ 5 │
128
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
129
+ # # │ b ┆ 6 │
130
+ # # └─────────┴─────┘
131
+ #
132
+ # @example
133
+ # df.groupby("letters").head(2).sort("letters")
134
+ # # =>
135
+ # # shape: (5, 2)
136
+ # # ┌─────────┬─────┐
137
+ # # │ letters ┆ nrs │
138
+ # # │ --- ┆ --- │
139
+ # # │ str ┆ i64 │
140
+ # # ╞═════════╪═════╡
141
+ # # │ a ┆ 3 │
142
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
143
+ # # │ a ┆ 5 │
144
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
145
+ # # │ b ┆ 6 │
146
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
147
+ # # │ c ┆ 1 │
148
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
149
+ # # │ c ┆ 2 │
150
+ # # └─────────┴─────┘
151
+ def head(n = 5)
152
+ df = (
153
+ Utils.wrap_df(_df)
154
+ .lazy
155
+ .groupby(by, maintain_order: maintain_order)
156
+ .head(n)
157
+ .collect(no_optimization: true, string_cache: false)
158
+ )
159
+ _dataframe_class._from_rbdf(df._df)
160
+ end
161
+
162
+ # Get the last `n` rows of each group.
163
+ #
164
+ # @param n [Integer]
165
+ # Number of rows to return.
166
+ #
167
+ # @return [DataFrame]
168
+ #
169
+ # @example
170
+ # df = Polars::DataFrame.new(
171
+ # {
172
+ # "letters" => ["c", "c", "a", "c", "a", "b"],
173
+ # "nrs" => [1, 2, 3, 4, 5, 6]
174
+ # }
175
+ # )
176
+ # # =>
177
+ # # shape: (6, 2)
178
+ # # ┌─────────┬─────┐
179
+ # # │ letters ┆ nrs │
180
+ # # │ --- ┆ --- │
181
+ # # │ str ┆ i64 │
182
+ # # ╞═════════╪═════╡
183
+ # # │ c ┆ 1 │
184
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
185
+ # # │ c ┆ 2 │
186
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
187
+ # # │ a ┆ 3 │
188
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
189
+ # # │ c ┆ 4 │
190
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
191
+ # # │ a ┆ 5 │
192
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
193
+ # # │ b ┆ 6 │
194
+ # # └─────────┴─────┘
195
+ #
196
+ # @example
197
+ # df.groupby("letters").tail(2).sort("letters")
198
+ # # =>
199
+ # # shape: (5, 2)
200
+ # # ┌─────────┬─────┐
201
+ # # │ letters ┆ nrs │
202
+ # # │ --- ┆ --- │
203
+ # # │ str ┆ i64 │
204
+ # # ╞═════════╪═════╡
205
+ # # │ a ┆ 3 │
206
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
207
+ # # │ a ┆ 5 │
208
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
209
+ # # │ b ┆ 6 │
210
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
211
+ # # │ c ┆ 2 │
212
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
213
+ # # │ c ┆ 4 │
214
+ # # └─────────┴─────┘
215
+ def tail(n = 5)
216
+ df = (
217
+ Utils.wrap_df(_df)
218
+ .lazy
219
+ .groupby(by, maintain_order: maintain_order)
220
+ .tail(n)
221
+ .collect(no_optimization: true, string_cache: false)
222
+ )
223
+ _dataframe_class._from_rbdf(df._df)
224
+ end
225
+
226
+ # pivot is deprecated
227
+
228
+ # Aggregate the first values in the group.
229
+ #
230
+ # @return [DataFrame]
231
+ #
232
+ # @example
233
+ # df = Polars::DataFrame.new(
234
+ # {
235
+ # "a" => [1, 2, 2, 3, 4, 5],
236
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
237
+ # "c" => [true, true, true, false, false, true],
238
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
239
+ # }
240
+ # )
241
+ # df.groupby("d", maintain_order: true).first
242
+ # # =>
243
+ # # shape: (3, 4)
244
+ # # ┌────────┬─────┬──────┬───────┐
245
+ # # │ d ┆ a ┆ b ┆ c │
246
+ # # │ --- ┆ --- ┆ --- ┆ --- │
247
+ # # │ str ┆ i64 ┆ f64 ┆ bool │
248
+ # # ╞════════╪═════╪══════╪═══════╡
249
+ # # │ Apple ┆ 1 ┆ 0.5 ┆ true │
250
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
251
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ true │
252
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
253
+ # # │ Banana ┆ 4 ┆ 13.0 ┆ false │
254
+ # # └────────┴─────┴──────┴───────┘
255
+ def first
256
+ agg(Polars.all.first)
257
+ end
258
+
259
+ # Aggregate the last values in the group.
260
+ #
261
+ # @return [DataFrame]
262
+ #
263
+ # @example
264
+ # df = Polars::DataFrame.new(
265
+ # {
266
+ # "a" => [1, 2, 2, 3, 4, 5],
267
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
268
+ # "c" => [true, true, true, false, false, true],
269
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
270
+ # }
271
+ # )
272
+ # df.groupby("d", maintain_order: true).last
273
+ # # =>
274
+ # # shape: (3, 4)
275
+ # # ┌────────┬─────┬──────┬───────┐
276
+ # # │ d ┆ a ┆ b ┆ c │
277
+ # # │ --- ┆ --- ┆ --- ┆ --- │
278
+ # # │ str ┆ i64 ┆ f64 ┆ bool │
279
+ # # ╞════════╪═════╪══════╪═══════╡
280
+ # # │ Apple ┆ 3 ┆ 10.0 ┆ false │
281
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
282
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ true │
283
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
284
+ # # │ Banana ┆ 5 ┆ 14.0 ┆ true │
285
+ # # └────────┴─────┴──────┴───────┘
286
+ def last
287
+ agg(Polars.all.last)
288
+ end
289
+
290
+ # Reduce the groups to the sum.
291
+ #
292
+ # @return [DataFrame]
293
+ #
294
+ # @example
295
+ # df = Polars::DataFrame.new(
296
+ # {
297
+ # "a" => [1, 2, 2, 3, 4, 5],
298
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
299
+ # "c" => [true, true, true, false, false, true],
300
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
301
+ # }
302
+ # )
303
+ # df.groupby("d", maintain_order: true).sum
304
+ # # =>
305
+ # # shape: (3, 4)
306
+ # # ┌────────┬─────┬──────┬─────┐
307
+ # # │ d ┆ a ┆ b ┆ c │
308
+ # # │ --- ┆ --- ┆ --- ┆ --- │
309
+ # # │ str ┆ i64 ┆ f64 ┆ u32 │
310
+ # # ╞════════╪═════╪══════╪═════╡
311
+ # # │ Apple ┆ 6 ┆ 14.5 ┆ 2 │
312
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
313
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ 1 │
314
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
315
+ # # │ Banana ┆ 9 ┆ 27.0 ┆ 1 │
316
+ # # └────────┴─────┴──────┴─────┘
317
+ def sum
318
+ agg(Polars.all.sum)
319
+ end
320
+
321
+ # Reduce the groups to the minimal value.
322
+ #
323
+ # @return [DataFrame]
324
+ #
325
+ # @example
326
+ # df = Polars::DataFrame.new(
327
+ # {
328
+ # "a" => [1, 2, 2, 3, 4, 5],
329
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
330
+ # "c" => [true, true, true, false, false, true],
331
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
332
+ # }
333
+ # )
334
+ # df.groupby("d", maintain_order: true).min
335
+ # # =>
336
+ # # shape: (3, 4)
337
+ # # ┌────────┬─────┬──────┬───────┐
338
+ # # │ d ┆ a ┆ b ┆ c │
339
+ # # │ --- ┆ --- ┆ --- ┆ --- │
340
+ # # │ str ┆ i64 ┆ f64 ┆ bool │
341
+ # # ╞════════╪═════╪══════╪═══════╡
342
+ # # │ Apple ┆ 1 ┆ 0.5 ┆ false │
343
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
344
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ true │
345
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
346
+ # # │ Banana ┆ 4 ┆ 13.0 ┆ false │
347
+ # # └────────┴─────┴──────┴───────┘
348
+ def min
349
+ agg(Polars.all.min)
350
+ end
351
+
352
+ # Reduce the groups to the maximal value.
353
+ #
354
+ # @return [DataFrame]
355
+ #
356
+ # @example
357
+ # df = Polars::DataFrame.new(
358
+ # {
359
+ # "a" => [1, 2, 2, 3, 4, 5],
360
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
361
+ # "c" => [true, true, true, false, false, true],
362
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
363
+ # }
364
+ # )
365
+ # df.groupby("d", maintain_order: true).max
366
+ # # =>
367
+ # # shape: (3, 4)
368
+ # # ┌────────┬─────┬──────┬──────┐
369
+ # # │ d ┆ a ┆ b ┆ c │
370
+ # # │ --- ┆ --- ┆ --- ┆ --- │
371
+ # # │ str ┆ i64 ┆ f64 ┆ bool │
372
+ # # ╞════════╪═════╪══════╪══════╡
373
+ # # │ Apple ┆ 3 ┆ 10.0 ┆ true │
374
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
375
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ true │
376
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
377
+ # # │ Banana ┆ 5 ┆ 14.0 ┆ true │
378
+ # # └────────┴─────┴──────┴──────┘
379
+ def max
380
+ agg(Polars.all.max)
381
+ end
382
+
383
+ # Count the number of values in each group.
384
+ #
385
+ # @return [DataFrame]
386
+ #
387
+ # @example
388
+ # df = Polars::DataFrame.new(
389
+ # {
390
+ # "a" => [1, 2, 2, 3, 4, 5],
391
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
392
+ # "c" => [true, true, true, false, false, true],
393
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
394
+ # }
395
+ # )
396
+ # df.groupby("d", maintain_order: true).count
397
+ # # =>
398
+ # # shape: (3, 2)
399
+ # # ┌────────┬───────┐
400
+ # # │ d ┆ count │
401
+ # # │ --- ┆ --- │
402
+ # # │ str ┆ u32 │
403
+ # # ╞════════╪═══════╡
404
+ # # │ Apple ┆ 3 │
405
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
406
+ # # │ Orange ┆ 1 │
407
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
408
+ # # │ Banana ┆ 2 │
409
+ # # └────────┴───────┘
410
+ def count
411
+ agg(Polars.count)
412
+ end
413
+
414
+ # Reduce the groups to the mean values.
415
+ #
416
+ # @return [DataFrame]
417
+ #
418
+ # @example
419
+ # df = Polars::DataFrame.new(
420
+ # {
421
+ # "a" => [1, 2, 2, 3, 4, 5],
422
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
423
+ # "c" => [true, true, true, false, false, true],
424
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
425
+ # }
426
+ # )
427
+ # df.groupby("d", maintain_order: true).mean
428
+ # # =>
429
+ # # shape: (3, 4)
430
+ # # ┌────────┬─────┬──────────┬──────────┐
431
+ # # │ d ┆ a ┆ b ┆ c │
432
+ # # │ --- ┆ --- ┆ --- ┆ --- │
433
+ # # │ str ┆ f64 ┆ f64 ┆ f64 │
434
+ # # ╞════════╪═════╪══════════╪══════════╡
435
+ # # │ Apple ┆ 2.0 ┆ 4.833333 ┆ 0.666667 │
436
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
437
+ # # │ Orange ┆ 2.0 ┆ 0.5 ┆ 1.0 │
438
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
439
+ # # │ Banana ┆ 4.5 ┆ 13.5 ┆ 0.5 │
440
+ # # └────────┴─────┴──────────┴──────────┘
441
+ def mean
442
+ agg(Polars.all.mean)
443
+ end
444
+
445
+ # Count the unique values per group.
446
+ #
447
+ # @return [DataFrame]
448
+ #
449
+ # @example
450
+ # df = Polars::DataFrame.new(
451
+ # {
452
+ # "a" => [1, 2, 1, 3, 4, 5],
453
+ # "b" => [0.5, 0.5, 0.5, 10, 13, 14],
454
+ # "d" => ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"]
455
+ # }
456
+ # )
457
+ # df.groupby("d", maintain_order: true).n_unique
458
+ # # =>
459
+ # # shape: (2, 3)
460
+ # # ┌────────┬─────┬─────┐
461
+ # # │ d ┆ a ┆ b │
462
+ # # │ --- ┆ --- ┆ --- │
463
+ # # │ str ┆ u32 ┆ u32 │
464
+ # # ╞════════╪═════╪═════╡
465
+ # # │ Apple ┆ 2 ┆ 2 │
466
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
467
+ # # │ Banana ┆ 3 ┆ 3 │
468
+ # # └────────┴─────┴─────┘
469
+ def n_unique
470
+ agg(Polars.all.n_unique)
471
+ end
472
+
473
+ # Compute the quantile per group.
474
+ #
475
+ # @param quantile [Float]
476
+ # Quantile between 0.0 and 1.0.
477
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
478
+ # Interpolation method.
479
+ #
480
+ # @return [DataFrame]
481
+ #
482
+ # @example
483
+ # df = Polars::DataFrame.new(
484
+ # {
485
+ # "a" => [1, 2, 2, 3, 4, 5],
486
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
487
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
488
+ # }
489
+ # )
490
+ # df.groupby("d", maintain_order: true).quantile(1)
491
+ # # =>
492
+ # # shape: (3, 3)
493
+ # # ┌────────┬─────┬──────┐
494
+ # # │ d ┆ a ┆ b │
495
+ # # │ --- ┆ --- ┆ --- │
496
+ # # │ str ┆ f64 ┆ f64 │
497
+ # # ╞════════╪═════╪══════╡
498
+ # # │ Apple ┆ 3.0 ┆ 10.0 │
499
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
500
+ # # │ Orange ┆ 2.0 ┆ 0.5 │
501
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
502
+ # # │ Banana ┆ 5.0 ┆ 14.0 │
503
+ # # └────────┴─────┴──────┘
504
+ def quantile(quantile, interpolation: "nearest")
505
+ agg(Polars.all.quantile(quantile, interpolation: interpolation))
506
+ end
507
+
508
+ # Return the median per group.
509
+ #
510
+ # @return [DataFrame]
511
+ #
512
+ # @example
513
+ # df = Polars::DataFrame.new(
514
+ # {
515
+ # "a" => [1, 2, 2, 3, 4, 5],
516
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
517
+ # "d" => ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"]
518
+ # }
519
+ # )
520
+ # df.groupby("d", maintain_order: true).median
521
+ # # =>
522
+ # # shape: (2, 3)
523
+ # # ┌────────┬─────┬──────┐
524
+ # # │ d ┆ a ┆ b │
525
+ # # │ --- ┆ --- ┆ --- │
526
+ # # │ str ┆ f64 ┆ f64 │
527
+ # # ╞════════╪═════╪══════╡
528
+ # # │ Apple ┆ 2.0 ┆ 4.0 │
529
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
530
+ # # │ Banana ┆ 4.0 ┆ 13.0 │
531
+ # # └────────┴─────┴──────┘
532
+ def median
533
+ agg(Polars.all.median)
534
+ end
535
+
536
+ # Aggregate the groups into Series.
537
+ #
538
+ # @return [DataFrame]
539
+ #
540
+ # @example
541
+ # df = Polars::DataFrame.new({"a" => ["one", "two", "one", "two"], "b" => [1, 2, 3, 4]})
542
+ # df.groupby("a", maintain_order: true).agg_list
543
+ # # =>
544
+ # # shape: (2, 2)
545
+ # # ┌─────┬───────────┐
546
+ # # │ a ┆ b │
547
+ # # │ --- ┆ --- │
548
+ # # │ str ┆ list[i64] │
549
+ # # ╞═════╪═══════════╡
550
+ # # │ one ┆ [1, 3] │
551
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
552
+ # # │ two ┆ [2, 4] │
553
+ # # └─────┴───────────┘
554
+ def agg_list
555
+ agg(Polars.all.list)
556
+ end
557
+ end
558
+ end