polars-df 0.2.0-x86_64-darwin

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +33 -0
  4. data/Cargo.lock +2230 -0
  5. data/Cargo.toml +10 -0
  6. data/LICENSE-THIRD-PARTY.txt +38856 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +91 -0
  9. data/lib/polars/3.0/polars.bundle +0 -0
  10. data/lib/polars/3.1/polars.bundle +0 -0
  11. data/lib/polars/3.2/polars.bundle +0 -0
  12. data/lib/polars/batched_csv_reader.rb +96 -0
  13. data/lib/polars/cat_expr.rb +52 -0
  14. data/lib/polars/cat_name_space.rb +54 -0
  15. data/lib/polars/convert.rb +100 -0
  16. data/lib/polars/data_frame.rb +4833 -0
  17. data/lib/polars/data_types.rb +122 -0
  18. data/lib/polars/date_time_expr.rb +1418 -0
  19. data/lib/polars/date_time_name_space.rb +1484 -0
  20. data/lib/polars/dynamic_group_by.rb +52 -0
  21. data/lib/polars/exceptions.rb +20 -0
  22. data/lib/polars/expr.rb +5307 -0
  23. data/lib/polars/expr_dispatch.rb +22 -0
  24. data/lib/polars/functions.rb +453 -0
  25. data/lib/polars/group_by.rb +558 -0
  26. data/lib/polars/io.rb +814 -0
  27. data/lib/polars/lazy_frame.rb +2442 -0
  28. data/lib/polars/lazy_functions.rb +1195 -0
  29. data/lib/polars/lazy_group_by.rb +93 -0
  30. data/lib/polars/list_expr.rb +610 -0
  31. data/lib/polars/list_name_space.rb +346 -0
  32. data/lib/polars/meta_expr.rb +54 -0
  33. data/lib/polars/rolling_group_by.rb +35 -0
  34. data/lib/polars/series.rb +3730 -0
  35. data/lib/polars/slice.rb +104 -0
  36. data/lib/polars/string_expr.rb +972 -0
  37. data/lib/polars/string_name_space.rb +690 -0
  38. data/lib/polars/struct_expr.rb +100 -0
  39. data/lib/polars/struct_name_space.rb +64 -0
  40. data/lib/polars/utils.rb +192 -0
  41. data/lib/polars/version.rb +4 -0
  42. data/lib/polars/when.rb +16 -0
  43. data/lib/polars/when_then.rb +19 -0
  44. data/lib/polars-df.rb +1 -0
  45. data/lib/polars.rb +50 -0
  46. metadata +89 -0
@@ -0,0 +1,558 @@
1
+ module Polars
2
+ # Starts a new GroupBy operation.
3
+ class GroupBy
4
+ # @private
5
+ attr_accessor :_df, :_dataframe_class, :by, :maintain_order
6
+
7
+ # @private
8
+ def initialize(df, by, dataframe_class, maintain_order: false)
9
+ self._df = df
10
+ self._dataframe_class = dataframe_class
11
+ self.by = by
12
+ self.maintain_order = maintain_order
13
+ end
14
+
15
+ # Apply a custom/user-defined function (UDF) over the groups as a sub-DataFrame.
16
+ #
17
+ # Implementing logic using a Ruby function is almost always _significantly_
18
+ # slower and more memory intensive than implementing the same logic using
19
+ # the native expression API because:
20
+
21
+ # - The native expression engine runs in Rust; UDFs run in Ruby.
22
+ # - Use of Ruby UDFs forces the DataFrame to be materialized in memory.
23
+ # - Polars-native expressions can be parallelised (UDFs cannot).
24
+ # - Polars-native expressions can be logically optimised (UDFs cannot).
25
+ #
26
+ # Wherever possible you should strongly prefer the native expression API
27
+ # to achieve the best performance.
28
+ #
29
+ # @return [DataFrame]
30
+ #
31
+ # @example
32
+ # df = Polars::DataFrame.new(
33
+ # {
34
+ # "id" => [0, 1, 2, 3, 4],
35
+ # "color" => ["red", "green", "green", "red", "red"],
36
+ # "shape" => ["square", "triangle", "square", "triangle", "square"]
37
+ # }
38
+ # )
39
+ # df.groupby("color").apply { |group_df| group_df.sample(2) }
40
+ # # =>
41
+ # # shape: (4, 3)
42
+ # # ┌─────┬───────┬──────────┐
43
+ # # │ id ┆ color ┆ shape │
44
+ # # │ --- ┆ --- ┆ --- │
45
+ # # │ i64 ┆ str ┆ str │
46
+ # # ╞═════╪═══════╪══════════╡
47
+ # # │ 1 ┆ green ┆ triangle │
48
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
49
+ # # │ 2 ┆ green ┆ square │
50
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
51
+ # # │ 4 ┆ red ┆ square │
52
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
53
+ # # │ 3 ┆ red ┆ triangle │
54
+ # # └─────┴───────┴──────────┘
55
+ # def apply(&f)
56
+ # _dataframe_class._from_rbdf(_df.groupby_apply(by, f))
57
+ # end
58
+
59
+ # Use multiple aggregations on columns.
60
+ #
61
+ # This can be combined with complete lazy API and is considered idiomatic polars.
62
+ #
63
+ # @param aggs [Object]
64
+ # Single / multiple aggregation expression(s).
65
+ #
66
+ # @return [DataFrame]
67
+ #
68
+ # @example
69
+ # df = Polars::DataFrame.new(
70
+ # {"foo" => ["one", "two", "two", "one", "two"], "bar" => [5, 3, 2, 4, 1]}
71
+ # )
72
+ # df.groupby("foo", maintain_order: true).agg(
73
+ # [
74
+ # Polars.sum("bar").suffix("_sum"),
75
+ # Polars.col("bar").sort.tail(2).sum.suffix("_tail_sum")
76
+ # ]
77
+ # )
78
+ # # =>
79
+ # # shape: (2, 3)
80
+ # # ┌─────┬─────────┬──────────────┐
81
+ # # │ foo ┆ bar_sum ┆ bar_tail_sum │
82
+ # # │ --- ┆ --- ┆ --- │
83
+ # # │ str ┆ i64 ┆ i64 │
84
+ # # ╞═════╪═════════╪══════════════╡
85
+ # # │ one ┆ 9 ┆ 9 │
86
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
87
+ # # │ two ┆ 6 ┆ 5 │
88
+ # # └─────┴─────────┴──────────────┘
89
+ def agg(aggs)
90
+ df = Utils.wrap_df(_df)
91
+ .lazy
92
+ .groupby(by, maintain_order: maintain_order)
93
+ .agg(aggs)
94
+ .collect(no_optimization: true, string_cache: false)
95
+ _dataframe_class._from_rbdf(df._df)
96
+ end
97
+
98
+ # Get the first `n` rows of each group.
99
+ #
100
+ # @param n [Integer]
101
+ # Number of rows to return.
102
+ #
103
+ # @return [DataFrame]
104
+ #
105
+ # @example
106
+ # df = Polars::DataFrame.new(
107
+ # {
108
+ # "letters" => ["c", "c", "a", "c", "a", "b"],
109
+ # "nrs" => [1, 2, 3, 4, 5, 6]
110
+ # }
111
+ # )
112
+ # # =>
113
+ # # shape: (6, 2)
114
+ # # ┌─────────┬─────┐
115
+ # # │ letters ┆ nrs │
116
+ # # │ --- ┆ --- │
117
+ # # │ str ┆ i64 │
118
+ # # ╞═════════╪═════╡
119
+ # # │ c ┆ 1 │
120
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
121
+ # # │ c ┆ 2 │
122
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
123
+ # # │ a ┆ 3 │
124
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
125
+ # # │ c ┆ 4 │
126
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
127
+ # # │ a ┆ 5 │
128
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
129
+ # # │ b ┆ 6 │
130
+ # # └─────────┴─────┘
131
+ #
132
+ # @example
133
+ # df.groupby("letters").head(2).sort("letters")
134
+ # # =>
135
+ # # shape: (5, 2)
136
+ # # ┌─────────┬─────┐
137
+ # # │ letters ┆ nrs │
138
+ # # │ --- ┆ --- │
139
+ # # │ str ┆ i64 │
140
+ # # ╞═════════╪═════╡
141
+ # # │ a ┆ 3 │
142
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
143
+ # # │ a ┆ 5 │
144
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
145
+ # # │ b ┆ 6 │
146
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
147
+ # # │ c ┆ 1 │
148
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
149
+ # # │ c ┆ 2 │
150
+ # # └─────────┴─────┘
151
+ def head(n = 5)
152
+ df = (
153
+ Utils.wrap_df(_df)
154
+ .lazy
155
+ .groupby(by, maintain_order: maintain_order)
156
+ .head(n)
157
+ .collect(no_optimization: true, string_cache: false)
158
+ )
159
+ _dataframe_class._from_rbdf(df._df)
160
+ end
161
+
162
+ # Get the last `n` rows of each group.
163
+ #
164
+ # @param n [Integer]
165
+ # Number of rows to return.
166
+ #
167
+ # @return [DataFrame]
168
+ #
169
+ # @example
170
+ # df = Polars::DataFrame.new(
171
+ # {
172
+ # "letters" => ["c", "c", "a", "c", "a", "b"],
173
+ # "nrs" => [1, 2, 3, 4, 5, 6]
174
+ # }
175
+ # )
176
+ # # =>
177
+ # # shape: (6, 2)
178
+ # # ┌─────────┬─────┐
179
+ # # │ letters ┆ nrs │
180
+ # # │ --- ┆ --- │
181
+ # # │ str ┆ i64 │
182
+ # # ╞═════════╪═════╡
183
+ # # │ c ┆ 1 │
184
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
185
+ # # │ c ┆ 2 │
186
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
187
+ # # │ a ┆ 3 │
188
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
189
+ # # │ c ┆ 4 │
190
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
191
+ # # │ a ┆ 5 │
192
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
193
+ # # │ b ┆ 6 │
194
+ # # └─────────┴─────┘
195
+ #
196
+ # @example
197
+ # df.groupby("letters").tail(2).sort("letters")
198
+ # # =>
199
+ # # shape: (5, 2)
200
+ # # ┌─────────┬─────┐
201
+ # # │ letters ┆ nrs │
202
+ # # │ --- ┆ --- │
203
+ # # │ str ┆ i64 │
204
+ # # ╞═════════╪═════╡
205
+ # # │ a ┆ 3 │
206
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
207
+ # # │ a ┆ 5 │
208
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
209
+ # # │ b ┆ 6 │
210
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
211
+ # # │ c ┆ 2 │
212
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
213
+ # # │ c ┆ 4 │
214
+ # # └─────────┴─────┘
215
+ def tail(n = 5)
216
+ df = (
217
+ Utils.wrap_df(_df)
218
+ .lazy
219
+ .groupby(by, maintain_order: maintain_order)
220
+ .tail(n)
221
+ .collect(no_optimization: true, string_cache: false)
222
+ )
223
+ _dataframe_class._from_rbdf(df._df)
224
+ end
225
+
226
+ # pivot is deprecated
227
+
228
+ # Aggregate the first values in the group.
229
+ #
230
+ # @return [DataFrame]
231
+ #
232
+ # @example
233
+ # df = Polars::DataFrame.new(
234
+ # {
235
+ # "a" => [1, 2, 2, 3, 4, 5],
236
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
237
+ # "c" => [true, true, true, false, false, true],
238
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
239
+ # }
240
+ # )
241
+ # df.groupby("d", maintain_order: true).first
242
+ # # =>
243
+ # # shape: (3, 4)
244
+ # # ┌────────┬─────┬──────┬───────┐
245
+ # # │ d ┆ a ┆ b ┆ c │
246
+ # # │ --- ┆ --- ┆ --- ┆ --- │
247
+ # # │ str ┆ i64 ┆ f64 ┆ bool │
248
+ # # ╞════════╪═════╪══════╪═══════╡
249
+ # # │ Apple ┆ 1 ┆ 0.5 ┆ true │
250
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
251
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ true │
252
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
253
+ # # │ Banana ┆ 4 ┆ 13.0 ┆ false │
254
+ # # └────────┴─────┴──────┴───────┘
255
+ def first
256
+ agg(Polars.all.first)
257
+ end
258
+
259
+ # Aggregate the last values in the group.
260
+ #
261
+ # @return [DataFrame]
262
+ #
263
+ # @example
264
+ # df = Polars::DataFrame.new(
265
+ # {
266
+ # "a" => [1, 2, 2, 3, 4, 5],
267
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
268
+ # "c" => [true, true, true, false, false, true],
269
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
270
+ # }
271
+ # )
272
+ # df.groupby("d", maintain_order: true).last
273
+ # # =>
274
+ # # shape: (3, 4)
275
+ # # ┌────────┬─────┬──────┬───────┐
276
+ # # │ d ┆ a ┆ b ┆ c │
277
+ # # │ --- ┆ --- ┆ --- ┆ --- │
278
+ # # │ str ┆ i64 ┆ f64 ┆ bool │
279
+ # # ╞════════╪═════╪══════╪═══════╡
280
+ # # │ Apple ┆ 3 ┆ 10.0 ┆ false │
281
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
282
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ true │
283
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
284
+ # # │ Banana ┆ 5 ┆ 14.0 ┆ true │
285
+ # # └────────┴─────┴──────┴───────┘
286
+ def last
287
+ agg(Polars.all.last)
288
+ end
289
+
290
+ # Reduce the groups to the sum.
291
+ #
292
+ # @return [DataFrame]
293
+ #
294
+ # @example
295
+ # df = Polars::DataFrame.new(
296
+ # {
297
+ # "a" => [1, 2, 2, 3, 4, 5],
298
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
299
+ # "c" => [true, true, true, false, false, true],
300
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
301
+ # }
302
+ # )
303
+ # df.groupby("d", maintain_order: true).sum
304
+ # # =>
305
+ # # shape: (3, 4)
306
+ # # ┌────────┬─────┬──────┬─────┐
307
+ # # │ d ┆ a ┆ b ┆ c │
308
+ # # │ --- ┆ --- ┆ --- ┆ --- │
309
+ # # │ str ┆ i64 ┆ f64 ┆ u32 │
310
+ # # ╞════════╪═════╪══════╪═════╡
311
+ # # │ Apple ┆ 6 ┆ 14.5 ┆ 2 │
312
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
313
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ 1 │
314
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
315
+ # # │ Banana ┆ 9 ┆ 27.0 ┆ 1 │
316
+ # # └────────┴─────┴──────┴─────┘
317
+ def sum
318
+ agg(Polars.all.sum)
319
+ end
320
+
321
+ # Reduce the groups to the minimal value.
322
+ #
323
+ # @return [DataFrame]
324
+ #
325
+ # @example
326
+ # df = Polars::DataFrame.new(
327
+ # {
328
+ # "a" => [1, 2, 2, 3, 4, 5],
329
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
330
+ # "c" => [true, true, true, false, false, true],
331
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
332
+ # }
333
+ # )
334
+ # df.groupby("d", maintain_order: true).min
335
+ # # =>
336
+ # # shape: (3, 4)
337
+ # # ┌────────┬─────┬──────┬───────┐
338
+ # # │ d ┆ a ┆ b ┆ c │
339
+ # # │ --- ┆ --- ┆ --- ┆ --- │
340
+ # # │ str ┆ i64 ┆ f64 ┆ bool │
341
+ # # ╞════════╪═════╪══════╪═══════╡
342
+ # # │ Apple ┆ 1 ┆ 0.5 ┆ false │
343
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
344
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ true │
345
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
346
+ # # │ Banana ┆ 4 ┆ 13.0 ┆ false │
347
+ # # └────────┴─────┴──────┴───────┘
348
+ def min
349
+ agg(Polars.all.min)
350
+ end
351
+
352
+ # Reduce the groups to the maximal value.
353
+ #
354
+ # @return [DataFrame]
355
+ #
356
+ # @example
357
+ # df = Polars::DataFrame.new(
358
+ # {
359
+ # "a" => [1, 2, 2, 3, 4, 5],
360
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
361
+ # "c" => [true, true, true, false, false, true],
362
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
363
+ # }
364
+ # )
365
+ # df.groupby("d", maintain_order: true).max
366
+ # # =>
367
+ # # shape: (3, 4)
368
+ # # ┌────────┬─────┬──────┬──────┐
369
+ # # │ d ┆ a ┆ b ┆ c │
370
+ # # │ --- ┆ --- ┆ --- ┆ --- │
371
+ # # │ str ┆ i64 ┆ f64 ┆ bool │
372
+ # # ╞════════╪═════╪══════╪══════╡
373
+ # # │ Apple ┆ 3 ┆ 10.0 ┆ true │
374
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
375
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ true │
376
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
377
+ # # │ Banana ┆ 5 ┆ 14.0 ┆ true │
378
+ # # └────────┴─────┴──────┴──────┘
379
+ def max
380
+ agg(Polars.all.max)
381
+ end
382
+
383
+ # Count the number of values in each group.
384
+ #
385
+ # @return [DataFrame]
386
+ #
387
+ # @example
388
+ # df = Polars::DataFrame.new(
389
+ # {
390
+ # "a" => [1, 2, 2, 3, 4, 5],
391
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
392
+ # "c" => [true, true, true, false, false, true],
393
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
394
+ # }
395
+ # )
396
+ # df.groupby("d", maintain_order: true).count
397
+ # # =>
398
+ # # shape: (3, 2)
399
+ # # ┌────────┬───────┐
400
+ # # │ d ┆ count │
401
+ # # │ --- ┆ --- │
402
+ # # │ str ┆ u32 │
403
+ # # ╞════════╪═══════╡
404
+ # # │ Apple ┆ 3 │
405
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
406
+ # # │ Orange ┆ 1 │
407
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
408
+ # # │ Banana ┆ 2 │
409
+ # # └────────┴───────┘
410
+ def count
411
+ agg(Polars.count)
412
+ end
413
+
414
+ # Reduce the groups to the mean values.
415
+ #
416
+ # @return [DataFrame]
417
+ #
418
+ # @example
419
+ # df = Polars::DataFrame.new(
420
+ # {
421
+ # "a" => [1, 2, 2, 3, 4, 5],
422
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
423
+ # "c" => [true, true, true, false, false, true],
424
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
425
+ # }
426
+ # )
427
+ # df.groupby("d", maintain_order: true).mean
428
+ # # =>
429
+ # # shape: (3, 4)
430
+ # # ┌────────┬─────┬──────────┬──────────┐
431
+ # # │ d ┆ a ┆ b ┆ c │
432
+ # # │ --- ┆ --- ┆ --- ┆ --- │
433
+ # # │ str ┆ f64 ┆ f64 ┆ f64 │
434
+ # # ╞════════╪═════╪══════════╪══════════╡
435
+ # # │ Apple ┆ 2.0 ┆ 4.833333 ┆ 0.666667 │
436
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
437
+ # # │ Orange ┆ 2.0 ┆ 0.5 ┆ 1.0 │
438
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
439
+ # # │ Banana ┆ 4.5 ┆ 13.5 ┆ 0.5 │
440
+ # # └────────┴─────┴──────────┴──────────┘
441
+ def mean
442
+ agg(Polars.all.mean)
443
+ end
444
+
445
+ # Count the unique values per group.
446
+ #
447
+ # @return [DataFrame]
448
+ #
449
+ # @example
450
+ # df = Polars::DataFrame.new(
451
+ # {
452
+ # "a" => [1, 2, 1, 3, 4, 5],
453
+ # "b" => [0.5, 0.5, 0.5, 10, 13, 14],
454
+ # "d" => ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"]
455
+ # }
456
+ # )
457
+ # df.groupby("d", maintain_order: true).n_unique
458
+ # # =>
459
+ # # shape: (2, 3)
460
+ # # ┌────────┬─────┬─────┐
461
+ # # │ d ┆ a ┆ b │
462
+ # # │ --- ┆ --- ┆ --- │
463
+ # # │ str ┆ u32 ┆ u32 │
464
+ # # ╞════════╪═════╪═════╡
465
+ # # │ Apple ┆ 2 ┆ 2 │
466
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
467
+ # # │ Banana ┆ 3 ┆ 3 │
468
+ # # └────────┴─────┴─────┘
469
+ def n_unique
470
+ agg(Polars.all.n_unique)
471
+ end
472
+
473
+ # Compute the quantile per group.
474
+ #
475
+ # @param quantile [Float]
476
+ # Quantile between 0.0 and 1.0.
477
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
478
+ # Interpolation method.
479
+ #
480
+ # @return [DataFrame]
481
+ #
482
+ # @example
483
+ # df = Polars::DataFrame.new(
484
+ # {
485
+ # "a" => [1, 2, 2, 3, 4, 5],
486
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
487
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
488
+ # }
489
+ # )
490
+ # df.groupby("d", maintain_order: true).quantile(1)
491
+ # # =>
492
+ # # shape: (3, 3)
493
+ # # ┌────────┬─────┬──────┐
494
+ # # │ d ┆ a ┆ b │
495
+ # # │ --- ┆ --- ┆ --- │
496
+ # # │ str ┆ f64 ┆ f64 │
497
+ # # ╞════════╪═════╪══════╡
498
+ # # │ Apple ┆ 3.0 ┆ 10.0 │
499
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
500
+ # # │ Orange ┆ 2.0 ┆ 0.5 │
501
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
502
+ # # │ Banana ┆ 5.0 ┆ 14.0 │
503
+ # # └────────┴─────┴──────┘
504
+ def quantile(quantile, interpolation: "nearest")
505
+ agg(Polars.all.quantile(quantile, interpolation: interpolation))
506
+ end
507
+
508
+ # Return the median per group.
509
+ #
510
+ # @return [DataFrame]
511
+ #
512
+ # @example
513
+ # df = Polars::DataFrame.new(
514
+ # {
515
+ # "a" => [1, 2, 2, 3, 4, 5],
516
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
517
+ # "d" => ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"]
518
+ # }
519
+ # )
520
+ # df.groupby("d", maintain_order: true).median
521
+ # # =>
522
+ # # shape: (2, 3)
523
+ # # ┌────────┬─────┬──────┐
524
+ # # │ d ┆ a ┆ b │
525
+ # # │ --- ┆ --- ┆ --- │
526
+ # # │ str ┆ f64 ┆ f64 │
527
+ # # ╞════════╪═════╪══════╡
528
+ # # │ Apple ┆ 2.0 ┆ 4.0 │
529
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
530
+ # # │ Banana ┆ 4.0 ┆ 13.0 │
531
+ # # └────────┴─────┴──────┘
532
+ def median
533
+ agg(Polars.all.median)
534
+ end
535
+
536
+ # Aggregate the groups into Series.
537
+ #
538
+ # @return [DataFrame]
539
+ #
540
+ # @example
541
+ # df = Polars::DataFrame.new({"a" => ["one", "two", "one", "two"], "b" => [1, 2, 3, 4]})
542
+ # df.groupby("a", maintain_order: true).agg_list
543
+ # # =>
544
+ # # shape: (2, 2)
545
+ # # ┌─────┬───────────┐
546
+ # # │ a ┆ b │
547
+ # # │ --- ┆ --- │
548
+ # # │ str ┆ list[i64] │
549
+ # # ╞═════╪═══════════╡
550
+ # # │ one ┆ [1, 3] │
551
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
552
+ # # │ two ┆ [2, 4] │
553
+ # # └─────┴───────────┘
554
+ def agg_list
555
+ agg(Polars.all.list)
556
+ end
557
+ end
558
+ end