polars-df 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +9 -0
  4. data/Cargo.lock +74 -3
  5. data/Cargo.toml +3 -0
  6. data/README.md +1 -1
  7. data/ext/polars/Cargo.toml +18 -1
  8. data/ext/polars/src/conversion.rs +115 -2
  9. data/ext/polars/src/dataframe.rs +228 -11
  10. data/ext/polars/src/error.rs +4 -0
  11. data/ext/polars/src/lazy/dataframe.rs +5 -5
  12. data/ext/polars/src/lazy/dsl.rs +157 -2
  13. data/ext/polars/src/lib.rs +185 -10
  14. data/ext/polars/src/list_construction.rs +100 -0
  15. data/ext/polars/src/series.rs +217 -29
  16. data/ext/polars/src/set.rs +91 -0
  17. data/ext/polars/src/utils.rs +19 -0
  18. data/lib/polars/batched_csv_reader.rb +1 -0
  19. data/lib/polars/cat_expr.rb +39 -0
  20. data/lib/polars/cat_name_space.rb +54 -0
  21. data/lib/polars/data_frame.rb +2384 -140
  22. data/lib/polars/date_time_expr.rb +1282 -7
  23. data/lib/polars/date_time_name_space.rb +1484 -0
  24. data/lib/polars/exceptions.rb +20 -0
  25. data/lib/polars/expr.rb +4374 -53
  26. data/lib/polars/expr_dispatch.rb +22 -0
  27. data/lib/polars/functions.rb +219 -0
  28. data/lib/polars/group_by.rb +518 -0
  29. data/lib/polars/io.rb +421 -2
  30. data/lib/polars/lazy_frame.rb +1267 -69
  31. data/lib/polars/lazy_functions.rb +412 -24
  32. data/lib/polars/lazy_group_by.rb +80 -0
  33. data/lib/polars/list_expr.rb +507 -5
  34. data/lib/polars/list_name_space.rb +346 -0
  35. data/lib/polars/meta_expr.rb +21 -0
  36. data/lib/polars/series.rb +2256 -242
  37. data/lib/polars/slice.rb +104 -0
  38. data/lib/polars/string_expr.rb +847 -10
  39. data/lib/polars/string_name_space.rb +690 -0
  40. data/lib/polars/struct_expr.rb +73 -0
  41. data/lib/polars/struct_name_space.rb +64 -0
  42. data/lib/polars/utils.rb +71 -3
  43. data/lib/polars/version.rb +2 -1
  44. data/lib/polars/when.rb +1 -0
  45. data/lib/polars/when_then.rb +1 -0
  46. data/lib/polars.rb +12 -10
  47. metadata +15 -2
@@ -0,0 +1,518 @@
1
+ module Polars
2
+ # Starts a new GroupBy operation.
3
+ class GroupBy
4
+ # @private
5
+ attr_accessor :_df, :_dataframe_class, :by, :maintain_order
6
+
7
+ # @private
8
+ def initialize(df, by, dataframe_class, maintain_order: false)
9
+ self._df = df
10
+ self._dataframe_class = dataframe_class
11
+ self.by = by
12
+ self.maintain_order = maintain_order
13
+ end
14
+
15
+ # def apply
16
+ # end
17
+
18
+ # Use multiple aggregations on columns.
19
+ #
20
+ # This can be combined with complete lazy API and is considered idiomatic polars.
21
+ #
22
+ # @param aggs [Object]
23
+ # Single / multiple aggregation expression(s).
24
+ #
25
+ # @return [DataFrame]
26
+ #
27
+ # @example
28
+ # df = Polars::DataFrame.new(
29
+ # {"foo" => ["one", "two", "two", "one", "two"], "bar" => [5, 3, 2, 4, 1]}
30
+ # )
31
+ # df.groupby("foo", maintain_order: true).agg(
32
+ # [
33
+ # Polars.sum("bar").suffix("_sum"),
34
+ # Polars.col("bar").sort.tail(2).sum.suffix("_tail_sum")
35
+ # ]
36
+ # )
37
+ # # =>
38
+ # # shape: (2, 3)
39
+ # # ┌─────┬─────────┬──────────────┐
40
+ # # │ foo ┆ bar_sum ┆ bar_tail_sum │
41
+ # # │ --- ┆ --- ┆ --- │
42
+ # # │ str ┆ i64 ┆ i64 │
43
+ # # ╞═════╪═════════╪══════════════╡
44
+ # # │ one ┆ 9 ┆ 9 │
45
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
46
+ # # │ two ┆ 6 ┆ 5 │
47
+ # # └─────┴─────────┴──────────────┘
48
+ def agg(aggs)
49
+ df = Utils.wrap_df(_df)
50
+ .lazy
51
+ .groupby(by, maintain_order: maintain_order)
52
+ .agg(aggs)
53
+ .collect(no_optimization: true, string_cache: false)
54
+ _dataframe_class._from_rbdf(df._df)
55
+ end
56
+
57
+ # Get the first `n` rows of each group.
58
+ #
59
+ # @param n [Integer]
60
+ # Number of rows to return.
61
+ #
62
+ # @return [DataFrame]
63
+ #
64
+ # @example
65
+ # df = Polars::DataFrame.new(
66
+ # {
67
+ # "letters" => ["c", "c", "a", "c", "a", "b"],
68
+ # "nrs" => [1, 2, 3, 4, 5, 6]
69
+ # }
70
+ # )
71
+ # # =>
72
+ # # shape: (6, 2)
73
+ # # ┌─────────┬─────┐
74
+ # # │ letters ┆ nrs │
75
+ # # │ --- ┆ --- │
76
+ # # │ str ┆ i64 │
77
+ # # ╞═════════╪═════╡
78
+ # # │ c ┆ 1 │
79
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
80
+ # # │ c ┆ 2 │
81
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
82
+ # # │ a ┆ 3 │
83
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
84
+ # # │ c ┆ 4 │
85
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
86
+ # # │ a ┆ 5 │
87
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
88
+ # # │ b ┆ 6 │
89
+ # # └─────────┴─────┘
90
+ #
91
+ # @example
92
+ # df.groupby("letters").head(2).sort("letters")
93
+ # # =>
94
+ # # shape: (5, 2)
95
+ # # ┌─────────┬─────┐
96
+ # # │ letters ┆ nrs │
97
+ # # │ --- ┆ --- │
98
+ # # │ str ┆ i64 │
99
+ # # ╞═════════╪═════╡
100
+ # # │ a ┆ 3 │
101
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
102
+ # # │ a ┆ 5 │
103
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
104
+ # # │ b ┆ 6 │
105
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
106
+ # # │ c ┆ 1 │
107
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
108
+ # # │ c ┆ 2 │
109
+ # # └─────────┴─────┘
110
+ def head(n = 5)
111
+ df = (
112
+ Utils.wrap_df(_df)
113
+ .lazy
114
+ .groupby(by, maintain_order: maintain_order)
115
+ .head(n)
116
+ .collect(no_optimization: true, string_cache: false)
117
+ )
118
+ _dataframe_class._from_rbdf(df._df)
119
+ end
120
+
121
+ # Get the last `n` rows of each group.
122
+ #
123
+ # @param n [Integer]
124
+ # Number of rows to return.
125
+ #
126
+ # @return [DataFrame]
127
+ #
128
+ # @example
129
+ # df = Polars::DataFrame.new(
130
+ # {
131
+ # "letters" => ["c", "c", "a", "c", "a", "b"],
132
+ # "nrs" => [1, 2, 3, 4, 5, 6]
133
+ # }
134
+ # )
135
+ # # =>
136
+ # # shape: (6, 2)
137
+ # # ┌─────────┬─────┐
138
+ # # │ letters ┆ nrs │
139
+ # # │ --- ┆ --- │
140
+ # # │ str ┆ i64 │
141
+ # # ╞═════════╪═════╡
142
+ # # │ c ┆ 1 │
143
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
144
+ # # │ c ┆ 2 │
145
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
146
+ # # │ a ┆ 3 │
147
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
148
+ # # │ c ┆ 4 │
149
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
150
+ # # │ a ┆ 5 │
151
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
152
+ # # │ b ┆ 6 │
153
+ # # └─────────┴─────┘
154
+ #
155
+ # @example
156
+ # df.groupby("letters").tail(2).sort("letters")
157
+ # # =>
158
+ # # shape: (5, 2)
159
+ # # ┌─────────┬─────┐
160
+ # # │ letters ┆ nrs │
161
+ # # │ --- ┆ --- │
162
+ # # │ str ┆ i64 │
163
+ # # ╞═════════╪═════╡
164
+ # # │ a ┆ 3 │
165
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
166
+ # # │ a ┆ 5 │
167
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
168
+ # # │ b ┆ 6 │
169
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
170
+ # # │ c ┆ 2 │
171
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
172
+ # # │ c ┆ 4 │
173
+ # # └─────────┴─────┘
174
+ def tail(n = 5)
175
+ df = (
176
+ Utils.wrap_df(_df)
177
+ .lazy
178
+ .groupby(by, maintain_order: maintain_order)
179
+ .tail(n)
180
+ .collect(no_optimization: true, string_cache: false)
181
+ )
182
+ _dataframe_class._from_rbdf(df._df)
183
+ end
184
+
185
+ # def pivot
186
+ # end
187
+
188
+ # Aggregate the first values in the group.
189
+ #
190
+ # @return [DataFrame]
191
+ #
192
+ # @example
193
+ # df = Polars::DataFrame.new(
194
+ # {
195
+ # "a" => [1, 2, 2, 3, 4, 5],
196
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
197
+ # "c" => [true, true, true, false, false, true],
198
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
199
+ # }
200
+ # )
201
+ # df.groupby("d", maintain_order: true).first
202
+ # # =>
203
+ # # shape: (3, 4)
204
+ # # ┌────────┬─────┬──────┬───────┐
205
+ # # │ d ┆ a ┆ b ┆ c │
206
+ # # │ --- ┆ --- ┆ --- ┆ --- │
207
+ # # │ str ┆ i64 ┆ f64 ┆ bool │
208
+ # # ╞════════╪═════╪══════╪═══════╡
209
+ # # │ Apple ┆ 1 ┆ 0.5 ┆ true │
210
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
211
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ true │
212
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
213
+ # # │ Banana ┆ 4 ┆ 13.0 ┆ false │
214
+ # # └────────┴─────┴──────┴───────┘
215
+ def first
216
+ agg(Polars.all.first)
217
+ end
218
+
219
+ # Aggregate the last values in the group.
220
+ #
221
+ # @return [DataFrame]
222
+ #
223
+ # @example
224
+ # df = Polars::DataFrame.new(
225
+ # {
226
+ # "a" => [1, 2, 2, 3, 4, 5],
227
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
228
+ # "c" => [true, true, true, false, false, true],
229
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
230
+ # }
231
+ # )
232
+ # df.groupby("d", maintain_order: true).last
233
+ # # =>
234
+ # # shape: (3, 4)
235
+ # # ┌────────┬─────┬──────┬───────┐
236
+ # # │ d ┆ a ┆ b ┆ c │
237
+ # # │ --- ┆ --- ┆ --- ┆ --- │
238
+ # # │ str ┆ i64 ┆ f64 ┆ bool │
239
+ # # ╞════════╪═════╪══════╪═══════╡
240
+ # # │ Apple ┆ 3 ┆ 10.0 ┆ false │
241
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
242
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ true │
243
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
244
+ # # │ Banana ┆ 5 ┆ 14.0 ┆ true │
245
+ # # └────────┴─────┴──────┴───────┘
246
+ def last
247
+ agg(Polars.all.last)
248
+ end
249
+
250
+ # Reduce the groups to the sum.
251
+ #
252
+ # @return [DataFrame]
253
+ #
254
+ # @example
255
+ # df = Polars::DataFrame.new(
256
+ # {
257
+ # "a" => [1, 2, 2, 3, 4, 5],
258
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
259
+ # "c" => [true, true, true, false, false, true],
260
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
261
+ # }
262
+ # )
263
+ # df.groupby("d", maintain_order: true).sum
264
+ # # =>
265
+ # # shape: (3, 4)
266
+ # # ┌────────┬─────┬──────┬─────┐
267
+ # # │ d ┆ a ┆ b ┆ c │
268
+ # # │ --- ┆ --- ┆ --- ┆ --- │
269
+ # # │ str ┆ i64 ┆ f64 ┆ u32 │
270
+ # # ╞════════╪═════╪══════╪═════╡
271
+ # # │ Apple ┆ 6 ┆ 14.5 ┆ 2 │
272
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
273
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ 1 │
274
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
275
+ # # │ Banana ┆ 9 ┆ 27.0 ┆ 1 │
276
+ # # └────────┴─────┴──────┴─────┘
277
+ def sum
278
+ agg(Polars.all.sum)
279
+ end
280
+
281
+ # Reduce the groups to the minimal value.
282
+ #
283
+ # @return [DataFrame]
284
+ #
285
+ # @example
286
+ # df = Polars::DataFrame.new(
287
+ # {
288
+ # "a" => [1, 2, 2, 3, 4, 5],
289
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
290
+ # "c" => [true, true, true, false, false, true],
291
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
292
+ # }
293
+ # )
294
+ # df.groupby("d", maintain_order: true).min
295
+ # # =>
296
+ # # shape: (3, 4)
297
+ # # ┌────────┬─────┬──────┬─────┐
298
+ # # │ d ┆ a ┆ b ┆ c │
299
+ # # │ --- ┆ --- ┆ --- ┆ --- │
300
+ # # │ str ┆ i64 ┆ f64 ┆ u32 │
301
+ # # ╞════════╪═════╪══════╪═════╡
302
+ # # │ Apple ┆ 1 ┆ 0.5 ┆ 0 │
303
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
304
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ 1 │
305
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
306
+ # # │ Banana ┆ 4 ┆ 13.0 ┆ 0 │
307
+ # # └────────┴─────┴──────┴─────┘
308
+ def min
309
+ agg(Polars.all.min)
310
+ end
311
+
312
+ # Reduce the groups to the maximal value.
313
+ #
314
+ # @return [DataFrame]
315
+ #
316
+ # @example
317
+ # df = Polars::DataFrame.new(
318
+ # {
319
+ # "a" => [1, 2, 2, 3, 4, 5],
320
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
321
+ # "c" => [true, true, true, false, false, true],
322
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
323
+ # }
324
+ # )
325
+ # df.groupby("d", maintain_order: true).max
326
+ # # =>
327
+ # # shape: (3, 4)
328
+ # # ┌────────┬─────┬──────┬─────┐
329
+ # # │ d ┆ a ┆ b ┆ c │
330
+ # # │ --- ┆ --- ┆ --- ┆ --- │
331
+ # # │ str ┆ i64 ┆ f64 ┆ u32 │
332
+ # # ╞════════╪═════╪══════╪═════╡
333
+ # # │ Apple ┆ 3 ┆ 10.0 ┆ 1 │
334
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
335
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ 1 │
336
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
337
+ # # │ Banana ┆ 5 ┆ 14.0 ┆ 1 │
338
+ # # └────────┴─────┴──────┴─────┘
339
+ def max
340
+ agg(Polars.all.max)
341
+ end
342
+
343
+ # Count the number of values in each group.
344
+ #
345
+ # @return [DataFrame]
346
+ #
347
+ # @example
348
+ # df = Polars::DataFrame.new(
349
+ # {
350
+ # "a" => [1, 2, 2, 3, 4, 5],
351
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
352
+ # "c" => [true, true, true, false, false, true],
353
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
354
+ # }
355
+ # )
356
+ # df.groupby("d", maintain_order: true).count
357
+ # # =>
358
+ # # shape: (3, 2)
359
+ # # ┌────────┬───────┐
360
+ # # │ d ┆ count │
361
+ # # │ --- ┆ --- │
362
+ # # │ str ┆ u32 │
363
+ # # ╞════════╪═══════╡
364
+ # # │ Apple ┆ 3 │
365
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
366
+ # # │ Orange ┆ 1 │
367
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
368
+ # # │ Banana ┆ 2 │
369
+ # # └────────┴───────┘
370
+ def count
371
+ agg(Polars.count)
372
+ end
373
+
374
+ # Reduce the groups to the mean values.
375
+ #
376
+ # @return [DataFrame]
377
+ #
378
+ # @example
379
+ # df = Polars::DataFrame.new(
380
+ # {
381
+ # "a" => [1, 2, 2, 3, 4, 5],
382
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
383
+ # "c" => [true, true, true, false, false, true],
384
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
385
+ # }
386
+ # )
387
+ # df.groupby("d", maintain_order: true).mean
388
+ # # =>
389
+ # # shape: (3, 4)
390
+ # # ┌────────┬─────┬──────────┬──────┐
391
+ # # │ d ┆ a ┆ b ┆ c │
392
+ # # │ --- ┆ --- ┆ --- ┆ --- │
393
+ # # │ str ┆ f64 ┆ f64 ┆ bool │
394
+ # # ╞════════╪═════╪══════════╪══════╡
395
+ # # │ Apple ┆ 2.0 ┆ 4.833333 ┆ null │
396
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
397
+ # # │ Orange ┆ 2.0 ┆ 0.5 ┆ null │
398
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
399
+ # # │ Banana ┆ 4.5 ┆ 13.5 ┆ null │
400
+ # # └────────┴─────┴──────────┴──────┘
401
+ def mean
402
+ agg(Polars.all.mean)
403
+ end
404
+
405
+ # Count the unique values per group.
406
+ #
407
+ # @return [DataFrame]
408
+ #
409
+ # @example
410
+ # df = Polars::DataFrame.new(
411
+ # {
412
+ # "a" => [1, 2, 1, 3, 4, 5],
413
+ # "b" => [0.5, 0.5, 0.5, 10, 13, 14],
414
+ # "d" => ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"]
415
+ # }
416
+ # )
417
+ # df.groupby("d", maintain_order: true).n_unique
418
+ # # =>
419
+ # # shape: (2, 3)
420
+ # # ┌────────┬─────┬─────┐
421
+ # # │ d ┆ a ┆ b │
422
+ # # │ --- ┆ --- ┆ --- │
423
+ # # │ str ┆ u32 ┆ u32 │
424
+ # # ╞════════╪═════╪═════╡
425
+ # # │ Apple ┆ 2 ┆ 2 │
426
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
427
+ # # │ Banana ┆ 3 ┆ 3 │
428
+ # # └────────┴─────┴─────┘
429
+ def n_unique
430
+ agg(Polars.all.n_unique)
431
+ end
432
+
433
+ # Compute the quantile per group.
434
+ #
435
+ # @param quantile [Float]
436
+ # Quantile between 0.0 and 1.0.
437
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
438
+ # Interpolation method.
439
+ #
440
+ # @return [DataFrame]
441
+ #
442
+ # @example
443
+ # df = Polars::DataFrame.new(
444
+ # {
445
+ # "a" => [1, 2, 2, 3, 4, 5],
446
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
447
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
448
+ # }
449
+ # )
450
+ # df.groupby("d", maintain_order: true).quantile(1)
451
+ # # =>
452
+ # # shape: (3, 3)
453
+ # # ┌────────┬─────┬──────┐
454
+ # # │ d ┆ a ┆ b │
455
+ # # │ --- ┆ --- ┆ --- │
456
+ # # │ str ┆ f64 ┆ f64 │
457
+ # # ╞════════╪═════╪══════╡
458
+ # # │ Apple ┆ 3.0 ┆ 10.0 │
459
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
460
+ # # │ Orange ┆ 2.0 ┆ 0.5 │
461
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
462
+ # # │ Banana ┆ 5.0 ┆ 14.0 │
463
+ # # └────────┴─────┴──────┘
464
+ def quantile(quantile, interpolation: "nearest")
465
+ agg(Polars.all.quantile(quantile, interpolation: interpolation))
466
+ end
467
+
468
+ # Return the median per group.
469
+ #
470
+ # @return [DataFrame]
471
+ #
472
+ # @example
473
+ # df = Polars::DataFrame.new(
474
+ # {
475
+ # "a" => [1, 2, 2, 3, 4, 5],
476
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
477
+ # "d" => ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"]
478
+ # }
479
+ # )
480
+ # df.groupby("d", maintain_order: true).median
481
+ # # =>
482
+ # # shape: (2, 3)
483
+ # # ┌────────┬─────┬──────┐
484
+ # # │ d ┆ a ┆ b │
485
+ # # │ --- ┆ --- ┆ --- │
486
+ # # │ str ┆ f64 ┆ f64 │
487
+ # # ╞════════╪═════╪══════╡
488
+ # # │ Apple ┆ 2.0 ┆ 4.0 │
489
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
490
+ # # │ Banana ┆ 4.0 ┆ 13.0 │
491
+ # # └────────┴─────┴──────┘
492
+ def median
493
+ agg(Polars.all.median)
494
+ end
495
+
496
+ # Aggregate the groups into Series.
497
+ #
498
+ # @return [DataFrame]
499
+ #
500
+ # @example
501
+ # df = Polars::DataFrame.new({"a" => ["one", "two", "one", "two"], "b" => [1, 2, 3, 4]})
502
+ # df.groupby("a", maintain_order: true).agg_list
503
+ # # =>
504
+ # # shape: (2, 2)
505
+ # # ┌─────┬───────────┐
506
+ # # │ a ┆ b │
507
+ # # │ --- ┆ --- │
508
+ # # │ str ┆ list[i64] │
509
+ # # ╞═════╪═══════════╡
510
+ # # │ one ┆ [1, 3] │
511
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
512
+ # # │ two ┆ [2, 4] │
513
+ # # └─────┴───────────┘
514
+ def agg_list
515
+ agg(Polars.all.list)
516
+ end
517
+ end
518
+ end