polars-df 0.10.0-x86_64-linux-musl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +175 -0
  4. data/Cargo.lock +2536 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +38726 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +98 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +72 -0
  18. data/lib/polars/cat_name_space.rb +125 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +93 -0
  21. data/lib/polars/data_frame.rb +5418 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1444 -0
  24. data/lib/polars/date_time_name_space.rb +1484 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +31 -0
  27. data/lib/polars/expr.rb +6105 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +248 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1280 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +103 -0
  39. data/lib/polars/functions/range/int_range.rb +51 -0
  40. data/lib/polars/functions/repeat.rb +144 -0
  41. data/lib/polars/functions/whenthen.rb +96 -0
  42. data/lib/polars/functions.rb +57 -0
  43. data/lib/polars/group_by.rb +548 -0
  44. data/lib/polars/io.rb +890 -0
  45. data/lib/polars/lazy_frame.rb +2833 -0
  46. data/lib/polars/lazy_group_by.rb +84 -0
  47. data/lib/polars/list_expr.rb +791 -0
  48. data/lib/polars/list_name_space.rb +445 -0
  49. data/lib/polars/meta_expr.rb +222 -0
  50. data/lib/polars/name_expr.rb +198 -0
  51. data/lib/polars/plot.rb +109 -0
  52. data/lib/polars/rolling_group_by.rb +37 -0
  53. data/lib/polars/series.rb +4527 -0
  54. data/lib/polars/slice.rb +104 -0
  55. data/lib/polars/sql_context.rb +194 -0
  56. data/lib/polars/string_cache.rb +75 -0
  57. data/lib/polars/string_expr.rb +1519 -0
  58. data/lib/polars/string_name_space.rb +810 -0
  59. data/lib/polars/struct_expr.rb +98 -0
  60. data/lib/polars/struct_name_space.rb +96 -0
  61. data/lib/polars/testing.rb +507 -0
  62. data/lib/polars/utils.rb +422 -0
  63. data/lib/polars/version.rb +4 -0
  64. data/lib/polars/whenthen.rb +83 -0
  65. data/lib/polars-df.rb +1 -0
  66. data/lib/polars.rb +72 -0
  67. metadata +125 -0
@@ -0,0 +1,548 @@
1
+ module Polars
2
+ # Starts a new GroupBy operation.
3
+ class GroupBy
4
+ # @private
5
+ def initialize(df, by, maintain_order: false)
6
+ @df = df
7
+ @by = by
8
+ @maintain_order = maintain_order
9
+ end
10
+
11
+ # Allows iteration over the groups of the group by operation.
12
+ #
13
+ # @return [Object]
14
+ #
15
+ # @example
16
+ # df = Polars::DataFrame.new({"foo" => ["a", "a", "b"], "bar" => [1, 2, 3]})
17
+ # df.group_by("foo", maintain_order: true).each.to_h
18
+ # # =>
19
+ # # {"a"=>shape: (2, 2)
20
+ # # ┌─────┬─────┐
21
+ # # │ foo ┆ bar │
22
+ # # │ --- ┆ --- │
23
+ # # │ str ┆ i64 │
24
+ # # ╞═════╪═════╡
25
+ # # │ a ┆ 1 │
26
+ # # │ a ┆ 2 │
27
+ # # └─────┴─────┘, "b"=>shape: (1, 2)
28
+ # # ┌─────┬─────┐
29
+ # # │ foo ┆ bar │
30
+ # # │ --- ┆ --- │
31
+ # # │ str ┆ i64 │
32
+ # # ╞═════╪═════╡
33
+ # # │ b ┆ 3 │
34
+ # # └─────┴─────┘}
35
+ def each
36
+ return to_enum(:each) unless block_given?
37
+
38
+ temp_col = "__POLARS_GB_GROUP_INDICES"
39
+ groups_df =
40
+ @df.lazy
41
+ .with_row_index(name: temp_col)
42
+ .group_by(@by, maintain_order: @maintain_order)
43
+ .agg(Polars.col(temp_col))
44
+ .collect(no_optimization: true)
45
+
46
+ group_names = groups_df.select(Polars.all.exclude(temp_col))
47
+
48
+ # When grouping by a single column, group name is a single value
49
+ # When grouping by multiple columns, group name is a tuple of values
50
+ if @by.is_a?(::String) || @by.is_a?(Expr)
51
+ _group_names = group_names.to_series.each
52
+ else
53
+ _group_names = group_names.iter_rows
54
+ end
55
+
56
+ _group_indices = groups_df.select(temp_col).to_series
57
+ _current_index = 0
58
+
59
+ while _current_index < _group_indices.length
60
+ group_name = _group_names.next
61
+ group_data = @df[_group_indices[_current_index]]
62
+ _current_index += 1
63
+
64
+ yield group_name, group_data
65
+ end
66
+ end
67
+
68
+ # Apply a custom/user-defined function (UDF) over the groups as a sub-DataFrame.
69
+ #
70
+ # Implementing logic using a Ruby function is almost always _significantly_
71
+ # slower and more memory intensive than implementing the same logic using
72
+ # the native expression API because:
73
+
74
+ # - The native expression engine runs in Rust; UDFs run in Ruby.
75
+ # - Use of Ruby UDFs forces the DataFrame to be materialized in memory.
76
+ # - Polars-native expressions can be parallelised (UDFs cannot).
77
+ # - Polars-native expressions can be logically optimised (UDFs cannot).
78
+ #
79
+ # Wherever possible you should strongly prefer the native expression API
80
+ # to achieve the best performance.
81
+ #
82
+ # @return [DataFrame]
83
+ #
84
+ # @example
85
+ # df = Polars::DataFrame.new(
86
+ # {
87
+ # "id" => [0, 1, 2, 3, 4],
88
+ # "color" => ["red", "green", "green", "red", "red"],
89
+ # "shape" => ["square", "triangle", "square", "triangle", "square"]
90
+ # }
91
+ # )
92
+ # df.group_by("color").apply { |group_df| group_df.sample(2) }
93
+ # # =>
94
+ # # shape: (4, 3)
95
+ # # ┌─────┬───────┬──────────┐
96
+ # # │ id ┆ color ┆ shape │
97
+ # # │ --- ┆ --- ┆ --- │
98
+ # # │ i64 ┆ str ┆ str │
99
+ # # ╞═════╪═══════╪══════════╡
100
+ # # │ 1 ┆ green ┆ triangle │
101
+ # # │ 2 ┆ green ┆ square │
102
+ # # │ 4 ┆ red ┆ square │
103
+ # # │ 3 ┆ red ┆ triangle │
104
+ # # └─────┴───────┴──────────┘
105
+ # def apply(&f)
106
+ # _dataframe_class._from_rbdf(_df.group_by_apply(by, f))
107
+ # end
108
+
109
+ # Use multiple aggregations on columns.
110
+ #
111
+ # This can be combined with complete lazy API and is considered idiomatic polars.
112
+ #
113
+ # @param aggs [Object]
114
+ # Single / multiple aggregation expression(s).
115
+ #
116
+ # @return [DataFrame]
117
+ #
118
+ # @example
119
+ # df = Polars::DataFrame.new(
120
+ # {"foo" => ["one", "two", "two", "one", "two"], "bar" => [5, 3, 2, 4, 1]}
121
+ # )
122
+ # df.group_by("foo", maintain_order: true).agg(
123
+ # [
124
+ # Polars.sum("bar").suffix("_sum"),
125
+ # Polars.col("bar").sort.tail(2).sum.suffix("_tail_sum")
126
+ # ]
127
+ # )
128
+ # # =>
129
+ # # shape: (2, 3)
130
+ # # ┌─────┬─────────┬──────────────┐
131
+ # # │ foo ┆ bar_sum ┆ bar_tail_sum │
132
+ # # │ --- ┆ --- ┆ --- │
133
+ # # │ str ┆ i64 ┆ i64 │
134
+ # # ╞═════╪═════════╪══════════════╡
135
+ # # │ one ┆ 9 ┆ 9 │
136
+ # # │ two ┆ 6 ┆ 5 │
137
+ # # └─────┴─────────┴──────────────┘
138
+ def agg(aggs)
139
+ @df.lazy
140
+ .group_by(@by, maintain_order: @maintain_order)
141
+ .agg(aggs)
142
+ .collect(no_optimization: true)
143
+ end
144
+
145
+ # Get the first `n` rows of each group.
146
+ #
147
+ # @param n [Integer]
148
+ # Number of rows to return.
149
+ #
150
+ # @return [DataFrame]
151
+ #
152
+ # @example
153
+ # df = Polars::DataFrame.new(
154
+ # {
155
+ # "letters" => ["c", "c", "a", "c", "a", "b"],
156
+ # "nrs" => [1, 2, 3, 4, 5, 6]
157
+ # }
158
+ # )
159
+ # # =>
160
+ # # shape: (6, 2)
161
+ # # ┌─────────┬─────┐
162
+ # # │ letters ┆ nrs │
163
+ # # │ --- ┆ --- │
164
+ # # │ str ┆ i64 │
165
+ # # ╞═════════╪═════╡
166
+ # # │ c ┆ 1 │
167
+ # # │ c ┆ 2 │
168
+ # # │ a ┆ 3 │
169
+ # # │ c ┆ 4 │
170
+ # # │ a ┆ 5 │
171
+ # # │ b ┆ 6 │
172
+ # # └─────────┴─────┘
173
+ #
174
+ # @example
175
+ # df.group_by("letters").head(2).sort("letters")
176
+ # # =>
177
+ # # shape: (5, 2)
178
+ # # ┌─────────┬─────┐
179
+ # # │ letters ┆ nrs │
180
+ # # │ --- ┆ --- │
181
+ # # │ str ┆ i64 │
182
+ # # ╞═════════╪═════╡
183
+ # # │ a ┆ 3 │
184
+ # # │ a ┆ 5 │
185
+ # # │ b ┆ 6 │
186
+ # # │ c ┆ 1 │
187
+ # # │ c ┆ 2 │
188
+ # # └─────────┴─────┘
189
+ def head(n = 5)
190
+ @df.lazy
191
+ .group_by(@by, maintain_order: @maintain_order)
192
+ .head(n)
193
+ .collect(no_optimization: true)
194
+ end
195
+
196
+ # Get the last `n` rows of each group.
197
+ #
198
+ # @param n [Integer]
199
+ # Number of rows to return.
200
+ #
201
+ # @return [DataFrame]
202
+ #
203
+ # @example
204
+ # df = Polars::DataFrame.new(
205
+ # {
206
+ # "letters" => ["c", "c", "a", "c", "a", "b"],
207
+ # "nrs" => [1, 2, 3, 4, 5, 6]
208
+ # }
209
+ # )
210
+ # # =>
211
+ # # shape: (6, 2)
212
+ # # ┌─────────┬─────┐
213
+ # # │ letters ┆ nrs │
214
+ # # │ --- ┆ --- │
215
+ # # │ str ┆ i64 │
216
+ # # ╞═════════╪═════╡
217
+ # # │ c ┆ 1 │
218
+ # # │ c ┆ 2 │
219
+ # # │ a ┆ 3 │
220
+ # # │ c ┆ 4 │
221
+ # # │ a ┆ 5 │
222
+ # # │ b ┆ 6 │
223
+ # # └─────────┴─────┘
224
+ #
225
+ # @example
226
+ # df.group_by("letters").tail(2).sort("letters")
227
+ # # =>
228
+ # # shape: (5, 2)
229
+ # # ┌─────────┬─────┐
230
+ # # │ letters ┆ nrs │
231
+ # # │ --- ┆ --- │
232
+ # # │ str ┆ i64 │
233
+ # # ╞═════════╪═════╡
234
+ # # │ a ┆ 3 │
235
+ # # │ a ┆ 5 │
236
+ # # │ b ┆ 6 │
237
+ # # │ c ┆ 2 │
238
+ # # │ c ┆ 4 │
239
+ # # └─────────┴─────┘
240
+ def tail(n = 5)
241
+ @df.lazy
242
+ .group_by(@by, maintain_order: @maintain_order)
243
+ .tail(n)
244
+ .collect(no_optimization: true)
245
+ end
246
+
247
+ # Aggregate the first values in the group.
248
+ #
249
+ # @return [DataFrame]
250
+ #
251
+ # @example
252
+ # df = Polars::DataFrame.new(
253
+ # {
254
+ # "a" => [1, 2, 2, 3, 4, 5],
255
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
256
+ # "c" => [true, true, true, false, false, true],
257
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
258
+ # }
259
+ # )
260
+ # df.group_by("d", maintain_order: true).first
261
+ # # =>
262
+ # # shape: (3, 4)
263
+ # # ┌────────┬─────┬──────┬───────┐
264
+ # # │ d ┆ a ┆ b ┆ c │
265
+ # # │ --- ┆ --- ┆ --- ┆ --- │
266
+ # # │ str ┆ i64 ┆ f64 ┆ bool │
267
+ # # ╞════════╪═════╪══════╪═══════╡
268
+ # # │ Apple ┆ 1 ┆ 0.5 ┆ true │
269
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ true │
270
+ # # │ Banana ┆ 4 ┆ 13.0 ┆ false │
271
+ # # └────────┴─────┴──────┴───────┘
272
+ def first
273
+ agg(Polars.all.first)
274
+ end
275
+
276
+ # Aggregate the last values in the group.
277
+ #
278
+ # @return [DataFrame]
279
+ #
280
+ # @example
281
+ # df = Polars::DataFrame.new(
282
+ # {
283
+ # "a" => [1, 2, 2, 3, 4, 5],
284
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
285
+ # "c" => [true, true, true, false, false, true],
286
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
287
+ # }
288
+ # )
289
+ # df.group_by("d", maintain_order: true).last
290
+ # # =>
291
+ # # shape: (3, 4)
292
+ # # ┌────────┬─────┬──────┬───────┐
293
+ # # │ d ┆ a ┆ b ┆ c │
294
+ # # │ --- ┆ --- ┆ --- ┆ --- │
295
+ # # │ str ┆ i64 ┆ f64 ┆ bool │
296
+ # # ╞════════╪═════╪══════╪═══════╡
297
+ # # │ Apple ┆ 3 ┆ 10.0 ┆ false │
298
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ true │
299
+ # # │ Banana ┆ 5 ┆ 14.0 ┆ true │
300
+ # # └────────┴─────┴──────┴───────┘
301
+ def last
302
+ agg(Polars.all.last)
303
+ end
304
+
305
+ # Reduce the groups to the sum.
306
+ #
307
+ # @return [DataFrame]
308
+ #
309
+ # @example
310
+ # df = Polars::DataFrame.new(
311
+ # {
312
+ # "a" => [1, 2, 2, 3, 4, 5],
313
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
314
+ # "c" => [true, true, true, false, false, true],
315
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
316
+ # }
317
+ # )
318
+ # df.group_by("d", maintain_order: true).sum
319
+ # # =>
320
+ # # shape: (3, 4)
321
+ # # ┌────────┬─────┬──────┬─────┐
322
+ # # │ d ┆ a ┆ b ┆ c │
323
+ # # │ --- ┆ --- ┆ --- ┆ --- │
324
+ # # │ str ┆ i64 ┆ f64 ┆ u32 │
325
+ # # ╞════════╪═════╪══════╪═════╡
326
+ # # │ Apple ┆ 6 ┆ 14.5 ┆ 2 │
327
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ 1 │
328
+ # # │ Banana ┆ 9 ┆ 27.0 ┆ 1 │
329
+ # # └────────┴─────┴──────┴─────┘
330
+ def sum
331
+ agg(Polars.all.sum)
332
+ end
333
+
334
+ # Reduce the groups to the minimal value.
335
+ #
336
+ # @return [DataFrame]
337
+ #
338
+ # @example
339
+ # df = Polars::DataFrame.new(
340
+ # {
341
+ # "a" => [1, 2, 2, 3, 4, 5],
342
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
343
+ # "c" => [true, true, true, false, false, true],
344
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
345
+ # }
346
+ # )
347
+ # df.group_by("d", maintain_order: true).min
348
+ # # =>
349
+ # # shape: (3, 4)
350
+ # # ┌────────┬─────┬──────┬───────┐
351
+ # # │ d ┆ a ┆ b ┆ c │
352
+ # # │ --- ┆ --- ┆ --- ┆ --- │
353
+ # # │ str ┆ i64 ┆ f64 ┆ bool │
354
+ # # ╞════════╪═════╪══════╪═══════╡
355
+ # # │ Apple ┆ 1 ┆ 0.5 ┆ false │
356
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ true │
357
+ # # │ Banana ┆ 4 ┆ 13.0 ┆ false │
358
+ # # └────────┴─────┴──────┴───────┘
359
+ def min
360
+ agg(Polars.all.min)
361
+ end
362
+
363
+ # Reduce the groups to the maximal value.
364
+ #
365
+ # @return [DataFrame]
366
+ #
367
+ # @example
368
+ # df = Polars::DataFrame.new(
369
+ # {
370
+ # "a" => [1, 2, 2, 3, 4, 5],
371
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
372
+ # "c" => [true, true, true, false, false, true],
373
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
374
+ # }
375
+ # )
376
+ # df.group_by("d", maintain_order: true).max
377
+ # # =>
378
+ # # shape: (3, 4)
379
+ # # ┌────────┬─────┬──────┬──────┐
380
+ # # │ d ┆ a ┆ b ┆ c │
381
+ # # │ --- ┆ --- ┆ --- ┆ --- │
382
+ # # │ str ┆ i64 ┆ f64 ┆ bool │
383
+ # # ╞════════╪═════╪══════╪══════╡
384
+ # # │ Apple ┆ 3 ┆ 10.0 ┆ true │
385
+ # # │ Orange ┆ 2 ┆ 0.5 ┆ true │
386
+ # # │ Banana ┆ 5 ┆ 14.0 ┆ true │
387
+ # # └────────┴─────┴──────┴──────┘
388
+ def max
389
+ agg(Polars.all.max)
390
+ end
391
+
392
+ # Count the number of values in each group.
393
+ #
394
+ # @return [DataFrame]
395
+ #
396
+ # @example
397
+ # df = Polars::DataFrame.new(
398
+ # {
399
+ # "a" => [1, 2, 2, 3, 4, 5],
400
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
401
+ # "c" => [true, true, true, false, false, true],
402
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
403
+ # }
404
+ # )
405
+ # df.group_by("d", maintain_order: true).count
406
+ # # =>
407
+ # # shape: (3, 2)
408
+ # # ┌────────┬───────┐
409
+ # # │ d ┆ count │
410
+ # # │ --- ┆ --- │
411
+ # # │ str ┆ u32 │
412
+ # # ╞════════╪═══════╡
413
+ # # │ Apple ┆ 3 │
414
+ # # │ Orange ┆ 1 │
415
+ # # │ Banana ┆ 2 │
416
+ # # └────────┴───────┘
417
+ def count
418
+ agg(Polars.len.alias("count"))
419
+ end
420
+
421
+ # Reduce the groups to the mean values.
422
+ #
423
+ # @return [DataFrame]
424
+ #
425
+ # @example
426
+ # df = Polars::DataFrame.new(
427
+ # {
428
+ # "a" => [1, 2, 2, 3, 4, 5],
429
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
430
+ # "c" => [true, true, true, false, false, true],
431
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
432
+ # }
433
+ # )
434
+ # df.group_by("d", maintain_order: true).mean
435
+ # # =>
436
+ # # shape: (3, 4)
437
+ # # ┌────────┬─────┬──────────┬──────────┐
438
+ # # │ d ┆ a ┆ b ┆ c │
439
+ # # │ --- ┆ --- ┆ --- ┆ --- │
440
+ # # │ str ┆ f64 ┆ f64 ┆ f64 │
441
+ # # ╞════════╪═════╪══════════╪══════════╡
442
+ # # │ Apple ┆ 2.0 ┆ 4.833333 ┆ 0.666667 │
443
+ # # │ Orange ┆ 2.0 ┆ 0.5 ┆ 1.0 │
444
+ # # │ Banana ┆ 4.5 ┆ 13.5 ┆ 0.5 │
445
+ # # └────────┴─────┴──────────┴──────────┘
446
+ def mean
447
+ agg(Polars.all.mean)
448
+ end
449
+
450
+ # Count the unique values per group.
451
+ #
452
+ # @return [DataFrame]
453
+ #
454
+ # @example
455
+ # df = Polars::DataFrame.new(
456
+ # {
457
+ # "a" => [1, 2, 1, 3, 4, 5],
458
+ # "b" => [0.5, 0.5, 0.5, 10, 13, 14],
459
+ # "d" => ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"]
460
+ # }
461
+ # )
462
+ # df.group_by("d", maintain_order: true).n_unique
463
+ # # =>
464
+ # # shape: (2, 3)
465
+ # # ┌────────┬─────┬─────┐
466
+ # # │ d ┆ a ┆ b │
467
+ # # │ --- ┆ --- ┆ --- │
468
+ # # │ str ┆ u32 ┆ u32 │
469
+ # # ╞════════╪═════╪═════╡
470
+ # # │ Apple ┆ 2 ┆ 2 │
471
+ # # │ Banana ┆ 3 ┆ 3 │
472
+ # # └────────┴─────┴─────┘
473
+ def n_unique
474
+ agg(Polars.all.n_unique)
475
+ end
476
+
477
+ # Compute the quantile per group.
478
+ #
479
+ # @param quantile [Float]
480
+ # Quantile between 0.0 and 1.0.
481
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
482
+ # Interpolation method.
483
+ #
484
+ # @return [DataFrame]
485
+ #
486
+ # @example
487
+ # df = Polars::DataFrame.new(
488
+ # {
489
+ # "a" => [1, 2, 2, 3, 4, 5],
490
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
491
+ # "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
492
+ # }
493
+ # )
494
+ # df.group_by("d", maintain_order: true).quantile(1)
495
+ # # =>
496
+ # # shape: (3, 3)
497
+ # # ┌────────┬─────┬──────┐
498
+ # # │ d ┆ a ┆ b │
499
+ # # │ --- ┆ --- ┆ --- │
500
+ # # │ str ┆ f64 ┆ f64 │
501
+ # # ╞════════╪═════╪══════╡
502
+ # # │ Apple ┆ 3.0 ┆ 10.0 │
503
+ # # │ Orange ┆ 2.0 ┆ 0.5 │
504
+ # # │ Banana ┆ 5.0 ┆ 14.0 │
505
+ # # └────────┴─────┴──────┘
506
+ def quantile(quantile, interpolation: "nearest")
507
+ agg(Polars.all.quantile(quantile, interpolation: interpolation))
508
+ end
509
+
510
+ # Return the median per group.
511
+ #
512
+ # @return [DataFrame]
513
+ #
514
+ # @example
515
+ # df = Polars::DataFrame.new(
516
+ # {
517
+ # "a" => [1, 2, 2, 3, 4, 5],
518
+ # "b" => [0.5, 0.5, 4, 10, 13, 14],
519
+ # "d" => ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"]
520
+ # }
521
+ # )
522
+ # df.group_by("d", maintain_order: true).median
523
+ # # =>
524
+ # # shape: (2, 3)
525
+ # # ┌────────┬─────┬──────┐
526
+ # # │ d ┆ a ┆ b │
527
+ # # │ --- ┆ --- ┆ --- │
528
+ # # │ str ┆ f64 ┆ f64 │
529
+ # # ╞════════╪═════╪══════╡
530
+ # # │ Apple ┆ 2.0 ┆ 4.0 │
531
+ # # │ Banana ┆ 4.0 ┆ 13.0 │
532
+ # # └────────┴─────┴──────┘
533
+ def median
534
+ agg(Polars.all.median)
535
+ end
536
+
537
+ # Plot data.
538
+ #
539
+ # @return [Vega::LiteChart]
540
+ def plot(*args, **options)
541
+ raise ArgumentError, "Multiple groups not supported" if @by.is_a?(::Array) && @by.size > 1
542
+ # same message as Ruby
543
+ raise ArgumentError, "unknown keyword: :group" if options.key?(:group)
544
+
545
+ @df.plot(*args, **options, group: @by)
546
+ end
547
+ end
548
+ end