polars-df 0.2.5 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,66 @@ module Polars
12
12
  self.maintain_order = maintain_order
13
13
  end
14
14
 
15
+ # Allows iteration over the groups of the groupby operation.
16
+ #
17
+ # @return [Object]
18
+ #
19
+ # @example
20
+ # df = Polars::DataFrame.new({"foo" => ["a", "a", "b"], "bar" => [1, 2, 3]})
21
+ # df.groupby("foo", maintain_order: true).each.to_h
22
+ # # =>
23
+ # # {"a"=>shape: (2, 2)
24
+ # # ┌─────┬─────┐
25
+ # # │ foo ┆ bar │
26
+ # # │ --- ┆ --- │
27
+ # # │ str ┆ i64 │
28
+ # # ╞═════╪═════╡
29
+ # # │ a ┆ 1 │
30
+ # # │ a ┆ 2 │
31
+ # # └─────┴─────┘, "b"=>shape: (1, 2)
32
+ # # ┌─────┬─────┐
33
+ # # │ foo ┆ bar │
34
+ # # │ --- ┆ --- │
35
+ # # │ str ┆ i64 │
36
+ # # ╞═════╪═════╡
37
+ # # │ b ┆ 3 │
38
+ # # └─────┴─────┘}
39
+ def each
40
+ return to_enum(:each) unless block_given?
41
+
42
+ temp_col = "__POLARS_GB_GROUP_INDICES"
43
+ groups_df =
44
+ Utils.wrap_df(_df)
45
+ .lazy
46
+ .with_row_count(name: temp_col)
47
+ .groupby(by, maintain_order: maintain_order)
48
+ .agg(Polars.col(temp_col))
49
+ .collect(no_optimization: true)
50
+
51
+ group_names = groups_df.select(Polars.all.exclude(temp_col))
52
+
53
+ # When grouping by a single column, group name is a single value
54
+ # When grouping by multiple columns, group name is a tuple of values
55
+ if by.is_a?(String) || by.is_a?(Expr)
56
+ _group_names = group_names.to_series.each
57
+ else
58
+ _group_names = group_names.iter_rows
59
+ end
60
+
61
+ _group_indices = groups_df.select(temp_col).to_series
62
+ _current_index = 0
63
+
64
+ while _current_index < _group_indices.length
65
+ df = _dataframe_class._from_rbdf(_df)
66
+
67
+ group_name = _group_names.next
68
+ group_data = df[_group_indices[_current_index]]
69
+ _current_index += 1
70
+
71
+ yield group_name, group_data
72
+ end
73
+ end
74
+
15
75
  # Apply a custom/user-defined function (UDF) over the groups as a sub-DataFrame.
16
76
  #
17
77
  # Implementing logic using a Ruby function is almost always _significantly_
@@ -45,11 +105,8 @@ module Polars
45
105
  # # │ i64 ┆ str ┆ str │
46
106
  # # ╞═════╪═══════╪══════════╡
47
107
  # # │ 1 ┆ green ┆ triangle │
48
- # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
49
108
  # # │ 2 ┆ green ┆ square │
50
- # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
51
109
  # # │ 4 ┆ red ┆ square │
52
- # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
53
110
  # # │ 3 ┆ red ┆ triangle │
54
111
  # # └─────┴───────┴──────────┘
55
112
  # def apply(&f)
@@ -83,7 +140,6 @@ module Polars
83
140
  # # │ str ┆ i64 ┆ i64 │
84
141
  # # ╞═════╪═════════╪══════════════╡
85
142
  # # │ one ┆ 9 ┆ 9 │
86
- # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
87
143
  # # │ two ┆ 6 ┆ 5 │
88
144
  # # └─────┴─────────┴──────────────┘
89
145
  def agg(aggs)
@@ -117,15 +173,10 @@ module Polars
117
173
  # # │ str ┆ i64 │
118
174
  # # ╞═════════╪═════╡
119
175
  # # │ c ┆ 1 │
120
- # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
121
176
  # # │ c ┆ 2 │
122
- # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
123
177
  # # │ a ┆ 3 │
124
- # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
125
178
  # # │ c ┆ 4 │
126
- # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
127
179
  # # │ a ┆ 5 │
128
- # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
129
180
  # # │ b ┆ 6 │
130
181
  # # └─────────┴─────┘
131
182
  #
@@ -139,13 +190,9 @@ module Polars
139
190
  # # │ str ┆ i64 │
140
191
  # # ╞═════════╪═════╡
141
192
  # # │ a ┆ 3 │
142
- # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
143
193
  # # │ a ┆ 5 │
144
- # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
145
194
  # # │ b ┆ 6 │
146
- # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
147
195
  # # │ c ┆ 1 │
148
- # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
149
196
  # # │ c ┆ 2 │
150
197
  # # └─────────┴─────┘
151
198
  def head(n = 5)
@@ -181,15 +228,10 @@ module Polars
181
228
  # # │ str ┆ i64 │
182
229
  # # ╞═════════╪═════╡
183
230
  # # │ c ┆ 1 │
184
- # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
185
231
  # # │ c ┆ 2 │
186
- # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
187
232
  # # │ a ┆ 3 │
188
- # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
189
233
  # # │ c ┆ 4 │
190
- # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
191
234
  # # │ a ┆ 5 │
192
- # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
193
235
  # # │ b ┆ 6 │
194
236
  # # └─────────┴─────┘
195
237
  #
@@ -203,13 +245,9 @@ module Polars
203
245
  # # │ str ┆ i64 │
204
246
  # # ╞═════════╪═════╡
205
247
  # # │ a ┆ 3 │
206
- # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
207
248
  # # │ a ┆ 5 │
208
- # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
209
249
  # # │ b ┆ 6 │
210
- # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
211
250
  # # │ c ┆ 2 │
212
- # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
213
251
  # # │ c ┆ 4 │
214
252
  # # └─────────┴─────┘
215
253
  def tail(n = 5)
@@ -223,8 +261,6 @@ module Polars
223
261
  _dataframe_class._from_rbdf(df._df)
224
262
  end
225
263
 
226
- # pivot is deprecated
227
-
228
264
  # Aggregate the first values in the group.
229
265
  #
230
266
  # @return [DataFrame]
@@ -247,9 +283,7 @@ module Polars
247
283
  # # │ str ┆ i64 ┆ f64 ┆ bool │
248
284
  # # ╞════════╪═════╪══════╪═══════╡
249
285
  # # │ Apple ┆ 1 ┆ 0.5 ┆ true │
250
- # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
251
286
  # # │ Orange ┆ 2 ┆ 0.5 ┆ true │
252
- # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
253
287
  # # │ Banana ┆ 4 ┆ 13.0 ┆ false │
254
288
  # # └────────┴─────┴──────┴───────┘
255
289
  def first
@@ -278,9 +312,7 @@ module Polars
278
312
  # # │ str ┆ i64 ┆ f64 ┆ bool │
279
313
  # # ╞════════╪═════╪══════╪═══════╡
280
314
  # # │ Apple ┆ 3 ┆ 10.0 ┆ false │
281
- # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
282
315
  # # │ Orange ┆ 2 ┆ 0.5 ┆ true │
283
- # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
284
316
  # # │ Banana ┆ 5 ┆ 14.0 ┆ true │
285
317
  # # └────────┴─────┴──────┴───────┘
286
318
  def last
@@ -309,9 +341,7 @@ module Polars
309
341
  # # │ str ┆ i64 ┆ f64 ┆ u32 │
310
342
  # # ╞════════╪═════╪══════╪═════╡
311
343
  # # │ Apple ┆ 6 ┆ 14.5 ┆ 2 │
312
- # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
313
344
  # # │ Orange ┆ 2 ┆ 0.5 ┆ 1 │
314
- # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
315
345
  # # │ Banana ┆ 9 ┆ 27.0 ┆ 1 │
316
346
  # # └────────┴─────┴──────┴─────┘
317
347
  def sum
@@ -340,9 +370,7 @@ module Polars
340
370
  # # │ str ┆ i64 ┆ f64 ┆ bool │
341
371
  # # ╞════════╪═════╪══════╪═══════╡
342
372
  # # │ Apple ┆ 1 ┆ 0.5 ┆ false │
343
- # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
344
373
  # # │ Orange ┆ 2 ┆ 0.5 ┆ true │
345
- # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
346
374
  # # │ Banana ┆ 4 ┆ 13.0 ┆ false │
347
375
  # # └────────┴─────┴──────┴───────┘
348
376
  def min
@@ -371,9 +399,7 @@ module Polars
371
399
  # # │ str ┆ i64 ┆ f64 ┆ bool │
372
400
  # # ╞════════╪═════╪══════╪══════╡
373
401
  # # │ Apple ┆ 3 ┆ 10.0 ┆ true │
374
- # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
375
402
  # # │ Orange ┆ 2 ┆ 0.5 ┆ true │
376
- # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
377
403
  # # │ Banana ┆ 5 ┆ 14.0 ┆ true │
378
404
  # # └────────┴─────┴──────┴──────┘
379
405
  def max
@@ -402,9 +428,7 @@ module Polars
402
428
  # # │ str ┆ u32 │
403
429
  # # ╞════════╪═══════╡
404
430
  # # │ Apple ┆ 3 │
405
- # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
406
431
  # # │ Orange ┆ 1 │
407
- # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
408
432
  # # │ Banana ┆ 2 │
409
433
  # # └────────┴───────┘
410
434
  def count
@@ -433,9 +457,7 @@ module Polars
433
457
  # # │ str ┆ f64 ┆ f64 ┆ f64 │
434
458
  # # ╞════════╪═════╪══════════╪══════════╡
435
459
  # # │ Apple ┆ 2.0 ┆ 4.833333 ┆ 0.666667 │
436
- # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
437
460
  # # │ Orange ┆ 2.0 ┆ 0.5 ┆ 1.0 │
438
- # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
439
461
  # # │ Banana ┆ 4.5 ┆ 13.5 ┆ 0.5 │
440
462
  # # └────────┴─────┴──────────┴──────────┘
441
463
  def mean
@@ -463,7 +485,6 @@ module Polars
463
485
  # # │ str ┆ u32 ┆ u32 │
464
486
  # # ╞════════╪═════╪═════╡
465
487
  # # │ Apple ┆ 2 ┆ 2 │
466
- # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
467
488
  # # │ Banana ┆ 3 ┆ 3 │
468
489
  # # └────────┴─────┴─────┘
469
490
  def n_unique
@@ -496,9 +517,7 @@ module Polars
496
517
  # # │ str ┆ f64 ┆ f64 │
497
518
  # # ╞════════╪═════╪══════╡
498
519
  # # │ Apple ┆ 3.0 ┆ 10.0 │
499
- # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
500
520
  # # │ Orange ┆ 2.0 ┆ 0.5 │
501
- # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
502
521
  # # │ Banana ┆ 5.0 ┆ 14.0 │
503
522
  # # └────────┴─────┴──────┘
504
523
  def quantile(quantile, interpolation: "nearest")
@@ -526,7 +545,6 @@ module Polars
526
545
  # # │ str ┆ f64 ┆ f64 │
527
546
  # # ╞════════╪═════╪══════╡
528
547
  # # │ Apple ┆ 2.0 ┆ 4.0 │
529
- # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
530
548
  # # │ Banana ┆ 4.0 ┆ 13.0 │
531
549
  # # └────────┴─────┴──────┘
532
550
  def median
@@ -542,15 +560,14 @@ module Polars
542
560
  # df.groupby("a", maintain_order: true).agg_list
543
561
  # # =>
544
562
  # # shape: (2, 2)
545
- # # ┌─────┬───────────┐
546
- # # │ a ┆ b
547
- # # │ --- ┆ ---
548
- # # │ str ┆ list[i64] │
549
- # # ╞═════╪═══════════╡
550
- # # │ one ┆ [1, 3]
551
- # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
552
- # # │ two ┆ [2, 4] │
553
- # # └─────┴───────────┘
563
+ # # ┌─────┬─────────────────┐
564
+ # # │ a ┆ b
565
+ # # │ --- ┆ ---
566
+ # # │ str ┆ list[list[i64]]
567
+ # # ╞═════╪═════════════════╡
568
+ # # │ one ┆ [[1, 3]]
569
+ # # │ two ┆ [[2, 4]] │
570
+ # # └─────┴─────────────────┘
554
571
  def agg_list
555
572
  agg(Polars.all.list)
556
573
  end
data/lib/polars/io.rb CHANGED
@@ -817,7 +817,7 @@ module Polars
817
817
  if defined?(URI) && file.is_a?(URI)
818
818
  require "open-uri"
819
819
 
820
- file = URI.open(file)
820
+ file = file.open
821
821
  end
822
822
 
823
823
  yield file