polars-df 0.2.5 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Cargo.lock +290 -137
- data/Cargo.toml +1 -1
- data/ext/polars/Cargo.toml +5 -4
- data/ext/polars/src/apply/dataframe.rs +6 -6
- data/ext/polars/src/apply/series.rs +10 -10
- data/ext/polars/src/batched_csv.rs +6 -4
- data/ext/polars/src/conversion.rs +40 -13
- data/ext/polars/src/dataframe.rs +45 -43
- data/ext/polars/src/error.rs +8 -8
- data/ext/polars/src/file.rs +5 -4
- data/ext/polars/src/lazy/apply.rs +1 -1
- data/ext/polars/src/lazy/dataframe.rs +12 -6
- data/ext/polars/src/lazy/dsl.rs +99 -45
- data/ext/polars/src/lazy/meta.rs +10 -9
- data/ext/polars/src/lib.rs +28 -29
- data/ext/polars/src/object.rs +2 -1
- data/ext/polars/src/series.rs +23 -21
- data/lib/polars/cat_expr.rb +0 -4
- data/lib/polars/cat_name_space.rb +0 -4
- data/lib/polars/convert.rb +0 -7
- data/lib/polars/data_frame.rb +139 -204
- data/lib/polars/date_time_expr.rb +19 -151
- data/lib/polars/date_time_name_space.rb +17 -17
- data/lib/polars/expr.rb +68 -315
- data/lib/polars/group_by.rb +68 -51
- data/lib/polars/io.rb +1 -1
- data/lib/polars/lazy_frame.rb +1 -103
- data/lib/polars/lazy_functions.rb +0 -26
- data/lib/polars/lazy_group_by.rb +0 -8
- data/lib/polars/list_expr.rb +5 -27
- data/lib/polars/list_name_space.rb +5 -8
- data/lib/polars/series.rb +20 -16
- data/lib/polars/string_expr.rb +20 -76
- data/lib/polars/string_name_space.rb +5 -15
- data/lib/polars/struct_expr.rb +0 -2
- data/lib/polars/version.rb +1 -1
- metadata +3 -3
data/lib/polars/group_by.rb
CHANGED
@@ -12,6 +12,66 @@ module Polars
|
|
12
12
|
self.maintain_order = maintain_order
|
13
13
|
end
|
14
14
|
|
15
|
+
# Allows iteration over the groups of the groupby operation.
|
16
|
+
#
|
17
|
+
# @return [Object]
|
18
|
+
#
|
19
|
+
# @example
|
20
|
+
# df = Polars::DataFrame.new({"foo" => ["a", "a", "b"], "bar" => [1, 2, 3]})
|
21
|
+
# df.groupby("foo", maintain_order: true).each.to_h
|
22
|
+
# # =>
|
23
|
+
# # {"a"=>shape: (2, 2)
|
24
|
+
# # ┌─────┬─────┐
|
25
|
+
# # │ foo ┆ bar │
|
26
|
+
# # │ --- ┆ --- │
|
27
|
+
# # │ str ┆ i64 │
|
28
|
+
# # ╞═════╪═════╡
|
29
|
+
# # │ a ┆ 1 │
|
30
|
+
# # │ a ┆ 2 │
|
31
|
+
# # └─────┴─────┘, "b"=>shape: (1, 2)
|
32
|
+
# # ┌─────┬─────┐
|
33
|
+
# # │ foo ┆ bar │
|
34
|
+
# # │ --- ┆ --- │
|
35
|
+
# # │ str ┆ i64 │
|
36
|
+
# # ╞═════╪═════╡
|
37
|
+
# # │ b ┆ 3 │
|
38
|
+
# # └─────┴─────┘}
|
39
|
+
def each
|
40
|
+
return to_enum(:each) unless block_given?
|
41
|
+
|
42
|
+
temp_col = "__POLARS_GB_GROUP_INDICES"
|
43
|
+
groups_df =
|
44
|
+
Utils.wrap_df(_df)
|
45
|
+
.lazy
|
46
|
+
.with_row_count(name: temp_col)
|
47
|
+
.groupby(by, maintain_order: maintain_order)
|
48
|
+
.agg(Polars.col(temp_col))
|
49
|
+
.collect(no_optimization: true)
|
50
|
+
|
51
|
+
group_names = groups_df.select(Polars.all.exclude(temp_col))
|
52
|
+
|
53
|
+
# When grouping by a single column, group name is a single value
|
54
|
+
# When grouping by multiple columns, group name is a tuple of values
|
55
|
+
if by.is_a?(String) || by.is_a?(Expr)
|
56
|
+
_group_names = group_names.to_series.each
|
57
|
+
else
|
58
|
+
_group_names = group_names.iter_rows
|
59
|
+
end
|
60
|
+
|
61
|
+
_group_indices = groups_df.select(temp_col).to_series
|
62
|
+
_current_index = 0
|
63
|
+
|
64
|
+
while _current_index < _group_indices.length
|
65
|
+
df = _dataframe_class._from_rbdf(_df)
|
66
|
+
|
67
|
+
group_name = _group_names.next
|
68
|
+
group_data = df[_group_indices[_current_index]]
|
69
|
+
_current_index += 1
|
70
|
+
|
71
|
+
yield group_name, group_data
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
15
75
|
# Apply a custom/user-defined function (UDF) over the groups as a sub-DataFrame.
|
16
76
|
#
|
17
77
|
# Implementing logic using a Ruby function is almost always _significantly_
|
@@ -45,11 +105,8 @@ module Polars
|
|
45
105
|
# # │ i64 ┆ str ┆ str │
|
46
106
|
# # ╞═════╪═══════╪══════════╡
|
47
107
|
# # │ 1 ┆ green ┆ triangle │
|
48
|
-
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
49
108
|
# # │ 2 ┆ green ┆ square │
|
50
|
-
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
51
109
|
# # │ 4 ┆ red ┆ square │
|
52
|
-
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
53
110
|
# # │ 3 ┆ red ┆ triangle │
|
54
111
|
# # └─────┴───────┴──────────┘
|
55
112
|
# def apply(&f)
|
@@ -83,7 +140,6 @@ module Polars
|
|
83
140
|
# # │ str ┆ i64 ┆ i64 │
|
84
141
|
# # ╞═════╪═════════╪══════════════╡
|
85
142
|
# # │ one ┆ 9 ┆ 9 │
|
86
|
-
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
87
143
|
# # │ two ┆ 6 ┆ 5 │
|
88
144
|
# # └─────┴─────────┴──────────────┘
|
89
145
|
def agg(aggs)
|
@@ -117,15 +173,10 @@ module Polars
|
|
117
173
|
# # │ str ┆ i64 │
|
118
174
|
# # ╞═════════╪═════╡
|
119
175
|
# # │ c ┆ 1 │
|
120
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
121
176
|
# # │ c ┆ 2 │
|
122
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
123
177
|
# # │ a ┆ 3 │
|
124
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
125
178
|
# # │ c ┆ 4 │
|
126
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
127
179
|
# # │ a ┆ 5 │
|
128
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
129
180
|
# # │ b ┆ 6 │
|
130
181
|
# # └─────────┴─────┘
|
131
182
|
#
|
@@ -139,13 +190,9 @@ module Polars
|
|
139
190
|
# # │ str ┆ i64 │
|
140
191
|
# # ╞═════════╪═════╡
|
141
192
|
# # │ a ┆ 3 │
|
142
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
143
193
|
# # │ a ┆ 5 │
|
144
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
145
194
|
# # │ b ┆ 6 │
|
146
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
147
195
|
# # │ c ┆ 1 │
|
148
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
149
196
|
# # │ c ┆ 2 │
|
150
197
|
# # └─────────┴─────┘
|
151
198
|
def head(n = 5)
|
@@ -181,15 +228,10 @@ module Polars
|
|
181
228
|
# # │ str ┆ i64 │
|
182
229
|
# # ╞═════════╪═════╡
|
183
230
|
# # │ c ┆ 1 │
|
184
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
185
231
|
# # │ c ┆ 2 │
|
186
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
187
232
|
# # │ a ┆ 3 │
|
188
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
189
233
|
# # │ c ┆ 4 │
|
190
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
191
234
|
# # │ a ┆ 5 │
|
192
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
193
235
|
# # │ b ┆ 6 │
|
194
236
|
# # └─────────┴─────┘
|
195
237
|
#
|
@@ -203,13 +245,9 @@ module Polars
|
|
203
245
|
# # │ str ┆ i64 │
|
204
246
|
# # ╞═════════╪═════╡
|
205
247
|
# # │ a ┆ 3 │
|
206
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
207
248
|
# # │ a ┆ 5 │
|
208
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
209
249
|
# # │ b ┆ 6 │
|
210
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
211
250
|
# # │ c ┆ 2 │
|
212
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
213
251
|
# # │ c ┆ 4 │
|
214
252
|
# # └─────────┴─────┘
|
215
253
|
def tail(n = 5)
|
@@ -223,8 +261,6 @@ module Polars
|
|
223
261
|
_dataframe_class._from_rbdf(df._df)
|
224
262
|
end
|
225
263
|
|
226
|
-
# pivot is deprecated
|
227
|
-
|
228
264
|
# Aggregate the first values in the group.
|
229
265
|
#
|
230
266
|
# @return [DataFrame]
|
@@ -247,9 +283,7 @@ module Polars
|
|
247
283
|
# # │ str ┆ i64 ┆ f64 ┆ bool │
|
248
284
|
# # ╞════════╪═════╪══════╪═══════╡
|
249
285
|
# # │ Apple ┆ 1 ┆ 0.5 ┆ true │
|
250
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
251
286
|
# # │ Orange ┆ 2 ┆ 0.5 ┆ true │
|
252
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
253
287
|
# # │ Banana ┆ 4 ┆ 13.0 ┆ false │
|
254
288
|
# # └────────┴─────┴──────┴───────┘
|
255
289
|
def first
|
@@ -278,9 +312,7 @@ module Polars
|
|
278
312
|
# # │ str ┆ i64 ┆ f64 ┆ bool │
|
279
313
|
# # ╞════════╪═════╪══════╪═══════╡
|
280
314
|
# # │ Apple ┆ 3 ┆ 10.0 ┆ false │
|
281
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
282
315
|
# # │ Orange ┆ 2 ┆ 0.5 ┆ true │
|
283
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
284
316
|
# # │ Banana ┆ 5 ┆ 14.0 ┆ true │
|
285
317
|
# # └────────┴─────┴──────┴───────┘
|
286
318
|
def last
|
@@ -309,9 +341,7 @@ module Polars
|
|
309
341
|
# # │ str ┆ i64 ┆ f64 ┆ u32 │
|
310
342
|
# # ╞════════╪═════╪══════╪═════╡
|
311
343
|
# # │ Apple ┆ 6 ┆ 14.5 ┆ 2 │
|
312
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
|
313
344
|
# # │ Orange ┆ 2 ┆ 0.5 ┆ 1 │
|
314
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
|
315
345
|
# # │ Banana ┆ 9 ┆ 27.0 ┆ 1 │
|
316
346
|
# # └────────┴─────┴──────┴─────┘
|
317
347
|
def sum
|
@@ -340,9 +370,7 @@ module Polars
|
|
340
370
|
# # │ str ┆ i64 ┆ f64 ┆ bool │
|
341
371
|
# # ╞════════╪═════╪══════╪═══════╡
|
342
372
|
# # │ Apple ┆ 1 ┆ 0.5 ┆ false │
|
343
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
344
373
|
# # │ Orange ┆ 2 ┆ 0.5 ┆ true │
|
345
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
346
374
|
# # │ Banana ┆ 4 ┆ 13.0 ┆ false │
|
347
375
|
# # └────────┴─────┴──────┴───────┘
|
348
376
|
def min
|
@@ -371,9 +399,7 @@ module Polars
|
|
371
399
|
# # │ str ┆ i64 ┆ f64 ┆ bool │
|
372
400
|
# # ╞════════╪═════╪══════╪══════╡
|
373
401
|
# # │ Apple ┆ 3 ┆ 10.0 ┆ true │
|
374
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
375
402
|
# # │ Orange ┆ 2 ┆ 0.5 ┆ true │
|
376
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
377
403
|
# # │ Banana ┆ 5 ┆ 14.0 ┆ true │
|
378
404
|
# # └────────┴─────┴──────┴──────┘
|
379
405
|
def max
|
@@ -402,9 +428,7 @@ module Polars
|
|
402
428
|
# # │ str ┆ u32 │
|
403
429
|
# # ╞════════╪═══════╡
|
404
430
|
# # │ Apple ┆ 3 │
|
405
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
406
431
|
# # │ Orange ┆ 1 │
|
407
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
408
432
|
# # │ Banana ┆ 2 │
|
409
433
|
# # └────────┴───────┘
|
410
434
|
def count
|
@@ -433,9 +457,7 @@ module Polars
|
|
433
457
|
# # │ str ┆ f64 ┆ f64 ┆ f64 │
|
434
458
|
# # ╞════════╪═════╪══════════╪══════════╡
|
435
459
|
# # │ Apple ┆ 2.0 ┆ 4.833333 ┆ 0.666667 │
|
436
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
437
460
|
# # │ Orange ┆ 2.0 ┆ 0.5 ┆ 1.0 │
|
438
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
439
461
|
# # │ Banana ┆ 4.5 ┆ 13.5 ┆ 0.5 │
|
440
462
|
# # └────────┴─────┴──────────┴──────────┘
|
441
463
|
def mean
|
@@ -463,7 +485,6 @@ module Polars
|
|
463
485
|
# # │ str ┆ u32 ┆ u32 │
|
464
486
|
# # ╞════════╪═════╪═════╡
|
465
487
|
# # │ Apple ┆ 2 ┆ 2 │
|
466
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
467
488
|
# # │ Banana ┆ 3 ┆ 3 │
|
468
489
|
# # └────────┴─────┴─────┘
|
469
490
|
def n_unique
|
@@ -496,9 +517,7 @@ module Polars
|
|
496
517
|
# # │ str ┆ f64 ┆ f64 │
|
497
518
|
# # ╞════════╪═════╪══════╡
|
498
519
|
# # │ Apple ┆ 3.0 ┆ 10.0 │
|
499
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
|
500
520
|
# # │ Orange ┆ 2.0 ┆ 0.5 │
|
501
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
|
502
521
|
# # │ Banana ┆ 5.0 ┆ 14.0 │
|
503
522
|
# # └────────┴─────┴──────┘
|
504
523
|
def quantile(quantile, interpolation: "nearest")
|
@@ -526,7 +545,6 @@ module Polars
|
|
526
545
|
# # │ str ┆ f64 ┆ f64 │
|
527
546
|
# # ╞════════╪═════╪══════╡
|
528
547
|
# # │ Apple ┆ 2.0 ┆ 4.0 │
|
529
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
|
530
548
|
# # │ Banana ┆ 4.0 ┆ 13.0 │
|
531
549
|
# # └────────┴─────┴──────┘
|
532
550
|
def median
|
@@ -542,15 +560,14 @@ module Polars
|
|
542
560
|
# df.groupby("a", maintain_order: true).agg_list
|
543
561
|
# # =>
|
544
562
|
# # shape: (2, 2)
|
545
|
-
# #
|
546
|
-
# # │ a ┆ b
|
547
|
-
# # │ --- ┆ ---
|
548
|
-
# # │ str ┆ list[i64] │
|
549
|
-
# #
|
550
|
-
# # │ one ┆ [1, 3]
|
551
|
-
# #
|
552
|
-
# #
|
553
|
-
# # └─────┴───────────┘
|
563
|
+
# # ┌─────┬─────────────────┐
|
564
|
+
# # │ a ┆ b │
|
565
|
+
# # │ --- ┆ --- │
|
566
|
+
# # │ str ┆ list[list[i64]] │
|
567
|
+
# # ╞═════╪═════════════════╡
|
568
|
+
# # │ one ┆ [[1, 3]] │
|
569
|
+
# # │ two ┆ [[2, 4]] │
|
570
|
+
# # └─────┴─────────────────┘
|
554
571
|
def agg_list
|
555
572
|
agg(Polars.all.list)
|
556
573
|
end
|