polars-df 0.2.5 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Cargo.lock +290 -137
- data/Cargo.toml +1 -1
- data/ext/polars/Cargo.toml +5 -4
- data/ext/polars/src/apply/dataframe.rs +6 -6
- data/ext/polars/src/apply/series.rs +10 -10
- data/ext/polars/src/batched_csv.rs +6 -4
- data/ext/polars/src/conversion.rs +40 -13
- data/ext/polars/src/dataframe.rs +45 -43
- data/ext/polars/src/error.rs +8 -8
- data/ext/polars/src/file.rs +5 -4
- data/ext/polars/src/lazy/apply.rs +1 -1
- data/ext/polars/src/lazy/dataframe.rs +12 -6
- data/ext/polars/src/lazy/dsl.rs +99 -45
- data/ext/polars/src/lazy/meta.rs +10 -9
- data/ext/polars/src/lib.rs +28 -29
- data/ext/polars/src/object.rs +2 -1
- data/ext/polars/src/series.rs +23 -21
- data/lib/polars/cat_expr.rb +0 -4
- data/lib/polars/cat_name_space.rb +0 -4
- data/lib/polars/convert.rb +0 -7
- data/lib/polars/data_frame.rb +139 -204
- data/lib/polars/date_time_expr.rb +19 -151
- data/lib/polars/date_time_name_space.rb +17 -17
- data/lib/polars/expr.rb +68 -315
- data/lib/polars/group_by.rb +68 -51
- data/lib/polars/io.rb +1 -1
- data/lib/polars/lazy_frame.rb +1 -103
- data/lib/polars/lazy_functions.rb +0 -26
- data/lib/polars/lazy_group_by.rb +0 -8
- data/lib/polars/list_expr.rb +5 -27
- data/lib/polars/list_name_space.rb +5 -8
- data/lib/polars/series.rb +20 -16
- data/lib/polars/string_expr.rb +20 -76
- data/lib/polars/string_name_space.rb +5 -15
- data/lib/polars/struct_expr.rb +0 -2
- data/lib/polars/version.rb +1 -1
- metadata +3 -3
data/lib/polars/group_by.rb
CHANGED
@@ -12,6 +12,66 @@ module Polars
|
|
12
12
|
self.maintain_order = maintain_order
|
13
13
|
end
|
14
14
|
|
15
|
+
# Allows iteration over the groups of the groupby operation.
|
16
|
+
#
|
17
|
+
# @return [Object]
|
18
|
+
#
|
19
|
+
# @example
|
20
|
+
# df = Polars::DataFrame.new({"foo" => ["a", "a", "b"], "bar" => [1, 2, 3]})
|
21
|
+
# df.groupby("foo", maintain_order: true).each.to_h
|
22
|
+
# # =>
|
23
|
+
# # {"a"=>shape: (2, 2)
|
24
|
+
# # ┌─────┬─────┐
|
25
|
+
# # │ foo ┆ bar │
|
26
|
+
# # │ --- ┆ --- │
|
27
|
+
# # │ str ┆ i64 │
|
28
|
+
# # ╞═════╪═════╡
|
29
|
+
# # │ a ┆ 1 │
|
30
|
+
# # │ a ┆ 2 │
|
31
|
+
# # └─────┴─────┘, "b"=>shape: (1, 2)
|
32
|
+
# # ┌─────┬─────┐
|
33
|
+
# # │ foo ┆ bar │
|
34
|
+
# # │ --- ┆ --- │
|
35
|
+
# # │ str ┆ i64 │
|
36
|
+
# # ╞═════╪═════╡
|
37
|
+
# # │ b ┆ 3 │
|
38
|
+
# # └─────┴─────┘}
|
39
|
+
def each
|
40
|
+
return to_enum(:each) unless block_given?
|
41
|
+
|
42
|
+
temp_col = "__POLARS_GB_GROUP_INDICES"
|
43
|
+
groups_df =
|
44
|
+
Utils.wrap_df(_df)
|
45
|
+
.lazy
|
46
|
+
.with_row_count(name: temp_col)
|
47
|
+
.groupby(by, maintain_order: maintain_order)
|
48
|
+
.agg(Polars.col(temp_col))
|
49
|
+
.collect(no_optimization: true)
|
50
|
+
|
51
|
+
group_names = groups_df.select(Polars.all.exclude(temp_col))
|
52
|
+
|
53
|
+
# When grouping by a single column, group name is a single value
|
54
|
+
# When grouping by multiple columns, group name is a tuple of values
|
55
|
+
if by.is_a?(String) || by.is_a?(Expr)
|
56
|
+
_group_names = group_names.to_series.each
|
57
|
+
else
|
58
|
+
_group_names = group_names.iter_rows
|
59
|
+
end
|
60
|
+
|
61
|
+
_group_indices = groups_df.select(temp_col).to_series
|
62
|
+
_current_index = 0
|
63
|
+
|
64
|
+
while _current_index < _group_indices.length
|
65
|
+
df = _dataframe_class._from_rbdf(_df)
|
66
|
+
|
67
|
+
group_name = _group_names.next
|
68
|
+
group_data = df[_group_indices[_current_index]]
|
69
|
+
_current_index += 1
|
70
|
+
|
71
|
+
yield group_name, group_data
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
15
75
|
# Apply a custom/user-defined function (UDF) over the groups as a sub-DataFrame.
|
16
76
|
#
|
17
77
|
# Implementing logic using a Ruby function is almost always _significantly_
|
@@ -45,11 +105,8 @@ module Polars
|
|
45
105
|
# # │ i64 ┆ str ┆ str │
|
46
106
|
# # ╞═════╪═══════╪══════════╡
|
47
107
|
# # │ 1 ┆ green ┆ triangle │
|
48
|
-
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
49
108
|
# # │ 2 ┆ green ┆ square │
|
50
|
-
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
51
109
|
# # │ 4 ┆ red ┆ square │
|
52
|
-
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
53
110
|
# # │ 3 ┆ red ┆ triangle │
|
54
111
|
# # └─────┴───────┴──────────┘
|
55
112
|
# def apply(&f)
|
@@ -83,7 +140,6 @@ module Polars
|
|
83
140
|
# # │ str ┆ i64 ┆ i64 │
|
84
141
|
# # ╞═════╪═════════╪══════════════╡
|
85
142
|
# # │ one ┆ 9 ┆ 9 │
|
86
|
-
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
87
143
|
# # │ two ┆ 6 ┆ 5 │
|
88
144
|
# # └─────┴─────────┴──────────────┘
|
89
145
|
def agg(aggs)
|
@@ -117,15 +173,10 @@ module Polars
|
|
117
173
|
# # │ str ┆ i64 │
|
118
174
|
# # ╞═════════╪═════╡
|
119
175
|
# # │ c ┆ 1 │
|
120
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
121
176
|
# # │ c ┆ 2 │
|
122
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
123
177
|
# # │ a ┆ 3 │
|
124
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
125
178
|
# # │ c ┆ 4 │
|
126
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
127
179
|
# # │ a ┆ 5 │
|
128
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
129
180
|
# # │ b ┆ 6 │
|
130
181
|
# # └─────────┴─────┘
|
131
182
|
#
|
@@ -139,13 +190,9 @@ module Polars
|
|
139
190
|
# # │ str ┆ i64 │
|
140
191
|
# # ╞═════════╪═════╡
|
141
192
|
# # │ a ┆ 3 │
|
142
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
143
193
|
# # │ a ┆ 5 │
|
144
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
145
194
|
# # │ b ┆ 6 │
|
146
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
147
195
|
# # │ c ┆ 1 │
|
148
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
149
196
|
# # │ c ┆ 2 │
|
150
197
|
# # └─────────┴─────┘
|
151
198
|
def head(n = 5)
|
@@ -181,15 +228,10 @@ module Polars
|
|
181
228
|
# # │ str ┆ i64 │
|
182
229
|
# # ╞═════════╪═════╡
|
183
230
|
# # │ c ┆ 1 │
|
184
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
185
231
|
# # │ c ┆ 2 │
|
186
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
187
232
|
# # │ a ┆ 3 │
|
188
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
189
233
|
# # │ c ┆ 4 │
|
190
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
191
234
|
# # │ a ┆ 5 │
|
192
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
193
235
|
# # │ b ┆ 6 │
|
194
236
|
# # └─────────┴─────┘
|
195
237
|
#
|
@@ -203,13 +245,9 @@ module Polars
|
|
203
245
|
# # │ str ┆ i64 │
|
204
246
|
# # ╞═════════╪═════╡
|
205
247
|
# # │ a ┆ 3 │
|
206
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
207
248
|
# # │ a ┆ 5 │
|
208
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
209
249
|
# # │ b ┆ 6 │
|
210
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
211
250
|
# # │ c ┆ 2 │
|
212
|
-
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
|
213
251
|
# # │ c ┆ 4 │
|
214
252
|
# # └─────────┴─────┘
|
215
253
|
def tail(n = 5)
|
@@ -223,8 +261,6 @@ module Polars
|
|
223
261
|
_dataframe_class._from_rbdf(df._df)
|
224
262
|
end
|
225
263
|
|
226
|
-
# pivot is deprecated
|
227
|
-
|
228
264
|
# Aggregate the first values in the group.
|
229
265
|
#
|
230
266
|
# @return [DataFrame]
|
@@ -247,9 +283,7 @@ module Polars
|
|
247
283
|
# # │ str ┆ i64 ┆ f64 ┆ bool │
|
248
284
|
# # ╞════════╪═════╪══════╪═══════╡
|
249
285
|
# # │ Apple ┆ 1 ┆ 0.5 ┆ true │
|
250
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
251
286
|
# # │ Orange ┆ 2 ┆ 0.5 ┆ true │
|
252
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
253
287
|
# # │ Banana ┆ 4 ┆ 13.0 ┆ false │
|
254
288
|
# # └────────┴─────┴──────┴───────┘
|
255
289
|
def first
|
@@ -278,9 +312,7 @@ module Polars
|
|
278
312
|
# # │ str ┆ i64 ┆ f64 ┆ bool │
|
279
313
|
# # ╞════════╪═════╪══════╪═══════╡
|
280
314
|
# # │ Apple ┆ 3 ┆ 10.0 ┆ false │
|
281
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
282
315
|
# # │ Orange ┆ 2 ┆ 0.5 ┆ true │
|
283
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
284
316
|
# # │ Banana ┆ 5 ┆ 14.0 ┆ true │
|
285
317
|
# # └────────┴─────┴──────┴───────┘
|
286
318
|
def last
|
@@ -309,9 +341,7 @@ module Polars
|
|
309
341
|
# # │ str ┆ i64 ┆ f64 ┆ u32 │
|
310
342
|
# # ╞════════╪═════╪══════╪═════╡
|
311
343
|
# # │ Apple ┆ 6 ┆ 14.5 ┆ 2 │
|
312
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
|
313
344
|
# # │ Orange ┆ 2 ┆ 0.5 ┆ 1 │
|
314
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
|
315
345
|
# # │ Banana ┆ 9 ┆ 27.0 ┆ 1 │
|
316
346
|
# # └────────┴─────┴──────┴─────┘
|
317
347
|
def sum
|
@@ -340,9 +370,7 @@ module Polars
|
|
340
370
|
# # │ str ┆ i64 ┆ f64 ┆ bool │
|
341
371
|
# # ╞════════╪═════╪══════╪═══════╡
|
342
372
|
# # │ Apple ┆ 1 ┆ 0.5 ┆ false │
|
343
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
344
373
|
# # │ Orange ┆ 2 ┆ 0.5 ┆ true │
|
345
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
346
374
|
# # │ Banana ┆ 4 ┆ 13.0 ┆ false │
|
347
375
|
# # └────────┴─────┴──────┴───────┘
|
348
376
|
def min
|
@@ -371,9 +399,7 @@ module Polars
|
|
371
399
|
# # │ str ┆ i64 ┆ f64 ┆ bool │
|
372
400
|
# # ╞════════╪═════╪══════╪══════╡
|
373
401
|
# # │ Apple ┆ 3 ┆ 10.0 ┆ true │
|
374
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
375
402
|
# # │ Orange ┆ 2 ┆ 0.5 ┆ true │
|
376
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
377
403
|
# # │ Banana ┆ 5 ┆ 14.0 ┆ true │
|
378
404
|
# # └────────┴─────┴──────┴──────┘
|
379
405
|
def max
|
@@ -402,9 +428,7 @@ module Polars
|
|
402
428
|
# # │ str ┆ u32 │
|
403
429
|
# # ╞════════╪═══════╡
|
404
430
|
# # │ Apple ┆ 3 │
|
405
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
406
431
|
# # │ Orange ┆ 1 │
|
407
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
408
432
|
# # │ Banana ┆ 2 │
|
409
433
|
# # └────────┴───────┘
|
410
434
|
def count
|
@@ -433,9 +457,7 @@ module Polars
|
|
433
457
|
# # │ str ┆ f64 ┆ f64 ┆ f64 │
|
434
458
|
# # ╞════════╪═════╪══════════╪══════════╡
|
435
459
|
# # │ Apple ┆ 2.0 ┆ 4.833333 ┆ 0.666667 │
|
436
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
437
460
|
# # │ Orange ┆ 2.0 ┆ 0.5 ┆ 1.0 │
|
438
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
439
461
|
# # │ Banana ┆ 4.5 ┆ 13.5 ┆ 0.5 │
|
440
462
|
# # └────────┴─────┴──────────┴──────────┘
|
441
463
|
def mean
|
@@ -463,7 +485,6 @@ module Polars
|
|
463
485
|
# # │ str ┆ u32 ┆ u32 │
|
464
486
|
# # ╞════════╪═════╪═════╡
|
465
487
|
# # │ Apple ┆ 2 ┆ 2 │
|
466
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
467
488
|
# # │ Banana ┆ 3 ┆ 3 │
|
468
489
|
# # └────────┴─────┴─────┘
|
469
490
|
def n_unique
|
@@ -496,9 +517,7 @@ module Polars
|
|
496
517
|
# # │ str ┆ f64 ┆ f64 │
|
497
518
|
# # ╞════════╪═════╪══════╡
|
498
519
|
# # │ Apple ┆ 3.0 ┆ 10.0 │
|
499
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
|
500
520
|
# # │ Orange ┆ 2.0 ┆ 0.5 │
|
501
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
|
502
521
|
# # │ Banana ┆ 5.0 ┆ 14.0 │
|
503
522
|
# # └────────┴─────┴──────┘
|
504
523
|
def quantile(quantile, interpolation: "nearest")
|
@@ -526,7 +545,6 @@ module Polars
|
|
526
545
|
# # │ str ┆ f64 ┆ f64 │
|
527
546
|
# # ╞════════╪═════╪══════╡
|
528
547
|
# # │ Apple ┆ 2.0 ┆ 4.0 │
|
529
|
-
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
|
530
548
|
# # │ Banana ┆ 4.0 ┆ 13.0 │
|
531
549
|
# # └────────┴─────┴──────┘
|
532
550
|
def median
|
@@ -542,15 +560,14 @@ module Polars
|
|
542
560
|
# df.groupby("a", maintain_order: true).agg_list
|
543
561
|
# # =>
|
544
562
|
# # shape: (2, 2)
|
545
|
-
# #
|
546
|
-
# # │ a ┆ b
|
547
|
-
# # │ --- ┆ ---
|
548
|
-
# # │ str ┆ list[i64] │
|
549
|
-
# #
|
550
|
-
# # │ one ┆ [1, 3]
|
551
|
-
# #
|
552
|
-
# #
|
553
|
-
# # └─────┴───────────┘
|
563
|
+
# # ┌─────┬─────────────────┐
|
564
|
+
# # │ a ┆ b │
|
565
|
+
# # │ --- ┆ --- │
|
566
|
+
# # │ str ┆ list[list[i64]] │
|
567
|
+
# # ╞═════╪═════════════════╡
|
568
|
+
# # │ one ┆ [[1, 3]] │
|
569
|
+
# # │ two ┆ [[2, 4]] │
|
570
|
+
# # └─────┴─────────────────┘
|
554
571
|
def agg_list
|
555
572
|
agg(Polars.all.list)
|
556
573
|
end
|