polars-df 0.13.0-x64-mingw-ucrt
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +3 -0
- data/CHANGELOG.md +208 -0
- data/Cargo.lock +2556 -0
- data/Cargo.toml +6 -0
- data/LICENSE-THIRD-PARTY.txt +39278 -0
- data/LICENSE.txt +20 -0
- data/README.md +437 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/array_expr.rb +537 -0
- data/lib/polars/array_name_space.rb +423 -0
- data/lib/polars/batched_csv_reader.rb +104 -0
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/cat_expr.rb +36 -0
- data/lib/polars/cat_name_space.rb +88 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/convert.rb +98 -0
- data/lib/polars/data_frame.rb +5191 -0
- data/lib/polars/data_types.rb +466 -0
- data/lib/polars/date_time_expr.rb +1397 -0
- data/lib/polars/date_time_name_space.rb +1287 -0
- data/lib/polars/dynamic_group_by.rb +52 -0
- data/lib/polars/exceptions.rb +38 -0
- data/lib/polars/expr.rb +7256 -0
- data/lib/polars/expr_dispatch.rb +22 -0
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +271 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1329 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +136 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +96 -0
- data/lib/polars/functions.rb +57 -0
- data/lib/polars/group_by.rb +613 -0
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/io/csv.rb +696 -0
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +275 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +233 -0
- data/lib/polars/lazy_frame.rb +2708 -0
- data/lib/polars/lazy_group_by.rb +181 -0
- data/lib/polars/list_expr.rb +791 -0
- data/lib/polars/list_name_space.rb +449 -0
- data/lib/polars/meta_expr.rb +222 -0
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/plot.rb +109 -0
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +4444 -0
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +1495 -0
- data/lib/polars/string_name_space.rb +811 -0
- data/lib/polars/struct_expr.rb +98 -0
- data/lib/polars/struct_name_space.rb +96 -0
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +130 -0
- data/lib/polars/version.rb +4 -0
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars-df.rb +1 -0
- data/lib/polars.rb +91 -0
- metadata +138 -0
@@ -0,0 +1,181 @@
|
|
1
|
+
module Polars
|
2
|
+
# Created by `df.lazy.group_by("foo")`.
|
3
|
+
class LazyGroupBy
|
4
|
+
# @private
|
5
|
+
def initialize(lgb)
|
6
|
+
@lgb = lgb
|
7
|
+
end
|
8
|
+
|
9
|
+
# Compute aggregations for each group of a group by operation.
|
10
|
+
#
|
11
|
+
# @param aggs [Array]
|
12
|
+
# Aggregations to compute for each group of the group by operation,
|
13
|
+
# specified as positional arguments.
|
14
|
+
# Accepts expression input. Strings are parsed as column names.
|
15
|
+
# @param named_aggs [Hash]
|
16
|
+
# Additional aggregations, specified as keyword arguments.
|
17
|
+
# The resulting columns will be renamed to the keyword used.
|
18
|
+
#
|
19
|
+
# @return [LazyFrame]
|
20
|
+
#
|
21
|
+
# @example Compute the aggregation of the columns for each group.
|
22
|
+
# ldf = Polars::DataFrame.new(
|
23
|
+
# {
|
24
|
+
# "a" => ["a", "b", "a", "b", "c"],
|
25
|
+
# "b" => [1, 2, 1, 3, 3],
|
26
|
+
# "c" => [5, 4, 3, 2, 1]
|
27
|
+
# }
|
28
|
+
# ).lazy
|
29
|
+
# ldf.group_by("a").agg(
|
30
|
+
# [Polars.col("b"), Polars.col("c")]
|
31
|
+
# ).collect
|
32
|
+
# # =>
|
33
|
+
# # shape: (3, 3)
|
34
|
+
# # ┌─────┬───────────┬───────────┐
|
35
|
+
# # │ a ┆ b ┆ c │
|
36
|
+
# # │ --- ┆ --- ┆ --- │
|
37
|
+
# # │ str ┆ list[i64] ┆ list[i64] │
|
38
|
+
# # ╞═════╪═══════════╪═══════════╡
|
39
|
+
# # │ a ┆ [1, 1] ┆ [5, 3] │
|
40
|
+
# # │ b ┆ [2, 3] ┆ [4, 2] │
|
41
|
+
# # │ c ┆ [3] ┆ [1] │
|
42
|
+
# # └─────┴───────────┴───────────┘
|
43
|
+
#
|
44
|
+
# @example Compute the sum of a column for each group.
|
45
|
+
# ldf.group_by("a").agg(
|
46
|
+
# Polars.col("b").sum
|
47
|
+
# ).collect
|
48
|
+
# # =>
|
49
|
+
# # shape: (3, 2)
|
50
|
+
# # ┌─────┬─────┐
|
51
|
+
# # │ a ┆ b │
|
52
|
+
# # │ --- ┆ --- │
|
53
|
+
# # │ str ┆ i64 │
|
54
|
+
# # ╞═════╪═════╡
|
55
|
+
# # │ a ┆ 2 │
|
56
|
+
# # │ b ┆ 5 │
|
57
|
+
# # │ c ┆ 3 │
|
58
|
+
# # └─────┴─────┘
|
59
|
+
#
|
60
|
+
# @example Compute multiple aggregates at once by passing a list of expressions.
|
61
|
+
# ldf.group_by("a").agg(
|
62
|
+
# [Polars.sum("b"), Polars.mean("c")]
|
63
|
+
# ).collect
|
64
|
+
# # =>
|
65
|
+
# # shape: (3, 3)
|
66
|
+
# # ┌─────┬─────┬─────┐
|
67
|
+
# # │ a ┆ b ┆ c │
|
68
|
+
# # │ --- ┆ --- ┆ --- │
|
69
|
+
# # │ str ┆ i64 ┆ f64 │
|
70
|
+
# # ╞═════╪═════╪═════╡
|
71
|
+
# # │ c ┆ 3 ┆ 1.0 │
|
72
|
+
# # │ a ┆ 2 ┆ 4.0 │
|
73
|
+
# # │ b ┆ 5 ┆ 3.0 │
|
74
|
+
# # └─────┴─────┴─────┘
|
75
|
+
#
|
76
|
+
# @example Or use positional arguments to compute multiple aggregations in the same way.
|
77
|
+
# ldf.group_by("a").agg(
|
78
|
+
# Polars.sum("b").name.suffix("_sum"),
|
79
|
+
# (Polars.col("c") ** 2).mean.name.suffix("_mean_squared")
|
80
|
+
# ).collect
|
81
|
+
# # =>
|
82
|
+
# # shape: (3, 3)
|
83
|
+
# # ┌─────┬───────┬────────────────┐
|
84
|
+
# # │ a ┆ b_sum ┆ c_mean_squared │
|
85
|
+
# # │ --- ┆ --- ┆ --- │
|
86
|
+
# # │ str ┆ i64 ┆ f64 │
|
87
|
+
# # ╞═════╪═══════╪════════════════╡
|
88
|
+
# # │ a ┆ 2 ┆ 17.0 │
|
89
|
+
# # │ c ┆ 3 ┆ 1.0 │
|
90
|
+
# # │ b ┆ 5 ┆ 10.0 │
|
91
|
+
# # └─────┴───────┴────────────────┘
|
92
|
+
#
|
93
|
+
# @example Use keyword arguments to easily name your expression inputs.
|
94
|
+
# ldf.group_by("a").agg(
|
95
|
+
# b_sum: Polars.sum("b"),
|
96
|
+
# c_mean_squared: (Polars.col("c") ** 2).mean
|
97
|
+
# ).collect
|
98
|
+
# # =>
|
99
|
+
# # shape: (3, 3)
|
100
|
+
# # ┌─────┬───────┬────────────────┐
|
101
|
+
# # │ a ┆ b_sum ┆ c_mean_squared │
|
102
|
+
# # │ --- ┆ --- ┆ --- │
|
103
|
+
# # │ str ┆ i64 ┆ f64 │
|
104
|
+
# # ╞═════╪═══════╪════════════════╡
|
105
|
+
# # │ a ┆ 2 ┆ 17.0 │
|
106
|
+
# # │ c ┆ 3 ┆ 1.0 │
|
107
|
+
# # │ b ┆ 5 ┆ 10.0 │
|
108
|
+
# # └─────┴───────┴────────────────┘
|
109
|
+
def agg(*aggs, **named_aggs)
|
110
|
+
rbexprs = Utils.parse_into_list_of_expressions(*aggs, **named_aggs)
|
111
|
+
Utils.wrap_ldf(@lgb.agg(rbexprs))
|
112
|
+
end
|
113
|
+
|
114
|
+
# Get the first `n` rows of each group.
|
115
|
+
#
|
116
|
+
# @param n [Integer]
|
117
|
+
# Number of rows to return.
|
118
|
+
#
|
119
|
+
# @return [LazyFrame]
|
120
|
+
#
|
121
|
+
# @example
|
122
|
+
# df = Polars::DataFrame.new(
|
123
|
+
# {
|
124
|
+
# "letters" => ["c", "c", "a", "c", "a", "b"],
|
125
|
+
# "nrs" => [1, 2, 3, 4, 5, 6]
|
126
|
+
# }
|
127
|
+
# )
|
128
|
+
# df.group_by("letters").head(2).sort("letters")
|
129
|
+
# # =>
|
130
|
+
# # shape: (5, 2)
|
131
|
+
# # ┌─────────┬─────┐
|
132
|
+
# # │ letters ┆ nrs │
|
133
|
+
# # │ --- ┆ --- │
|
134
|
+
# # │ str ┆ i64 │
|
135
|
+
# # ╞═════════╪═════╡
|
136
|
+
# # │ a ┆ 3 │
|
137
|
+
# # │ a ┆ 5 │
|
138
|
+
# # │ b ┆ 6 │
|
139
|
+
# # │ c ┆ 1 │
|
140
|
+
# # │ c ┆ 2 │
|
141
|
+
# # └─────────┴─────┘
|
142
|
+
def head(n = 5)
|
143
|
+
Utils.wrap_ldf(@lgb.head(n))
|
144
|
+
end
|
145
|
+
|
146
|
+
# Get the last `n` rows of each group.
|
147
|
+
#
|
148
|
+
# @param n [Integer]
|
149
|
+
# Number of rows to return.
|
150
|
+
#
|
151
|
+
# @return [LazyFrame]
|
152
|
+
#
|
153
|
+
# @example
|
154
|
+
# df = Polars::DataFrame.new(
|
155
|
+
# {
|
156
|
+
# "letters" => ["c", "c", "a", "c", "a", "b"],
|
157
|
+
# "nrs" => [1, 2, 3, 4, 5, 6]
|
158
|
+
# }
|
159
|
+
# )
|
160
|
+
# df.group_by("letters").tail(2).sort("letters")
|
161
|
+
# # =>
|
162
|
+
# # shape: (5, 2)
|
163
|
+
# # ┌─────────┬─────┐
|
164
|
+
# # │ letters ┆ nrs │
|
165
|
+
# # │ --- ┆ --- │
|
166
|
+
# # │ str ┆ i64 │
|
167
|
+
# # ╞═════════╪═════╡
|
168
|
+
# # │ a ┆ 3 │
|
169
|
+
# # │ a ┆ 5 │
|
170
|
+
# # │ b ┆ 6 │
|
171
|
+
# # │ c ┆ 2 │
|
172
|
+
# # │ c ┆ 4 │
|
173
|
+
# # └─────────┴─────┘
|
174
|
+
def tail(n = 5)
|
175
|
+
Utils.wrap_ldf(@lgb.tail(n))
|
176
|
+
end
|
177
|
+
|
178
|
+
# def apply
|
179
|
+
# end
|
180
|
+
end
|
181
|
+
end
|