polars-df 0.10.0-aarch64-linux → 0.11.0-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Cargo.lock +90 -48
- data/LICENSE-THIRD-PARTY.txt +152 -79
- data/README.md +6 -6
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/batched_csv_reader.rb +9 -3
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +83 -302
- data/lib/polars/date_time_expr.rb +1 -0
- data/lib/polars/date_time_name_space.rb +5 -1
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/exceptions.rb +4 -0
- data/lib/polars/expr.rb +1134 -20
- data/lib/polars/functions/range/date_range.rb +92 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +296 -490
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +18 -0
- data/lib/polars/io/ndjson.rb +69 -0
- data/lib/polars/io/parquet.rb +226 -0
- data/lib/polars/lazy_frame.rb +23 -166
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +2 -2
- data/lib/polars/string_expr.rb +37 -36
- data/lib/polars/utils.rb +35 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +9 -1
- metadata +12 -4
@@ -99,5 +99,97 @@ module Polars
|
|
99
99
|
|
100
100
|
result
|
101
101
|
end
|
102
|
+
|
103
|
+
# Create a column of date ranges.
|
104
|
+
#
|
105
|
+
# @param start [Object]
|
106
|
+
# Lower bound of the date range.
|
107
|
+
# @param stop [Object]
|
108
|
+
# Upper bound of the date range.
|
109
|
+
# @param interval [Object]
|
110
|
+
# Interval of the range periods, specified using the Polars duration string language (see "Notes" section below).
|
111
|
+
# @param closed ["both", "left", "right", "none"]
|
112
|
+
# Define which sides of the range are closed (inclusive).
|
113
|
+
# @param time_unit [nil, "ns", "us", "ms"]
|
114
|
+
# Time unit of the resulting `Datetime` data type.
|
115
|
+
# Only takes effect if the output column is of type `Datetime`.
|
116
|
+
# @param time_zone [String]
|
117
|
+
# Time zone of the resulting `Datetime` data type.
|
118
|
+
# Only takes effect if the output column is of type `Datetime`.
|
119
|
+
# @param eager [Boolean]
|
120
|
+
# Evaluate immediately and return a `Series`.
|
121
|
+
# If set to `false` (default), return an expression instead.
|
122
|
+
#
|
123
|
+
# @return [Object]
|
124
|
+
#
|
125
|
+
# @note
|
126
|
+
# `interval` is created according to the following string language:
|
127
|
+
#
|
128
|
+
# - 1ns (1 nanosecond)
|
129
|
+
# - 1us (1 microsecond)
|
130
|
+
# - 1ms (1 millisecond)
|
131
|
+
# - 1s (1 second)
|
132
|
+
# - 1m (1 minute)
|
133
|
+
# - 1h (1 hour)
|
134
|
+
# - 1d (1 calendar day)
|
135
|
+
# - 1w (1 calendar week)
|
136
|
+
# - 1mo (1 calendar month)
|
137
|
+
# - 1q (1 calendar quarter)
|
138
|
+
# - 1y (1 calendar year)
|
139
|
+
#
|
140
|
+
# Or combine them:
|
141
|
+
# "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
|
142
|
+
#
|
143
|
+
# By "calendar day", we mean the corresponding time on the next day (which may
|
144
|
+
# not be 24 hours, due to daylight savings). Similarly for "calendar week",
|
145
|
+
# "calendar month", "calendar quarter", and "calendar year".
|
146
|
+
#
|
147
|
+
# @example
|
148
|
+
# df = Polars::DataFrame.new(
|
149
|
+
# {
|
150
|
+
# "start" => [Date.new(2022, 1, 1), Date.new(2022, 1, 2)],
|
151
|
+
# "end" => Date.new(2022, 1, 3)
|
152
|
+
# }
|
153
|
+
# )
|
154
|
+
# df.with_columns(date_range: Polars.date_ranges("start", "end"))
|
155
|
+
# # =>
|
156
|
+
# # shape: (2, 3)
|
157
|
+
# # ┌────────────┬────────────┬─────────────────────────────────┐
|
158
|
+
# # │ start ┆ end ┆ date_range │
|
159
|
+
# # │ --- ┆ --- ┆ --- │
|
160
|
+
# # │ date ┆ date ┆ list[date] │
|
161
|
+
# # ╞════════════╪════════════╪═════════════════════════════════╡
|
162
|
+
# # │ 2022-01-01 ┆ 2022-01-03 ┆ [2022-01-01, 2022-01-02, 2022-… │
|
163
|
+
# # │ 2022-01-02 ┆ 2022-01-03 ┆ [2022-01-02, 2022-01-03] │
|
164
|
+
# # └────────────┴────────────┴─────────────────────────────────┘
|
165
|
+
def date_ranges(
|
166
|
+
start,
|
167
|
+
stop,
|
168
|
+
interval = "1d",
|
169
|
+
closed: "both",
|
170
|
+
time_unit: nil,
|
171
|
+
time_zone: nil,
|
172
|
+
eager: false
|
173
|
+
)
|
174
|
+
interval = Utils.parse_interval_argument(interval)
|
175
|
+
if time_unit.nil? && interval.include?("ns")
|
176
|
+
time_unit = "ns"
|
177
|
+
end
|
178
|
+
|
179
|
+
start_rbexpr = Utils.parse_as_expression(start)
|
180
|
+
end_rbexpr = Utils.parse_as_expression(stop)
|
181
|
+
|
182
|
+
result = Utils.wrap_expr(
|
183
|
+
Plr.date_ranges(
|
184
|
+
start_rbexpr, end_rbexpr, interval, closed, time_unit, time_zone
|
185
|
+
)
|
186
|
+
)
|
187
|
+
|
188
|
+
if eager
|
189
|
+
return F.select(result).to_series
|
190
|
+
end
|
191
|
+
|
192
|
+
result
|
193
|
+
end
|
102
194
|
end
|
103
195
|
end
|
@@ -0,0 +1,149 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
# Generate a datetime range.
|
4
|
+
#
|
5
|
+
# @param start [Object]
|
6
|
+
# Lower bound of the datetime range.
|
7
|
+
# @param stop [Object]
|
8
|
+
# Upper bound of the datetime range.
|
9
|
+
# @param interval [String]
|
10
|
+
# Interval of the range periods, specified using the Polars duration string language.
|
11
|
+
# @param closed ['both', 'left', 'right', 'none']
|
12
|
+
# Define which sides of the range are closed (inclusive).
|
13
|
+
# @param time_unit [nil, 'ns', 'us', 'ms']
|
14
|
+
# Time unit of the resulting `Datetime` data type.
|
15
|
+
# @param time_zone [String]
|
16
|
+
# Time zone of the resulting `Datetime` data type.
|
17
|
+
# @param eager [Boolean]
|
18
|
+
# Evaluate immediately and return a `Series`.
|
19
|
+
# If set to `false` (default), return an expression instead.
|
20
|
+
#
|
21
|
+
# @return [Object]
|
22
|
+
#
|
23
|
+
# @example Using Polars duration string to specify the interval:
|
24
|
+
# Polars.datetime_range(
|
25
|
+
# DateTime.new(2022, 1, 1), DateTime.new(2022, 3, 1), "1mo", eager: true
|
26
|
+
# ).alias("datetime")
|
27
|
+
# # =>
|
28
|
+
# # shape: (3,)
|
29
|
+
# # Series: 'datetime' [datetime[ns]]
|
30
|
+
# # [
|
31
|
+
# # 2022-01-01 00:00:00
|
32
|
+
# # 2022-02-01 00:00:00
|
33
|
+
# # 2022-03-01 00:00:00
|
34
|
+
# # ]
|
35
|
+
#
|
36
|
+
# @example Specifying a time zone:
|
37
|
+
# Polars.datetime_range(
|
38
|
+
# DateTime.new(2022, 1, 1),
|
39
|
+
# DateTime.new(2022, 3, 1),
|
40
|
+
# "1mo",
|
41
|
+
# time_zone: "America/New_York",
|
42
|
+
# eager: true
|
43
|
+
# ).alias("datetime")
|
44
|
+
# # =>
|
45
|
+
# # shape: (3,)
|
46
|
+
# # Series: 'datetime' [datetime[ns, America/New_York]]
|
47
|
+
# # [
|
48
|
+
# # 2022-01-01 00:00:00 EST
|
49
|
+
# # 2022-02-01 00:00:00 EST
|
50
|
+
# # 2022-03-01 00:00:00 EST
|
51
|
+
# # ]
|
52
|
+
def datetime_range(
|
53
|
+
start,
|
54
|
+
stop,
|
55
|
+
interval = "1d",
|
56
|
+
closed: "both",
|
57
|
+
time_unit: nil,
|
58
|
+
time_zone: nil,
|
59
|
+
eager: false
|
60
|
+
)
|
61
|
+
interval = Utils.parse_interval_argument(interval)
|
62
|
+
if time_unit.nil? && interval.include?("ns")
|
63
|
+
time_unit = "ns"
|
64
|
+
end
|
65
|
+
|
66
|
+
start_rbexpr = Utils.parse_as_expression(start)
|
67
|
+
end_rbexpr = Utils.parse_as_expression(stop)
|
68
|
+
result = Utils.wrap_expr(
|
69
|
+
Plr.datetime_range(
|
70
|
+
start_rbexpr, end_rbexpr, interval, closed, time_unit, time_zone
|
71
|
+
)
|
72
|
+
)
|
73
|
+
|
74
|
+
if eager
|
75
|
+
return Polars.select(result).to_series
|
76
|
+
end
|
77
|
+
|
78
|
+
result
|
79
|
+
end
|
80
|
+
|
81
|
+
# Create a column of datetime ranges.
|
82
|
+
#
|
83
|
+
# @param start [Object]
|
84
|
+
# Lower bound of the datetime range.
|
85
|
+
# @param stop [Object]
|
86
|
+
# Upper bound of the datetime range.
|
87
|
+
# @param interval [String]
|
88
|
+
# Interval of the range periods, specified using the Polars duration string language.
|
89
|
+
# @param closed ['both', 'left', 'right', 'none']
|
90
|
+
# Define which sides of the range are closed (inclusive).
|
91
|
+
# @param time_unit [nil, 'ns', 'us', 'ms']
|
92
|
+
# Time unit of the resulting `Datetime` data type.
|
93
|
+
# @param time_zone [String]
|
94
|
+
# Time zone of the resulting `Datetime` data type.
|
95
|
+
# @param eager [Boolean]
|
96
|
+
# Evaluate immediately and return a `Series`.
|
97
|
+
# If set to `false` (default), return an expression instead.
|
98
|
+
#
|
99
|
+
# @return [Object]
|
100
|
+
#
|
101
|
+
# @example
|
102
|
+
# df = Polars::DataFrame.new(
|
103
|
+
# {
|
104
|
+
# "start" => [DateTime.new(2022, 1, 1), DateTime.new(2022, 1, 2)],
|
105
|
+
# "end" => DateTime.new(2022, 1, 3),
|
106
|
+
# }
|
107
|
+
# )
|
108
|
+
# df.select(datetime_range: Polars.datetime_ranges("start", "end"))
|
109
|
+
# # =>
|
110
|
+
# # shape: (2, 1)
|
111
|
+
# # ┌─────────────────────────────────┐
|
112
|
+
# # │ datetime_range │
|
113
|
+
# # │ --- │
|
114
|
+
# # │ list[datetime[ns]] │
|
115
|
+
# # ╞═════════════════════════════════╡
|
116
|
+
# # │ [2022-01-01 00:00:00, 2022-01-… │
|
117
|
+
# # │ [2022-01-02 00:00:00, 2022-01-… │
|
118
|
+
# # └─────────────────────────────────┘
|
119
|
+
def datetime_ranges(
|
120
|
+
start,
|
121
|
+
stop,
|
122
|
+
interval: "1d",
|
123
|
+
closed: "both",
|
124
|
+
time_unit: nil,
|
125
|
+
time_zone: nil,
|
126
|
+
eager: false
|
127
|
+
)
|
128
|
+
interval = Utils.parse_interval_argument(interval)
|
129
|
+
if time_unit.nil? && interval.include?("ns")
|
130
|
+
time_unit = "ns"
|
131
|
+
end
|
132
|
+
|
133
|
+
start_rbexpr = Utils.parse_as_expression(start)
|
134
|
+
end_rbexpr = Utils.parse_as_expression(stop)
|
135
|
+
|
136
|
+
result = Utils.wrap_expr(
|
137
|
+
Plr.datetime_ranges(
|
138
|
+
start_rbexpr, end_rbexpr, interval, closed, time_unit, time_zone
|
139
|
+
)
|
140
|
+
)
|
141
|
+
|
142
|
+
if eager
|
143
|
+
return Polars.select(result).to_series
|
144
|
+
end
|
145
|
+
|
146
|
+
result
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
@@ -0,0 +1,141 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
# Generate a time range.
|
4
|
+
#
|
5
|
+
# @param start [Object]
|
6
|
+
# Lower bound of the time range.
|
7
|
+
# @param stop [Object]
|
8
|
+
# Upper bound of the time range.
|
9
|
+
# @param interval [String]
|
10
|
+
# Interval of the range periods, specified using the Polars duration string language.
|
11
|
+
# @param closed ['both', 'left', 'right', 'none']
|
12
|
+
# Define which sides of the range are closed (inclusive).
|
13
|
+
# @param eager [Boolean]
|
14
|
+
# Evaluate immediately and return a `Series`.
|
15
|
+
# If set to `False` (default), return an expression instead.
|
16
|
+
#
|
17
|
+
# @return [Object]
|
18
|
+
#
|
19
|
+
# @example
|
20
|
+
# Polars.time_range(
|
21
|
+
# time(14, 0),
|
22
|
+
# nil,
|
23
|
+
# "3h15m",
|
24
|
+
# eager: true
|
25
|
+
# ).alias("time")
|
26
|
+
# # =>
|
27
|
+
# # shape: (4,)
|
28
|
+
# # Series: 'time' [time]
|
29
|
+
# # [
|
30
|
+
# # 14:00:00
|
31
|
+
# # 17:15:00
|
32
|
+
# # 20:30:00
|
33
|
+
# # 23:45:00
|
34
|
+
# # ]
|
35
|
+
def time_range(
|
36
|
+
start = nil,
|
37
|
+
stop = nil,
|
38
|
+
interval = "1h",
|
39
|
+
closed: "both",
|
40
|
+
eager: false
|
41
|
+
)
|
42
|
+
interval = Utils.parse_interval_argument(interval)
|
43
|
+
["y", "mo", "w", "d"].each do |unit|
|
44
|
+
if interval.include?(unit)
|
45
|
+
msg = "invalid interval unit for time_range: found #{unit.inspect}"
|
46
|
+
raise ArgumentError, msg
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
if start.nil?
|
51
|
+
# start = time(0, 0, 0)
|
52
|
+
raise Todo
|
53
|
+
end
|
54
|
+
if stop.nil?
|
55
|
+
# stop = time(23, 59, 59, 999999)
|
56
|
+
raise Todo
|
57
|
+
end
|
58
|
+
|
59
|
+
start_rbexpr = Utils.parse_as_expression(start)
|
60
|
+
end_rbexpr = Utils.parse_as_expression(stop)
|
61
|
+
|
62
|
+
result = Utils.wrap_expr(Plr.time_range(start_rbexpr, end_rbexpr, interval, closed))
|
63
|
+
|
64
|
+
if eager
|
65
|
+
return Polars.select(result).to_series
|
66
|
+
end
|
67
|
+
|
68
|
+
result
|
69
|
+
end
|
70
|
+
|
71
|
+
# Create a column of time ranges.
|
72
|
+
#
|
73
|
+
# @param start [Object]
|
74
|
+
# Lower bound of the time range.
|
75
|
+
# @param stop [Object]
|
76
|
+
# Upper bound of the time range.
|
77
|
+
# @param interval [Integer]
|
78
|
+
# Interval of the range periods, specified using the Polars duration string language.
|
79
|
+
# @param closed ['both', 'left', 'right', 'none']
|
80
|
+
# Define which sides of the range are closed (inclusive).
|
81
|
+
# @param eager [Boolean]
|
82
|
+
# Evaluate immediately and return a `Series`.
|
83
|
+
# If set to `false` (default), return an expression instead.
|
84
|
+
#
|
85
|
+
# @return [Object]
|
86
|
+
#
|
87
|
+
# @example
|
88
|
+
# df = Polars::DataFrame.new(
|
89
|
+
# {
|
90
|
+
# "start" => [time(9, 0), time(10, 0)],
|
91
|
+
# "end" => time(11, 0)
|
92
|
+
# }
|
93
|
+
# )
|
94
|
+
# df.with_columns(time_range: Polars.time_ranges("start", "end"))
|
95
|
+
# # =>
|
96
|
+
# # shape: (2, 3)
|
97
|
+
# # ┌──────────┬──────────┬────────────────────────────────┐
|
98
|
+
# # │ start ┆ end ┆ time_range │
|
99
|
+
# # │ --- ┆ --- ┆ --- │
|
100
|
+
# # │ time ┆ time ┆ list[time] │
|
101
|
+
# # ╞══════════╪══════════╪════════════════════════════════╡
|
102
|
+
# # │ 09:00:00 ┆ 11:00:00 ┆ [09:00:00, 10:00:00, 11:00:00] │
|
103
|
+
# # │ 10:00:00 ┆ 11:00:00 ┆ [10:00:00, 11:00:00] │
|
104
|
+
# # └──────────┴──────────┴────────────────────────────────┘
|
105
|
+
def time_ranges(
|
106
|
+
start = nil,
|
107
|
+
stop = nil,
|
108
|
+
interval = "1h",
|
109
|
+
closed: "both",
|
110
|
+
eager: false
|
111
|
+
)
|
112
|
+
interval = Utils.parse_interval_argument(interval)
|
113
|
+
["y", "mo", "w", "d"].each do |unit|
|
114
|
+
if interval.include?(unit)
|
115
|
+
msg = "invalid interval unit for time_range: found #{unit.inspect}"
|
116
|
+
raise ArgumentError, msg
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
if start.nil?
|
121
|
+
# start = time(0, 0, 0)
|
122
|
+
raise Todo
|
123
|
+
end
|
124
|
+
if stop.nil?
|
125
|
+
# stop = time(23, 59, 59, 999999)
|
126
|
+
raise Todo
|
127
|
+
end
|
128
|
+
|
129
|
+
start_rbexpr = Utils.parse_as_expression(start)
|
130
|
+
end_rbexpr = Utils.parse_as_expression(stop)
|
131
|
+
|
132
|
+
result = Utils.wrap_expr(Plr.time_ranges(start_rbexpr, end_rbexpr, interval, closed))
|
133
|
+
|
134
|
+
if eager
|
135
|
+
return Polars.select(result).to_series
|
136
|
+
end
|
137
|
+
|
138
|
+
result
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
data/lib/polars/group_by.rb
CHANGED
@@ -106,39 +106,104 @@ module Polars
|
|
106
106
|
# _dataframe_class._from_rbdf(_df.group_by_apply(by, f))
|
107
107
|
# end
|
108
108
|
|
109
|
-
#
|
109
|
+
# Compute aggregations for each group of a group by operation.
|
110
110
|
#
|
111
|
-
#
|
112
|
-
#
|
113
|
-
#
|
114
|
-
#
|
111
|
+
# @param aggs [Array]
|
112
|
+
# Aggregations to compute for each group of the group by operation,
|
113
|
+
# specified as positional arguments.
|
114
|
+
# Accepts expression input. Strings are parsed as column names.
|
115
|
+
# @param named_aggs [Hash]
|
116
|
+
# Additional aggregations, specified as keyword arguments.
|
117
|
+
# The resulting columns will be renamed to the keyword used.
|
115
118
|
#
|
116
119
|
# @return [DataFrame]
|
117
120
|
#
|
118
|
-
# @example
|
121
|
+
# @example Compute the aggregation of the columns for each group.
|
119
122
|
# df = Polars::DataFrame.new(
|
120
|
-
# {
|
123
|
+
# {
|
124
|
+
# "a" => ["a", "b", "a", "b", "c"],
|
125
|
+
# "b" => [1, 2, 1, 3, 3],
|
126
|
+
# "c" => [5, 4, 3, 2, 1]
|
127
|
+
# }
|
121
128
|
# )
|
122
|
-
# df.group_by("
|
123
|
-
#
|
124
|
-
#
|
125
|
-
#
|
126
|
-
#
|
129
|
+
# df.group_by("a").agg(Polars.col("b"), Polars.col("c"))
|
130
|
+
# # =>
|
131
|
+
# # shape: (3, 3)
|
132
|
+
# # ┌─────┬───────────┬───────────┐
|
133
|
+
# # │ a ┆ b ┆ c │
|
134
|
+
# # │ --- ┆ --- ┆ --- │
|
135
|
+
# # │ str ┆ list[i64] ┆ list[i64] │
|
136
|
+
# # ╞═════╪═══════════╪═══════════╡
|
137
|
+
# # │ a ┆ [1, 1] ┆ [5, 3] │
|
138
|
+
# # │ b ┆ [2, 3] ┆ [4, 2] │
|
139
|
+
# # │ c ┆ [3] ┆ [1] │
|
140
|
+
# # └─────┴───────────┴───────────┘
|
141
|
+
#
|
142
|
+
# @example Compute the sum of a column for each group.
|
143
|
+
# df.group_by("a").agg(Polars.col("b").sum)
|
144
|
+
# # =>
|
145
|
+
# # shape: (3, 2)
|
146
|
+
# # ┌─────┬─────┐
|
147
|
+
# # │ a ┆ b │
|
148
|
+
# # │ --- ┆ --- │
|
149
|
+
# # │ str ┆ i64 │
|
150
|
+
# # ╞═════╪═════╡
|
151
|
+
# # │ a ┆ 2 │
|
152
|
+
# # │ b ┆ 5 │
|
153
|
+
# # │ c ┆ 3 │
|
154
|
+
# # └─────┴─────┘
|
155
|
+
#
|
156
|
+
# @example Compute multiple aggregates at once by passing a list of expressions.
|
157
|
+
# df.group_by("a").agg([Polars.sum("b"), Polars.mean("c")])
|
158
|
+
# # =>
|
159
|
+
# # shape: (3, 3)
|
160
|
+
# # ┌─────┬─────┬─────┐
|
161
|
+
# # │ a ┆ b ┆ c │
|
162
|
+
# # │ --- ┆ --- ┆ --- │
|
163
|
+
# # │ str ┆ i64 ┆ f64 │
|
164
|
+
# # ╞═════╪═════╪═════╡
|
165
|
+
# # │ c ┆ 3 ┆ 1.0 │
|
166
|
+
# # │ a ┆ 2 ┆ 4.0 │
|
167
|
+
# # │ b ┆ 5 ┆ 3.0 │
|
168
|
+
# # └─────┴─────┴─────┘
|
169
|
+
#
|
170
|
+
# @example Or use positional arguments to compute multiple aggregations in the same way.
|
171
|
+
# df.group_by("a").agg(
|
172
|
+
# Polars.sum("b").name.suffix("_sum"),
|
173
|
+
# (Polars.col("c") ** 2).mean.name.suffix("_mean_squared")
|
127
174
|
# )
|
128
175
|
# # =>
|
129
|
-
# # shape: (
|
130
|
-
# #
|
131
|
-
# # │
|
132
|
-
# # │ --- ┆ ---
|
133
|
-
# # │ str ┆ i64
|
134
|
-
# #
|
135
|
-
# # │
|
136
|
-
# # │
|
137
|
-
# #
|
138
|
-
|
176
|
+
# # shape: (3, 3)
|
177
|
+
# # ┌─────┬───────┬────────────────┐
|
178
|
+
# # │ a ┆ b_sum ┆ c_mean_squared │
|
179
|
+
# # │ --- ┆ --- ┆ --- │
|
180
|
+
# # │ str ┆ i64 ┆ f64 │
|
181
|
+
# # ╞═════╪═══════╪════════════════╡
|
182
|
+
# # │ a ┆ 2 ┆ 17.0 │
|
183
|
+
# # │ c ┆ 3 ┆ 1.0 │
|
184
|
+
# # │ b ┆ 5 ┆ 10.0 │
|
185
|
+
# # └─────┴───────┴────────────────┘
|
186
|
+
#
|
187
|
+
# @example Use keyword arguments to easily name your expression inputs.
|
188
|
+
# df.group_by("a").agg(
|
189
|
+
# b_sum: Polars.sum("b"),
|
190
|
+
# c_mean_squared: (Polars.col("c") ** 2).mean
|
191
|
+
# )
|
192
|
+
# # =>
|
193
|
+
# # shape: (3, 3)
|
194
|
+
# # ┌─────┬───────┬────────────────┐
|
195
|
+
# # │ a ┆ b_sum ┆ c_mean_squared │
|
196
|
+
# # │ --- ┆ --- ┆ --- │
|
197
|
+
# # │ str ┆ i64 ┆ f64 │
|
198
|
+
# # ╞═════╪═══════╪════════════════╡
|
199
|
+
# # │ a ┆ 2 ┆ 17.0 │
|
200
|
+
# # │ c ┆ 3 ┆ 1.0 │
|
201
|
+
# # │ b ┆ 5 ┆ 10.0 │
|
202
|
+
# # └─────┴───────┴────────────────┘
|
203
|
+
def agg(*aggs, **named_aggs)
|
139
204
|
@df.lazy
|
140
205
|
.group_by(@by, maintain_order: @maintain_order)
|
141
|
-
.agg(aggs)
|
206
|
+
.agg(*aggs, **named_aggs)
|
142
207
|
.collect(no_optimization: true)
|
143
208
|
end
|
144
209
|
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Polars
|
2
|
+
module IO
|
3
|
+
# Read into a DataFrame from Apache Avro format.
|
4
|
+
#
|
5
|
+
# @param source [Object]
|
6
|
+
# Path to a file or a file-like object.
|
7
|
+
# @param columns [Object]
|
8
|
+
# Columns to select. Accepts a list of column indices (starting at zero) or a list
|
9
|
+
# of column names.
|
10
|
+
# @param n_rows [Integer]
|
11
|
+
# Stop reading from Apache Avro file after reading ``n_rows``.
|
12
|
+
#
|
13
|
+
# @return [DataFrame]
|
14
|
+
def read_avro(source, columns: nil, n_rows: nil)
|
15
|
+
if Utils.pathlike?(source)
|
16
|
+
source = Utils.normalize_filepath(source)
|
17
|
+
end
|
18
|
+
projection, column_names = Utils.handle_projection_columns(columns)
|
19
|
+
|
20
|
+
rbdf = RbDataFrame.read_avro(source, column_names, projection, n_rows)
|
21
|
+
Utils.wrap_df(rbdf)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|