polars-df 0.13.0-x64-mingw-ucrt
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.yardopts +3 -0
- data/CHANGELOG.md +208 -0
- data/Cargo.lock +2556 -0
- data/Cargo.toml +6 -0
- data/LICENSE-THIRD-PARTY.txt +39278 -0
- data/LICENSE.txt +20 -0
- data/README.md +437 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/array_expr.rb +537 -0
- data/lib/polars/array_name_space.rb +423 -0
- data/lib/polars/batched_csv_reader.rb +104 -0
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/cat_expr.rb +36 -0
- data/lib/polars/cat_name_space.rb +88 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/convert.rb +98 -0
- data/lib/polars/data_frame.rb +5191 -0
- data/lib/polars/data_types.rb +466 -0
- data/lib/polars/date_time_expr.rb +1397 -0
- data/lib/polars/date_time_name_space.rb +1287 -0
- data/lib/polars/dynamic_group_by.rb +52 -0
- data/lib/polars/exceptions.rb +38 -0
- data/lib/polars/expr.rb +7256 -0
- data/lib/polars/expr_dispatch.rb +22 -0
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +271 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1329 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +136 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +96 -0
- data/lib/polars/functions.rb +57 -0
- data/lib/polars/group_by.rb +613 -0
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/io/csv.rb +696 -0
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +275 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +233 -0
- data/lib/polars/lazy_frame.rb +2708 -0
- data/lib/polars/lazy_group_by.rb +181 -0
- data/lib/polars/list_expr.rb +791 -0
- data/lib/polars/list_name_space.rb +449 -0
- data/lib/polars/meta_expr.rb +222 -0
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/plot.rb +109 -0
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +4444 -0
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +1495 -0
- data/lib/polars/string_name_space.rb +811 -0
- data/lib/polars/struct_expr.rb +98 -0
- data/lib/polars/struct_name_space.rb +96 -0
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +130 -0
- data/lib/polars/version.rb +4 -0
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars-df.rb +1 -0
- data/lib/polars.rb +91 -0
- metadata +138 -0
@@ -0,0 +1,198 @@
|
|
1
|
+
module Polars
|
2
|
+
# Namespace for expressions that operate on expression names.
|
3
|
+
class NameExpr
|
4
|
+
# @private
|
5
|
+
attr_accessor :_rbexpr
|
6
|
+
|
7
|
+
# @private
|
8
|
+
def initialize(expr)
|
9
|
+
self._rbexpr = expr._rbexpr
|
10
|
+
end
|
11
|
+
|
12
|
+
# Keep the original root name of the expression.
|
13
|
+
#
|
14
|
+
# @note
|
15
|
+
# Due to implementation constraints, this method can only be called as the last
|
16
|
+
# expression in a chain.
|
17
|
+
#
|
18
|
+
# @return [Expr]
|
19
|
+
#
|
20
|
+
# @example Prevent errors due to potential duplicate column names.
|
21
|
+
# df = Polars::DataFrame.new(
|
22
|
+
# {
|
23
|
+
# "a" => [1, 2],
|
24
|
+
# "b" => [3, 4]
|
25
|
+
# }
|
26
|
+
# )
|
27
|
+
# df.select((Polars.lit(10) / Polars.all).name.keep)
|
28
|
+
# # =>
|
29
|
+
# # shape: (2, 2)
|
30
|
+
# # ┌──────┬──────────┐
|
31
|
+
# # │ a ┆ b │
|
32
|
+
# # │ --- ┆ --- │
|
33
|
+
# # │ f64 ┆ f64 │
|
34
|
+
# # ╞══════╪══════════╡
|
35
|
+
# # │ 10.0 ┆ 3.333333 │
|
36
|
+
# # │ 5.0 ┆ 2.5 │
|
37
|
+
# # └──────┴──────────┘
|
38
|
+
#
|
39
|
+
# @example Undo an alias operation.
|
40
|
+
# df.with_columns((Polars.col("a") * 9).alias("c").name.keep)
|
41
|
+
# # =>
|
42
|
+
# # shape: (2, 2)
|
43
|
+
# # ┌─────┬─────┐
|
44
|
+
# # │ a ┆ b │
|
45
|
+
# # │ --- ┆ --- │
|
46
|
+
# # │ i64 ┆ i64 │
|
47
|
+
# # ╞═════╪═════╡
|
48
|
+
# # │ 9 ┆ 3 │
|
49
|
+
# # │ 18 ┆ 4 │
|
50
|
+
# # └─────┴─────┘
|
51
|
+
def keep
|
52
|
+
Utils.wrap_expr(_rbexpr.name_keep)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Rename the output of an expression by mapping a function over the root name.
|
56
|
+
#
|
57
|
+
# @return [Expr]
|
58
|
+
#
|
59
|
+
# @example Remove a common suffix and convert to lower case.
|
60
|
+
# df = Polars::DataFrame.new(
|
61
|
+
# {
|
62
|
+
# "A_reverse" => [3, 2, 1],
|
63
|
+
# "B_reverse" => ["z", "y", "x"]
|
64
|
+
# }
|
65
|
+
# )
|
66
|
+
# df.with_columns(
|
67
|
+
# Polars.all.reverse.name.map { |c| c.delete_suffix("_reverse").downcase }
|
68
|
+
# )
|
69
|
+
# # =>
|
70
|
+
# # shape: (3, 4)
|
71
|
+
# # ┌───────────┬───────────┬─────┬─────┐
|
72
|
+
# # │ A_reverse ┆ B_reverse ┆ a ┆ b │
|
73
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
74
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
75
|
+
# # ╞═══════════╪═══════════╪═════╪═════╡
|
76
|
+
# # │ 3 ┆ z ┆ 1 ┆ x │
|
77
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
78
|
+
# # │ 1 ┆ x ┆ 3 ┆ z │
|
79
|
+
# # └───────────┴───────────┴─────┴─────┘
|
80
|
+
def map(&f)
|
81
|
+
Utils.wrap_expr(_rbexpr.name_map(f))
|
82
|
+
end
|
83
|
+
|
84
|
+
# Add a prefix to the root column name of the expression.
|
85
|
+
#
|
86
|
+
# @param prefix [Object]
|
87
|
+
# Prefix to add to the root column name.
|
88
|
+
#
|
89
|
+
# @return [Expr]
|
90
|
+
#
|
91
|
+
# @example
|
92
|
+
# df = Polars::DataFrame.new(
|
93
|
+
# {
|
94
|
+
# "a" => [1, 2, 3],
|
95
|
+
# "b" => ["x", "y", "z"]
|
96
|
+
# }
|
97
|
+
# )
|
98
|
+
# df.with_columns(Polars.all.reverse.name.prefix("reverse_"))
|
99
|
+
# # =>
|
100
|
+
# # shape: (3, 4)
|
101
|
+
# # ┌─────┬─────┬───────────┬───────────┐
|
102
|
+
# # │ a ┆ b ┆ reverse_a ┆ reverse_b │
|
103
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
104
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
105
|
+
# # ╞═════╪═════╪═══════════╪═══════════╡
|
106
|
+
# # │ 1 ┆ x ┆ 3 ┆ z │
|
107
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
108
|
+
# # │ 3 ┆ z ┆ 1 ┆ x │
|
109
|
+
# # └─────┴─────┴───────────┴───────────┘
|
110
|
+
def prefix(prefix)
|
111
|
+
Utils.wrap_expr(_rbexpr.name_prefix(prefix))
|
112
|
+
end
|
113
|
+
|
114
|
+
# Add a suffix to the root column name of the expression.
|
115
|
+
#
|
116
|
+
# @param suffix [Object]
|
117
|
+
# Suffix to add to the root column name.
|
118
|
+
#
|
119
|
+
# @return [Expr]
|
120
|
+
#
|
121
|
+
# @example
|
122
|
+
# df = Polars::DataFrame.new(
|
123
|
+
# {
|
124
|
+
# "a" => [1, 2, 3],
|
125
|
+
# "b" => ["x", "y", "z"]
|
126
|
+
# }
|
127
|
+
# )
|
128
|
+
# df.with_columns(Polars.all.reverse.name.suffix("_reverse"))
|
129
|
+
# # =>
|
130
|
+
# # shape: (3, 4)
|
131
|
+
# # ┌─────┬─────┬───────────┬───────────┐
|
132
|
+
# # │ a ┆ b ┆ a_reverse ┆ b_reverse │
|
133
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
134
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
135
|
+
# # ╞═════╪═════╪═══════════╪═══════════╡
|
136
|
+
# # │ 1 ┆ x ┆ 3 ┆ z │
|
137
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
138
|
+
# # │ 3 ┆ z ┆ 1 ┆ x │
|
139
|
+
# # └─────┴─────┴───────────┴───────────┘
|
140
|
+
def suffix(suffix)
|
141
|
+
Utils.wrap_expr(_rbexpr.name_suffix(suffix))
|
142
|
+
end
|
143
|
+
|
144
|
+
# Make the root column name lowercase.
|
145
|
+
#
|
146
|
+
# @return [Expr]
|
147
|
+
#
|
148
|
+
# @example
|
149
|
+
# df = Polars::DataFrame.new(
|
150
|
+
# {
|
151
|
+
# "ColX" => [1, 2, 3],
|
152
|
+
# "ColY" => ["x", "y", "z"],
|
153
|
+
# }
|
154
|
+
# )
|
155
|
+
# df.with_columns(Polars.all.name.to_lowercase)
|
156
|
+
# # =>
|
157
|
+
# # shape: (3, 4)
|
158
|
+
# # ┌──────┬──────┬──────┬──────┐
|
159
|
+
# # │ ColX ┆ ColY ┆ colx ┆ coly │
|
160
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
161
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
162
|
+
# # ╞══════╪══════╪══════╪══════╡
|
163
|
+
# # │ 1 ┆ x ┆ 1 ┆ x │
|
164
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
165
|
+
# # │ 3 ┆ z ┆ 3 ┆ z │
|
166
|
+
# # └──────┴──────┴──────┴──────┘
|
167
|
+
def to_lowercase
|
168
|
+
Utils.wrap_expr(_rbexpr.name_to_lowercase)
|
169
|
+
end
|
170
|
+
|
171
|
+
# Make the root column name uppercase.
|
172
|
+
#
|
173
|
+
# @return [Expr]
|
174
|
+
#
|
175
|
+
# @example
|
176
|
+
# df = Polars::DataFrame.new(
|
177
|
+
# {
|
178
|
+
# "ColX" => [1, 2, 3],
|
179
|
+
# "ColY" => ["x", "y", "z"]
|
180
|
+
# }
|
181
|
+
# )
|
182
|
+
# df.with_columns(Polars.all.name.to_uppercase)
|
183
|
+
# # =>
|
184
|
+
# # shape: (3, 4)
|
185
|
+
# # ┌──────┬──────┬──────┬──────┐
|
186
|
+
# # │ ColX ┆ ColY ┆ COLX ┆ COLY │
|
187
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
188
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
189
|
+
# # ╞══════╪══════╪══════╪══════╡
|
190
|
+
# # │ 1 ┆ x ┆ 1 ┆ x │
|
191
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
192
|
+
# # │ 3 ┆ z ┆ 3 ┆ z │
|
193
|
+
# # └──────┴──────┴──────┴──────┘
|
194
|
+
def to_uppercase
|
195
|
+
Utils.wrap_expr(_rbexpr.name_to_uppercase)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
data/lib/polars/plot.rb
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
module Polars
|
2
|
+
module Plot
|
3
|
+
# Plot data.
|
4
|
+
#
|
5
|
+
# @return [Vega::LiteChart]
|
6
|
+
def plot(x = nil, y = nil, type: nil, group: nil, stacked: nil)
|
7
|
+
require "vega"
|
8
|
+
|
9
|
+
raise ArgumentError, "Must specify columns" if columns.size != 2 && (!x || !y)
|
10
|
+
x ||= columns[0]
|
11
|
+
y ||= columns[1]
|
12
|
+
type ||= begin
|
13
|
+
if self[x].numeric? && self[y].numeric?
|
14
|
+
"scatter"
|
15
|
+
elsif self[x].utf8? && self[y].numeric?
|
16
|
+
"column"
|
17
|
+
elsif (self[x].dtype == Date || self[x].dtype.is_a?(Datetime)) && self[y].numeric?
|
18
|
+
"line"
|
19
|
+
else
|
20
|
+
raise "Cannot determine type. Use the type option."
|
21
|
+
end
|
22
|
+
end
|
23
|
+
df = self[(group.nil? ? [x, y] : [x, y, group]).map(&:to_s).uniq]
|
24
|
+
data = df.rows(named: true)
|
25
|
+
|
26
|
+
case type
|
27
|
+
when "line", "area"
|
28
|
+
x_type =
|
29
|
+
if df[x].numeric?
|
30
|
+
"quantitative"
|
31
|
+
elsif df[x].datelike?
|
32
|
+
"temporal"
|
33
|
+
else
|
34
|
+
"nominal"
|
35
|
+
end
|
36
|
+
|
37
|
+
scale = x_type == "temporal" ? {type: "utc"} : {}
|
38
|
+
encoding = {
|
39
|
+
x: {field: x, type: x_type, scale: scale},
|
40
|
+
y: {field: y, type: "quantitative"}
|
41
|
+
}
|
42
|
+
encoding[:color] = {field: group} if group
|
43
|
+
|
44
|
+
Vega.lite
|
45
|
+
.data(data)
|
46
|
+
.mark(type: type, tooltip: true, interpolate: "cardinal", point: {size: 60})
|
47
|
+
.encoding(encoding)
|
48
|
+
.config(axis: {labelFontSize: 12})
|
49
|
+
when "pie"
|
50
|
+
raise ArgumentError, "Cannot use group option with pie chart" unless group.nil?
|
51
|
+
|
52
|
+
Vega.lite
|
53
|
+
.data(data)
|
54
|
+
.mark(type: "arc", tooltip: true)
|
55
|
+
.encoding(
|
56
|
+
color: {field: x, type: "nominal", sort: "none", axis: {title: nil}, legend: {labelFontSize: 12}},
|
57
|
+
theta: {field: y, type: "quantitative"}
|
58
|
+
)
|
59
|
+
.view(stroke: nil)
|
60
|
+
when "column"
|
61
|
+
encoding = {
|
62
|
+
x: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
|
63
|
+
y: {field: y, type: "quantitative"}
|
64
|
+
}
|
65
|
+
if group
|
66
|
+
encoding[:color] = {field: group}
|
67
|
+
encoding[:xOffset] = {field: group} unless stacked
|
68
|
+
end
|
69
|
+
|
70
|
+
Vega.lite
|
71
|
+
.data(data)
|
72
|
+
.mark(type: "bar", tooltip: true)
|
73
|
+
.encoding(encoding)
|
74
|
+
.config(axis: {labelFontSize: 12})
|
75
|
+
when "bar"
|
76
|
+
encoding = {
|
77
|
+
# TODO determine label angle
|
78
|
+
y: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
|
79
|
+
x: {field: y, type: "quantitative"}
|
80
|
+
}
|
81
|
+
if group
|
82
|
+
encoding[:color] = {field: group}
|
83
|
+
encoding[:yOffset] = {field: group} unless stacked
|
84
|
+
end
|
85
|
+
|
86
|
+
Vega.lite
|
87
|
+
.data(data)
|
88
|
+
.mark(type: "bar", tooltip: true)
|
89
|
+
.encoding(encoding)
|
90
|
+
.config(axis: {labelFontSize: 12})
|
91
|
+
when "scatter"
|
92
|
+
encoding = {
|
93
|
+
x: {field: x, type: "quantitative", scale: {zero: false}},
|
94
|
+
y: {field: y, type: "quantitative", scale: {zero: false}},
|
95
|
+
size: {value: 60}
|
96
|
+
}
|
97
|
+
encoding[:color] = {field: group} if group
|
98
|
+
|
99
|
+
Vega.lite
|
100
|
+
.data(data)
|
101
|
+
.mark(type: "circle", tooltip: true)
|
102
|
+
.encoding(encoding)
|
103
|
+
.config(axis: {labelFontSize: 12})
|
104
|
+
else
|
105
|
+
raise ArgumentError, "Invalid type: #{type}"
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Polars
|
2
|
+
# A rolling grouper.
|
3
|
+
#
|
4
|
+
# This has an `.agg` method which will allow you to run all polars expressions in a
|
5
|
+
# group by context.
|
6
|
+
class RollingGroupBy
|
7
|
+
def initialize(
|
8
|
+
df,
|
9
|
+
index_column,
|
10
|
+
period,
|
11
|
+
offset,
|
12
|
+
closed,
|
13
|
+
group_by
|
14
|
+
)
|
15
|
+
period = Utils.parse_as_duration_string(period)
|
16
|
+
offset = Utils.parse_as_duration_string(offset)
|
17
|
+
|
18
|
+
@df = df
|
19
|
+
@time_column = index_column
|
20
|
+
@period = period
|
21
|
+
@offset = offset
|
22
|
+
@closed = closed
|
23
|
+
@group_by = group_by
|
24
|
+
end
|
25
|
+
|
26
|
+
def agg(*aggs, **named_aggs)
|
27
|
+
@df.lazy
|
28
|
+
.group_by_rolling(
|
29
|
+
index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @group_by
|
30
|
+
)
|
31
|
+
.agg(*aggs, **named_aggs)
|
32
|
+
.collect(no_optimization: true, string_cache: false)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|