polars-df 0.13.0-x64-mingw-ucrt
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +3 -0
- data/CHANGELOG.md +208 -0
- data/Cargo.lock +2556 -0
- data/Cargo.toml +6 -0
- data/LICENSE-THIRD-PARTY.txt +39278 -0
- data/LICENSE.txt +20 -0
- data/README.md +437 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/array_expr.rb +537 -0
- data/lib/polars/array_name_space.rb +423 -0
- data/lib/polars/batched_csv_reader.rb +104 -0
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/cat_expr.rb +36 -0
- data/lib/polars/cat_name_space.rb +88 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/convert.rb +98 -0
- data/lib/polars/data_frame.rb +5191 -0
- data/lib/polars/data_types.rb +466 -0
- data/lib/polars/date_time_expr.rb +1397 -0
- data/lib/polars/date_time_name_space.rb +1287 -0
- data/lib/polars/dynamic_group_by.rb +52 -0
- data/lib/polars/exceptions.rb +38 -0
- data/lib/polars/expr.rb +7256 -0
- data/lib/polars/expr_dispatch.rb +22 -0
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +271 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1329 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +136 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +96 -0
- data/lib/polars/functions.rb +57 -0
- data/lib/polars/group_by.rb +613 -0
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/io/csv.rb +696 -0
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +275 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +233 -0
- data/lib/polars/lazy_frame.rb +2708 -0
- data/lib/polars/lazy_group_by.rb +181 -0
- data/lib/polars/list_expr.rb +791 -0
- data/lib/polars/list_name_space.rb +449 -0
- data/lib/polars/meta_expr.rb +222 -0
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/plot.rb +109 -0
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +4444 -0
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +1495 -0
- data/lib/polars/string_name_space.rb +811 -0
- data/lib/polars/struct_expr.rb +98 -0
- data/lib/polars/struct_name_space.rb +96 -0
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +130 -0
- data/lib/polars/version.rb +4 -0
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars-df.rb +1 -0
- data/lib/polars.rb +91 -0
- metadata +138 -0
@@ -0,0 +1,198 @@
|
|
1
|
+
module Polars
|
2
|
+
# Namespace for expressions that operate on expression names.
|
3
|
+
class NameExpr
|
4
|
+
# @private
|
5
|
+
attr_accessor :_rbexpr
|
6
|
+
|
7
|
+
# @private
|
8
|
+
def initialize(expr)
|
9
|
+
self._rbexpr = expr._rbexpr
|
10
|
+
end
|
11
|
+
|
12
|
+
# Keep the original root name of the expression.
|
13
|
+
#
|
14
|
+
# @note
|
15
|
+
# Due to implementation constraints, this method can only be called as the last
|
16
|
+
# expression in a chain.
|
17
|
+
#
|
18
|
+
# @return [Expr]
|
19
|
+
#
|
20
|
+
# @example Prevent errors due to potential duplicate column names.
|
21
|
+
# df = Polars::DataFrame.new(
|
22
|
+
# {
|
23
|
+
# "a" => [1, 2],
|
24
|
+
# "b" => [3, 4]
|
25
|
+
# }
|
26
|
+
# )
|
27
|
+
# df.select((Polars.lit(10) / Polars.all).name.keep)
|
28
|
+
# # =>
|
29
|
+
# # shape: (2, 2)
|
30
|
+
# # ┌──────┬──────────┐
|
31
|
+
# # │ a ┆ b │
|
32
|
+
# # │ --- ┆ --- │
|
33
|
+
# # │ f64 ┆ f64 │
|
34
|
+
# # ╞══════╪══════════╡
|
35
|
+
# # │ 10.0 ┆ 3.333333 │
|
36
|
+
# # │ 5.0 ┆ 2.5 │
|
37
|
+
# # └──────┴──────────┘
|
38
|
+
#
|
39
|
+
# @example Undo an alias operation.
|
40
|
+
# df.with_columns((Polars.col("a") * 9).alias("c").name.keep)
|
41
|
+
# # =>
|
42
|
+
# # shape: (2, 2)
|
43
|
+
# # ┌─────┬─────┐
|
44
|
+
# # │ a ┆ b │
|
45
|
+
# # │ --- ┆ --- │
|
46
|
+
# # │ i64 ┆ i64 │
|
47
|
+
# # ╞═════╪═════╡
|
48
|
+
# # │ 9 ┆ 3 │
|
49
|
+
# # │ 18 ┆ 4 │
|
50
|
+
# # └─────┴─────┘
|
51
|
+
def keep
|
52
|
+
Utils.wrap_expr(_rbexpr.name_keep)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Rename the output of an expression by mapping a function over the root name.
|
56
|
+
#
|
57
|
+
# @return [Expr]
|
58
|
+
#
|
59
|
+
# @example Remove a common suffix and convert to lower case.
|
60
|
+
# df = Polars::DataFrame.new(
|
61
|
+
# {
|
62
|
+
# "A_reverse" => [3, 2, 1],
|
63
|
+
# "B_reverse" => ["z", "y", "x"]
|
64
|
+
# }
|
65
|
+
# )
|
66
|
+
# df.with_columns(
|
67
|
+
# Polars.all.reverse.name.map { |c| c.delete_suffix("_reverse").downcase }
|
68
|
+
# )
|
69
|
+
# # =>
|
70
|
+
# # shape: (3, 4)
|
71
|
+
# # ┌───────────┬───────────┬─────┬─────┐
|
72
|
+
# # │ A_reverse ┆ B_reverse ┆ a ┆ b │
|
73
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
74
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
75
|
+
# # ╞═══════════╪═══════════╪═════╪═════╡
|
76
|
+
# # │ 3 ┆ z ┆ 1 ┆ x │
|
77
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
78
|
+
# # │ 1 ┆ x ┆ 3 ┆ z │
|
79
|
+
# # └───────────┴───────────┴─────┴─────┘
|
80
|
+
def map(&f)
|
81
|
+
Utils.wrap_expr(_rbexpr.name_map(f))
|
82
|
+
end
|
83
|
+
|
84
|
+
# Add a prefix to the root column name of the expression.
|
85
|
+
#
|
86
|
+
# @param prefix [Object]
|
87
|
+
# Prefix to add to the root column name.
|
88
|
+
#
|
89
|
+
# @return [Expr]
|
90
|
+
#
|
91
|
+
# @example
|
92
|
+
# df = Polars::DataFrame.new(
|
93
|
+
# {
|
94
|
+
# "a" => [1, 2, 3],
|
95
|
+
# "b" => ["x", "y", "z"]
|
96
|
+
# }
|
97
|
+
# )
|
98
|
+
# df.with_columns(Polars.all.reverse.name.prefix("reverse_"))
|
99
|
+
# # =>
|
100
|
+
# # shape: (3, 4)
|
101
|
+
# # ┌─────┬─────┬───────────┬───────────┐
|
102
|
+
# # │ a ┆ b ┆ reverse_a ┆ reverse_b │
|
103
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
104
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
105
|
+
# # ╞═════╪═════╪═══════════╪═══════════╡
|
106
|
+
# # │ 1 ┆ x ┆ 3 ┆ z │
|
107
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
108
|
+
# # │ 3 ┆ z ┆ 1 ┆ x │
|
109
|
+
# # └─────┴─────┴───────────┴───────────┘
|
110
|
+
def prefix(prefix)
|
111
|
+
Utils.wrap_expr(_rbexpr.name_prefix(prefix))
|
112
|
+
end
|
113
|
+
|
114
|
+
# Add a suffix to the root column name of the expression.
|
115
|
+
#
|
116
|
+
# @param suffix [Object]
|
117
|
+
# Suffix to add to the root column name.
|
118
|
+
#
|
119
|
+
# @return [Expr]
|
120
|
+
#
|
121
|
+
# @example
|
122
|
+
# df = Polars::DataFrame.new(
|
123
|
+
# {
|
124
|
+
# "a" => [1, 2, 3],
|
125
|
+
# "b" => ["x", "y", "z"]
|
126
|
+
# }
|
127
|
+
# )
|
128
|
+
# df.with_columns(Polars.all.reverse.name.suffix("_reverse"))
|
129
|
+
# # =>
|
130
|
+
# # shape: (3, 4)
|
131
|
+
# # ┌─────┬─────┬───────────┬───────────┐
|
132
|
+
# # │ a ┆ b ┆ a_reverse ┆ b_reverse │
|
133
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
134
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
135
|
+
# # ╞═════╪═════╪═══════════╪═══════════╡
|
136
|
+
# # │ 1 ┆ x ┆ 3 ┆ z │
|
137
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
138
|
+
# # │ 3 ┆ z ┆ 1 ┆ x │
|
139
|
+
# # └─────┴─────┴───────────┴───────────┘
|
140
|
+
def suffix(suffix)
|
141
|
+
Utils.wrap_expr(_rbexpr.name_suffix(suffix))
|
142
|
+
end
|
143
|
+
|
144
|
+
# Make the root column name lowercase.
|
145
|
+
#
|
146
|
+
# @return [Expr]
|
147
|
+
#
|
148
|
+
# @example
|
149
|
+
# df = Polars::DataFrame.new(
|
150
|
+
# {
|
151
|
+
# "ColX" => [1, 2, 3],
|
152
|
+
# "ColY" => ["x", "y", "z"],
|
153
|
+
# }
|
154
|
+
# )
|
155
|
+
# df.with_columns(Polars.all.name.to_lowercase)
|
156
|
+
# # =>
|
157
|
+
# # shape: (3, 4)
|
158
|
+
# # ┌──────┬──────┬──────┬──────┐
|
159
|
+
# # │ ColX ┆ ColY ┆ colx ┆ coly │
|
160
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
161
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
162
|
+
# # ╞══════╪══════╪══════╪══════╡
|
163
|
+
# # │ 1 ┆ x ┆ 1 ┆ x │
|
164
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
165
|
+
# # │ 3 ┆ z ┆ 3 ┆ z │
|
166
|
+
# # └──────┴──────┴──────┴──────┘
|
167
|
+
def to_lowercase
|
168
|
+
Utils.wrap_expr(_rbexpr.name_to_lowercase)
|
169
|
+
end
|
170
|
+
|
171
|
+
# Make the root column name uppercase.
|
172
|
+
#
|
173
|
+
# @return [Expr]
|
174
|
+
#
|
175
|
+
# @example
|
176
|
+
# df = Polars::DataFrame.new(
|
177
|
+
# {
|
178
|
+
# "ColX" => [1, 2, 3],
|
179
|
+
# "ColY" => ["x", "y", "z"]
|
180
|
+
# }
|
181
|
+
# )
|
182
|
+
# df.with_columns(Polars.all.name.to_uppercase)
|
183
|
+
# # =>
|
184
|
+
# # shape: (3, 4)
|
185
|
+
# # ┌──────┬──────┬──────┬──────┐
|
186
|
+
# # │ ColX ┆ ColY ┆ COLX ┆ COLY │
|
187
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
188
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
189
|
+
# # ╞══════╪══════╪══════╪══════╡
|
190
|
+
# # │ 1 ┆ x ┆ 1 ┆ x │
|
191
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
192
|
+
# # │ 3 ┆ z ┆ 3 ┆ z │
|
193
|
+
# # └──────┴──────┴──────┴──────┘
|
194
|
+
def to_uppercase
|
195
|
+
Utils.wrap_expr(_rbexpr.name_to_uppercase)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
data/lib/polars/plot.rb
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
module Polars
|
2
|
+
module Plot
|
3
|
+
# Plot data.
|
4
|
+
#
|
5
|
+
# @return [Vega::LiteChart]
|
6
|
+
def plot(x = nil, y = nil, type: nil, group: nil, stacked: nil)
|
7
|
+
require "vega"
|
8
|
+
|
9
|
+
raise ArgumentError, "Must specify columns" if columns.size != 2 && (!x || !y)
|
10
|
+
x ||= columns[0]
|
11
|
+
y ||= columns[1]
|
12
|
+
type ||= begin
|
13
|
+
if self[x].numeric? && self[y].numeric?
|
14
|
+
"scatter"
|
15
|
+
elsif self[x].utf8? && self[y].numeric?
|
16
|
+
"column"
|
17
|
+
elsif (self[x].dtype == Date || self[x].dtype.is_a?(Datetime)) && self[y].numeric?
|
18
|
+
"line"
|
19
|
+
else
|
20
|
+
raise "Cannot determine type. Use the type option."
|
21
|
+
end
|
22
|
+
end
|
23
|
+
df = self[(group.nil? ? [x, y] : [x, y, group]).map(&:to_s).uniq]
|
24
|
+
data = df.rows(named: true)
|
25
|
+
|
26
|
+
case type
|
27
|
+
when "line", "area"
|
28
|
+
x_type =
|
29
|
+
if df[x].numeric?
|
30
|
+
"quantitative"
|
31
|
+
elsif df[x].datelike?
|
32
|
+
"temporal"
|
33
|
+
else
|
34
|
+
"nominal"
|
35
|
+
end
|
36
|
+
|
37
|
+
scale = x_type == "temporal" ? {type: "utc"} : {}
|
38
|
+
encoding = {
|
39
|
+
x: {field: x, type: x_type, scale: scale},
|
40
|
+
y: {field: y, type: "quantitative"}
|
41
|
+
}
|
42
|
+
encoding[:color] = {field: group} if group
|
43
|
+
|
44
|
+
Vega.lite
|
45
|
+
.data(data)
|
46
|
+
.mark(type: type, tooltip: true, interpolate: "cardinal", point: {size: 60})
|
47
|
+
.encoding(encoding)
|
48
|
+
.config(axis: {labelFontSize: 12})
|
49
|
+
when "pie"
|
50
|
+
raise ArgumentError, "Cannot use group option with pie chart" unless group.nil?
|
51
|
+
|
52
|
+
Vega.lite
|
53
|
+
.data(data)
|
54
|
+
.mark(type: "arc", tooltip: true)
|
55
|
+
.encoding(
|
56
|
+
color: {field: x, type: "nominal", sort: "none", axis: {title: nil}, legend: {labelFontSize: 12}},
|
57
|
+
theta: {field: y, type: "quantitative"}
|
58
|
+
)
|
59
|
+
.view(stroke: nil)
|
60
|
+
when "column"
|
61
|
+
encoding = {
|
62
|
+
x: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
|
63
|
+
y: {field: y, type: "quantitative"}
|
64
|
+
}
|
65
|
+
if group
|
66
|
+
encoding[:color] = {field: group}
|
67
|
+
encoding[:xOffset] = {field: group} unless stacked
|
68
|
+
end
|
69
|
+
|
70
|
+
Vega.lite
|
71
|
+
.data(data)
|
72
|
+
.mark(type: "bar", tooltip: true)
|
73
|
+
.encoding(encoding)
|
74
|
+
.config(axis: {labelFontSize: 12})
|
75
|
+
when "bar"
|
76
|
+
encoding = {
|
77
|
+
# TODO determine label angle
|
78
|
+
y: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
|
79
|
+
x: {field: y, type: "quantitative"}
|
80
|
+
}
|
81
|
+
if group
|
82
|
+
encoding[:color] = {field: group}
|
83
|
+
encoding[:yOffset] = {field: group} unless stacked
|
84
|
+
end
|
85
|
+
|
86
|
+
Vega.lite
|
87
|
+
.data(data)
|
88
|
+
.mark(type: "bar", tooltip: true)
|
89
|
+
.encoding(encoding)
|
90
|
+
.config(axis: {labelFontSize: 12})
|
91
|
+
when "scatter"
|
92
|
+
encoding = {
|
93
|
+
x: {field: x, type: "quantitative", scale: {zero: false}},
|
94
|
+
y: {field: y, type: "quantitative", scale: {zero: false}},
|
95
|
+
size: {value: 60}
|
96
|
+
}
|
97
|
+
encoding[:color] = {field: group} if group
|
98
|
+
|
99
|
+
Vega.lite
|
100
|
+
.data(data)
|
101
|
+
.mark(type: "circle", tooltip: true)
|
102
|
+
.encoding(encoding)
|
103
|
+
.config(axis: {labelFontSize: 12})
|
104
|
+
else
|
105
|
+
raise ArgumentError, "Invalid type: #{type}"
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Polars
|
2
|
+
# A rolling grouper.
|
3
|
+
#
|
4
|
+
# This has an `.agg` method which will allow you to run all polars expressions in a
|
5
|
+
# group by context.
|
6
|
+
class RollingGroupBy
|
7
|
+
def initialize(
|
8
|
+
df,
|
9
|
+
index_column,
|
10
|
+
period,
|
11
|
+
offset,
|
12
|
+
closed,
|
13
|
+
group_by
|
14
|
+
)
|
15
|
+
period = Utils.parse_as_duration_string(period)
|
16
|
+
offset = Utils.parse_as_duration_string(offset)
|
17
|
+
|
18
|
+
@df = df
|
19
|
+
@time_column = index_column
|
20
|
+
@period = period
|
21
|
+
@offset = offset
|
22
|
+
@closed = closed
|
23
|
+
@group_by = group_by
|
24
|
+
end
|
25
|
+
|
26
|
+
def agg(*aggs, **named_aggs)
|
27
|
+
@df.lazy
|
28
|
+
.group_by_rolling(
|
29
|
+
index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @group_by
|
30
|
+
)
|
31
|
+
.agg(*aggs, **named_aggs)
|
32
|
+
.collect(no_optimization: true, string_cache: false)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|