polars-df 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +127 -1
- data/Cargo.lock +72 -58
- data/README.md +31 -27
- data/ext/polars/Cargo.toml +15 -6
- data/ext/polars/src/batched_csv.rs +35 -39
- data/ext/polars/src/c_api/allocator.rs +7 -0
- data/ext/polars/src/c_api/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +123 -101
- data/ext/polars/src/conversion/any_value.rs +13 -17
- data/ext/polars/src/conversion/chunked_array.rs +5 -5
- data/ext/polars/src/conversion/datetime.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +50 -45
- data/ext/polars/src/dataframe/export.rs +13 -13
- data/ext/polars/src/dataframe/general.rs +223 -223
- data/ext/polars/src/dataframe/io.rs +27 -141
- data/ext/polars/src/dataframe/mod.rs +13 -5
- data/ext/polars/src/dataframe/serde.rs +1 -1
- data/ext/polars/src/error.rs +44 -7
- data/ext/polars/src/exceptions.rs +45 -12
- data/ext/polars/src/expr/array.rs +12 -0
- data/ext/polars/src/expr/datatype.rs +2 -2
- data/ext/polars/src/expr/datetime.rs +4 -5
- data/ext/polars/src/expr/general.rs +49 -13
- data/ext/polars/src/expr/list.rs +4 -0
- data/ext/polars/src/expr/meta.rs +8 -3
- data/ext/polars/src/expr/mod.rs +22 -6
- data/ext/polars/src/expr/name.rs +19 -8
- data/ext/polars/src/expr/rolling.rs +50 -1
- data/ext/polars/src/expr/string.rs +0 -1
- data/ext/polars/src/expr/struct.rs +7 -2
- data/ext/polars/src/file.rs +136 -103
- data/ext/polars/src/functions/aggregation.rs +9 -8
- data/ext/polars/src/functions/io.rs +81 -10
- data/ext/polars/src/functions/lazy.rs +95 -21
- data/ext/polars/src/functions/mod.rs +2 -0
- data/ext/polars/src/functions/range.rs +19 -3
- data/ext/polars/src/functions/strings.rs +6 -0
- data/ext/polars/src/functions/utils.rs +6 -0
- data/ext/polars/src/interop/arrow/mod.rs +50 -1
- data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
- data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/lazyframe/exitable.rs +39 -0
- data/ext/polars/src/lazyframe/general.rs +340 -236
- data/ext/polars/src/lazyframe/mod.rs +46 -10
- data/ext/polars/src/lazyframe/optflags.rs +5 -4
- data/ext/polars/src/lazyframe/serde.rs +11 -3
- data/ext/polars/src/lazyframe/sink.rs +10 -5
- data/ext/polars/src/lazygroupby.rs +6 -7
- data/ext/polars/src/lib.rs +141 -76
- data/ext/polars/src/map/dataframe.rs +12 -12
- data/ext/polars/src/map/lazy.rs +7 -5
- data/ext/polars/src/map/mod.rs +15 -8
- data/ext/polars/src/map/series.rs +3 -3
- data/ext/polars/src/on_startup.rs +16 -8
- data/ext/polars/src/prelude.rs +1 -0
- data/ext/polars/src/rb_modules.rs +19 -49
- data/ext/polars/src/series/aggregation.rs +79 -140
- data/ext/polars/src/series/arithmetic.rs +16 -22
- data/ext/polars/src/series/comparison.rs +101 -222
- data/ext/polars/src/series/construction.rs +17 -18
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +254 -289
- data/ext/polars/src/series/import.rs +17 -0
- data/ext/polars/src/series/map.rs +178 -160
- data/ext/polars/src/series/mod.rs +28 -12
- data/ext/polars/src/series/scatter.rs +12 -9
- data/ext/polars/src/sql.rs +16 -9
- data/ext/polars/src/testing/frame.rs +31 -0
- data/ext/polars/src/testing/mod.rs +5 -0
- data/ext/polars/src/testing/series.rs +31 -0
- data/ext/polars/src/timeout.rs +105 -0
- data/ext/polars/src/utils.rs +159 -1
- data/lib/polars/array_expr.rb +81 -12
- data/lib/polars/array_name_space.rb +74 -7
- data/lib/polars/batched_csv_reader.rb +21 -21
- data/lib/polars/binary_name_space.rb +1 -1
- data/lib/polars/cat_expr.rb +7 -7
- data/lib/polars/config.rb +1 -1
- data/lib/polars/convert.rb +189 -34
- data/lib/polars/data_frame.rb +1066 -831
- data/lib/polars/data_frame_plot.rb +173 -0
- data/lib/polars/data_type_group.rb +1 -0
- data/lib/polars/data_types.rb +31 -12
- data/lib/polars/date_time_expr.rb +51 -69
- data/lib/polars/date_time_name_space.rb +80 -112
- data/lib/polars/dynamic_group_by.rb +7 -7
- data/lib/polars/exceptions.rb +50 -10
- data/lib/polars/expr.rb +470 -517
- data/lib/polars/functions/aggregation/horizontal.rb +0 -1
- data/lib/polars/functions/aggregation/vertical.rb +2 -3
- data/lib/polars/functions/as_datatype.rb +290 -8
- data/lib/polars/functions/eager.rb +204 -10
- data/lib/polars/functions/escape_regex.rb +21 -0
- data/lib/polars/functions/lazy.rb +409 -169
- data/lib/polars/functions/lit.rb +17 -1
- data/lib/polars/functions/range/int_range.rb +74 -2
- data/lib/polars/functions/range/linear_space.rb +77 -0
- data/lib/polars/functions/range/time_range.rb +1 -1
- data/lib/polars/functions/repeat.rb +3 -12
- data/lib/polars/functions/whenthen.rb +2 -2
- data/lib/polars/group_by.rb +72 -20
- data/lib/polars/iceberg_dataset.rb +1 -6
- data/lib/polars/in_process_query.rb +37 -0
- data/lib/polars/io/cloud.rb +18 -0
- data/lib/polars/io/csv.rb +265 -126
- data/lib/polars/io/database.rb +0 -1
- data/lib/polars/io/delta.rb +15 -7
- data/lib/polars/io/ipc.rb +24 -17
- data/lib/polars/io/ndjson.rb +161 -24
- data/lib/polars/io/parquet.rb +101 -38
- data/lib/polars/lazy_frame.rb +849 -558
- data/lib/polars/lazy_group_by.rb +327 -2
- data/lib/polars/list_expr.rb +94 -16
- data/lib/polars/list_name_space.rb +88 -24
- data/lib/polars/meta_expr.rb +42 -1
- data/lib/polars/name_expr.rb +41 -4
- data/lib/polars/query_opt_flags.rb +198 -2
- data/lib/polars/rolling_group_by.rb +3 -3
- data/lib/polars/schema.rb +21 -3
- data/lib/polars/selector.rb +37 -2
- data/lib/polars/selectors.rb +45 -9
- data/lib/polars/series.rb +1156 -728
- data/lib/polars/series_plot.rb +72 -0
- data/lib/polars/slice.rb +1 -1
- data/lib/polars/sql_context.rb +11 -4
- data/lib/polars/string_expr.rb +59 -68
- data/lib/polars/string_name_space.rb +51 -87
- data/lib/polars/struct_expr.rb +36 -18
- data/lib/polars/testing.rb +24 -273
- data/lib/polars/utils/constants.rb +2 -0
- data/lib/polars/utils/construction/data_frame.rb +410 -0
- data/lib/polars/utils/construction/series.rb +364 -0
- data/lib/polars/utils/construction/utils.rb +9 -0
- data/lib/polars/utils/deprecation.rb +11 -0
- data/lib/polars/utils/serde.rb +8 -3
- data/lib/polars/utils/unstable.rb +19 -0
- data/lib/polars/utils/various.rb +59 -0
- data/lib/polars/utils.rb +46 -47
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +47 -1
- metadata +25 -6
- data/ext/polars/src/allocator.rs +0 -13
- data/lib/polars/plot.rb +0 -109
|
@@ -46,7 +46,7 @@ module Polars
|
|
|
46
46
|
return col("*")
|
|
47
47
|
end
|
|
48
48
|
|
|
49
|
-
col(*names).all(
|
|
49
|
+
col(*names).all(ignore_nulls: ignore_nulls)
|
|
50
50
|
end
|
|
51
51
|
|
|
52
52
|
# Evaluate a bitwise OR operation.
|
|
@@ -78,7 +78,7 @@ module Polars
|
|
|
78
78
|
# # │ true │
|
|
79
79
|
# # └──────┘
|
|
80
80
|
def any(*names, ignore_nulls: true)
|
|
81
|
-
col(*names).any(
|
|
81
|
+
col(*names).any(ignore_nulls: ignore_nulls)
|
|
82
82
|
end
|
|
83
83
|
|
|
84
84
|
# Get the maximum value.
|
|
@@ -277,6 +277,5 @@ module Polars
|
|
|
277
277
|
def cum_sum(*names)
|
|
278
278
|
col(*names).cum_sum
|
|
279
279
|
end
|
|
280
|
-
alias_method :cumsum, :cum_sum
|
|
281
280
|
end
|
|
282
281
|
end
|
|
@@ -1,5 +1,233 @@
|
|
|
1
1
|
module Polars
|
|
2
2
|
module Functions
|
|
3
|
+
# Create a Polars literal expression of type Datetime.
|
|
4
|
+
#
|
|
5
|
+
# @param year [Object]
|
|
6
|
+
# Column or literal.
|
|
7
|
+
# @param month [Object]
|
|
8
|
+
# Column or literal, ranging from 1-12.
|
|
9
|
+
# @param day [Object]
|
|
10
|
+
# Column or literal, ranging from 1-31.
|
|
11
|
+
# @param hour [Object]
|
|
12
|
+
# Column or literal, ranging from 0-23.
|
|
13
|
+
# @param minute [Object]
|
|
14
|
+
# Column or literal, ranging from 0-59.
|
|
15
|
+
# @param second [Object]
|
|
16
|
+
# Column or literal, ranging from 0-59.
|
|
17
|
+
# @param microsecond [Object]
|
|
18
|
+
# Column or literal, ranging from 0-999999.
|
|
19
|
+
# @param time_unit ['us', 'ms', 'ns']
|
|
20
|
+
# Time unit of the resulting expression.
|
|
21
|
+
# @param time_zone [Object]
|
|
22
|
+
# Time zone of the resulting expression.
|
|
23
|
+
# @param ambiguous ['raise', 'earliest', 'latest', 'null']
|
|
24
|
+
# Determine how to deal with ambiguous datetimes:
|
|
25
|
+
#
|
|
26
|
+
# - `'raise'` (default): raise
|
|
27
|
+
# - `'earliest'`: use the earliest datetime
|
|
28
|
+
# - `'latest'`: use the latest datetime
|
|
29
|
+
# - `'null'`: set to null
|
|
30
|
+
#
|
|
31
|
+
# @return [Expr]
|
|
32
|
+
#
|
|
33
|
+
# @example
|
|
34
|
+
# df = Polars::DataFrame.new(
|
|
35
|
+
# {
|
|
36
|
+
# "month" => [1, 2, 3],
|
|
37
|
+
# "day" => [4, 5, 6],
|
|
38
|
+
# "hour" => [12, 13, 14],
|
|
39
|
+
# "minute" => [15, 30, 45]
|
|
40
|
+
# }
|
|
41
|
+
# )
|
|
42
|
+
# df.with_columns(
|
|
43
|
+
# Polars.datetime(
|
|
44
|
+
# 2024,
|
|
45
|
+
# Polars.col("month"),
|
|
46
|
+
# Polars.col("day"),
|
|
47
|
+
# Polars.col("hour"),
|
|
48
|
+
# Polars.col("minute"),
|
|
49
|
+
# time_zone: "Australia/Sydney"
|
|
50
|
+
# )
|
|
51
|
+
# )
|
|
52
|
+
# # =>
|
|
53
|
+
# # shape: (3, 5)
|
|
54
|
+
# # ┌───────┬─────┬──────┬────────┬────────────────────────────────┐
|
|
55
|
+
# # │ month ┆ day ┆ hour ┆ minute ┆ datetime │
|
|
56
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
|
57
|
+
# # │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ datetime[μs, Australia/Sydney] │
|
|
58
|
+
# # ╞═══════╪═════╪══════╪════════╪════════════════════════════════╡
|
|
59
|
+
# # │ 1 ┆ 4 ┆ 12 ┆ 15 ┆ 2024-01-04 12:15:00 AEDT │
|
|
60
|
+
# # │ 2 ┆ 5 ┆ 13 ┆ 30 ┆ 2024-02-05 13:30:00 AEDT │
|
|
61
|
+
# # │ 3 ┆ 6 ┆ 14 ┆ 45 ┆ 2024-03-06 14:45:00 AEDT │
|
|
62
|
+
# # └───────┴─────┴──────┴────────┴────────────────────────────────┘
|
|
63
|
+
#
|
|
64
|
+
# @example We can also use `Polars.datetime` for filtering:
|
|
65
|
+
# df = Polars::DataFrame.new(
|
|
66
|
+
# {
|
|
67
|
+
# "start" => [
|
|
68
|
+
# DateTime.new(2024, 1, 1, 0, 0, 0),
|
|
69
|
+
# DateTime.new(2024, 1, 1, 0, 0, 0),
|
|
70
|
+
# DateTime.new(2024, 1, 1, 0, 0, 0)
|
|
71
|
+
# ],
|
|
72
|
+
# "end" => [
|
|
73
|
+
# DateTime.new(2024, 5, 1, 20, 15, 10),
|
|
74
|
+
# DateTime.new(2024, 7, 1, 21, 25, 20),
|
|
75
|
+
# DateTime.new(2024, 9, 1, 22, 35, 30)
|
|
76
|
+
# ]
|
|
77
|
+
# }
|
|
78
|
+
# )
|
|
79
|
+
# df.filter(Polars.col("end") > Polars.datetime(2024, 6, 1))
|
|
80
|
+
# # =>
|
|
81
|
+
# # shape: (2, 2)
|
|
82
|
+
# # ┌─────────────────────┬─────────────────────┐
|
|
83
|
+
# # │ start ┆ end │
|
|
84
|
+
# # │ --- ┆ --- │
|
|
85
|
+
# # │ datetime[ns] ┆ datetime[ns] │
|
|
86
|
+
# # ╞═════════════════════╪═════════════════════╡
|
|
87
|
+
# # │ 2024-01-01 00:00:00 ┆ 2024-07-01 21:25:20 │
|
|
88
|
+
# # │ 2024-01-01 00:00:00 ┆ 2024-09-01 22:35:30 │
|
|
89
|
+
# # └─────────────────────┴─────────────────────┘
|
|
90
|
+
def datetime(
|
|
91
|
+
year,
|
|
92
|
+
month,
|
|
93
|
+
day,
|
|
94
|
+
hour = nil,
|
|
95
|
+
minute = nil,
|
|
96
|
+
second = nil,
|
|
97
|
+
microsecond = nil,
|
|
98
|
+
time_unit: "us",
|
|
99
|
+
time_zone: nil,
|
|
100
|
+
ambiguous: "raise"
|
|
101
|
+
)
|
|
102
|
+
ambiguous_expr = Utils.parse_into_expression(ambiguous, str_as_lit: true)
|
|
103
|
+
year_expr = Utils.parse_into_expression(year)
|
|
104
|
+
month_expr = Utils.parse_into_expression(month)
|
|
105
|
+
day_expr = Utils.parse_into_expression(day)
|
|
106
|
+
|
|
107
|
+
hour_expr = !hour.nil? ? Utils.parse_into_expression(hour) : nil
|
|
108
|
+
minute_expr = !minute.nil? ? Utils.parse_into_expression(minute) : nil
|
|
109
|
+
second_expr = !second.nil? ? Utils.parse_into_expression(second) : nil
|
|
110
|
+
microsecond_expr = (
|
|
111
|
+
!microsecond.nil? ? Utils.parse_into_expression(microsecond) : nil
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
Utils.wrap_expr(
|
|
115
|
+
Plr.datetime(
|
|
116
|
+
year_expr,
|
|
117
|
+
month_expr,
|
|
118
|
+
day_expr,
|
|
119
|
+
hour_expr,
|
|
120
|
+
minute_expr,
|
|
121
|
+
second_expr,
|
|
122
|
+
microsecond_expr,
|
|
123
|
+
time_unit,
|
|
124
|
+
time_zone,
|
|
125
|
+
ambiguous_expr
|
|
126
|
+
)
|
|
127
|
+
)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Create a Polars literal expression of type Date.
|
|
131
|
+
#
|
|
132
|
+
# @param year [Object]
|
|
133
|
+
# column or literal.
|
|
134
|
+
# @param month [Object]
|
|
135
|
+
# column or literal, ranging from 1-12.
|
|
136
|
+
# @param day [Object]
|
|
137
|
+
# column or literal, ranging from 1-31.
|
|
138
|
+
#
|
|
139
|
+
# @return [Expr]
|
|
140
|
+
#
|
|
141
|
+
# @example
|
|
142
|
+
# df = Polars::DataFrame.new(
|
|
143
|
+
# {
|
|
144
|
+
# "month" => [1, 2, 3],
|
|
145
|
+
# "day" => [4, 5, 6]
|
|
146
|
+
# }
|
|
147
|
+
# )
|
|
148
|
+
# df.with_columns(Polars.date(2024, Polars.col("month"), Polars.col("day")))
|
|
149
|
+
# # =>
|
|
150
|
+
# # shape: (3, 3)
|
|
151
|
+
# # ┌───────┬─────┬────────────┐
|
|
152
|
+
# # │ month ┆ day ┆ date │
|
|
153
|
+
# # │ --- ┆ --- ┆ --- │
|
|
154
|
+
# # │ i64 ┆ i64 ┆ date │
|
|
155
|
+
# # ╞═══════╪═════╪════════════╡
|
|
156
|
+
# # │ 1 ┆ 4 ┆ 2024-01-04 │
|
|
157
|
+
# # │ 2 ┆ 5 ┆ 2024-02-05 │
|
|
158
|
+
# # │ 3 ┆ 6 ┆ 2024-03-06 │
|
|
159
|
+
# # └───────┴─────┴────────────┘
|
|
160
|
+
#
|
|
161
|
+
# @example We can also use `pl.date` for filtering:
|
|
162
|
+
# df = Polars::DataFrame.new(
|
|
163
|
+
# {
|
|
164
|
+
# "start" => [Date.new(2024, 1, 1), Date.new(2024, 1, 1), Date.new(2024, 1, 1)],
|
|
165
|
+
# "end" => [Date.new(2024, 5, 1), Date.new(2024, 7, 1), Date.new(2024, 9, 1)]
|
|
166
|
+
# }
|
|
167
|
+
# )
|
|
168
|
+
# df.filter(Polars.col("end") > Polars.date(2024, 6, 1))
|
|
169
|
+
# # =>
|
|
170
|
+
# # shape: (2, 2)
|
|
171
|
+
# # ┌────────────┬────────────┐
|
|
172
|
+
# # │ start ┆ end │
|
|
173
|
+
# # │ --- ┆ --- │
|
|
174
|
+
# # │ date ┆ date │
|
|
175
|
+
# # ╞════════════╪════════════╡
|
|
176
|
+
# # │ 2024-01-01 ┆ 2024-07-01 │
|
|
177
|
+
# # │ 2024-01-01 ┆ 2024-09-01 │
|
|
178
|
+
# # └────────────┴────────────┘
|
|
179
|
+
def date(
|
|
180
|
+
year,
|
|
181
|
+
month,
|
|
182
|
+
day
|
|
183
|
+
)
|
|
184
|
+
datetime(year, month, day).cast(Date).alias("date")
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Create a Polars literal expression of type Time.
|
|
188
|
+
#
|
|
189
|
+
# @param hour [Object]
|
|
190
|
+
# column or literal, ranging from 0-23.
|
|
191
|
+
# @param minute [Object]
|
|
192
|
+
# column or literal, ranging from 0-59.
|
|
193
|
+
# @param second [Object]
|
|
194
|
+
# column or literal, ranging from 0-59.
|
|
195
|
+
# @param microsecond [Object]
|
|
196
|
+
# column or literal, ranging from 0-999999.
|
|
197
|
+
#
|
|
198
|
+
# @return [Expr]
|
|
199
|
+
#
|
|
200
|
+
# @example
|
|
201
|
+
# df = Polars::DataFrame.new(
|
|
202
|
+
# {
|
|
203
|
+
# "hour" => [12, 13, 14],
|
|
204
|
+
# "minute" => [15, 30, 45]
|
|
205
|
+
# }
|
|
206
|
+
# )
|
|
207
|
+
# df.with_columns(Polars.time(Polars.col("hour"), Polars.col("minute")))
|
|
208
|
+
# # =>
|
|
209
|
+
# # shape: (3, 3)
|
|
210
|
+
# # ┌──────┬────────┬──────────┐
|
|
211
|
+
# # │ hour ┆ minute ┆ time │
|
|
212
|
+
# # │ --- ┆ --- ┆ --- │
|
|
213
|
+
# # │ i64 ┆ i64 ┆ time │
|
|
214
|
+
# # ╞══════╪════════╪══════════╡
|
|
215
|
+
# # │ 12 ┆ 15 ┆ 12:15:00 │
|
|
216
|
+
# # │ 13 ┆ 30 ┆ 13:30:00 │
|
|
217
|
+
# # │ 14 ┆ 45 ┆ 14:45:00 │
|
|
218
|
+
# # └──────┴────────┴──────────┘
|
|
219
|
+
def time(
|
|
220
|
+
hour = nil,
|
|
221
|
+
minute = nil,
|
|
222
|
+
second = nil,
|
|
223
|
+
microsecond = nil
|
|
224
|
+
)
|
|
225
|
+
epoch_start = [1970, 1, 1]
|
|
226
|
+
datetime(*epoch_start, hour, minute, second, microsecond)
|
|
227
|
+
.cast(Time)
|
|
228
|
+
.alias("time")
|
|
229
|
+
end
|
|
230
|
+
|
|
3
231
|
# Create polars `Duration` from distinct time components.
|
|
4
232
|
#
|
|
5
233
|
# @return [Expr]
|
|
@@ -41,8 +269,12 @@ module Polars
|
|
|
41
269
|
milliseconds: nil,
|
|
42
270
|
microseconds: nil,
|
|
43
271
|
nanoseconds: nil,
|
|
44
|
-
time_unit:
|
|
272
|
+
time_unit: nil
|
|
45
273
|
)
|
|
274
|
+
if !nanoseconds.nil? && time_unit.nil?
|
|
275
|
+
time_unit = "ns"
|
|
276
|
+
end
|
|
277
|
+
|
|
46
278
|
if !weeks.nil?
|
|
47
279
|
weeks = Utils.parse_into_expression(weeks, str_as_lit: false)
|
|
48
280
|
end
|
|
@@ -68,6 +300,10 @@ module Polars
|
|
|
68
300
|
nanoseconds = Utils.parse_into_expression(nanoseconds, str_as_lit: false)
|
|
69
301
|
end
|
|
70
302
|
|
|
303
|
+
if time_unit.nil?
|
|
304
|
+
time_unit = "us"
|
|
305
|
+
end
|
|
306
|
+
|
|
71
307
|
Utils.wrap_expr(
|
|
72
308
|
Plr.duration(
|
|
73
309
|
weeks,
|
|
@@ -140,6 +376,49 @@ module Polars
|
|
|
140
376
|
Utils.wrap_expr(Plr.concat_list(exprs))
|
|
141
377
|
end
|
|
142
378
|
|
|
379
|
+
# Horizontally concatenate columns into a single array column.
|
|
380
|
+
#
|
|
381
|
+
# Non-array columns are reshaped to a unit-width array. All columns must have
|
|
382
|
+
# a dtype of either `Polars::Array.new(<DataType>, width)` or `Polars::<DataType>`.
|
|
383
|
+
#
|
|
384
|
+
# @note
|
|
385
|
+
# This functionality is considered **unstable**. It may be changed
|
|
386
|
+
# at any point without it being considered a breaking change.
|
|
387
|
+
#
|
|
388
|
+
# @param exprs [Object]
|
|
389
|
+
# Columns to concatenate into a single array column. Accepts expression input.
|
|
390
|
+
# Strings are parsed as column names, other non-expression inputs are parsed as
|
|
391
|
+
# literals.
|
|
392
|
+
# @param more_exprs [Array]
|
|
393
|
+
# Additional columns to concatenate into a single array column, specified as
|
|
394
|
+
# positional arguments.
|
|
395
|
+
#
|
|
396
|
+
# @return [Expr]
|
|
397
|
+
#
|
|
398
|
+
# @example Concatenate 2 array columns:
|
|
399
|
+
# Polars.select(
|
|
400
|
+
# a: Polars::Series.new([[1], [3], nil], dtype: Polars::Array.new(Polars::Int64, 1)),
|
|
401
|
+
# b: Polars::Series.new([[3], [nil], [5]], dtype: Polars::Array.new(Polars::Int64, 1))
|
|
402
|
+
# ).with_columns(
|
|
403
|
+
# Polars.concat_arr("a", "b").alias("concat_arr(a, b)"),
|
|
404
|
+
# Polars.concat_arr("a", Polars.first("b")).alias("concat_arr(a, first(b))")
|
|
405
|
+
# )
|
|
406
|
+
# # =>
|
|
407
|
+
# # shape: (3, 4)
|
|
408
|
+
# # ┌───────────────┬───────────────┬──────────────────┬─────────────────────────┐
|
|
409
|
+
# # │ a ┆ b ┆ concat_arr(a, b) ┆ concat_arr(a, first(b)) │
|
|
410
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
|
411
|
+
# # │ array[i64, 1] ┆ array[i64, 1] ┆ array[i64, 2] ┆ array[i64, 2] │
|
|
412
|
+
# # ╞═══════════════╪═══════════════╪══════════════════╪═════════════════════════╡
|
|
413
|
+
# # │ [1] ┆ [3] ┆ [1, 3] ┆ [1, 3] │
|
|
414
|
+
# # │ [3] ┆ [null] ┆ [3, null] ┆ [3, 3] │
|
|
415
|
+
# # │ null ┆ [5] ┆ null ┆ null │
|
|
416
|
+
# # └───────────────┴───────────────┴──────────────────┴─────────────────────────┘
|
|
417
|
+
def concat_arr(exprs, *more_exprs)
|
|
418
|
+
exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
|
|
419
|
+
Utils.wrap_expr(Plr.concat_arr(exprs))
|
|
420
|
+
end
|
|
421
|
+
|
|
143
422
|
# Collect several columns into a Series of dtype Struct.
|
|
144
423
|
#
|
|
145
424
|
# @param exprs [Array]
|
|
@@ -194,7 +473,7 @@ module Polars
|
|
|
194
473
|
#
|
|
195
474
|
# @example Use keyword arguments to easily name each struct field.
|
|
196
475
|
# df.select(Polars.struct(p: "int", q: "bool").alias("my_struct")).schema
|
|
197
|
-
# # => {"my_struct"=>Polars::Struct({"p"=>Polars::Int64, "q"=>Polars::Boolean})}
|
|
476
|
+
# # => Polars::Schema({"my_struct"=>Polars::Struct({"p"=>Polars::Int64, "q"=>Polars::Boolean})})
|
|
198
477
|
def struct(*exprs, schema: nil, eager: false, **named_exprs)
|
|
199
478
|
rbexprs = Utils.parse_into_list_of_expressions(*exprs, **named_exprs)
|
|
200
479
|
expr = Utils.wrap_expr(Plr.as_struct(rbexprs))
|
|
@@ -221,7 +500,10 @@ module Polars
|
|
|
221
500
|
#
|
|
222
501
|
# @param exprs [Object]
|
|
223
502
|
# Columns to concat into a Utf8 Series.
|
|
224
|
-
# @param
|
|
503
|
+
# @param more_exprs [Array]
|
|
504
|
+
# Additional columns to concatenate into a single string column, specified as
|
|
505
|
+
# positional arguments.
|
|
506
|
+
# @param separator [String]
|
|
225
507
|
# String value that will be used to separate the values.
|
|
226
508
|
# @param ignore_nulls [Boolean]
|
|
227
509
|
# Ignore null values (default).
|
|
@@ -244,7 +526,7 @@ module Polars
|
|
|
244
526
|
# Polars.col("b"),
|
|
245
527
|
# Polars.col("c")
|
|
246
528
|
# ],
|
|
247
|
-
#
|
|
529
|
+
# separator: " "
|
|
248
530
|
# ).alias("full_sentence")
|
|
249
531
|
# ]
|
|
250
532
|
# )
|
|
@@ -259,9 +541,9 @@ module Polars
|
|
|
259
541
|
# # │ 2 ┆ cats ┆ swim ┆ 4 cats swim │
|
|
260
542
|
# # │ 3 ┆ null ┆ walk ┆ null │
|
|
261
543
|
# # └─────┴──────┴──────┴───────────────┘
|
|
262
|
-
def concat_str(exprs,
|
|
263
|
-
exprs = Utils.parse_into_list_of_expressions(exprs)
|
|
264
|
-
Utils.wrap_expr(Plr.concat_str(exprs,
|
|
544
|
+
def concat_str(exprs, *more_exprs, separator: "", ignore_nulls: false)
|
|
545
|
+
exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
|
|
546
|
+
Utils.wrap_expr(Plr.concat_str(exprs, separator, ignore_nulls))
|
|
265
547
|
end
|
|
266
548
|
|
|
267
549
|
# Format expressions as a string.
|
|
@@ -314,7 +596,7 @@ module Polars
|
|
|
314
596
|
end
|
|
315
597
|
end
|
|
316
598
|
|
|
317
|
-
concat_str(exprs,
|
|
599
|
+
concat_str(exprs, separator: "")
|
|
318
600
|
end
|
|
319
601
|
end
|
|
320
602
|
end
|
|
@@ -93,7 +93,7 @@ module Polars
|
|
|
93
93
|
# # │ 2 ┆ 4 ┆ 5 ┆ null │
|
|
94
94
|
# # │ 3 ┆ null ┆ 6 ┆ 8 │
|
|
95
95
|
# # └─────┴──────┴──────┴──────┘
|
|
96
|
-
def concat(items, rechunk:
|
|
96
|
+
def concat(items, rechunk: false, how: "vertical", parallel: true)
|
|
97
97
|
elems = items.to_a
|
|
98
98
|
|
|
99
99
|
if elems.empty?
|
|
@@ -152,7 +152,7 @@ module Polars
|
|
|
152
152
|
parallel,
|
|
153
153
|
true
|
|
154
154
|
)
|
|
155
|
-
).collect(
|
|
155
|
+
).collect(optimizations: QueryOptFlags._eager)
|
|
156
156
|
elsif how == "diagonal"
|
|
157
157
|
out = Utils.wrap_df(Plr.concat_df_diagonal(elems))
|
|
158
158
|
elsif how == "diagonal_relaxed"
|
|
@@ -163,7 +163,7 @@ module Polars
|
|
|
163
163
|
parallel,
|
|
164
164
|
true
|
|
165
165
|
)
|
|
166
|
-
).collect(
|
|
166
|
+
).collect(optimizations: QueryOptFlags._eager)
|
|
167
167
|
elsif how == "horizontal"
|
|
168
168
|
out = Utils.wrap_df(Plr.concat_df_horizontal(elems))
|
|
169
169
|
else
|
|
@@ -206,7 +206,194 @@ module Polars
|
|
|
206
206
|
end
|
|
207
207
|
end
|
|
208
208
|
|
|
209
|
-
#
|
|
209
|
+
# Combine multiple DataFrames, LazyFrames, or Series into a single object.
|
|
210
|
+
#
|
|
211
|
+
# @note
|
|
212
|
+
# This function does not guarantee any specific ordering of rows in the result.
|
|
213
|
+
# If you need predictable row ordering, use `Polars.concat` instead.
|
|
214
|
+
#
|
|
215
|
+
# @param items [Array]
|
|
216
|
+
# DataFrames, LazyFrames, or Series to concatenate.
|
|
217
|
+
# @param how ['vertical', 'vertical_relaxed', 'diagonal', 'diagonal_relaxed', 'horizontal', 'align', 'align_full', 'align_inner', 'align_left', 'align_right']
|
|
218
|
+
# Note that `Series` only support the `vertical` strategy.
|
|
219
|
+
#
|
|
220
|
+
# * vertical: Applies multiple `vstack` operations.
|
|
221
|
+
# * vertical_relaxed: Same as `vertical`, but additionally coerces columns to
|
|
222
|
+
# their common supertype *if* they are mismatched (eg: Int32 → Int64).
|
|
223
|
+
# * diagonal: Finds a union between the column schemas and fills missing column
|
|
224
|
+
# values with `null`.
|
|
225
|
+
# * diagonal_relaxed: Same as `diagonal`, but additionally coerces columns to
|
|
226
|
+
# their common supertype *if* they are mismatched (eg: Int32 → Int64).
|
|
227
|
+
# * horizontal: Stacks Series from DataFrames horizontally and fills with `null`
|
|
228
|
+
# if the lengths don't match.
|
|
229
|
+
# * align, align_full, align_left, align_right: Combines frames horizontally,
|
|
230
|
+
# auto-determining the common key columns and aligning rows using the same
|
|
231
|
+
# logic as `align_frames` (note that "align" is an alias for "align_full").
|
|
232
|
+
# The "align" strategy determines the type of join used to align the frames,
|
|
233
|
+
# equivalent to the "how" parameter on `align_frames`. Note that the common
|
|
234
|
+
# join columns are automatically coalesced, but other column collisions
|
|
235
|
+
# will raise an error (if you need more control over this you should use
|
|
236
|
+
# a suitable `join` method directly).
|
|
237
|
+
#
|
|
238
|
+
# @return [Object]
|
|
239
|
+
#
|
|
240
|
+
# @example
|
|
241
|
+
# df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
|
|
242
|
+
# df2 = Polars::DataFrame.new({"a" => [2], "b" => [4]})
|
|
243
|
+
# Polars.union([df1, df2])
|
|
244
|
+
# # =>
|
|
245
|
+
# # shape: (2, 2)
|
|
246
|
+
# # ┌─────┬─────┐
|
|
247
|
+
# # │ a ┆ b │
|
|
248
|
+
# # │ --- ┆ --- │
|
|
249
|
+
# # │ i64 ┆ i64 │
|
|
250
|
+
# # ╞═════╪═════╡
|
|
251
|
+
# # │ 1 ┆ 3 │
|
|
252
|
+
# # │ 2 ┆ 4 │
|
|
253
|
+
# # └─────┴─────┘
|
|
254
|
+
#
|
|
255
|
+
# @example
|
|
256
|
+
# df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
|
|
257
|
+
# df2 = Polars::DataFrame.new({"a" => [2.5], "b" => [4]})
|
|
258
|
+
# Polars.union([df1, df2], how: "vertical_relaxed")
|
|
259
|
+
# # =>
|
|
260
|
+
# # shape: (2, 2)
|
|
261
|
+
# # ┌─────┬─────┐
|
|
262
|
+
# # │ a ┆ b │
|
|
263
|
+
# # │ --- ┆ --- │
|
|
264
|
+
# # │ f64 ┆ i64 │
|
|
265
|
+
# # ╞═════╪═════╡
|
|
266
|
+
# # │ 1.0 ┆ 3 │
|
|
267
|
+
# # │ 2.5 ┆ 4 │
|
|
268
|
+
# # └─────┴─────┘
|
|
269
|
+
#
|
|
270
|
+
# @example
|
|
271
|
+
# df_h1 = Polars::DataFrame.new({"l1" => [1, 2], "l2" => [3, 4]})
|
|
272
|
+
# df_h2 = Polars::DataFrame.new({"r1" => [5, 6], "r2" => [7, 8], "r3" => [9, 10]})
|
|
273
|
+
# Polars.union([df_h1, df_h2], how: "horizontal")
|
|
274
|
+
# # =>
|
|
275
|
+
# # shape: (2, 5)
|
|
276
|
+
# # ┌─────┬─────┬─────┬─────┬─────┐
|
|
277
|
+
# # │ l1 ┆ l2 ┆ r1 ┆ r2 ┆ r3 │
|
|
278
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
|
279
|
+
# # │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │
|
|
280
|
+
# # ╞═════╪═════╪═════╪═════╪═════╡
|
|
281
|
+
# # │ 1 ┆ 3 ┆ 5 ┆ 7 ┆ 9 │
|
|
282
|
+
# # │ 2 ┆ 4 ┆ 6 ┆ 8 ┆ 10 │
|
|
283
|
+
# # └─────┴─────┴─────┴─────┴─────┘
|
|
284
|
+
#
|
|
285
|
+
# @example The "diagonal" strategy allows for some frames to have missing columns, the values for which are filled with `null`:
|
|
286
|
+
# df_d1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
|
|
287
|
+
# df_d2 = Polars::DataFrame.new({"a" => [2], "c" => [4]})
|
|
288
|
+
# Polars.union([df_d1, df_d2], how: "diagonal")
|
|
289
|
+
# # =>
|
|
290
|
+
# # shape: (2, 3)
|
|
291
|
+
# # ┌─────┬──────┬──────┐
|
|
292
|
+
# # │ a ┆ b ┆ c │
|
|
293
|
+
# # │ --- ┆ --- ┆ --- │
|
|
294
|
+
# # │ i64 ┆ i64 ┆ i64 │
|
|
295
|
+
# # ╞═════╪══════╪══════╡
|
|
296
|
+
# # │ 1 ┆ 3 ┆ null │
|
|
297
|
+
# # │ 2 ┆ null ┆ 4 │
|
|
298
|
+
# # └─────┴──────┴──────┘
|
|
299
|
+
def union(
|
|
300
|
+
items,
|
|
301
|
+
how: "vertical"
|
|
302
|
+
)
|
|
303
|
+
elems = items.to_a
|
|
304
|
+
|
|
305
|
+
if elems.empty?
|
|
306
|
+
msg = "cannot concat empty list"
|
|
307
|
+
raise ArgumentError, msg
|
|
308
|
+
elsif elems.length == 1 && (elems[0].is_a?(DataFrame) || elems[0].is_a?(Series) || elems[0].is_a?(LazyFrame))
|
|
309
|
+
return elems[0]
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
if how.start_with?("align")
|
|
313
|
+
raise Todo
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
out = nil
|
|
317
|
+
first = elems[0]
|
|
318
|
+
|
|
319
|
+
if first.is_a?(DataFrame)
|
|
320
|
+
if ["vertical", "vertical_relaxed"].include?(how)
|
|
321
|
+
out = Utils.wrap_ldf(
|
|
322
|
+
Plr.concat_lf(
|
|
323
|
+
elems.map { |df| df.lazy },
|
|
324
|
+
false,
|
|
325
|
+
true,
|
|
326
|
+
how.end_with?("relaxed")
|
|
327
|
+
)
|
|
328
|
+
).collect(optimizations: QueryOptFlags._eager)
|
|
329
|
+
elsif ["diagonal", "diagonal_relaxed"].include?(how)
|
|
330
|
+
out = Utils.wrap_ldf(
|
|
331
|
+
Plr.concat_lf_diagonal(
|
|
332
|
+
elems.map { |df| df.lazy },
|
|
333
|
+
false,
|
|
334
|
+
true,
|
|
335
|
+
how.end_with?("relaxed")
|
|
336
|
+
)
|
|
337
|
+
).collect(optimizations: QueryOptFlags._eager)
|
|
338
|
+
elsif how == "horizontal"
|
|
339
|
+
out = Utils.wrap_df(Plr.concat_df_horizontal(elems))
|
|
340
|
+
else
|
|
341
|
+
raise Todo
|
|
342
|
+
msg = "DataFrame `how` must be one of {{#{allowed}}}, got #{how.inspect}"
|
|
343
|
+
raise ArgumentError, msg
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
elsif first.is_a?(LazyFrame)
|
|
347
|
+
if ["vertical", "vertical_relaxed"].include?(how)
|
|
348
|
+
return Utils.wrap_ldf(
|
|
349
|
+
Plr.concat_lf(
|
|
350
|
+
elems,
|
|
351
|
+
false,
|
|
352
|
+
true,
|
|
353
|
+
how.end_with?("relaxed")
|
|
354
|
+
)
|
|
355
|
+
)
|
|
356
|
+
elsif ["diagonal", "diagonal_relaxed"].include?(how)
|
|
357
|
+
return Utils.wrap_ldf(
|
|
358
|
+
Plr.concat_lf_diagonal(
|
|
359
|
+
elems,
|
|
360
|
+
false,
|
|
361
|
+
true,
|
|
362
|
+
how.end_with?("relaxed")
|
|
363
|
+
)
|
|
364
|
+
)
|
|
365
|
+
elsif how == "horizontal"
|
|
366
|
+
return Utils.wrap_ldf(
|
|
367
|
+
Plr.concat_lf_horizontal(
|
|
368
|
+
elems,
|
|
369
|
+
true
|
|
370
|
+
)
|
|
371
|
+
)
|
|
372
|
+
else
|
|
373
|
+
raise Todo
|
|
374
|
+
msg = "LazyFrame `how` must be one of {{#{allowed}}}, got #{how.inspect}"
|
|
375
|
+
raise ArgumentError, msg
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
elsif first.is_a?(Series)
|
|
379
|
+
if how == "vertical"
|
|
380
|
+
out = Utils.wrap_s(Plr.concat_series(elems))
|
|
381
|
+
else
|
|
382
|
+
msg = "Series only supports 'vertical' concat strategy"
|
|
383
|
+
raise ArgumentError, msg
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
elsif first.is_a?(Expr)
|
|
387
|
+
return Utils.wrap_expr(Plr.concat_expr(elems.map { |e| e._rbexpr }, false))
|
|
388
|
+
else
|
|
389
|
+
msg = "did not expect type: #{first.class.name.inspect} in `concat`"
|
|
390
|
+
raise TypeError, msg
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
out
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
# Align an array of frames using the unique values from one or more columns as a key.
|
|
210
397
|
#
|
|
211
398
|
# Frames that do not contain the given key values have rows injected (with nulls
|
|
212
399
|
# filling the non-key columns), and each resulting frame is sorted by the key.
|
|
@@ -219,13 +406,13 @@ module Polars
|
|
|
219
406
|
# the same number of rows.
|
|
220
407
|
#
|
|
221
408
|
# @param frames [Array]
|
|
222
|
-
#
|
|
409
|
+
# Array of DataFrames or LazyFrames.
|
|
223
410
|
# @param on [Object]
|
|
224
411
|
# One or more columns whose unique values will be used to align the frames.
|
|
225
412
|
# @param select [Object]
|
|
226
413
|
# Optional post-alignment column select to constrain and/or order
|
|
227
414
|
# the columns returned from the newly aligned frames.
|
|
228
|
-
# @param
|
|
415
|
+
# @param descending [Object]
|
|
229
416
|
# Sort the alignment column values in descending order; can be a single
|
|
230
417
|
# boolean or a list of booleans associated with each column in `on`.
|
|
231
418
|
#
|
|
@@ -254,7 +441,7 @@ module Polars
|
|
|
254
441
|
# }
|
|
255
442
|
# )
|
|
256
443
|
# af1, af2, af3 = Polars.align_frames(
|
|
257
|
-
# df1, df2, df3, on: "dt", select: ["x", "y"]
|
|
444
|
+
# df1, df2, df3, on: "dt", how: "left", select: ["x", "y"]
|
|
258
445
|
# )
|
|
259
446
|
# (af1 * af2 * af3).fill_null(0).select(Polars.sum_horizontal("*").alias("dot"))
|
|
260
447
|
# # =>
|
|
@@ -271,9 +458,16 @@ module Polars
|
|
|
271
458
|
def align_frames(
|
|
272
459
|
*frames,
|
|
273
460
|
on:,
|
|
461
|
+
how: nil,
|
|
274
462
|
select: nil,
|
|
275
|
-
|
|
463
|
+
descending: false
|
|
276
464
|
)
|
|
465
|
+
# TODO update
|
|
466
|
+
if how.nil?
|
|
467
|
+
warn "The default `how` for `align_frames` method will change from `left` to `full` in a future version"
|
|
468
|
+
how = "left"
|
|
469
|
+
end
|
|
470
|
+
|
|
277
471
|
if frames.empty?
|
|
278
472
|
return []
|
|
279
473
|
elsif frames.map(&:class).uniq.length != 1
|
|
@@ -285,7 +479,7 @@ module Polars
|
|
|
285
479
|
alignment_frame = (
|
|
286
480
|
concat(frames.map { |df| df.lazy.select(on) })
|
|
287
481
|
.unique(maintain_order: false)
|
|
288
|
-
.sort(on,
|
|
482
|
+
.sort(on, descending: descending)
|
|
289
483
|
)
|
|
290
484
|
alignment_frame = (
|
|
291
485
|
eager ? alignment_frame.collect.lazy : alignment_frame.cache
|
|
@@ -296,7 +490,7 @@ module Polars
|
|
|
296
490
|
alignment_frame.join(
|
|
297
491
|
df.lazy,
|
|
298
492
|
on: alignment_frame.columns,
|
|
299
|
-
how:
|
|
493
|
+
how: how
|
|
300
494
|
).select(df.columns)
|
|
301
495
|
end
|
|
302
496
|
if !select.nil?
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
module Polars
|
|
2
|
+
module Functions
|
|
3
|
+
# Escapes string regex meta characters.
|
|
4
|
+
#
|
|
5
|
+
# @param s [String]
|
|
6
|
+
# The string whose meta characters will be escaped.
|
|
7
|
+
#
|
|
8
|
+
# @return [String]
|
|
9
|
+
def escape_regex(s)
|
|
10
|
+
if s.is_a?(Expr)
|
|
11
|
+
msg = "escape_regex function is unsupported for `Expr`, you may want use `Expr.str.escape_regex` instead"
|
|
12
|
+
raise TypeError, msg
|
|
13
|
+
elsif !s.is_a?(::String)
|
|
14
|
+
msg = "escape_regex function supports only `String` type, got `#{s.class.name}`"
|
|
15
|
+
raise TypeError, msg
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
Plr.escape_regex(s)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|