polars-df 0.14.0-arm64-darwin → 0.16.0-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/Cargo.lock +1523 -378
- data/LICENSE-THIRD-PARTY.txt +25665 -14861
- data/LICENSE.txt +1 -0
- data/README.md +38 -4
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/{3.1 → 3.4}/polars.bundle +0 -0
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +452 -101
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +3 -1
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +103 -2
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +95 -13
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/io/ipc.rb +14 -4
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +684 -20
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1226 -0
- data/lib/polars/series.rb +465 -35
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +17 -1
- metadata +9 -8
- data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,1226 @@
|
|
1
|
+
module Polars
|
2
|
+
module Selectors
|
3
|
+
# @private
|
4
|
+
class SelectorProxy < Expr
|
5
|
+
attr_accessor :_attrs
|
6
|
+
attr_accessor :_repr_override
|
7
|
+
|
8
|
+
def initialize(
|
9
|
+
expr,
|
10
|
+
name:,
|
11
|
+
parameters: nil
|
12
|
+
)
|
13
|
+
self._rbexpr = expr._rbexpr
|
14
|
+
self._attrs = {
|
15
|
+
name: name,
|
16
|
+
params: parameters
|
17
|
+
}
|
18
|
+
end
|
19
|
+
|
20
|
+
def inspect
|
21
|
+
if !_attrs
|
22
|
+
as_expr.inspect
|
23
|
+
elsif _repr_override
|
24
|
+
_repr_override
|
25
|
+
else
|
26
|
+
selector_name = _attrs[:name]
|
27
|
+
params = _attrs[:params] || {}
|
28
|
+
set_ops = {"and" => "&", "or" => "|", "sub" => "-", "xor" => "^"}
|
29
|
+
if set_ops.include?(selector_name)
|
30
|
+
op = set_ops[selector_name]
|
31
|
+
"(#{params.values.map(&:inspect).join(" #{op} ")})"
|
32
|
+
else
|
33
|
+
str_params = params.map { |k, v| k.start_with?("*") ? v.inspect[1..-2] : "#{k}=#{v.inspect}" }.join(", ")
|
34
|
+
"Polars.cs.#{selector_name}(#{str_params})"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def ~
|
40
|
+
if Utils.is_selector(self)
|
41
|
+
inverted = Selectors.all - self
|
42
|
+
inverted._repr_override = "~#{inspect}"
|
43
|
+
else
|
44
|
+
inverted = ~as_expr
|
45
|
+
end
|
46
|
+
inverted
|
47
|
+
end
|
48
|
+
|
49
|
+
def -(other)
|
50
|
+
if Utils.is_selector(other)
|
51
|
+
SelectorProxy.new(
|
52
|
+
meta._as_selector.meta._selector_sub(other),
|
53
|
+
parameters: {"self" => self, "other" => other},
|
54
|
+
name: "sub"
|
55
|
+
)
|
56
|
+
else
|
57
|
+
as_expr - other
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def &(other)
|
62
|
+
if Utils.is_column(other)
|
63
|
+
raise Todo
|
64
|
+
end
|
65
|
+
if Utils.is_selector(other)
|
66
|
+
SelectorProxy.new(
|
67
|
+
meta._as_selector.meta._selector_and(other),
|
68
|
+
parameters: {"self" => self, "other" => other},
|
69
|
+
name: "and"
|
70
|
+
)
|
71
|
+
else
|
72
|
+
as_expr & other
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def |(other)
|
77
|
+
if Utils.is_column(other)
|
78
|
+
raise Todo
|
79
|
+
end
|
80
|
+
if Utils.is_selector(other)
|
81
|
+
SelectorProxy.new(
|
82
|
+
meta._as_selector.meta._selector_and(other),
|
83
|
+
parameters: {"self" => self, "other" => other},
|
84
|
+
name: "or"
|
85
|
+
)
|
86
|
+
else
|
87
|
+
as_expr | other
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def ^(other)
|
92
|
+
if Utils.is_column(other)
|
93
|
+
raise Todo
|
94
|
+
end
|
95
|
+
if Utils.is_selector(other)
|
96
|
+
SelectorProxy.new(
|
97
|
+
meta._as_selector.meta._selector_and(other),
|
98
|
+
parameters: {"self" => self, "other" => other},
|
99
|
+
name: "xor"
|
100
|
+
)
|
101
|
+
else
|
102
|
+
as_expr ^ other
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def as_expr
|
107
|
+
Expr._from_rbexpr(_rbexpr)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# @private
|
112
|
+
def self._selector_proxy_(...)
|
113
|
+
SelectorProxy.new(...)
|
114
|
+
end
|
115
|
+
|
116
|
+
# @private
|
117
|
+
def self._re_string(string, escape: true)
|
118
|
+
if string.is_a?(::String)
|
119
|
+
rx = escape ? Utils.re_escape(string) : string
|
120
|
+
else
|
121
|
+
strings = []
|
122
|
+
string.each do |st|
|
123
|
+
if st.is_a?(Array)
|
124
|
+
strings.concat(st)
|
125
|
+
else
|
126
|
+
strings << st
|
127
|
+
end
|
128
|
+
end
|
129
|
+
rx = strings.map { |x| escape ? Utils.re_escape(x) : x }.join("|")
|
130
|
+
end
|
131
|
+
"(#{rx})"
|
132
|
+
end
|
133
|
+
|
134
|
+
# Select all columns.
|
135
|
+
#
|
136
|
+
# @return [SelectorProxy]
|
137
|
+
#
|
138
|
+
# @example
|
139
|
+
# df = Polars::DataFrame.new(
|
140
|
+
# {
|
141
|
+
# "dt" => [Date.new(1999, 12, 31), Date.new(2024, 1, 1)],
|
142
|
+
# "value" => [1_234_500, 5_000_555]
|
143
|
+
# },
|
144
|
+
# schema_overrides: {"value" => Polars::Int32}
|
145
|
+
# )
|
146
|
+
#
|
147
|
+
# @example Select all columns, casting them to string:
|
148
|
+
# df.select(Polars.cs.all.cast(Polars::String))
|
149
|
+
# # =>
|
150
|
+
# # shape: (2, 2)
|
151
|
+
# # ┌────────────┬─────────┐
|
152
|
+
# # │ dt ┆ value │
|
153
|
+
# # │ --- ┆ --- │
|
154
|
+
# # │ str ┆ str │
|
155
|
+
# # ╞════════════╪═════════╡
|
156
|
+
# # │ 1999-12-31 ┆ 1234500 │
|
157
|
+
# # │ 2024-01-01 ┆ 5000555 │
|
158
|
+
# # └────────────┴─────────┘
|
159
|
+
#
|
160
|
+
# @example Select all columns *except* for those matching the given dtypes:
|
161
|
+
# df.select(Polars.cs.all - Polars.cs.numeric)
|
162
|
+
# # =>
|
163
|
+
# # shape: (2, 1)
|
164
|
+
# # ┌────────────┐
|
165
|
+
# # │ dt │
|
166
|
+
# # │ --- │
|
167
|
+
# # │ date │
|
168
|
+
# # ╞════════════╡
|
169
|
+
# # │ 1999-12-31 │
|
170
|
+
# # │ 2024-01-01 │
|
171
|
+
# # └────────────┘
|
172
|
+
def self.all
|
173
|
+
_selector_proxy_(F.all, name: "all")
|
174
|
+
end
|
175
|
+
|
176
|
+
# Select all columns with alphabetic names (eg: only letters).
|
177
|
+
#
|
178
|
+
# @param ascii_only [Boolean]
|
179
|
+
# Indicate whether to consider only ASCII alphabetic characters, or the full
|
180
|
+
# Unicode range of valid letters (accented, idiographic, etc).
|
181
|
+
# @param ignore_spaces [Boolean]
|
182
|
+
# Indicate whether to ignore the presence of spaces in column names; if so,
|
183
|
+
# only the other (non-space) characters are considered.
|
184
|
+
#
|
185
|
+
# @return [SelectorProxy]
|
186
|
+
#
|
187
|
+
# @note
|
188
|
+
# Matching column names cannot contain *any* non-alphabetic characters. Note
|
189
|
+
# that the definition of "alphabetic" consists of all valid Unicode alphabetic
|
190
|
+
# characters (`\p{Alphabetic}`) by default; this can be changed by setting
|
191
|
+
# `ascii_only: true`.
|
192
|
+
#
|
193
|
+
# @example
|
194
|
+
# df = Polars::DataFrame.new(
|
195
|
+
# {
|
196
|
+
# "no1" => [100, 200, 300],
|
197
|
+
# "café" => ["espresso", "latte", "mocha"],
|
198
|
+
# "t or f" => [true, false, nil],
|
199
|
+
# "hmm" => ["aaa", "bbb", "ccc"],
|
200
|
+
# "都市" => ["東京", "大阪", "京都"]
|
201
|
+
# }
|
202
|
+
# )
|
203
|
+
#
|
204
|
+
# @example Select columns with alphabetic names; note that accented characters and kanji are recognised as alphabetic here:
|
205
|
+
# df.select(Polars.cs.alpha)
|
206
|
+
# # =>
|
207
|
+
# # shape: (3, 3)
|
208
|
+
# # ┌──────────┬─────┬──────┐
|
209
|
+
# # │ café ┆ hmm ┆ 都市 │
|
210
|
+
# # │ --- ┆ --- ┆ --- │
|
211
|
+
# # │ str ┆ str ┆ str │
|
212
|
+
# # ╞══════════╪═════╪══════╡
|
213
|
+
# # │ espresso ┆ aaa ┆ 東京 │
|
214
|
+
# # │ latte ┆ bbb ┆ 大阪 │
|
215
|
+
# # │ mocha ┆ ccc ┆ 京都 │
|
216
|
+
# # └──────────┴─────┴──────┘
|
217
|
+
#
|
218
|
+
# @example Constrain the definition of "alphabetic" to ASCII characters only:
|
219
|
+
# df.select(Polars.cs.alpha(ascii_only: true))
|
220
|
+
# # =>
|
221
|
+
# # shape: (3, 1)
|
222
|
+
# # ┌─────┐
|
223
|
+
# # │ hmm │
|
224
|
+
# # │ --- │
|
225
|
+
# # │ str │
|
226
|
+
# # ╞═════╡
|
227
|
+
# # │ aaa │
|
228
|
+
# # │ bbb │
|
229
|
+
# # │ ccc │
|
230
|
+
# # └─────┘
|
231
|
+
#
|
232
|
+
# @example
|
233
|
+
# df.select(Polars.cs.alpha(ascii_only: true, ignore_spaces: true))
|
234
|
+
# # =>
|
235
|
+
# # shape: (3, 2)
|
236
|
+
# # ┌────────┬─────┐
|
237
|
+
# # │ t or f ┆ hmm │
|
238
|
+
# # │ --- ┆ --- │
|
239
|
+
# # │ bool ┆ str │
|
240
|
+
# # ╞════════╪═════╡
|
241
|
+
# # │ true ┆ aaa │
|
242
|
+
# # │ false ┆ bbb │
|
243
|
+
# # │ null ┆ ccc │
|
244
|
+
# # └────────┴─────┘
|
245
|
+
#
|
246
|
+
# @example Select all columns *except* for those with alphabetic names:
|
247
|
+
# df.select(~Polars.cs.alpha)
|
248
|
+
# # =>
|
249
|
+
# # shape: (3, 2)
|
250
|
+
# # ┌─────┬────────┐
|
251
|
+
# # │ no1 ┆ t or f │
|
252
|
+
# # │ --- ┆ --- │
|
253
|
+
# # │ i64 ┆ bool │
|
254
|
+
# # ╞═════╪════════╡
|
255
|
+
# # │ 100 ┆ true │
|
256
|
+
# # │ 200 ┆ false │
|
257
|
+
# # │ 300 ┆ null │
|
258
|
+
# # └─────┴────────┘
|
259
|
+
#
|
260
|
+
# @example
|
261
|
+
# df.select(~Polars.cs.alpha(ignore_spaces: true))
|
262
|
+
# # =>
|
263
|
+
# # shape: (3, 1)
|
264
|
+
# # ┌─────┐
|
265
|
+
# # │ no1 │
|
266
|
+
# # │ --- │
|
267
|
+
# # │ i64 │
|
268
|
+
# # ╞═════╡
|
269
|
+
# # │ 100 │
|
270
|
+
# # │ 200 │
|
271
|
+
# # │ 300 │
|
272
|
+
# # └─────┘
|
273
|
+
def self.alpha(ascii_only: false, ignore_spaces: false)
|
274
|
+
# note that we need to supply a pattern compatible with the *rust* regex crate
|
275
|
+
re_alpha = ascii_only ? "a-zA-Z" : "\\p{Alphabetic}"
|
276
|
+
re_space = ignore_spaces ? " " : ""
|
277
|
+
_selector_proxy_(
|
278
|
+
F.col("^[#{re_alpha}#{re_space}]+$"),
|
279
|
+
name: "alpha",
|
280
|
+
parameters: {"ascii_only" => ascii_only, "ignore_spaces" => ignore_spaces},
|
281
|
+
)
|
282
|
+
end
|
283
|
+
|
284
|
+
# TODO
|
285
|
+
# def alphanumeric
|
286
|
+
# end
|
287
|
+
|
288
|
+
# Select all binary columns.
|
289
|
+
#
|
290
|
+
# @return [SelectorProxy]
|
291
|
+
#
|
292
|
+
# @example
|
293
|
+
# df = Polars::DataFrame.new({"a" => ["hello".b], "b" => ["world"], "c" => ["!".b], "d" => [":)"]})
|
294
|
+
# # =>
|
295
|
+
# # shape: (1, 4)
|
296
|
+
# # ┌──────────┬───────┬────────┬─────┐
|
297
|
+
# # │ a ┆ b ┆ c ┆ d │
|
298
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
299
|
+
# # │ binary ┆ str ┆ binary ┆ str │
|
300
|
+
# # ╞══════════╪═══════╪════════╪═════╡
|
301
|
+
# # │ b"hello" ┆ world ┆ b"!" ┆ :) │
|
302
|
+
# # └──────────┴───────┴────────┴─────┘
|
303
|
+
#
|
304
|
+
# @example Select binary columns and export as a dict:
|
305
|
+
# df.select(Polars.cs.binary).to_h(as_series: false)
|
306
|
+
# # => {"a"=>["hello"], "c"=>["!"]}
|
307
|
+
#
|
308
|
+
# @example Select all columns *except* for those that are binary:
|
309
|
+
# df.select(~Polars.cs.binary).to_h(as_series: false)
|
310
|
+
# # => {"b"=>["world"], "d"=>[":)"]}
|
311
|
+
def self.binary
|
312
|
+
_selector_proxy_(F.col(Binary), name: "binary")
|
313
|
+
end
|
314
|
+
|
315
|
+
# Select all boolean columns.
|
316
|
+
#
|
317
|
+
# @return [SelectorProxy]
|
318
|
+
#
|
319
|
+
# @example
|
320
|
+
# df = Polars::DataFrame.new({"n" => 1..4}).with_columns(n_even: Polars.col("n") % 2 == 0)
|
321
|
+
# # =>
|
322
|
+
# # shape: (4, 2)
|
323
|
+
# # ┌─────┬────────┐
|
324
|
+
# # │ n ┆ n_even │
|
325
|
+
# # │ --- ┆ --- │
|
326
|
+
# # │ i64 ┆ bool │
|
327
|
+
# # ╞═════╪════════╡
|
328
|
+
# # │ 1 ┆ false │
|
329
|
+
# # │ 2 ┆ true │
|
330
|
+
# # │ 3 ┆ false │
|
331
|
+
# # │ 4 ┆ true │
|
332
|
+
# # └─────┴────────┘
|
333
|
+
#
|
334
|
+
# @example Select and invert boolean columns:
|
335
|
+
# df.with_columns(is_odd: Polars.cs.boolean.not_)
|
336
|
+
# # =>
|
337
|
+
# # shape: (4, 3)
|
338
|
+
# # ┌─────┬────────┬────────┐
|
339
|
+
# # │ n ┆ n_even ┆ is_odd │
|
340
|
+
# # │ --- ┆ --- ┆ --- │
|
341
|
+
# # │ i64 ┆ bool ┆ bool │
|
342
|
+
# # ╞═════╪════════╪════════╡
|
343
|
+
# # │ 1 ┆ false ┆ true │
|
344
|
+
# # │ 2 ┆ true ┆ false │
|
345
|
+
# # │ 3 ┆ false ┆ true │
|
346
|
+
# # │ 4 ┆ true ┆ false │
|
347
|
+
# # └─────┴────────┴────────┘
|
348
|
+
#
|
349
|
+
# @example Select all columns *except* for those that are boolean:
|
350
|
+
# df.select(~Polars.cs.boolean)
|
351
|
+
# # =>
|
352
|
+
# # shape: (4, 1)
|
353
|
+
# # ┌─────┐
|
354
|
+
# # │ n │
|
355
|
+
# # │ --- │
|
356
|
+
# # │ i64 │
|
357
|
+
# # ╞═════╡
|
358
|
+
# # │ 1 │
|
359
|
+
# # │ 2 │
|
360
|
+
# # │ 3 │
|
361
|
+
# # │ 4 │
|
362
|
+
# # └─────┘
|
363
|
+
def self.boolean
|
364
|
+
_selector_proxy_(F.col(Boolean), name: "boolean")
|
365
|
+
end
|
366
|
+
|
367
|
+
# TODO
|
368
|
+
# def by_dtype
|
369
|
+
# end
|
370
|
+
|
371
|
+
# TODO
|
372
|
+
# def by_index
|
373
|
+
# end
|
374
|
+
|
375
|
+
# Select all columns matching the given names.
|
376
|
+
#
|
377
|
+
# @param names [Array]
|
378
|
+
# One or more names of columns to select.
|
379
|
+
# @param require_all [Boolean]
|
380
|
+
# Whether to match *all* names (the default) or *any* of the names.
|
381
|
+
#
|
382
|
+
# @return [SelectorProxy]
|
383
|
+
#
|
384
|
+
# @note
|
385
|
+
# Matching columns are returned in the order in which they are declared in
|
386
|
+
# the selector, not the underlying schema order.
|
387
|
+
#
|
388
|
+
# @example
|
389
|
+
# df = Polars::DataFrame.new(
|
390
|
+
# {
|
391
|
+
# "foo" => ["x", "y"],
|
392
|
+
# "bar" => [123, 456],
|
393
|
+
# "baz" => [2.0, 5.5],
|
394
|
+
# "zap" => [false, true]
|
395
|
+
# }
|
396
|
+
# )
|
397
|
+
#
|
398
|
+
# @example Select columns by name:
|
399
|
+
# df.select(Polars.cs.by_name("foo", "bar"))
|
400
|
+
# # =>
|
401
|
+
# # shape: (2, 2)
|
402
|
+
# # ┌─────┬─────┐
|
403
|
+
# # │ foo ┆ bar │
|
404
|
+
# # │ --- ┆ --- │
|
405
|
+
# # │ str ┆ i64 │
|
406
|
+
# # ╞═════╪═════╡
|
407
|
+
# # │ x ┆ 123 │
|
408
|
+
# # │ y ┆ 456 │
|
409
|
+
# # └─────┴─────┘
|
410
|
+
#
|
411
|
+
# @example Match *any* of the given columns by name:
|
412
|
+
# df.select(Polars.cs.by_name("baz", "moose", "foo", "bear", require_all: false))
|
413
|
+
# # =>
|
414
|
+
# # shape: (2, 2)
|
415
|
+
# # ┌─────┬─────┐
|
416
|
+
# # │ foo ┆ baz │
|
417
|
+
# # │ --- ┆ --- │
|
418
|
+
# # │ str ┆ f64 │
|
419
|
+
# # ╞═════╪═════╡
|
420
|
+
# # │ x ┆ 2.0 │
|
421
|
+
# # │ y ┆ 5.5 │
|
422
|
+
# # └─────┴─────┘
|
423
|
+
#
|
424
|
+
# @example Match all columns *except* for those given:
|
425
|
+
# df.select(~Polars.cs.by_name("foo", "bar"))
|
426
|
+
# # =>
|
427
|
+
# # shape: (2, 2)
|
428
|
+
# # ┌─────┬───────┐
|
429
|
+
# # │ baz ┆ zap │
|
430
|
+
# # │ --- ┆ --- │
|
431
|
+
# # │ f64 ┆ bool │
|
432
|
+
# # ╞═════╪═══════╡
|
433
|
+
# # │ 2.0 ┆ false │
|
434
|
+
# # │ 5.5 ┆ true │
|
435
|
+
# # └─────┴───────┘
|
436
|
+
def self.by_name(*names, require_all: true)
|
437
|
+
all_names = []
|
438
|
+
names.each do |nm|
|
439
|
+
if nm.is_a?(::String)
|
440
|
+
all_names << nm
|
441
|
+
else
|
442
|
+
msg = "invalid name: #{nm.inspect}"
|
443
|
+
raise TypeError, msg
|
444
|
+
end
|
445
|
+
end
|
446
|
+
|
447
|
+
selector_params = {"*names" => all_names}
|
448
|
+
match_cols = all_names
|
449
|
+
if !require_all
|
450
|
+
match_cols = "^(#{all_names.map { |nm| Utils.re_escape(nm) }.join("|")})$"
|
451
|
+
selector_params["require_all"] = require_all
|
452
|
+
end
|
453
|
+
|
454
|
+
_selector_proxy_(
|
455
|
+
F.col(match_cols),
|
456
|
+
name: "by_name",
|
457
|
+
parameters: selector_params
|
458
|
+
)
|
459
|
+
end
|
460
|
+
|
461
|
+
# Select all categorical columns.
|
462
|
+
#
|
463
|
+
# @return [SelectorProxy]
|
464
|
+
#
|
465
|
+
# @example
|
466
|
+
# df = Polars::DataFrame.new(
|
467
|
+
# {
|
468
|
+
# "foo" => ["xx", "yy"],
|
469
|
+
# "bar" => [123, 456],
|
470
|
+
# "baz" => [2.0, 5.5]
|
471
|
+
# },
|
472
|
+
# schema_overrides: {"foo" => Polars::Categorical}
|
473
|
+
# )
|
474
|
+
#
|
475
|
+
# @example Select all categorical columns:
|
476
|
+
# df.select(Polars.cs.categorical)
|
477
|
+
# # =>
|
478
|
+
# # shape: (2, 1)
|
479
|
+
# # ┌─────┐
|
480
|
+
# # │ foo │
|
481
|
+
# # │ --- │
|
482
|
+
# # │ cat │
|
483
|
+
# # ╞═════╡
|
484
|
+
# # │ xx │
|
485
|
+
# # │ yy │
|
486
|
+
# # └─────┘
|
487
|
+
#
|
488
|
+
# @example Select all columns *except* for those that are categorical:
|
489
|
+
# df.select(~Polars.cs.categorical)
|
490
|
+
# # =>
|
491
|
+
# # shape: (2, 2)
|
492
|
+
# # ┌─────┬─────┐
|
493
|
+
# # │ bar ┆ baz │
|
494
|
+
# # │ --- ┆ --- │
|
495
|
+
# # │ i64 ┆ f64 │
|
496
|
+
# # ╞═════╪═════╡
|
497
|
+
# # │ 123 ┆ 2.0 │
|
498
|
+
# # │ 456 ┆ 5.5 │
|
499
|
+
# # └─────┴─────┘
|
500
|
+
def self.categorical
|
501
|
+
_selector_proxy_(F.col(Categorical), name: "categorical")
|
502
|
+
end
|
503
|
+
|
504
|
+
# Select columns whose names contain the given literal substring(s).
|
505
|
+
#
|
506
|
+
# @param substring [Object]
|
507
|
+
# Substring(s) that matching column names should contain.
|
508
|
+
#
|
509
|
+
# @return [SelectorProxy]
|
510
|
+
#
|
511
|
+
# @example
|
512
|
+
# df = Polars::DataFrame.new(
|
513
|
+
# {
|
514
|
+
# "foo" => ["x", "y"],
|
515
|
+
# "bar" => [123, 456],
|
516
|
+
# "baz" => [2.0, 5.5],
|
517
|
+
# "zap" => [false, true]
|
518
|
+
# }
|
519
|
+
# )
|
520
|
+
#
|
521
|
+
# @example Select columns that contain the substring 'ba':
|
522
|
+
# df.select(Polars.cs.contains("ba"))
|
523
|
+
# # =>
|
524
|
+
# # shape: (2, 2)
|
525
|
+
# # ┌─────┬─────┐
|
526
|
+
# # │ bar ┆ baz │
|
527
|
+
# # │ --- ┆ --- │
|
528
|
+
# # │ i64 ┆ f64 │
|
529
|
+
# # ╞═════╪═════╡
|
530
|
+
# # │ 123 ┆ 2.0 │
|
531
|
+
# # │ 456 ┆ 5.5 │
|
532
|
+
# # └─────┴─────┘
|
533
|
+
#
|
534
|
+
# @example Select columns that contain the substring 'ba' or the letter 'z':
|
535
|
+
# df.select(Polars.cs.contains("ba", "z"))
|
536
|
+
# # =>
|
537
|
+
# # shape: (2, 3)
|
538
|
+
# # ┌─────┬─────┬───────┐
|
539
|
+
# # │ bar ┆ baz ┆ zap │
|
540
|
+
# # │ --- ┆ --- ┆ --- │
|
541
|
+
# # │ i64 ┆ f64 ┆ bool │
|
542
|
+
# # ╞═════╪═════╪═══════╡
|
543
|
+
# # │ 123 ┆ 2.0 ┆ false │
|
544
|
+
# # │ 456 ┆ 5.5 ┆ true │
|
545
|
+
# # └─────┴─────┴───────┘
|
546
|
+
#
|
547
|
+
# @example Select all columns *except* for those that contain the substring 'ba':
|
548
|
+
# df.select(~Polars.cs.contains("ba"))
|
549
|
+
# # =>
|
550
|
+
# # shape: (2, 2)
|
551
|
+
# # ┌─────┬───────┐
|
552
|
+
# # │ foo ┆ zap │
|
553
|
+
# # │ --- ┆ --- │
|
554
|
+
# # │ str ┆ bool │
|
555
|
+
# # ╞═════╪═══════╡
|
556
|
+
# # │ x ┆ false │
|
557
|
+
# # │ y ┆ true │
|
558
|
+
# # └─────┴───────┘
|
559
|
+
def self.contains(*substring)
|
560
|
+
escaped_substring = _re_string(substring)
|
561
|
+
raw_params = "^.*#{escaped_substring}.*$"
|
562
|
+
|
563
|
+
_selector_proxy_(
|
564
|
+
F.col(raw_params),
|
565
|
+
name: "contains",
|
566
|
+
parameters: {"*substring" => escaped_substring}
|
567
|
+
)
|
568
|
+
end
|
569
|
+
|
570
|
+
# Select all date columns.
|
571
|
+
#
|
572
|
+
# @return [SelectorProxy]
|
573
|
+
#
|
574
|
+
# @example
|
575
|
+
# df = Polars::DataFrame.new(
|
576
|
+
# {
|
577
|
+
# "dtm" => [DateTime.new(2001, 5, 7, 10, 25), DateTime.new(2031, 12, 31, 0, 30)],
|
578
|
+
# "dt" => [Date.new(1999, 12, 31), Date.new(2024, 8, 9)]
|
579
|
+
# }
|
580
|
+
# )
|
581
|
+
#
|
582
|
+
# @example Select all date columns:
|
583
|
+
# df.select(Polars.cs.date)
|
584
|
+
# # =>
|
585
|
+
# # shape: (2, 1)
|
586
|
+
# # ┌────────────┐
|
587
|
+
# # │ dt │
|
588
|
+
# # │ --- │
|
589
|
+
# # │ date │
|
590
|
+
# # ╞════════════╡
|
591
|
+
# # │ 1999-12-31 │
|
592
|
+
# # │ 2024-08-09 │
|
593
|
+
# # └────────────┘
|
594
|
+
#
|
595
|
+
# @example Select all columns *except* for those that are dates:
|
596
|
+
# df.select(~Polars.cs.date)
|
597
|
+
# # =>
|
598
|
+
# # shape: (2, 1)
|
599
|
+
# # ┌─────────────────────┐
|
600
|
+
# # │ dtm │
|
601
|
+
# # │ --- │
|
602
|
+
# # │ datetime[ns] │
|
603
|
+
# # ╞═════════════════════╡
|
604
|
+
# # │ 2001-05-07 10:25:00 │
|
605
|
+
# # │ 2031-12-31 00:30:00 │
|
606
|
+
# # └─────────────────────┘
|
607
|
+
def self.date
|
608
|
+
_selector_proxy_(F.col(Date), name: "date")
|
609
|
+
end
|
610
|
+
|
611
|
+
# TODO
|
612
|
+
# def datetime
|
613
|
+
# end
|
614
|
+
|
615
|
+
# Select all decimal columns.
|
616
|
+
#
|
617
|
+
# @return [SelectorProxy]
|
618
|
+
#
|
619
|
+
# @example
|
620
|
+
# df = Polars::DataFrame.new(
|
621
|
+
# {
|
622
|
+
# "foo" => ["x", "y"],
|
623
|
+
# "bar" => [BigDecimal("123"), BigDecimal("456")],
|
624
|
+
# "baz" => [BigDecimal("2.0005"), BigDecimal("-50.5555")],
|
625
|
+
# },
|
626
|
+
# schema_overrides: {"baz" => Polars::Decimal.new(10, 5)}
|
627
|
+
# )
|
628
|
+
#
|
629
|
+
# @example Select all decimal columns:
|
630
|
+
# df.select(Polars.cs.decimal)
|
631
|
+
# # =>
|
632
|
+
# # shape: (2, 2)
|
633
|
+
# # ┌──────────────┬───────────────┐
|
634
|
+
# # │ bar ┆ baz │
|
635
|
+
# # │ --- ┆ --- │
|
636
|
+
# # │ decimal[*,0] ┆ decimal[10,5] │
|
637
|
+
# # ╞══════════════╪═══════════════╡
|
638
|
+
# # │ 123 ┆ 2.00050 │
|
639
|
+
# # │ 456 ┆ -50.55550 │
|
640
|
+
# # └──────────────┴───────────────┘
|
641
|
+
#
|
642
|
+
# @example Select all columns *except* the decimal ones:
|
643
|
+
#
|
644
|
+
# df.select(~Polars.cs.decimal)
|
645
|
+
# # =>
|
646
|
+
# # shape: (2, 1)
|
647
|
+
# # ┌─────┐
|
648
|
+
# # │ foo │
|
649
|
+
# # │ --- │
|
650
|
+
# # │ str │
|
651
|
+
# # ╞═════╡
|
652
|
+
# # │ x │
|
653
|
+
# # │ y │
|
654
|
+
# # └─────┘
|
655
|
+
def self.decimal
|
656
|
+
# TODO: allow explicit selection by scale/precision?
|
657
|
+
_selector_proxy_(F.col(Decimal), name: "decimal")
|
658
|
+
end
|
659
|
+
|
660
|
+
# Select columns that end with the given substring(s).
|
661
|
+
#
|
662
|
+
# @param suffix [Object]
|
663
|
+
# Substring(s) that matching column names should end with.
|
664
|
+
#
|
665
|
+
# @return [SelectorProxy]
|
666
|
+
#
|
667
|
+
# @example
|
668
|
+
# df = Polars::DataFrame.new(
|
669
|
+
# {
|
670
|
+
# "foo" => ["x", "y"],
|
671
|
+
# "bar" => [123, 456],
|
672
|
+
# "baz" => [2.0, 5.5],
|
673
|
+
# "zap" => [false, true]
|
674
|
+
# }
|
675
|
+
# )
|
676
|
+
#
|
677
|
+
# @example Select columns that end with the substring 'z':
|
678
|
+
# df.select(Polars.cs.ends_with("z"))
|
679
|
+
# # =>
|
680
|
+
# # shape: (2, 1)
|
681
|
+
# # ┌─────┐
|
682
|
+
# # │ baz │
|
683
|
+
# # │ --- │
|
684
|
+
# # │ f64 │
|
685
|
+
# # ╞═════╡
|
686
|
+
# # │ 2.0 │
|
687
|
+
# # │ 5.5 │
|
688
|
+
# # └─────┘
|
689
|
+
#
|
690
|
+
# @example Select columns that end with *either* the letter 'z' or 'r':
|
691
|
+
# df.select(Polars.cs.ends_with("z", "r"))
|
692
|
+
# # =>
|
693
|
+
# # shape: (2, 2)
|
694
|
+
# # ┌─────┬─────┐
|
695
|
+
# # │ bar ┆ baz │
|
696
|
+
# # │ --- ┆ --- │
|
697
|
+
# # │ i64 ┆ f64 │
|
698
|
+
# # ╞═════╪═════╡
|
699
|
+
# # │ 123 ┆ 2.0 │
|
700
|
+
# # │ 456 ┆ 5.5 │
|
701
|
+
# # └─────┴─────┘
|
702
|
+
#
|
703
|
+
# @example Select all columns *except* for those that end with the substring 'z':
|
704
|
+
# df.select(~Polars.cs.ends_with("z"))
|
705
|
+
# # =>
|
706
|
+
# # shape: (2, 3)
|
707
|
+
# # ┌─────┬─────┬───────┐
|
708
|
+
# # │ foo ┆ bar ┆ zap │
|
709
|
+
# # │ --- ┆ --- ┆ --- │
|
710
|
+
# # │ str ┆ i64 ┆ bool │
|
711
|
+
# # ╞═════╪═════╪═══════╡
|
712
|
+
# # │ x ┆ 123 ┆ false │
|
713
|
+
# # │ y ┆ 456 ┆ true │
|
714
|
+
# # └─────┴─────┴───────┘
|
715
|
+
def self.ends_with(*suffix)
|
716
|
+
escaped_suffix = _re_string(suffix)
|
717
|
+
raw_params = "^.*#{escaped_suffix}$"
|
718
|
+
|
719
|
+
_selector_proxy_(
|
720
|
+
F.col(raw_params),
|
721
|
+
name: "ends_with",
|
722
|
+
parameters: {"*suffix" => escaped_suffix},
|
723
|
+
)
|
724
|
+
end
|
725
|
+
|
726
|
+
# Select the first column in the current scope.
|
727
|
+
#
|
728
|
+
# @return [SelectorProxy]
|
729
|
+
#
|
730
|
+
# @example
|
731
|
+
# df = Polars::DataFrame.new(
|
732
|
+
# {
|
733
|
+
# "foo" => ["x", "y"],
|
734
|
+
# "bar" => [123, 456],
|
735
|
+
# "baz" => [2.0, 5.5],
|
736
|
+
# "zap" => [0, 1]
|
737
|
+
# }
|
738
|
+
# )
|
739
|
+
#
|
740
|
+
# @example Select the first column:
|
741
|
+
# df.select(Polars.cs.first)
|
742
|
+
# # =>
|
743
|
+
# # shape: (2, 1)
|
744
|
+
# # ┌─────┐
|
745
|
+
# # │ foo │
|
746
|
+
# # │ --- │
|
747
|
+
# # │ str │
|
748
|
+
# # ╞═════╡
|
749
|
+
# # │ x │
|
750
|
+
# # │ y │
|
751
|
+
# # └─────┘
|
752
|
+
#
|
753
|
+
# @example Select everything *except* for the first column:
|
754
|
+
# df.select(~Polars.cs.first)
|
755
|
+
# # =>
|
756
|
+
# # shape: (2, 3)
|
757
|
+
# # ┌─────┬─────┬─────┐
|
758
|
+
# # │ bar ┆ baz ┆ zap │
|
759
|
+
# # │ --- ┆ --- ┆ --- │
|
760
|
+
# # │ i64 ┆ f64 ┆ i64 │
|
761
|
+
# # ╞═════╪═════╪═════╡
|
762
|
+
# # │ 123 ┆ 2.0 ┆ 0 │
|
763
|
+
# # │ 456 ┆ 5.5 ┆ 1 │
|
764
|
+
# # └─────┴─────┴─────┘
|
765
|
+
def self.first
|
766
|
+
_selector_proxy_(F.first, name: "first")
|
767
|
+
end
|
768
|
+
|
769
|
+
# Select all float columns.
|
770
|
+
#
|
771
|
+
# @return [SelectorProxy]
|
772
|
+
#
|
773
|
+
# @example
|
774
|
+
# df = Polars::DataFrame.new(
|
775
|
+
# {
|
776
|
+
# "foo" => ["x", "y"],
|
777
|
+
# "bar" => [123, 456],
|
778
|
+
# "baz" => [2.0, 5.5],
|
779
|
+
# "zap" => [0.0, 1.0]
|
780
|
+
# },
|
781
|
+
# schema_overrides: {"baz" => Polars::Float32, "zap" => Polars::Float64}
|
782
|
+
# )
|
783
|
+
#
|
784
|
+
# @example Select all float columns:
|
785
|
+
# df.select(Polars.cs.float)
|
786
|
+
# # =>
|
787
|
+
# # shape: (2, 2)
|
788
|
+
# # ┌─────┬─────┐
|
789
|
+
# # │ baz ┆ zap │
|
790
|
+
# # │ --- ┆ --- │
|
791
|
+
# # │ f32 ┆ f64 │
|
792
|
+
# # ╞═════╪═════╡
|
793
|
+
# # │ 2.0 ┆ 0.0 │
|
794
|
+
# # │ 5.5 ┆ 1.0 │
|
795
|
+
# # └─────┴─────┘
|
796
|
+
#
|
797
|
+
# @example Select all columns *except* for those that are float:
|
798
|
+
# df.select(~Polars.cs.float)
|
799
|
+
# # =>
|
800
|
+
# # shape: (2, 2)
|
801
|
+
# # ┌─────┬─────┐
|
802
|
+
# # │ foo ┆ bar │
|
803
|
+
# # │ --- ┆ --- │
|
804
|
+
# # │ str ┆ i64 │
|
805
|
+
# # ╞═════╪═════╡
|
806
|
+
# # │ x ┆ 123 │
|
807
|
+
# # │ y ┆ 456 │
|
808
|
+
# # └─────┴─────┘
|
809
|
+
def self.float
|
810
|
+
_selector_proxy_(F.col(FLOAT_DTYPES), name: "float")
|
811
|
+
end
|
812
|
+
|
813
|
+
# Select all integer columns.
|
814
|
+
#
|
815
|
+
# @return [SelectorProxy]
|
816
|
+
#
|
817
|
+
# @example
|
818
|
+
# df = Polars::DataFrame.new(
|
819
|
+
# {
|
820
|
+
# "foo" => ["x", "y"],
|
821
|
+
# "bar" => [123, 456],
|
822
|
+
# "baz" => [2.0, 5.5],
|
823
|
+
# "zap" => [0, 1]
|
824
|
+
# }
|
825
|
+
# )
|
826
|
+
#
|
827
|
+
# @example Select all integer columns:
|
828
|
+
# df.select(Polars.cs.integer)
|
829
|
+
# # =>
|
830
|
+
# # shape: (2, 2)
|
831
|
+
# # ┌─────┬─────┐
|
832
|
+
# # │ bar ┆ zap │
|
833
|
+
# # │ --- ┆ --- │
|
834
|
+
# # │ i64 ┆ i64 │
|
835
|
+
# # ╞═════╪═════╡
|
836
|
+
# # │ 123 ┆ 0 │
|
837
|
+
# # │ 456 ┆ 1 │
|
838
|
+
# # └─────┴─────┘
|
839
|
+
#
|
840
|
+
# @example Select all columns *except* for those that are integer:
|
841
|
+
# df.select(~Polars.cs.integer)
|
842
|
+
# # =>
|
843
|
+
# # shape: (2, 2)
|
844
|
+
# # ┌─────┬─────┐
|
845
|
+
# # │ foo ┆ baz │
|
846
|
+
# # │ --- ┆ --- │
|
847
|
+
# # │ str ┆ f64 │
|
848
|
+
# # ╞═════╪═════╡
|
849
|
+
# # │ x ┆ 2.0 │
|
850
|
+
# # │ y ┆ 5.5 │
|
851
|
+
# # └─────┴─────┘
|
852
|
+
def self.integer
|
853
|
+
_selector_proxy_(F.col(INTEGER_DTYPES), name: "integer")
|
854
|
+
end
|
855
|
+
|
856
|
+
# Select all signed integer columns.
|
857
|
+
#
|
858
|
+
# @return [SelectorProxy]
|
859
|
+
#
|
860
|
+
# @example
|
861
|
+
# df = Polars::DataFrame.new(
|
862
|
+
# {
|
863
|
+
# "foo" => [-123, -456],
|
864
|
+
# "bar" => [3456, 6789],
|
865
|
+
# "baz" => [7654, 4321],
|
866
|
+
# "zap" => ["ab", "cd"]
|
867
|
+
# },
|
868
|
+
# schema_overrides: {"bar" => Polars::UInt32, "baz" => Polars::UInt64}
|
869
|
+
# )
|
870
|
+
#
|
871
|
+
# @example Select all signed integer columns:
|
872
|
+
# df.select(Polars.cs.signed_integer)
|
873
|
+
# # =>
|
874
|
+
# # shape: (2, 1)
|
875
|
+
# # ┌──────┐
|
876
|
+
# # │ foo │
|
877
|
+
# # │ --- │
|
878
|
+
# # │ i64 │
|
879
|
+
# # ╞══════╡
|
880
|
+
# # │ -123 │
|
881
|
+
# # │ -456 │
|
882
|
+
# # └──────┘
|
883
|
+
#
|
884
|
+
# @example
|
885
|
+
# df.select(~Polars.cs.signed_integer)
|
886
|
+
# # =>
|
887
|
+
# # shape: (2, 3)
|
888
|
+
# # ┌──────┬──────┬─────┐
|
889
|
+
# # │ bar ┆ baz ┆ zap │
|
890
|
+
# # │ --- ┆ --- ┆ --- │
|
891
|
+
# # │ u32 ┆ u64 ┆ str │
|
892
|
+
# # ╞══════╪══════╪═════╡
|
893
|
+
# # │ 3456 ┆ 7654 ┆ ab │
|
894
|
+
# # │ 6789 ┆ 4321 ┆ cd │
|
895
|
+
# # └──────┴──────┴─────┘
|
896
|
+
#
|
897
|
+
# @example Select all integer columns (both signed and unsigned):
|
898
|
+
# df.select(Polars.cs.integer)
|
899
|
+
# # =>
|
900
|
+
# # shape: (2, 3)
|
901
|
+
# # ┌──────┬──────┬──────┐
|
902
|
+
# # │ foo ┆ bar ┆ baz │
|
903
|
+
# # │ --- ┆ --- ┆ --- │
|
904
|
+
# # │ i64 ┆ u32 ┆ u64 │
|
905
|
+
# # ╞══════╪══════╪══════╡
|
906
|
+
# # │ -123 ┆ 3456 ┆ 7654 │
|
907
|
+
# # │ -456 ┆ 6789 ┆ 4321 │
|
908
|
+
# # └──────┴──────┴──────┘
|
909
|
+
def self.signed_integer
|
910
|
+
_selector_proxy_(F.col(SIGNED_INTEGER_DTYPES), name: "signed_integer")
|
911
|
+
end
|
912
|
+
|
913
|
+
# Select all unsigned integer columns.
|
914
|
+
#
|
915
|
+
# @return [SelectorProxy]
|
916
|
+
#
|
917
|
+
# @example
|
918
|
+
# df = Polars::DataFrame.new(
|
919
|
+
# {
|
920
|
+
# "foo" => [-123, -456],
|
921
|
+
# "bar" => [3456, 6789],
|
922
|
+
# "baz" => [7654, 4321],
|
923
|
+
# "zap" => ["ab", "cd"]
|
924
|
+
# },
|
925
|
+
# schema_overrides: {"bar" => Polars::UInt32, "baz" => Polars::UInt64}
|
926
|
+
# )
|
927
|
+
#
|
928
|
+
# @example Select all unsigned integer columns:
|
929
|
+
# df.select(Polars.cs.unsigned_integer)
|
930
|
+
# # =>
|
931
|
+
# # shape: (2, 2)
|
932
|
+
# # ┌──────┬──────┐
|
933
|
+
# # │ bar ┆ baz │
|
934
|
+
# # │ --- ┆ --- │
|
935
|
+
# # │ u32 ┆ u64 │
|
936
|
+
# # ╞══════╪══════╡
|
937
|
+
# # │ 3456 ┆ 7654 │
|
938
|
+
# # │ 6789 ┆ 4321 │
|
939
|
+
# # └──────┴──────┘
|
940
|
+
#
|
941
|
+
# @example Select all columns *except* for those that are unsigned integers:
|
942
|
+
# df.select(~Polars.cs.unsigned_integer)
|
943
|
+
# # =>
|
944
|
+
# # shape: (2, 2)
|
945
|
+
# # ┌──────┬─────┐
|
946
|
+
# # │ foo ┆ zap │
|
947
|
+
# # │ --- ┆ --- │
|
948
|
+
# # │ i64 ┆ str │
|
949
|
+
# # ╞══════╪═════╡
|
950
|
+
# # │ -123 ┆ ab │
|
951
|
+
# # │ -456 ┆ cd │
|
952
|
+
# # └──────┴─────┘
|
953
|
+
#
|
954
|
+
# @example Select all integer columns (both signed and unsigned):
|
955
|
+
# df.select(Polars.cs.integer)
|
956
|
+
# # =>
|
957
|
+
# # shape: (2, 3)
|
958
|
+
# # ┌──────┬──────┬──────┐
|
959
|
+
# # │ foo ┆ bar ┆ baz │
|
960
|
+
# # │ --- ┆ --- ┆ --- │
|
961
|
+
# # │ i64 ┆ u32 ┆ u64 │
|
962
|
+
# # ╞══════╪══════╪══════╡
|
963
|
+
# # │ -123 ┆ 3456 ┆ 7654 │
|
964
|
+
# # │ -456 ┆ 6789 ┆ 4321 │
|
965
|
+
# # └──────┴──────┴──────┘
|
966
|
+
def self.unsigned_integer
|
967
|
+
_selector_proxy_(F.col(UNSIGNED_INTEGER_DTYPES), name: "unsigned_integer")
|
968
|
+
end
|
969
|
+
|
970
|
+
# Select the last column in the current scope.
|
971
|
+
#
|
972
|
+
# @return [SelectorProxy]
|
973
|
+
#
|
974
|
+
# @example
|
975
|
+
# df = Polars::DataFrame.new(
|
976
|
+
# {
|
977
|
+
# "foo" => ["x", "y"],
|
978
|
+
# "bar" => [123, 456],
|
979
|
+
# "baz" => [2.0, 5.5],
|
980
|
+
# "zap" => [0, 1]
|
981
|
+
# }
|
982
|
+
# )
|
983
|
+
#
|
984
|
+
# @example Select the last column:
|
985
|
+
# df.select(Polars.cs.last)
|
986
|
+
# # =>
|
987
|
+
# # shape: (2, 1)
|
988
|
+
# # ┌─────┐
|
989
|
+
# # │ zap │
|
990
|
+
# # │ --- │
|
991
|
+
# # │ i64 │
|
992
|
+
# # ╞═════╡
|
993
|
+
# # │ 0 │
|
994
|
+
# # │ 1 │
|
995
|
+
# # └─────┘
|
996
|
+
#
|
997
|
+
# @example Select everything *except* for the last column:
|
998
|
+
# df.select(~Polars.cs.last)
|
999
|
+
# # =>
|
1000
|
+
# # shape: (2, 3)
|
1001
|
+
# # ┌─────┬─────┬─────┐
|
1002
|
+
# # │ foo ┆ bar ┆ baz │
|
1003
|
+
# # │ --- ┆ --- ┆ --- │
|
1004
|
+
# # │ str ┆ i64 ┆ f64 │
|
1005
|
+
# # ╞═════╪═════╪═════╡
|
1006
|
+
# # │ x ┆ 123 ┆ 2.0 │
|
1007
|
+
# # │ y ┆ 456 ┆ 5.5 │
|
1008
|
+
# # └─────┴─────┴─────┘
|
1009
|
+
def self.last
|
1010
|
+
_selector_proxy_(F.last, name: "last")
|
1011
|
+
end
|
1012
|
+
|
1013
|
+
# Select all numeric columns.
|
1014
|
+
#
|
1015
|
+
# @return [SelectorProxy]
|
1016
|
+
#
|
1017
|
+
# @example
|
1018
|
+
# df = Polars::DataFrame.new(
|
1019
|
+
# {
|
1020
|
+
# "foo" => ["x", "y"],
|
1021
|
+
# "bar" => [123, 456],
|
1022
|
+
# "baz" => [2.0, 5.5],
|
1023
|
+
# "zap" => [0, 0]
|
1024
|
+
# },
|
1025
|
+
# schema_overrides: {"bar" => Polars::Int16, "baz" => Polars::Float32, "zap" => Polars::UInt8},
|
1026
|
+
# )
|
1027
|
+
#
|
1028
|
+
# @example Match all numeric columns:
|
1029
|
+
# df.select(Polars.cs.numeric)
|
1030
|
+
# # =>
|
1031
|
+
# # shape: (2, 3)
|
1032
|
+
# # ┌─────┬─────┬─────┐
|
1033
|
+
# # │ bar ┆ baz ┆ zap │
|
1034
|
+
# # │ --- ┆ --- ┆ --- │
|
1035
|
+
# # │ i16 ┆ f32 ┆ u8 │
|
1036
|
+
# # ╞═════╪═════╪═════╡
|
1037
|
+
# # │ 123 ┆ 2.0 ┆ 0 │
|
1038
|
+
# # │ 456 ┆ 5.5 ┆ 0 │
|
1039
|
+
# # └─────┴─────┴─────┘
|
1040
|
+
#
|
1041
|
+
# @example Match all columns *except* for those that are numeric:
|
1042
|
+
# df.select(~Polars.cs.numeric)
|
1043
|
+
# # =>
|
1044
|
+
# # shape: (2, 1)
|
1045
|
+
# # ┌─────┐
|
1046
|
+
# # │ foo │
|
1047
|
+
# # │ --- │
|
1048
|
+
# # │ str │
|
1049
|
+
# # ╞═════╡
|
1050
|
+
# # │ x │
|
1051
|
+
# # │ y │
|
1052
|
+
# # └─────┘
|
1053
|
+
def self.numeric
|
1054
|
+
_selector_proxy_(F.col(NUMERIC_DTYPES), name: "numeric")
|
1055
|
+
end
|
1056
|
+
|
1057
|
+
# Select columns that start with the given substring(s).
|
1058
|
+
#
|
1059
|
+
# @param prefix [Object]
|
1060
|
+
# Substring(s) that matching column names should start with.
|
1061
|
+
#
|
1062
|
+
# @return [SelectorProxy]
|
1063
|
+
#
|
1064
|
+
# @example
|
1065
|
+
# df = Polars::DataFrame.new(
|
1066
|
+
# {
|
1067
|
+
# "foo" => [1.0, 2.0],
|
1068
|
+
# "bar" => [3.0, 4.0],
|
1069
|
+
# "baz" => [5, 6],
|
1070
|
+
# "zap" => [7, 8]
|
1071
|
+
# }
|
1072
|
+
# )
|
1073
|
+
#
|
1074
|
+
# @example Match columns starting with a 'b':
|
1075
|
+
# df.select(Polars.cs.starts_with("b"))
|
1076
|
+
# # =>
|
1077
|
+
# # shape: (2, 2)
|
1078
|
+
# # ┌─────┬─────┐
|
1079
|
+
# # │ bar ┆ baz │
|
1080
|
+
# # │ --- ┆ --- │
|
1081
|
+
# # │ f64 ┆ i64 │
|
1082
|
+
# # ╞═════╪═════╡
|
1083
|
+
# # │ 3.0 ┆ 5 │
|
1084
|
+
# # │ 4.0 ┆ 6 │
|
1085
|
+
# # └─────┴─────┘
|
1086
|
+
#
|
1087
|
+
# @example Match columns starting with *either* the letter 'b' or 'z':
|
1088
|
+
# df.select(Polars.cs.starts_with("b", "z"))
|
1089
|
+
# # =>
|
1090
|
+
# # shape: (2, 3)
|
1091
|
+
# # ┌─────┬─────┬─────┐
|
1092
|
+
# # │ bar ┆ baz ┆ zap │
|
1093
|
+
# # │ --- ┆ --- ┆ --- │
|
1094
|
+
# # │ f64 ┆ i64 ┆ i64 │
|
1095
|
+
# # ╞═════╪═════╪═════╡
|
1096
|
+
# # │ 3.0 ┆ 5 ┆ 7 │
|
1097
|
+
# # │ 4.0 ┆ 6 ┆ 8 │
|
1098
|
+
# # └─────┴─────┴─────┘
|
1099
|
+
#
|
1100
|
+
# @example Match all columns *except* for those starting with 'b':
|
1101
|
+
# df.select(~Polars.cs.starts_with("b"))
|
1102
|
+
# # =>
|
1103
|
+
# # shape: (2, 2)
|
1104
|
+
# # ┌─────┬─────┐
|
1105
|
+
# # │ foo ┆ zap │
|
1106
|
+
# # │ --- ┆ --- │
|
1107
|
+
# # │ f64 ┆ i64 │
|
1108
|
+
# # ╞═════╪═════╡
|
1109
|
+
# # │ 1.0 ┆ 7 │
|
1110
|
+
# # │ 2.0 ┆ 8 │
|
1111
|
+
# # └─────┴─────┘
|
1112
|
+
def self.starts_with(*prefix)
|
1113
|
+
escaped_prefix = _re_string(prefix)
|
1114
|
+
raw_params = "^#{escaped_prefix}.*$"
|
1115
|
+
|
1116
|
+
_selector_proxy_(
|
1117
|
+
F.col(raw_params),
|
1118
|
+
name: "starts_with",
|
1119
|
+
parameters: {"*prefix" => prefix}
|
1120
|
+
)
|
1121
|
+
end
|
1122
|
+
|
1123
|
+
# Select all String (and, optionally, Categorical) string columns.
|
1124
|
+
#
|
1125
|
+
# @return [SelectorProxy]
|
1126
|
+
#
|
1127
|
+
# @example
|
1128
|
+
# df = Polars::DataFrame.new(
|
1129
|
+
# {
|
1130
|
+
# "w" => ["xx", "yy", "xx", "yy", "xx"],
|
1131
|
+
# "x" => [1, 2, 1, 4, -2],
|
1132
|
+
# "y" => [3.0, 4.5, 1.0, 2.5, -2.0],
|
1133
|
+
# "z" => ["a", "b", "a", "b", "b"]
|
1134
|
+
# },
|
1135
|
+
# ).with_columns(
|
1136
|
+
# z: Polars.col("z").cast(Polars::Categorical.new("lexical")),
|
1137
|
+
# )
|
1138
|
+
#
|
1139
|
+
# @example Group by all string columns, sum the numeric columns, then sort by the string cols:
|
1140
|
+
# >>> df.group_by(Polars.cs.string).agg(Polars.cs.numeric.sum).sort(Polars.cs.string)
|
1141
|
+
# shape: (2, 3)
|
1142
|
+
# ┌─────┬─────┬─────┐
|
1143
|
+
# │ w ┆ x ┆ y │
|
1144
|
+
# │ --- ┆ --- ┆ --- │
|
1145
|
+
# │ str ┆ i64 ┆ f64 │
|
1146
|
+
# ╞═════╪═════╪═════╡
|
1147
|
+
# │ xx ┆ 0 ┆ 2.0 │
|
1148
|
+
# │ yy ┆ 6 ┆ 7.0 │
|
1149
|
+
# └─────┴─────┴─────┘
|
1150
|
+
#
|
1151
|
+
# @example Group by all string *and* categorical columns:
|
1152
|
+
# df.group_by(Polars.cs.string(include_categorical: true)).agg(Polars.cs.numeric.sum).sort(
|
1153
|
+
# Polars.cs.string(include_categorical: true)
|
1154
|
+
# )
|
1155
|
+
# # =>
|
1156
|
+
# # shape: (3, 4)
|
1157
|
+
# # ┌─────┬─────┬─────┬──────┐
|
1158
|
+
# # │ w ┆ z ┆ x ┆ y │
|
1159
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
1160
|
+
# # │ str ┆ cat ┆ i64 ┆ f64 │
|
1161
|
+
# # ╞═════╪═════╪═════╪══════╡
|
1162
|
+
# # │ xx ┆ a ┆ 2 ┆ 4.0 │
|
1163
|
+
# # │ xx ┆ b ┆ -2 ┆ -2.0 │
|
1164
|
+
# # │ yy ┆ b ┆ 6 ┆ 7.0 │
|
1165
|
+
# # └─────┴─────┴─────┴──────┘
|
1166
|
+
def self.string(include_categorical: false)
|
1167
|
+
string_dtypes = [String]
|
1168
|
+
if include_categorical
|
1169
|
+
string_dtypes << Categorical
|
1170
|
+
end
|
1171
|
+
|
1172
|
+
_selector_proxy_(
|
1173
|
+
F.col(string_dtypes),
|
1174
|
+
name: "string",
|
1175
|
+
parameters: {"include_categorical" => include_categorical},
|
1176
|
+
)
|
1177
|
+
end
|
1178
|
+
|
1179
|
+
# Select all time columns.
|
1180
|
+
#
|
1181
|
+
# @return [SelectorProxy]
|
1182
|
+
#
|
1183
|
+
# @example
|
1184
|
+
# df = Polars::DataFrame.new(
|
1185
|
+
# {
|
1186
|
+
# "dtm" => [DateTime.new(2001, 5, 7, 10, 25), DateTime.new(2031, 12, 31, 0, 30)],
|
1187
|
+
# "dt" => [Date.new(1999, 12, 31), Date.new(2024, 8, 9)],
|
1188
|
+
# "tm" => [Time.utc(2001, 1, 1, 0, 0, 0), Time.utc(2001, 1, 1, 23, 59, 59)]
|
1189
|
+
# },
|
1190
|
+
# schema_overrides: {"tm" => Polars::Time}
|
1191
|
+
# )
|
1192
|
+
#
|
1193
|
+
# @example Select all time columns:
|
1194
|
+
# df.select(Polars.cs.time)
|
1195
|
+
# # =>
|
1196
|
+
# # shape: (2, 1)
|
1197
|
+
# # ┌──────────┐
|
1198
|
+
# # │ tm │
|
1199
|
+
# # │ --- │
|
1200
|
+
# # │ time │
|
1201
|
+
# # ╞══════════╡
|
1202
|
+
# # │ 00:00:00 │
|
1203
|
+
# # │ 23:59:59 │
|
1204
|
+
# # └──────────┘
|
1205
|
+
#
|
1206
|
+
# @example Select all columns *except* for those that are times:
|
1207
|
+
# df.select(~Polars.cs.time)
|
1208
|
+
# # =>
|
1209
|
+
# # shape: (2, 2)
|
1210
|
+
# # ┌─────────────────────┬────────────┐
|
1211
|
+
# # │ dtm ┆ dt │
|
1212
|
+
# # │ --- ┆ --- │
|
1213
|
+
# # │ datetime[ns] ┆ date │
|
1214
|
+
# # ╞═════════════════════╪════════════╡
|
1215
|
+
# # │ 2001-05-07 10:25:00 ┆ 1999-12-31 │
|
1216
|
+
# # │ 2031-12-31 00:30:00 ┆ 2024-08-09 │
|
1217
|
+
# # └─────────────────────┴────────────┘
|
1218
|
+
def self.time
|
1219
|
+
_selector_proxy_(F.col(Time), name: "time")
|
1220
|
+
end
|
1221
|
+
end
|
1222
|
+
|
1223
|
+
def self.cs
|
1224
|
+
Polars::Selectors
|
1225
|
+
end
|
1226
|
+
end
|