polars-df 0.20.0-x86_64-darwin → 0.21.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Cargo.lock +192 -186
- data/LICENSE-THIRD-PARTY.txt +1431 -1810
- data/LICENSE.txt +1 -1
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/3.4/polars.bundle +0 -0
- data/lib/polars/cat_name_space.rb +3 -43
- data/lib/polars/catalog/unity/catalog_info.rb +20 -0
- data/lib/polars/catalog/unity/column_info.rb +31 -0
- data/lib/polars/catalog/unity/namespace_info.rb +21 -0
- data/lib/polars/catalog/unity/table_info.rb +50 -0
- data/lib/polars/catalog.rb +448 -0
- data/lib/polars/convert.rb +10 -0
- data/lib/polars/data_frame.rb +151 -30
- data/lib/polars/data_types.rb +47 -3
- data/lib/polars/exceptions.rb +7 -2
- data/lib/polars/expr.rb +34 -31
- data/lib/polars/functions/col.rb +6 -5
- data/lib/polars/functions/lazy.rb +114 -15
- data/lib/polars/functions/repeat.rb +4 -0
- data/lib/polars/io/csv.rb +18 -0
- data/lib/polars/io/json.rb +16 -0
- data/lib/polars/io/ndjson.rb +13 -0
- data/lib/polars/io/parquet.rb +45 -63
- data/lib/polars/io/scan_options.rb +47 -0
- data/lib/polars/lazy_frame.rb +163 -75
- data/lib/polars/list_expr.rb +204 -7
- data/lib/polars/list_name_space.rb +120 -1
- data/lib/polars/meta_expr.rb +7 -22
- data/lib/polars/scan_cast_options.rb +64 -0
- data/lib/polars/schema.rb +6 -1
- data/lib/polars/selector.rb +138 -0
- data/lib/polars/selectors.rb +931 -202
- data/lib/polars/series.rb +34 -11
- data/lib/polars/string_expr.rb +24 -3
- data/lib/polars/string_name_space.rb +11 -0
- data/lib/polars/utils/parse.rb +40 -0
- data/lib/polars/utils.rb +5 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +8 -0
- metadata +10 -2
data/lib/polars/selectors.rb
CHANGED
@@ -1,118 +1,5 @@
|
|
1
1
|
module Polars
|
2
2
|
module Selectors
|
3
|
-
# @private
|
4
|
-
class SelectorProxy < Expr
|
5
|
-
attr_accessor :_attrs
|
6
|
-
attr_accessor :_repr_override
|
7
|
-
|
8
|
-
def initialize(
|
9
|
-
expr,
|
10
|
-
name:,
|
11
|
-
parameters: nil
|
12
|
-
)
|
13
|
-
self._rbexpr = expr._rbexpr
|
14
|
-
self._attrs = {
|
15
|
-
name: name,
|
16
|
-
params: parameters
|
17
|
-
}
|
18
|
-
end
|
19
|
-
|
20
|
-
def inspect
|
21
|
-
if !_attrs
|
22
|
-
as_expr.inspect
|
23
|
-
elsif _repr_override
|
24
|
-
_repr_override
|
25
|
-
else
|
26
|
-
selector_name = _attrs[:name]
|
27
|
-
params = _attrs[:params] || {}
|
28
|
-
set_ops = {"and" => "&", "or" => "|", "sub" => "-", "xor" => "^"}
|
29
|
-
if set_ops.include?(selector_name)
|
30
|
-
op = set_ops[selector_name]
|
31
|
-
"(#{params.values.map(&:inspect).join(" #{op} ")})"
|
32
|
-
else
|
33
|
-
str_params = params.map { |k, v| k.start_with?("*") ? v.inspect[1..-2] : "#{k}=#{v.inspect}" }.join(", ")
|
34
|
-
"Polars.cs.#{selector_name}(#{str_params})"
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
def ~
|
40
|
-
if Utils.is_selector(self)
|
41
|
-
inverted = Selectors.all - self
|
42
|
-
inverted._repr_override = "~#{inspect}"
|
43
|
-
else
|
44
|
-
inverted = ~as_expr
|
45
|
-
end
|
46
|
-
inverted
|
47
|
-
end
|
48
|
-
|
49
|
-
def -(other)
|
50
|
-
if Utils.is_selector(other)
|
51
|
-
SelectorProxy.new(
|
52
|
-
meta._as_selector.meta._selector_sub(other),
|
53
|
-
parameters: {"self" => self, "other" => other},
|
54
|
-
name: "sub"
|
55
|
-
)
|
56
|
-
else
|
57
|
-
as_expr - other
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
def &(other)
|
62
|
-
if Utils.is_column(other)
|
63
|
-
raise Todo
|
64
|
-
end
|
65
|
-
if Utils.is_selector(other)
|
66
|
-
SelectorProxy.new(
|
67
|
-
meta._as_selector.meta._selector_and(other),
|
68
|
-
parameters: {"self" => self, "other" => other},
|
69
|
-
name: "and"
|
70
|
-
)
|
71
|
-
else
|
72
|
-
as_expr & other
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
def |(other)
|
77
|
-
if Utils.is_column(other)
|
78
|
-
raise Todo
|
79
|
-
end
|
80
|
-
if Utils.is_selector(other)
|
81
|
-
SelectorProxy.new(
|
82
|
-
meta._as_selector.meta._selector_and(other),
|
83
|
-
parameters: {"self" => self, "other" => other},
|
84
|
-
name: "or"
|
85
|
-
)
|
86
|
-
else
|
87
|
-
as_expr | other
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
def ^(other)
|
92
|
-
if Utils.is_column(other)
|
93
|
-
raise Todo
|
94
|
-
end
|
95
|
-
if Utils.is_selector(other)
|
96
|
-
SelectorProxy.new(
|
97
|
-
meta._as_selector.meta._selector_and(other),
|
98
|
-
parameters: {"self" => self, "other" => other},
|
99
|
-
name: "xor"
|
100
|
-
)
|
101
|
-
else
|
102
|
-
as_expr ^ other
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
def as_expr
|
107
|
-
Expr._from_rbexpr(_rbexpr)
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
# @private
|
112
|
-
def self._selector_proxy_(...)
|
113
|
-
SelectorProxy.new(...)
|
114
|
-
end
|
115
|
-
|
116
3
|
# @private
|
117
4
|
def self._re_string(string, escape: true)
|
118
5
|
if string.is_a?(::String)
|
@@ -131,9 +18,26 @@ module Polars
|
|
131
18
|
"(#{rx})"
|
132
19
|
end
|
133
20
|
|
21
|
+
# Select no columns.
|
22
|
+
#
|
23
|
+
# This is useful for composition with other selectors.
|
24
|
+
#
|
25
|
+
# @return [Selector]
|
26
|
+
#
|
27
|
+
# @example
|
28
|
+
# Polars::DataFrame.new({"a" => 1, "b" => 2}).select(Polars.cs.empty)
|
29
|
+
# # =>
|
30
|
+
# # shape: (0, 0)
|
31
|
+
# # ┌┐
|
32
|
+
# # ╞╡
|
33
|
+
# # └┘
|
34
|
+
def self.empty
|
35
|
+
Selector._from_rbselector(RbSelector.empty)
|
36
|
+
end
|
37
|
+
|
134
38
|
# Select all columns.
|
135
39
|
#
|
136
|
-
# @return [
|
40
|
+
# @return [Selector]
|
137
41
|
#
|
138
42
|
# @example
|
139
43
|
# df = Polars::DataFrame.new(
|
@@ -170,7 +74,7 @@ module Polars
|
|
170
74
|
# # │ 2024-01-01 │
|
171
75
|
# # └────────────┘
|
172
76
|
def self.all
|
173
|
-
|
77
|
+
Selector._from_rbselector(RbSelector.all)
|
174
78
|
end
|
175
79
|
|
176
80
|
# Select all columns with alphabetic names (eg: only letters).
|
@@ -182,7 +86,7 @@ module Polars
|
|
182
86
|
# Indicate whether to ignore the presence of spaces in column names; if so,
|
183
87
|
# only the other (non-space) characters are considered.
|
184
88
|
#
|
185
|
-
# @return [
|
89
|
+
# @return [Selector]
|
186
90
|
#
|
187
91
|
# @note
|
188
92
|
# Matching column names cannot contain *any* non-alphabetic characters. Note
|
@@ -274,20 +178,102 @@ module Polars
|
|
274
178
|
# note that we need to supply a pattern compatible with the *rust* regex crate
|
275
179
|
re_alpha = ascii_only ? "a-zA-Z" : "\\p{Alphabetic}"
|
276
180
|
re_space = ignore_spaces ? " " : ""
|
277
|
-
|
278
|
-
F.col("^[#{re_alpha}#{re_space}]+$"),
|
279
|
-
name: "alpha",
|
280
|
-
parameters: {"ascii_only" => ascii_only, "ignore_spaces" => ignore_spaces},
|
281
|
-
)
|
181
|
+
Selector._from_rbselector(RbSelector.matches("^[#{re_alpha}#{re_space}]+$"))
|
282
182
|
end
|
283
183
|
|
284
|
-
#
|
285
|
-
#
|
286
|
-
#
|
184
|
+
# Select all columns with alphanumeric names (eg: only letters and the digits 0-9).
|
185
|
+
#
|
186
|
+
# @param ascii_only [Boolean]
|
187
|
+
# Indicate whether to consider only ASCII alphabetic characters, or the full
|
188
|
+
# Unicode range of valid letters (accented, idiographic, etc).
|
189
|
+
# @param ignore_spaces [Boolean]
|
190
|
+
# Indicate whether to ignore the presence of spaces in column names; if so,
|
191
|
+
# only the other (non-space) characters are considered.
|
192
|
+
#
|
193
|
+
# @return [Selector]
|
194
|
+
#
|
195
|
+
# @note
|
196
|
+
# Matching column names cannot contain *any* non-alphabetic or integer characters.
|
197
|
+
# Note that the definition of "alphabetic" consists of all valid Unicode alphabetic
|
198
|
+
# characters (`\p{Alphabetic}`) and digit characters (`\d`) by default; this
|
199
|
+
# can be changed by setting `ascii_only: true`.
|
200
|
+
#
|
201
|
+
# @example Select columns with alphanumeric names:
|
202
|
+
# df = Polars::DataFrame.new(
|
203
|
+
# {
|
204
|
+
# "1st_col" => [100, 200, 300],
|
205
|
+
# "flagged" => [true, false, true],
|
206
|
+
# "00prefix" => ["01:aa", "02:bb", "03:cc"],
|
207
|
+
# "last col" => ["x", "y", "z"]
|
208
|
+
# }
|
209
|
+
# )
|
210
|
+
# df.select(Polars.cs.alphanumeric)
|
211
|
+
# # =>
|
212
|
+
# # shape: (3, 2)
|
213
|
+
# # ┌─────────┬──────────┐
|
214
|
+
# # │ flagged ┆ 00prefix │
|
215
|
+
# # │ --- ┆ --- │
|
216
|
+
# # │ bool ┆ str │
|
217
|
+
# # ╞═════════╪══════════╡
|
218
|
+
# # │ true ┆ 01:aa │
|
219
|
+
# # │ false ┆ 02:bb │
|
220
|
+
# # │ true ┆ 03:cc │
|
221
|
+
# # └─────────┴──────────┘
|
222
|
+
#
|
223
|
+
# @example
|
224
|
+
# df.select(Polars.cs.alphanumeric(ignore_spaces: true))
|
225
|
+
# # =>
|
226
|
+
# # shape: (3, 3)
|
227
|
+
# # ┌─────────┬──────────┬──────────┐
|
228
|
+
# # │ flagged ┆ 00prefix ┆ last col │
|
229
|
+
# # │ --- ┆ --- ┆ --- │
|
230
|
+
# # │ bool ┆ str ┆ str │
|
231
|
+
# # ╞═════════╪══════════╪══════════╡
|
232
|
+
# # │ true ┆ 01:aa ┆ x │
|
233
|
+
# # │ false ┆ 02:bb ┆ y │
|
234
|
+
# # │ true ┆ 03:cc ┆ z │
|
235
|
+
# # └─────────┴──────────┴──────────┘
|
236
|
+
#
|
237
|
+
# @example Select all columns *except* for those with alphanumeric names:
|
238
|
+
# df.select(~Polars.cs.alphanumeric)
|
239
|
+
# # =>
|
240
|
+
# # shape: (3, 2)
|
241
|
+
# # ┌─────────┬──────────┐
|
242
|
+
# # │ 1st_col ┆ last col │
|
243
|
+
# # │ --- ┆ --- │
|
244
|
+
# # │ i64 ┆ str │
|
245
|
+
# # ╞═════════╪══════════╡
|
246
|
+
# # │ 100 ┆ x │
|
247
|
+
# # │ 200 ┆ y │
|
248
|
+
# # │ 300 ┆ z │
|
249
|
+
# # └─────────┴──────────┘
|
250
|
+
#
|
251
|
+
# @example
|
252
|
+
# df.select(~Polars.cs.alphanumeric(ignore_spaces: true))
|
253
|
+
# # =>
|
254
|
+
# # shape: (3, 1)
|
255
|
+
# # ┌─────────┐
|
256
|
+
# # │ 1st_col │
|
257
|
+
# # │ --- │
|
258
|
+
# # │ i64 │
|
259
|
+
# # ╞═════════╡
|
260
|
+
# # │ 100 │
|
261
|
+
# # │ 200 │
|
262
|
+
# # │ 300 │
|
263
|
+
# # └─────────┘
|
264
|
+
def self.alphanumeric(ascii_only: false, ignore_spaces: false)
|
265
|
+
# note that we need to supply patterns compatible with the *rust* regex crate
|
266
|
+
re_alpha = ascii_only ? "a-zA-Z" : "\\p{Alphabetic}"
|
267
|
+
re_digit = ascii_only ? "0-9" : "\\d"
|
268
|
+
re_space = ignore_spaces ? " " : ""
|
269
|
+
return Selector._from_rbselector(
|
270
|
+
RbSelector.matches("^[#{re_alpha}#{re_digit}#{re_space}]+$")
|
271
|
+
)
|
272
|
+
end
|
287
273
|
|
288
274
|
# Select all binary columns.
|
289
275
|
#
|
290
|
-
# @return [
|
276
|
+
# @return [Selector]
|
291
277
|
#
|
292
278
|
# @example
|
293
279
|
# df = Polars::DataFrame.new({"a" => ["hello".b], "b" => ["world"], "c" => ["!".b], "d" => [":)"]})
|
@@ -309,12 +295,12 @@ module Polars
|
|
309
295
|
# df.select(~Polars.cs.binary).to_h(as_series: false)
|
310
296
|
# # => {"b"=>["world"], "d"=>[":)"]}
|
311
297
|
def self.binary
|
312
|
-
|
298
|
+
by_dtype([Binary])
|
313
299
|
end
|
314
300
|
|
315
301
|
# Select all boolean columns.
|
316
302
|
#
|
317
|
-
# @return [
|
303
|
+
# @return [Selector]
|
318
304
|
#
|
319
305
|
# @example
|
320
306
|
# df = Polars::DataFrame.new({"n" => 1..4}).with_columns(n_even: Polars.col("n") % 2 == 0)
|
@@ -361,16 +347,173 @@ module Polars
|
|
361
347
|
# # │ 4 │
|
362
348
|
# # └─────┘
|
363
349
|
def self.boolean
|
364
|
-
|
350
|
+
by_dtype([Boolean])
|
351
|
+
end
|
352
|
+
|
353
|
+
# Select all columns matching the given dtypes.
|
354
|
+
#
|
355
|
+
# @return [Selector]
|
356
|
+
#
|
357
|
+
# @example Select all columns with date or string dtypes:
|
358
|
+
# df = Polars::DataFrame.new(
|
359
|
+
# {
|
360
|
+
# "dt" => [Date.new(1999, 12, 31), Date.new(2024, 1, 1), Date.new(2010, 7, 5)],
|
361
|
+
# "value" => [1_234_500, 5_000_555, -4_500_000],
|
362
|
+
# "other" => ["foo", "bar", "foo"]
|
363
|
+
# }
|
364
|
+
# )
|
365
|
+
# df.select(Polars.cs.by_dtype(Polars::Date, Polars::String))
|
366
|
+
# # =>
|
367
|
+
# # shape: (3, 2)
|
368
|
+
# # ┌────────────┬───────┐
|
369
|
+
# # │ dt ┆ other │
|
370
|
+
# # │ --- ┆ --- │
|
371
|
+
# # │ date ┆ str │
|
372
|
+
# # ╞════════════╪═══════╡
|
373
|
+
# # │ 1999-12-31 ┆ foo │
|
374
|
+
# # │ 2024-01-01 ┆ bar │
|
375
|
+
# # │ 2010-07-05 ┆ foo │
|
376
|
+
# # └────────────┴───────┘
|
377
|
+
#
|
378
|
+
# @example Select all columns that are not of date or string dtype:
|
379
|
+
# df.select(~Polars.cs.by_dtype(Polars::Date, Polars::String))
|
380
|
+
# # =>
|
381
|
+
# # shape: (3, 1)
|
382
|
+
# # ┌──────────┐
|
383
|
+
# # │ value │
|
384
|
+
# # │ --- │
|
385
|
+
# # │ i64 │
|
386
|
+
# # ╞══════════╡
|
387
|
+
# # │ 1234500 │
|
388
|
+
# # │ 5000555 │
|
389
|
+
# # │ -4500000 │
|
390
|
+
# # └──────────┘
|
391
|
+
#
|
392
|
+
# Group by string columns and sum the numeric columns:
|
393
|
+
# df.group_by(Polars.cs.string).agg(Polars.cs.numeric.sum).sort("other")
|
394
|
+
# # =>
|
395
|
+
# # shape: (2, 2)
|
396
|
+
# # ┌───────┬──────────┐
|
397
|
+
# # │ other ┆ value │
|
398
|
+
# # │ --- ┆ --- │
|
399
|
+
# # │ str ┆ i64 │
|
400
|
+
# # ╞═══════╪══════════╡
|
401
|
+
# # │ bar ┆ 5000555 │
|
402
|
+
# # │ foo ┆ -3265500 │
|
403
|
+
# # └───────┴──────────┘
|
404
|
+
def self.by_dtype(*dtypes)
|
405
|
+
all_dtypes = []
|
406
|
+
dtypes.each do |tp|
|
407
|
+
if Utils.is_polars_dtype(tp) || tp.is_a?(Class)
|
408
|
+
all_dtypes << tp
|
409
|
+
elsif tp.is_a?(::Array)
|
410
|
+
tp.each do |t|
|
411
|
+
if !(Utils.is_polars_dtype(t) || t.is_a?(Class))
|
412
|
+
msg = "invalid dtype: #{t.inspect}"
|
413
|
+
raise TypeError, msg
|
414
|
+
end
|
415
|
+
all_dtypes << t
|
416
|
+
end
|
417
|
+
else
|
418
|
+
msg = "invalid dtype: #{tp.inspect}"
|
419
|
+
raise TypeError, msg
|
420
|
+
end
|
421
|
+
end
|
422
|
+
|
423
|
+
Selector._by_dtype(all_dtypes)
|
365
424
|
end
|
366
425
|
|
367
|
-
#
|
368
|
-
#
|
369
|
-
#
|
426
|
+
# Select all columns matching the given indices (or range objects).
|
427
|
+
#
|
428
|
+
# @param indices [Array]
|
429
|
+
# One or more column indices (or range objects).
|
430
|
+
# Negative indexing is supported.
|
431
|
+
#
|
432
|
+
# @return [Selector]
|
433
|
+
#
|
434
|
+
# @note
|
435
|
+
# Matching columns are returned in the order in which their indexes
|
436
|
+
# appear in the selector, not the underlying schema order.
|
437
|
+
#
|
438
|
+
# @example
|
439
|
+
# df = Polars::DataFrame.new(
|
440
|
+
# {
|
441
|
+
# "key" => ["abc"],
|
442
|
+
# **100.times.to_h { |i| ["c%02d" % i, 0.5 * i] }
|
443
|
+
# }
|
444
|
+
# )
|
445
|
+
# # =>
|
446
|
+
# # shape: (1, 101)
|
447
|
+
# # ┌─────┬─────┬─────┬─────┬───┬──────┬──────┬──────┬──────┐
|
448
|
+
# # │ key ┆ c00 ┆ c01 ┆ c02 ┆ … ┆ c96 ┆ c97 ┆ c98 ┆ c99 │
|
449
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │
|
450
|
+
# # │ str ┆ f64 ┆ f64 ┆ f64 ┆ ┆ f64 ┆ f64 ┆ f64 ┆ f64 │
|
451
|
+
# # ╞═════╪═════╪═════╪═════╪═══╪══════╪══════╪══════╪══════╡
|
452
|
+
# # │ abc ┆ 0.0 ┆ 0.5 ┆ 1.0 ┆ … ┆ 48.0 ┆ 48.5 ┆ 49.0 ┆ 49.5 │
|
453
|
+
# # └─────┴─────┴─────┴─────┴───┴──────┴──────┴──────┴──────┘
|
454
|
+
#
|
455
|
+
# @example Select columns by index ("key" column and the two first/last columns):
|
456
|
+
# df.select(Polars.cs.by_index(0, 1, 2, -2, -1))
|
457
|
+
# # =>
|
458
|
+
# # shape: (1, 5)
|
459
|
+
# # ┌─────┬─────┬─────┬──────┬──────┐
|
460
|
+
# # │ key ┆ c00 ┆ c01 ┆ c98 ┆ c99 │
|
461
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
462
|
+
# # │ str ┆ f64 ┆ f64 ┆ f64 ┆ f64 │
|
463
|
+
# # ╞═════╪═════╪═════╪══════╪══════╡
|
464
|
+
# # │ abc ┆ 0.0 ┆ 0.5 ┆ 49.0 ┆ 49.5 │
|
465
|
+
# # └─────┴─────┴─────┴──────┴──────┘
|
466
|
+
#
|
467
|
+
# @example Select the "key" column and use a `range` object to select various columns.
|
468
|
+
# df.select(Polars.cs.by_index(0, (1...101).step(20)))
|
469
|
+
# # =>
|
470
|
+
# # shape: (1, 6)
|
471
|
+
# # ┌─────┬─────┬──────┬──────┬──────┬──────┐
|
472
|
+
# # │ key ┆ c00 ┆ c20 ┆ c40 ┆ c60 ┆ c80 │
|
473
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
474
|
+
# # │ str ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │
|
475
|
+
# # ╞═════╪═════╪══════╪══════╪══════╪══════╡
|
476
|
+
# # │ abc ┆ 0.0 ┆ 10.0 ┆ 20.0 ┆ 30.0 ┆ 40.0 │
|
477
|
+
# # └─────┴─────┴──────┴──────┴──────┴──────┘
|
478
|
+
#
|
479
|
+
# @example
|
480
|
+
# df.select(Polars.cs.by_index(0, (101...0).step(-25), require_all: false))
|
481
|
+
# # =>
|
482
|
+
# # shape: (1, 5)
|
483
|
+
# # ┌─────┬──────┬──────┬──────┬─────┐
|
484
|
+
# # │ key ┆ c75 ┆ c50 ┆ c25 ┆ c00 │
|
485
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
486
|
+
# # │ str ┆ f64 ┆ f64 ┆ f64 ┆ f64 │
|
487
|
+
# # ╞═════╪══════╪══════╪══════╪═════╡
|
488
|
+
# # │ abc ┆ 37.5 ┆ 25.0 ┆ 12.5 ┆ 0.0 │
|
489
|
+
# # └─────┴──────┴──────┴──────┴─────┘
|
490
|
+
#
|
491
|
+
# @example Select all columns *except* for the even-indexed ones:
|
492
|
+
# df.select(~Polars.cs.by_index((1...100).step(2)))
|
493
|
+
# # =>
|
494
|
+
# # shape: (1, 51)
|
495
|
+
# # ┌─────┬─────┬─────┬─────┬───┬──────┬──────┬──────┬──────┐
|
496
|
+
# # │ key ┆ c01 ┆ c03 ┆ c05 ┆ … ┆ c93 ┆ c95 ┆ c97 ┆ c99 │
|
497
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │
|
498
|
+
# # │ str ┆ f64 ┆ f64 ┆ f64 ┆ ┆ f64 ┆ f64 ┆ f64 ┆ f64 │
|
499
|
+
# # ╞═════╪═════╪═════╪═════╪═══╪══════╪══════╪══════╪══════╡
|
500
|
+
# # │ abc ┆ 0.5 ┆ 1.5 ┆ 2.5 ┆ … ┆ 46.5 ┆ 47.5 ┆ 48.5 ┆ 49.5 │
|
501
|
+
# # └─────┴─────┴─────┴─────┴───┴──────┴──────┴──────┴──────┘
|
502
|
+
def self.by_index(*indices, require_all: true)
|
503
|
+
all_indices = []
|
504
|
+
indices.each do |idx|
|
505
|
+
if idx.is_a?(Enumerable)
|
506
|
+
all_indices.concat(idx.to_a)
|
507
|
+
elsif idx.is_a?(Integer)
|
508
|
+
all_indices << idx
|
509
|
+
else
|
510
|
+
msg = "invalid index value: #{idx.inspect}"
|
511
|
+
raise TypeError, msg
|
512
|
+
end
|
513
|
+
end
|
370
514
|
|
371
|
-
|
372
|
-
|
373
|
-
# end
|
515
|
+
Selector._from_rbselector(RbSelector.by_index(all_indices, require_all))
|
516
|
+
end
|
374
517
|
|
375
518
|
# Select all columns matching the given names.
|
376
519
|
#
|
@@ -379,7 +522,7 @@ module Polars
|
|
379
522
|
# @param require_all [Boolean]
|
380
523
|
# Whether to match *all* names (the default) or *any* of the names.
|
381
524
|
#
|
382
|
-
# @return [
|
525
|
+
# @return [Selector]
|
383
526
|
#
|
384
527
|
# @note
|
385
528
|
# Matching columns are returned in the order in which they are declared in
|
@@ -413,12 +556,12 @@ module Polars
|
|
413
556
|
# # =>
|
414
557
|
# # shape: (2, 2)
|
415
558
|
# # ┌─────┬─────┐
|
416
|
-
# # │
|
559
|
+
# # │ baz ┆ foo │
|
417
560
|
# # │ --- ┆ --- │
|
418
|
-
# # │
|
561
|
+
# # │ f64 ┆ str │
|
419
562
|
# # ╞═════╪═════╡
|
420
|
-
# # │
|
421
|
-
# # │
|
563
|
+
# # │ 2.0 ┆ x │
|
564
|
+
# # │ 5.5 ┆ y │
|
422
565
|
# # └─────┴─────┘
|
423
566
|
#
|
424
567
|
# @example Match all columns *except* for those given:
|
@@ -438,29 +581,316 @@ module Polars
|
|
438
581
|
names.each do |nm|
|
439
582
|
if nm.is_a?(::String)
|
440
583
|
all_names << nm
|
584
|
+
elsif nm.is_a?(::Array)
|
585
|
+
nm.each do |n|
|
586
|
+
if !n.is_a?(::String)
|
587
|
+
msg = "invalid name: #{n.inspect}"
|
588
|
+
raise TypeError, msg
|
589
|
+
end
|
590
|
+
all_names << n
|
591
|
+
end
|
441
592
|
else
|
442
593
|
msg = "invalid name: #{nm.inspect}"
|
443
594
|
raise TypeError, msg
|
444
595
|
end
|
445
596
|
end
|
446
597
|
|
447
|
-
|
448
|
-
|
449
|
-
if !require_all
|
450
|
-
match_cols = "^(#{all_names.map { |nm| Utils.re_escape(nm) }.join("|")})$"
|
451
|
-
selector_params["require_all"] = require_all
|
452
|
-
end
|
598
|
+
Selector._by_name(all_names, strict: require_all)
|
599
|
+
end
|
453
600
|
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
601
|
+
# Select all enum columns.
|
602
|
+
#
|
603
|
+
# @return [Selector]
|
604
|
+
#
|
605
|
+
# @note
|
606
|
+
# This functionality is considered **unstable**. It may be changed
|
607
|
+
# at any point without it being considered a breaking change.
|
608
|
+
#
|
609
|
+
# @example Select all enum columns:
|
610
|
+
# df = Polars::DataFrame.new(
|
611
|
+
# {
|
612
|
+
# "foo" => ["xx", "yy"],
|
613
|
+
# "bar" => [123, 456],
|
614
|
+
# "baz" => [2.0, 5.5],
|
615
|
+
# },
|
616
|
+
# schema_overrides: {"foo" => Polars::Enum.new(["xx", "yy"])}
|
617
|
+
# )
|
618
|
+
# df.select(Polars.cs.enum)
|
619
|
+
# # =>
|
620
|
+
# # shape: (2, 1)
|
621
|
+
# # ┌──────┐
|
622
|
+
# # │ foo │
|
623
|
+
# # │ --- │
|
624
|
+
# # │ enum │
|
625
|
+
# # ╞══════╡
|
626
|
+
# # │ xx │
|
627
|
+
# # │ yy │
|
628
|
+
# # └──────┘
|
629
|
+
#
|
630
|
+
# @example Select all columns *except* for those that are enum:
|
631
|
+
# df.select(~Polars.cs.enum())
|
632
|
+
# # =>
|
633
|
+
# # shape: (2, 2)
|
634
|
+
# # ┌─────┬─────┐
|
635
|
+
# # │ bar ┆ baz │
|
636
|
+
# # │ --- ┆ --- │
|
637
|
+
# # │ i64 ┆ f64 │
|
638
|
+
# # ╞═════╪═════╡
|
639
|
+
# # │ 123 ┆ 2.0 │
|
640
|
+
# # │ 456 ┆ 5.5 │
|
641
|
+
# # └─────┴─────┘
|
642
|
+
def self.enum
|
643
|
+
Selector._from_rbselector(RbSelector.enum_)
|
644
|
+
end
|
645
|
+
|
646
|
+
# Select all list columns.
|
647
|
+
#
|
648
|
+
# @return [Selector]
|
649
|
+
#
|
650
|
+
# @note
|
651
|
+
# This functionality is considered **unstable**. It may be changed
|
652
|
+
# at any point without it being considered a breaking change.
|
653
|
+
#
|
654
|
+
# @example Select all list columns:
|
655
|
+
# df = Polars::DataFrame.new(
|
656
|
+
# {
|
657
|
+
# "foo" => [["xx", "yy"], ["x"]],
|
658
|
+
# "bar" => [123, 456],
|
659
|
+
# "baz" => [2.0, 5.5]
|
660
|
+
# }
|
661
|
+
# )
|
662
|
+
# df.select(Polars.cs.list)
|
663
|
+
# # =>
|
664
|
+
# # shape: (2, 1)
|
665
|
+
# # ┌──────────────┐
|
666
|
+
# # │ foo │
|
667
|
+
# # │ --- │
|
668
|
+
# # │ list[str] │
|
669
|
+
# # ╞══════════════╡
|
670
|
+
# # │ ["xx", "yy"] │
|
671
|
+
# # │ ["x"] │
|
672
|
+
# # └──────────────┘
|
673
|
+
#
|
674
|
+
# @example Select all columns *except* for those that are list:
|
675
|
+
# df.select(~Polars.cs.list)
|
676
|
+
# # =>
|
677
|
+
# # shape: (2, 2)
|
678
|
+
# # ┌─────┬─────┐
|
679
|
+
# # │ bar ┆ baz │
|
680
|
+
# # │ --- ┆ --- │
|
681
|
+
# # │ i64 ┆ f64 │
|
682
|
+
# # ╞═════╪═════╡
|
683
|
+
# # │ 123 ┆ 2.0 │
|
684
|
+
# # │ 456 ┆ 5.5 │
|
685
|
+
# # └─────┴─────┘
|
686
|
+
#
|
687
|
+
# @example Select all list columns with a certain matching inner type:
|
688
|
+
# df.select(Polars.cs.list(Polars.cs.string))
|
689
|
+
# # =>
|
690
|
+
# # shape: (2, 1)
|
691
|
+
# # ┌──────────────┐
|
692
|
+
# # │ foo │
|
693
|
+
# # │ --- │
|
694
|
+
# # │ list[str] │
|
695
|
+
# # ╞══════════════╡
|
696
|
+
# # │ ["xx", "yy"] │
|
697
|
+
# # │ ["x"] │
|
698
|
+
# # └──────────────┘
|
699
|
+
#
|
700
|
+
# @example
|
701
|
+
# df.select(Polars.cs.list(Polars.cs.integer))
|
702
|
+
# # =>
|
703
|
+
# # shape: (0, 0)
|
704
|
+
# # ┌┐
|
705
|
+
# # ╞╡
|
706
|
+
# # └┘
|
707
|
+
def self.list(inner = nil)
|
708
|
+
inner_s = !inner.nil? ? inner._rbselector : nil
|
709
|
+
Selector._from_rbselector(RbSelector.list(inner_s))
|
710
|
+
end
|
711
|
+
|
712
|
+
# Select all array columns.
|
713
|
+
#
|
714
|
+
# @return [Selector]
|
715
|
+
#
|
716
|
+
# @note
|
717
|
+
# This functionality is considered **unstable**. It may be changed
|
718
|
+
# at any point without it being considered a breaking change.
|
719
|
+
#
|
720
|
+
# @example Select all array columns:
|
721
|
+
# df = Polars::DataFrame.new(
|
722
|
+
# {
|
723
|
+
# "foo" => [["xx", "yy"], ["x", "y"]],
|
724
|
+
# "bar" => [123, 456],
|
725
|
+
# "baz" => [2.0, 5.5]
|
726
|
+
# },
|
727
|
+
# schema_overrides: {"foo" => Polars::Array.new(Polars::String, 2)}
|
728
|
+
# )
|
729
|
+
# df.select(Polars.cs.array)
|
730
|
+
# # =>
|
731
|
+
# # shape: (2, 1)
|
732
|
+
# # ┌───────────────┐
|
733
|
+
# # │ foo │
|
734
|
+
# # │ --- │
|
735
|
+
# # │ array[str, 2] │
|
736
|
+
# # ╞═══════════════╡
|
737
|
+
# # │ ["xx", "yy"] │
|
738
|
+
# # │ ["x", "y"] │
|
739
|
+
# # └───────────────┘
|
740
|
+
#
|
741
|
+
# @example Select all columns *except* for those that are array:
|
742
|
+
# df.select(~Polars.cs.array)
|
743
|
+
# # =>
|
744
|
+
# # shape: (2, 2)
|
745
|
+
# # ┌─────┬─────┐
|
746
|
+
# # │ bar ┆ baz │
|
747
|
+
# # │ --- ┆ --- │
|
748
|
+
# # │ i64 ┆ f64 │
|
749
|
+
# # ╞═════╪═════╡
|
750
|
+
# # │ 123 ┆ 2.0 │
|
751
|
+
# # │ 456 ┆ 5.5 │
|
752
|
+
# # └─────┴─────┘
|
753
|
+
#
|
754
|
+
# @example Select all array columns with a certain matching inner type:
|
755
|
+
# df.select(Polars.cs.array(Polars.cs.string))
|
756
|
+
# # =>
|
757
|
+
# # shape: (2, 1)
|
758
|
+
# # ┌───────────────┐
|
759
|
+
# # │ foo │
|
760
|
+
# # │ --- │
|
761
|
+
# # │ array[str, 2] │
|
762
|
+
# # ╞═══════════════╡
|
763
|
+
# # │ ["xx", "yy"] │
|
764
|
+
# # │ ["x", "y"] │
|
765
|
+
# # └───────────────┘
|
766
|
+
#
|
767
|
+
# @example
|
768
|
+
# df.select(Polars.cs.array(Polars.cs.integer))
|
769
|
+
# # =>
|
770
|
+
# # shape: (0, 0)
|
771
|
+
# # ┌┐
|
772
|
+
# # ╞╡
|
773
|
+
# # └┘
|
774
|
+
#
|
775
|
+
# @example
|
776
|
+
# df.select(Polars.cs.array(width: 2))
|
777
|
+
# # =>
|
778
|
+
# # shape: (2, 1)
|
779
|
+
# # ┌───────────────┐
|
780
|
+
# # │ foo │
|
781
|
+
# # │ --- │
|
782
|
+
# # │ array[str, 2] │
|
783
|
+
# # ╞═══════════════╡
|
784
|
+
# # │ ["xx", "yy"] │
|
785
|
+
# # │ ["x", "y"] │
|
786
|
+
# # └───────────────┘
|
787
|
+
#
|
788
|
+
# @example
|
789
|
+
# df.select(Polars.cs.array(width: 3))
|
790
|
+
# # =>
|
791
|
+
# # shape: (0, 0)
|
792
|
+
# # ┌┐
|
793
|
+
# # ╞╡
|
794
|
+
# # └┘
|
795
|
+
def self.array(inner = nil, width: nil)
|
796
|
+
inner_s = !inner.nil? ? inner._rbselector : nil
|
797
|
+
Selector._from_rbselector(RbSelector.array(inner_s, width))
|
798
|
+
end
|
799
|
+
|
800
|
+
# Select all struct columns.
|
801
|
+
#
|
802
|
+
# @return [Selector]
|
803
|
+
#
|
804
|
+
# @note
|
805
|
+
# This functionality is considered **unstable**. It may be changed
|
806
|
+
# at any point without it being considered a breaking change.
|
807
|
+
#
|
808
|
+
# @example Select all struct columns:
|
809
|
+
# df = Polars::DataFrame.new(
|
810
|
+
# {
|
811
|
+
# "foo" => [{"a": "xx", "b": "z"}, {"a": "x", "b": "y"}],
|
812
|
+
# "bar" => [123, 456],
|
813
|
+
# "baz" => [2.0, 5.5]
|
814
|
+
# }
|
815
|
+
# )
|
816
|
+
# df.select(Polars.cs.struct)
|
817
|
+
# # =>
|
818
|
+
# # shape: (2, 1)
|
819
|
+
# # ┌────────────┐
|
820
|
+
# # │ foo │
|
821
|
+
# # │ --- │
|
822
|
+
# # │ struct[2] │
|
823
|
+
# # ╞════════════╡
|
824
|
+
# # │ {"xx","z"} │
|
825
|
+
# # │ {"x","y"} │
|
826
|
+
# # └────────────┘
|
827
|
+
#
|
828
|
+
# @example Select all columns *except* for those that are struct:
|
829
|
+
# df.select(~Polars.cs.struct)
|
830
|
+
# # =>
|
831
|
+
# # shape: (2, 2)
|
832
|
+
# # ┌─────┬─────┐
|
833
|
+
# # │ bar ┆ baz │
|
834
|
+
# # │ --- ┆ --- │
|
835
|
+
# # │ i64 ┆ f64 │
|
836
|
+
# # ╞═════╪═════╡
|
837
|
+
# # │ 123 ┆ 2.0 │
|
838
|
+
# # │ 456 ┆ 5.5 │
|
839
|
+
# # └─────┴─────┘
|
840
|
+
def self.struct
|
841
|
+
Selector._from_rbselector(RbSelector.struct_)
|
842
|
+
end
|
843
|
+
|
844
|
+
# Select all nested columns.
|
845
|
+
#
|
846
|
+
# A nested column is a list, array or struct.
|
847
|
+
#
|
848
|
+
# @return [Selector]
|
849
|
+
#
|
850
|
+
# @note
|
851
|
+
# This functionality is considered **unstable**. It may be changed
|
852
|
+
# at any point without it being considered a breaking change.
|
853
|
+
#
|
854
|
+
# @example Select all nested columns:
|
855
|
+
# df = Polars::DataFrame.new(
|
856
|
+
# {
|
857
|
+
# "foo" => [{"a" => "xx", "b" => "z"}, {"a" => "x", "b" => "y"}],
|
858
|
+
# "bar" => [123, 456],
|
859
|
+
# "baz" => [2.0, 5.5],
|
860
|
+
# "wow" => [[1, 2], [3]]
|
861
|
+
# }
|
862
|
+
# )
|
863
|
+
# df.select(Polars.cs.nested)
|
864
|
+
# # =>
|
865
|
+
# # shape: (2, 2)
|
866
|
+
# # ┌────────────┬───────────┐
|
867
|
+
# # │ foo ┆ wow │
|
868
|
+
# # │ --- ┆ --- │
|
869
|
+
# # │ struct[2] ┆ list[i64] │
|
870
|
+
# # ╞════════════╪═══════════╡
|
871
|
+
# # │ {"xx","z"} ┆ [1, 2] │
|
872
|
+
# # │ {"x","y"} ┆ [3] │
|
873
|
+
# # └────────────┴───────────┘
|
874
|
+
#
|
875
|
+
# @example Select all columns *except* for those that are nested:
|
876
|
+
# df.select(~Polars.cs.nested)
|
877
|
+
# # =>
|
878
|
+
# # shape: (2, 2)
|
879
|
+
# # ┌─────┬─────┐
|
880
|
+
# # │ bar ┆ baz │
|
881
|
+
# # │ --- ┆ --- │
|
882
|
+
# # │ i64 ┆ f64 │
|
883
|
+
# # ╞═════╪═════╡
|
884
|
+
# # │ 123 ┆ 2.0 │
|
885
|
+
# # │ 456 ┆ 5.5 │
|
886
|
+
# # └─────┴─────┘
|
887
|
+
def self.nested
|
888
|
+
Selector._from_rbselector(RbSelector.nested)
|
459
889
|
end
|
460
890
|
|
461
891
|
# Select all categorical columns.
|
462
892
|
#
|
463
|
-
# @return [
|
893
|
+
# @return [Selector]
|
464
894
|
#
|
465
895
|
# @example
|
466
896
|
# df = Polars::DataFrame.new(
|
@@ -498,7 +928,7 @@ module Polars
|
|
498
928
|
# # │ 456 ┆ 5.5 │
|
499
929
|
# # └─────┴─────┘
|
500
930
|
def self.categorical
|
501
|
-
|
931
|
+
Selector._from_rbselector(RbSelector.categorical())
|
502
932
|
end
|
503
933
|
|
504
934
|
# Select columns whose names contain the given literal substring(s).
|
@@ -506,7 +936,7 @@ module Polars
|
|
506
936
|
# @param substring [Object]
|
507
937
|
# Substring(s) that matching column names should contain.
|
508
938
|
#
|
509
|
-
# @return [
|
939
|
+
# @return [Selector]
|
510
940
|
#
|
511
941
|
# @example
|
512
942
|
# df = Polars::DataFrame.new(
|
@@ -560,16 +990,12 @@ module Polars
|
|
560
990
|
escaped_substring = _re_string(substring)
|
561
991
|
raw_params = "^.*#{escaped_substring}.*$"
|
562
992
|
|
563
|
-
|
564
|
-
F.col(raw_params),
|
565
|
-
name: "contains",
|
566
|
-
parameters: {"*substring" => escaped_substring}
|
567
|
-
)
|
993
|
+
Selector._from_rbselector(RbSelector.matches(raw_params))
|
568
994
|
end
|
569
995
|
|
570
996
|
# Select all date columns.
|
571
997
|
#
|
572
|
-
# @return [
|
998
|
+
# @return [Selector]
|
573
999
|
#
|
574
1000
|
# @example
|
575
1001
|
# df = Polars::DataFrame.new(
|
@@ -605,16 +1031,23 @@ module Polars
|
|
605
1031
|
# # │ 2031-12-31 00:30:00 │
|
606
1032
|
# # └─────────────────────┘
|
607
1033
|
def self.date
|
608
|
-
|
1034
|
+
by_dtype([Date])
|
609
1035
|
end
|
610
1036
|
|
611
|
-
#
|
612
|
-
#
|
613
|
-
#
|
1037
|
+
# Select all datetime columns, optionally filtering by time unit/zone.
|
1038
|
+
#
|
1039
|
+
# @return [Selector]
|
1040
|
+
def self.datetime
|
1041
|
+
time_unit = ["ms", "us", "ns"]
|
1042
|
+
|
1043
|
+
time_zone = [nil]
|
1044
|
+
|
1045
|
+
Selector._from_rbselector(RbSelector.datetime(time_unit, time_zone))
|
1046
|
+
end
|
614
1047
|
|
615
1048
|
# Select all decimal columns.
|
616
1049
|
#
|
617
|
-
# @return [
|
1050
|
+
# @return [Selector]
|
618
1051
|
#
|
619
1052
|
# @example
|
620
1053
|
# df = Polars::DataFrame.new(
|
@@ -654,7 +1087,104 @@ module Polars
|
|
654
1087
|
# # └─────┘
|
655
1088
|
def self.decimal
|
656
1089
|
# TODO: allow explicit selection by scale/precision?
|
657
|
-
|
1090
|
+
Selector._from_rbselector(RbSelector.decimal)
|
1091
|
+
end
|
1092
|
+
|
1093
|
+
# Select all columns having names consisting only of digits.
|
1094
|
+
#
|
1095
|
+
# @return [Selector]
|
1096
|
+
#
|
1097
|
+
# @note
|
1098
|
+
# Matching column names cannot contain *any* non-digit characters. Note that the
|
1099
|
+
# definition of "digit" consists of all valid Unicode digit characters (`\d`)
|
1100
|
+
# by default; this can be changed by setting `ascii_only: true`.
|
1101
|
+
#
|
1102
|
+
# @example
|
1103
|
+
# df = Polars::DataFrame.new(
|
1104
|
+
# {
|
1105
|
+
# "key" => ["aaa", "bbb", "aaa", "bbb", "bbb"],
|
1106
|
+
# "year" => [2001, 2001, 2025, 2025, 2001],
|
1107
|
+
# "value" => [-25, 100, 75, -15, -5]
|
1108
|
+
# }
|
1109
|
+
# ).pivot(
|
1110
|
+
# "year",
|
1111
|
+
# values: "value",
|
1112
|
+
# index: "key",
|
1113
|
+
# aggregate_function: "sum"
|
1114
|
+
# )
|
1115
|
+
# # =>
|
1116
|
+
# # shape: (2, 3)
|
1117
|
+
# # ┌─────┬──────┬──────┐
|
1118
|
+
# # │ key ┆ 2001 ┆ 2025 │
|
1119
|
+
# # │ --- ┆ --- ┆ --- │
|
1120
|
+
# # │ str ┆ i64 ┆ i64 │
|
1121
|
+
# # ╞═════╪══════╪══════╡
|
1122
|
+
# # │ aaa ┆ -25 ┆ 75 │
|
1123
|
+
# # │ bbb ┆ 95 ┆ -15 │
|
1124
|
+
# # └─────┴──────┴──────┘
|
1125
|
+
#
|
1126
|
+
# @example Select columns with digit names:
|
1127
|
+
# df.select(Polars.cs.digit)
|
1128
|
+
# # =>
|
1129
|
+
# # shape: (2, 2)
|
1130
|
+
# # ┌──────┬──────┐
|
1131
|
+
# # │ 2001 ┆ 2025 │
|
1132
|
+
# # │ --- ┆ --- │
|
1133
|
+
# # │ i64 ┆ i64 │
|
1134
|
+
# # ╞══════╪══════╡
|
1135
|
+
# # │ -25 ┆ 75 │
|
1136
|
+
# # │ 95 ┆ -15 │
|
1137
|
+
# # └──────┴──────┘
|
1138
|
+
#
|
1139
|
+
# @example Select all columns *except* for those with digit names:
|
1140
|
+
# df.select(~Polars.cs.digit)
|
1141
|
+
# # =>
|
1142
|
+
# # shape: (2, 1)
|
1143
|
+
# # ┌─────┐
|
1144
|
+
# # │ key │
|
1145
|
+
# # │ --- │
|
1146
|
+
# # │ str │
|
1147
|
+
# # ╞═════╡
|
1148
|
+
# # │ aaa │
|
1149
|
+
# # │ bbb │
|
1150
|
+
# # └─────┘
|
1151
|
+
#
|
1152
|
+
# @example Demonstrate use of `ascii_only` flag (by default all valid unicode digits are considered, but this can be constrained to ascii 0-9):
|
1153
|
+
# df = Polars::DataFrame.new({"१९९९" => [1999], "२०७७" => [2077], "3000": [3000]})
|
1154
|
+
# df.select(Polars.cs.digit)
|
1155
|
+
# # =>
|
1156
|
+
# # shape: (1, 3)
|
1157
|
+
# # ┌──────┬──────┬──────┐
|
1158
|
+
# # │ १९९९ ┆ २०७७ ┆ 3000 │
|
1159
|
+
# # │ --- ┆ --- ┆ --- │
|
1160
|
+
# # │ i64 ┆ i64 ┆ i64 │
|
1161
|
+
# # ╞══════╪══════╪══════╡
|
1162
|
+
# # │ 1999 ┆ 2077 ┆ 3000 │
|
1163
|
+
# # └──────┴──────┴──────┘
|
1164
|
+
#
|
1165
|
+
# @example
|
1166
|
+
# df.select(Polars.cs.digit(ascii_only: true))
|
1167
|
+
# # =>
|
1168
|
+
# # shape: (1, 1)
|
1169
|
+
# # ┌──────┐
|
1170
|
+
# # │ 3000 │
|
1171
|
+
# # │ --- │
|
1172
|
+
# # │ i64 │
|
1173
|
+
# # ╞══════╡
|
1174
|
+
# # │ 3000 │
|
1175
|
+
# # └──────┘
|
1176
|
+
def self.digit(ascii_only: false)
|
1177
|
+
re_digit = ascii_only ? "[0-9]" : "\\d"
|
1178
|
+
Selector._from_rbselector(RbSelector.matches("^#{re_digit}+$"))
|
1179
|
+
end
|
1180
|
+
|
1181
|
+
# Select all duration columns, optionally filtering by time unit.
|
1182
|
+
#
|
1183
|
+
# @return [Selector]
|
1184
|
+
def self.duration
|
1185
|
+
time_unit = ["ms", "us", "ns"]
|
1186
|
+
|
1187
|
+
Selector._from_rbselector(RbSelector.duration(time_unit))
|
658
1188
|
end
|
659
1189
|
|
660
1190
|
# Select columns that end with the given substring(s).
|
@@ -662,7 +1192,7 @@ module Polars
|
|
662
1192
|
# @param suffix [Object]
|
663
1193
|
# Substring(s) that matching column names should end with.
|
664
1194
|
#
|
665
|
-
# @return [
|
1195
|
+
# @return [Selector]
|
666
1196
|
#
|
667
1197
|
# @example
|
668
1198
|
# df = Polars::DataFrame.new(
|
@@ -716,16 +1246,64 @@ module Polars
|
|
716
1246
|
escaped_suffix = _re_string(suffix)
|
717
1247
|
raw_params = "^.*#{escaped_suffix}$"
|
718
1248
|
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
1249
|
+
Selector._from_rbselector(RbSelector.matches(raw_params))
|
1250
|
+
end
|
1251
|
+
|
1252
|
+
# Select all columns except those matching the given columns, datatypes, or selectors.
|
1253
|
+
#
|
1254
|
+
# @param columns [Object]
|
1255
|
+
# One or more columns (col or name), datatypes, columns, or selectors representing
|
1256
|
+
# the columns to exclude.
|
1257
|
+
# @param more_columns [Array]
|
1258
|
+
# Additional columns, datatypes, or selectors to exclude, specified as positional
|
1259
|
+
# arguments.
|
1260
|
+
#
|
1261
|
+
# @return [Selector]
|
1262
|
+
#
|
1263
|
+
# @note
|
1264
|
+
# If excluding a single selector it is simpler to write as `~selector` instead.
|
1265
|
+
#
|
1266
|
+
# @example Exclude by column name(s):
|
1267
|
+
# df = Polars::DataFrame.new(
|
1268
|
+
# {
|
1269
|
+
# "aa" => [1, 2, 3],
|
1270
|
+
# "ba" => ["a", "b", nil],
|
1271
|
+
# "cc" => [nil, 2.5, 1.5]
|
1272
|
+
# }
|
1273
|
+
# )
|
1274
|
+
# df.select(Polars.cs.exclude("ba", "xx"))
|
1275
|
+
# # =>
|
1276
|
+
# # shape: (3, 2)
|
1277
|
+
# # ┌─────┬──────┐
|
1278
|
+
# # │ aa ┆ cc │
|
1279
|
+
# # │ --- ┆ --- │
|
1280
|
+
# # │ i64 ┆ f64 │
|
1281
|
+
# # ╞═════╪══════╡
|
1282
|
+
# # │ 1 ┆ null │
|
1283
|
+
# # │ 2 ┆ 2.5 │
|
1284
|
+
# # │ 3 ┆ 1.5 │
|
1285
|
+
# # └─────┴──────┘
|
1286
|
+
#
|
1287
|
+
# @example Exclude using a column name, a selector, and a dtype:
|
1288
|
+
# df.select(Polars.cs.exclude("aa", Polars.cs.string, Polars::UInt32))
|
1289
|
+
# # =>
|
1290
|
+
# # shape: (3, 1)
|
1291
|
+
# # ┌──────┐
|
1292
|
+
# # │ cc │
|
1293
|
+
# # │ --- │
|
1294
|
+
# # │ f64 │
|
1295
|
+
# # ╞══════╡
|
1296
|
+
# # │ null │
|
1297
|
+
# # │ 2.5 │
|
1298
|
+
# # │ 1.5 │
|
1299
|
+
# # └──────┘
|
1300
|
+
def self.exclude(columns, *more_columns)
|
1301
|
+
~_combine_as_selector(columns, *more_columns)
|
724
1302
|
end
|
725
1303
|
|
726
1304
|
# Select the first column in the current scope.
|
727
1305
|
#
|
728
|
-
# @return [
|
1306
|
+
# @return [Selector]
|
729
1307
|
#
|
730
1308
|
# @example
|
731
1309
|
# df = Polars::DataFrame.new(
|
@@ -762,13 +1340,13 @@ module Polars
|
|
762
1340
|
# # │ 123 ┆ 2.0 ┆ 0 │
|
763
1341
|
# # │ 456 ┆ 5.5 ┆ 1 │
|
764
1342
|
# # └─────┴─────┴─────┘
|
765
|
-
def self.first
|
766
|
-
|
1343
|
+
def self.first(strict: true)
|
1344
|
+
Selector._from_rbselector(RbSelector.first(strict))
|
767
1345
|
end
|
768
1346
|
|
769
1347
|
# Select all float columns.
|
770
1348
|
#
|
771
|
-
# @return [
|
1349
|
+
# @return [Selector]
|
772
1350
|
#
|
773
1351
|
# @example
|
774
1352
|
# df = Polars::DataFrame.new(
|
@@ -807,12 +1385,12 @@ module Polars
|
|
807
1385
|
# # │ y ┆ 456 │
|
808
1386
|
# # └─────┴─────┘
|
809
1387
|
def self.float
|
810
|
-
|
1388
|
+
Selector._from_rbselector(RbSelector.float)
|
811
1389
|
end
|
812
1390
|
|
813
1391
|
# Select all integer columns.
|
814
1392
|
#
|
815
|
-
# @return [
|
1393
|
+
# @return [Selector]
|
816
1394
|
#
|
817
1395
|
# @example
|
818
1396
|
# df = Polars::DataFrame.new(
|
@@ -850,12 +1428,12 @@ module Polars
|
|
850
1428
|
# # │ y ┆ 5.5 │
|
851
1429
|
# # └─────┴─────┘
|
852
1430
|
def self.integer
|
853
|
-
|
1431
|
+
Selector._from_rbselector(RbSelector.integer)
|
854
1432
|
end
|
855
1433
|
|
856
1434
|
# Select all signed integer columns.
|
857
1435
|
#
|
858
|
-
# @return [
|
1436
|
+
# @return [Selector]
|
859
1437
|
#
|
860
1438
|
# @example
|
861
1439
|
# df = Polars::DataFrame.new(
|
@@ -907,12 +1485,12 @@ module Polars
|
|
907
1485
|
# # │ -456 ┆ 6789 ┆ 4321 │
|
908
1486
|
# # └──────┴──────┴──────┘
|
909
1487
|
def self.signed_integer
|
910
|
-
|
1488
|
+
Selector._from_rbselector(RbSelector.signed_integer)
|
911
1489
|
end
|
912
1490
|
|
913
1491
|
# Select all unsigned integer columns.
|
914
1492
|
#
|
915
|
-
# @return [
|
1493
|
+
# @return [Selector]
|
916
1494
|
#
|
917
1495
|
# @example
|
918
1496
|
# df = Polars::DataFrame.new(
|
@@ -964,12 +1542,12 @@ module Polars
|
|
964
1542
|
# # │ -456 ┆ 6789 ┆ 4321 │
|
965
1543
|
# # └──────┴──────┴──────┘
|
966
1544
|
def self.unsigned_integer
|
967
|
-
|
1545
|
+
Selector._from_rbselector(RbSelector.unsigned_integer)
|
968
1546
|
end
|
969
1547
|
|
970
1548
|
# Select the last column in the current scope.
|
971
1549
|
#
|
972
|
-
# @return [
|
1550
|
+
# @return [Selector]
|
973
1551
|
#
|
974
1552
|
# @example
|
975
1553
|
# df = Polars::DataFrame.new(
|
@@ -1006,13 +1584,71 @@ module Polars
|
|
1006
1584
|
# # │ x ┆ 123 ┆ 2.0 │
|
1007
1585
|
# # │ y ┆ 456 ┆ 5.5 │
|
1008
1586
|
# # └─────┴─────┴─────┘
|
1009
|
-
def self.last
|
1010
|
-
|
1587
|
+
def self.last(strict: true)
|
1588
|
+
Selector._from_rbselector(RbSelector.last(strict))
|
1589
|
+
end
|
1590
|
+
|
1591
|
+
# Select all columns that match the given regex pattern.
|
1592
|
+
#
|
1593
|
+
# @param pattern [String]
|
1594
|
+
# A valid regular expression pattern, compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
|
1595
|
+
#
|
1596
|
+
# @return [Selector]
|
1597
|
+
#
|
1598
|
+
# @example Match column names containing an 'a', preceded by a character that is not 'z':
|
1599
|
+
# df = Polars::DataFrame.new(
|
1600
|
+
# {
|
1601
|
+
# "foo" => ["x", "y"],
|
1602
|
+
# "bar" => [123, 456],
|
1603
|
+
# "baz" => [2.0, 5.5],
|
1604
|
+
# "zap" => [0, 1]
|
1605
|
+
# }
|
1606
|
+
# )
|
1607
|
+
# df.select(Polars.cs.matches("[^z]a"))
|
1608
|
+
# # =>
|
1609
|
+
# # shape: (2, 2)
|
1610
|
+
# # ┌─────┬─────┐
|
1611
|
+
# # │ bar ┆ baz │
|
1612
|
+
# # │ --- ┆ --- │
|
1613
|
+
# # │ i64 ┆ f64 │
|
1614
|
+
# # ╞═════╪═════╡
|
1615
|
+
# # │ 123 ┆ 2.0 │
|
1616
|
+
# # │ 456 ┆ 5.5 │
|
1617
|
+
# # └─────┴─────┘
|
1618
|
+
#
|
1619
|
+
# @example Do not match column names ending in 'R' or 'z' (case-insensitively):
|
1620
|
+
# df.select(~Polars.cs.matches("(?i)R|z$"))
|
1621
|
+
# # =>
|
1622
|
+
# # shape: (2, 2)
|
1623
|
+
# # ┌─────┬─────┐
|
1624
|
+
# # │ foo ┆ zap │
|
1625
|
+
# # │ --- ┆ --- │
|
1626
|
+
# # │ str ┆ i64 │
|
1627
|
+
# # ╞═════╪═════╡
|
1628
|
+
# # │ x ┆ 0 │
|
1629
|
+
# # │ y ┆ 1 │
|
1630
|
+
# # └─────┴─────┘
|
1631
|
+
def self.matches(pattern)
|
1632
|
+
if pattern == ".*"
|
1633
|
+
all
|
1634
|
+
else
|
1635
|
+
if pattern.start_with?(".*")
|
1636
|
+
pattern = pattern[2..]
|
1637
|
+
elsif pattern.end_with?(".*")
|
1638
|
+
pattern = pattern[..-3]
|
1639
|
+
end
|
1640
|
+
|
1641
|
+
pfx = !pattern.start_with?("^") ? "^.*" : ""
|
1642
|
+
sfx = !pattern.end_with?("$") ? ".*$" : ""
|
1643
|
+
raw_params = "#{pfx}#{pattern}#{sfx}"
|
1644
|
+
|
1645
|
+
Selector._from_rbselector(RbSelector.matches(raw_params))
|
1646
|
+
end
|
1011
1647
|
end
|
1012
1648
|
|
1013
1649
|
# Select all numeric columns.
|
1014
1650
|
#
|
1015
|
-
# @return [
|
1651
|
+
# @return [Selector]
|
1016
1652
|
#
|
1017
1653
|
# @example
|
1018
1654
|
# df = Polars::DataFrame.new(
|
@@ -1051,7 +1687,14 @@ module Polars
|
|
1051
1687
|
# # │ y │
|
1052
1688
|
# # └─────┘
|
1053
1689
|
def self.numeric
|
1054
|
-
|
1690
|
+
Selector._from_rbselector(RbSelector.numeric)
|
1691
|
+
end
|
1692
|
+
|
1693
|
+
# Select all object columns.
|
1694
|
+
#
|
1695
|
+
# @return [Selector]
|
1696
|
+
def self.object
|
1697
|
+
Selector._from_rbselector(RbSelector.object)
|
1055
1698
|
end
|
1056
1699
|
|
1057
1700
|
# Select columns that start with the given substring(s).
|
@@ -1059,7 +1702,7 @@ module Polars
|
|
1059
1702
|
# @param prefix [Object]
|
1060
1703
|
# Substring(s) that matching column names should start with.
|
1061
1704
|
#
|
1062
|
-
# @return [
|
1705
|
+
# @return [Selector]
|
1063
1706
|
#
|
1064
1707
|
# @example
|
1065
1708
|
# df = Polars::DataFrame.new(
|
@@ -1113,16 +1756,12 @@ module Polars
|
|
1113
1756
|
escaped_prefix = _re_string(prefix)
|
1114
1757
|
raw_params = "^#{escaped_prefix}.*$"
|
1115
1758
|
|
1116
|
-
|
1117
|
-
F.col(raw_params),
|
1118
|
-
name: "starts_with",
|
1119
|
-
parameters: {"*prefix" => prefix}
|
1120
|
-
)
|
1759
|
+
Selector._from_rbselector(RbSelector.matches(raw_params))
|
1121
1760
|
end
|
1122
1761
|
|
1123
1762
|
# Select all String (and, optionally, Categorical) string columns.
|
1124
1763
|
#
|
1125
|
-
# @return [
|
1764
|
+
# @return [Selector]
|
1126
1765
|
#
|
1127
1766
|
# @example
|
1128
1767
|
# df = Polars::DataFrame.new(
|
@@ -1169,16 +1808,66 @@ module Polars
|
|
1169
1808
|
string_dtypes << Categorical
|
1170
1809
|
end
|
1171
1810
|
|
1172
|
-
|
1173
|
-
|
1174
|
-
|
1175
|
-
|
1176
|
-
|
1811
|
+
by_dtype(string_dtypes)
|
1812
|
+
end
|
1813
|
+
|
1814
|
+
# Select all temporal columns.
|
1815
|
+
#
|
1816
|
+
# @return [Selector]
|
1817
|
+
#
|
1818
|
+
# @example Match all temporal columns:
|
1819
|
+
# df = Polars::DataFrame.new(
|
1820
|
+
# {
|
1821
|
+
# "dt" => [Date.new(2021, 1, 1), Date.new(2021, 1, 2)],
|
1822
|
+
# "tm" => [DateTime.new(2000, 1, 1, 12, 0, 0), DateTime.new(2000, 1, 1, 20, 30, 45)],
|
1823
|
+
# "value" => [1.2345, 2.3456],
|
1824
|
+
# },
|
1825
|
+
# schema_overrides: {"tm" => Polars::Time}
|
1826
|
+
# )
|
1827
|
+
# df.select(Polars.cs.temporal)
|
1828
|
+
# # =>
|
1829
|
+
# # shape: (2, 2)
|
1830
|
+
# # ┌────────────┬──────────┐
|
1831
|
+
# # │ dt ┆ tm │
|
1832
|
+
# # │ --- ┆ --- │
|
1833
|
+
# # │ date ┆ time │
|
1834
|
+
# # ╞════════════╪══════════╡
|
1835
|
+
# # │ 2021-01-01 ┆ 12:00:00 │
|
1836
|
+
# # │ 2021-01-02 ┆ 20:30:45 │
|
1837
|
+
# # └────────────┴──────────┘
|
1838
|
+
#
|
1839
|
+
# @example Match all temporal columns *except* for time columns:
|
1840
|
+
# df.select(Polars.cs.temporal - Polars.cs.time)
|
1841
|
+
# # =>
|
1842
|
+
# # shape: (2, 1)
|
1843
|
+
# # ┌────────────┐
|
1844
|
+
# # │ dt │
|
1845
|
+
# # │ --- │
|
1846
|
+
# # │ date │
|
1847
|
+
# # ╞════════════╡
|
1848
|
+
# # │ 2021-01-01 │
|
1849
|
+
# # │ 2021-01-02 │
|
1850
|
+
# # └────────────┘
|
1851
|
+
#
|
1852
|
+
# @example Match all columns *except* for temporal columns:
|
1853
|
+
# df.select(~Polars.cs.temporal)
|
1854
|
+
# # =>
|
1855
|
+
# # shape: (2, 1)
|
1856
|
+
# # ┌────────┐
|
1857
|
+
# # │ value │
|
1858
|
+
# # │ --- │
|
1859
|
+
# # │ f64 │
|
1860
|
+
# # ╞════════╡
|
1861
|
+
# # │ 1.2345 │
|
1862
|
+
# # │ 2.3456 │
|
1863
|
+
# # └────────┘
|
1864
|
+
def self.temporal
|
1865
|
+
Selector._from_rbselector(RbSelector.temporal)
|
1177
1866
|
end
|
1178
1867
|
|
1179
1868
|
# Select all time columns.
|
1180
1869
|
#
|
1181
|
-
# @return [
|
1870
|
+
# @return [Selector]
|
1182
1871
|
#
|
1183
1872
|
# @example
|
1184
1873
|
# df = Polars::DataFrame.new(
|
@@ -1216,11 +1905,51 @@ module Polars
|
|
1216
1905
|
# # │ 2031-12-31 00:30:00 ┆ 2024-08-09 │
|
1217
1906
|
# # └─────────────────────┴────────────┘
|
1218
1907
|
def self.time
|
1219
|
-
|
1908
|
+
by_dtype([Time])
|
1909
|
+
end
|
1910
|
+
|
1911
|
+
# @private
|
1912
|
+
def self._combine_as_selector(items, *more_items)
|
1913
|
+
names, regexes, dtypes = [], [], []
|
1914
|
+
selectors = []
|
1915
|
+
((items.is_a?(::Array) ? items : [items]) + more_items).each do |item|
|
1916
|
+
if Utils.is_selector(item)
|
1917
|
+
selectors << item
|
1918
|
+
elsif item.is_a?(::String)
|
1919
|
+
if item.start_with?("^") && item.end_with?("$")
|
1920
|
+
regexes << item
|
1921
|
+
else
|
1922
|
+
names << item
|
1923
|
+
end
|
1924
|
+
elsif Utils.is_polars_dtype(item)
|
1925
|
+
dtypes << item
|
1926
|
+
elsif Utils.is_column(item)
|
1927
|
+
names << item.meta.output_name
|
1928
|
+
else
|
1929
|
+
msg = "expected one or more `str`, `DataType` or selector; found #{item.inspect} instead."
|
1930
|
+
raise TypeError, msg
|
1931
|
+
end
|
1932
|
+
end
|
1933
|
+
|
1934
|
+
selected = []
|
1935
|
+
if names.any?
|
1936
|
+
selected << by_name(*names, require_all: false)
|
1937
|
+
end
|
1938
|
+
if dtypes.any?
|
1939
|
+
selected << by_dtype(*dtypes)
|
1940
|
+
end
|
1941
|
+
if regexes.any?
|
1942
|
+
raise Todo
|
1943
|
+
end
|
1944
|
+
if selectors.any?
|
1945
|
+
selected.concat(selectors)
|
1946
|
+
end
|
1947
|
+
|
1948
|
+
selected.reduce(empty, :|)
|
1220
1949
|
end
|
1221
1950
|
end
|
1222
1951
|
|
1223
1952
|
def self.cs
|
1224
|
-
|
1953
|
+
Selectors
|
1225
1954
|
end
|
1226
1955
|
end
|