polars-df 0.19.0-x64-mingw-ucrt → 0.21.0-x64-mingw-ucrt

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +15 -0
  3. data/Cargo.lock +211 -320
  4. data/LICENSE-THIRD-PARTY.txt +1376 -2634
  5. data/LICENSE.txt +1 -1
  6. data/lib/polars/3.2/polars.so +0 -0
  7. data/lib/polars/3.3/polars.so +0 -0
  8. data/lib/polars/3.4/polars.so +0 -0
  9. data/lib/polars/cat_name_space.rb +3 -43
  10. data/lib/polars/catalog/unity/catalog_info.rb +20 -0
  11. data/lib/polars/catalog/unity/column_info.rb +31 -0
  12. data/lib/polars/catalog/unity/namespace_info.rb +21 -0
  13. data/lib/polars/catalog/unity/table_info.rb +50 -0
  14. data/lib/polars/catalog.rb +448 -0
  15. data/lib/polars/convert.rb +10 -0
  16. data/lib/polars/data_frame.rb +151 -30
  17. data/lib/polars/data_types.rb +47 -3
  18. data/lib/polars/exceptions.rb +7 -2
  19. data/lib/polars/expr.rb +48 -39
  20. data/lib/polars/functions/col.rb +6 -5
  21. data/lib/polars/functions/eager.rb +1 -1
  22. data/lib/polars/functions/lazy.rb +114 -15
  23. data/lib/polars/functions/repeat.rb +4 -0
  24. data/lib/polars/io/csv.rb +18 -0
  25. data/lib/polars/io/json.rb +16 -0
  26. data/lib/polars/io/ndjson.rb +13 -0
  27. data/lib/polars/io/parquet.rb +45 -63
  28. data/lib/polars/io/scan_options.rb +47 -0
  29. data/lib/polars/lazy_frame.rb +163 -75
  30. data/lib/polars/list_expr.rb +213 -17
  31. data/lib/polars/list_name_space.rb +121 -8
  32. data/lib/polars/meta_expr.rb +14 -29
  33. data/lib/polars/scan_cast_options.rb +64 -0
  34. data/lib/polars/schema.rb +6 -1
  35. data/lib/polars/selector.rb +138 -0
  36. data/lib/polars/selectors.rb +931 -202
  37. data/lib/polars/series.rb +46 -19
  38. data/lib/polars/string_expr.rb +24 -3
  39. data/lib/polars/string_name_space.rb +12 -1
  40. data/lib/polars/utils/parse.rb +40 -0
  41. data/lib/polars/utils.rb +5 -1
  42. data/lib/polars/version.rb +1 -1
  43. data/lib/polars.rb +8 -0
  44. metadata +10 -2
@@ -1,118 +1,5 @@
1
1
  module Polars
2
2
  module Selectors
3
- # @private
4
- class SelectorProxy < Expr
5
- attr_accessor :_attrs
6
- attr_accessor :_repr_override
7
-
8
- def initialize(
9
- expr,
10
- name:,
11
- parameters: nil
12
- )
13
- self._rbexpr = expr._rbexpr
14
- self._attrs = {
15
- name: name,
16
- params: parameters
17
- }
18
- end
19
-
20
- def inspect
21
- if !_attrs
22
- as_expr.inspect
23
- elsif _repr_override
24
- _repr_override
25
- else
26
- selector_name = _attrs[:name]
27
- params = _attrs[:params] || {}
28
- set_ops = {"and" => "&", "or" => "|", "sub" => "-", "xor" => "^"}
29
- if set_ops.include?(selector_name)
30
- op = set_ops[selector_name]
31
- "(#{params.values.map(&:inspect).join(" #{op} ")})"
32
- else
33
- str_params = params.map { |k, v| k.start_with?("*") ? v.inspect[1..-2] : "#{k}=#{v.inspect}" }.join(", ")
34
- "Polars.cs.#{selector_name}(#{str_params})"
35
- end
36
- end
37
- end
38
-
39
- def ~
40
- if Utils.is_selector(self)
41
- inverted = Selectors.all - self
42
- inverted._repr_override = "~#{inspect}"
43
- else
44
- inverted = ~as_expr
45
- end
46
- inverted
47
- end
48
-
49
- def -(other)
50
- if Utils.is_selector(other)
51
- SelectorProxy.new(
52
- meta._as_selector.meta._selector_sub(other),
53
- parameters: {"self" => self, "other" => other},
54
- name: "sub"
55
- )
56
- else
57
- as_expr - other
58
- end
59
- end
60
-
61
- def &(other)
62
- if Utils.is_column(other)
63
- raise Todo
64
- end
65
- if Utils.is_selector(other)
66
- SelectorProxy.new(
67
- meta._as_selector.meta._selector_and(other),
68
- parameters: {"self" => self, "other" => other},
69
- name: "and"
70
- )
71
- else
72
- as_expr & other
73
- end
74
- end
75
-
76
- def |(other)
77
- if Utils.is_column(other)
78
- raise Todo
79
- end
80
- if Utils.is_selector(other)
81
- SelectorProxy.new(
82
- meta._as_selector.meta._selector_and(other),
83
- parameters: {"self" => self, "other" => other},
84
- name: "or"
85
- )
86
- else
87
- as_expr | other
88
- end
89
- end
90
-
91
- def ^(other)
92
- if Utils.is_column(other)
93
- raise Todo
94
- end
95
- if Utils.is_selector(other)
96
- SelectorProxy.new(
97
- meta._as_selector.meta._selector_and(other),
98
- parameters: {"self" => self, "other" => other},
99
- name: "xor"
100
- )
101
- else
102
- as_expr ^ other
103
- end
104
- end
105
-
106
- def as_expr
107
- Expr._from_rbexpr(_rbexpr)
108
- end
109
- end
110
-
111
- # @private
112
- def self._selector_proxy_(...)
113
- SelectorProxy.new(...)
114
- end
115
-
116
3
  # @private
117
4
  def self._re_string(string, escape: true)
118
5
  if string.is_a?(::String)
@@ -131,9 +18,26 @@ module Polars
131
18
  "(#{rx})"
132
19
  end
133
20
 
21
+ # Select no columns.
22
+ #
23
+ # This is useful for composition with other selectors.
24
+ #
25
+ # @return [Selector]
26
+ #
27
+ # @example
28
+ # Polars::DataFrame.new({"a" => 1, "b" => 2}).select(Polars.cs.empty)
29
+ # # =>
30
+ # # shape: (0, 0)
31
+ # # ┌┐
32
+ # # ╞╡
33
+ # # └┘
34
+ def self.empty
35
+ Selector._from_rbselector(RbSelector.empty)
36
+ end
37
+
134
38
  # Select all columns.
135
39
  #
136
- # @return [SelectorProxy]
40
+ # @return [Selector]
137
41
  #
138
42
  # @example
139
43
  # df = Polars::DataFrame.new(
@@ -170,7 +74,7 @@ module Polars
170
74
  # # │ 2024-01-01 │
171
75
  # # └────────────┘
172
76
  def self.all
173
- _selector_proxy_(F.all, name: "all")
77
+ Selector._from_rbselector(RbSelector.all)
174
78
  end
175
79
 
176
80
  # Select all columns with alphabetic names (eg: only letters).
@@ -182,7 +86,7 @@ module Polars
182
86
  # Indicate whether to ignore the presence of spaces in column names; if so,
183
87
  # only the other (non-space) characters are considered.
184
88
  #
185
- # @return [SelectorProxy]
89
+ # @return [Selector]
186
90
  #
187
91
  # @note
188
92
  # Matching column names cannot contain *any* non-alphabetic characters. Note
@@ -274,20 +178,102 @@ module Polars
274
178
  # note that we need to supply a pattern compatible with the *rust* regex crate
275
179
  re_alpha = ascii_only ? "a-zA-Z" : "\\p{Alphabetic}"
276
180
  re_space = ignore_spaces ? " " : ""
277
- _selector_proxy_(
278
- F.col("^[#{re_alpha}#{re_space}]+$"),
279
- name: "alpha",
280
- parameters: {"ascii_only" => ascii_only, "ignore_spaces" => ignore_spaces},
281
- )
181
+ Selector._from_rbselector(RbSelector.matches("^[#{re_alpha}#{re_space}]+$"))
282
182
  end
283
183
 
284
- # TODO
285
- # def alphanumeric
286
- # end
184
+ # Select all columns with alphanumeric names (eg: only letters and the digits 0-9).
185
+ #
186
+ # @param ascii_only [Boolean]
187
+ # Indicate whether to consider only ASCII alphabetic characters, or the full
188
+ # Unicode range of valid letters (accented, idiographic, etc).
189
+ # @param ignore_spaces [Boolean]
190
+ # Indicate whether to ignore the presence of spaces in column names; if so,
191
+ # only the other (non-space) characters are considered.
192
+ #
193
+ # @return [Selector]
194
+ #
195
+ # @note
196
+ # Matching column names cannot contain *any* non-alphabetic or integer characters.
197
+ # Note that the definition of "alphabetic" consists of all valid Unicode alphabetic
198
+ # characters (`\p{Alphabetic}`) and digit characters (`\d`) by default; this
199
+ # can be changed by setting `ascii_only: true`.
200
+ #
201
+ # @example Select columns with alphanumeric names:
202
+ # df = Polars::DataFrame.new(
203
+ # {
204
+ # "1st_col" => [100, 200, 300],
205
+ # "flagged" => [true, false, true],
206
+ # "00prefix" => ["01:aa", "02:bb", "03:cc"],
207
+ # "last col" => ["x", "y", "z"]
208
+ # }
209
+ # )
210
+ # df.select(Polars.cs.alphanumeric)
211
+ # # =>
212
+ # # shape: (3, 2)
213
+ # # ┌─────────┬──────────┐
214
+ # # │ flagged ┆ 00prefix │
215
+ # # │ --- ┆ --- │
216
+ # # │ bool ┆ str │
217
+ # # ╞═════════╪══════════╡
218
+ # # │ true ┆ 01:aa │
219
+ # # │ false ┆ 02:bb │
220
+ # # │ true ┆ 03:cc │
221
+ # # └─────────┴──────────┘
222
+ #
223
+ # @example
224
+ # df.select(Polars.cs.alphanumeric(ignore_spaces: true))
225
+ # # =>
226
+ # # shape: (3, 3)
227
+ # # ┌─────────┬──────────┬──────────┐
228
+ # # │ flagged ┆ 00prefix ┆ last col │
229
+ # # │ --- ┆ --- ┆ --- │
230
+ # # │ bool ┆ str ┆ str │
231
+ # # ╞═════════╪══════════╪══════════╡
232
+ # # │ true ┆ 01:aa ┆ x │
233
+ # # │ false ┆ 02:bb ┆ y │
234
+ # # │ true ┆ 03:cc ┆ z │
235
+ # # └─────────┴──────────┴──────────┘
236
+ #
237
+ # @example Select all columns *except* for those with alphanumeric names:
238
+ # df.select(~Polars.cs.alphanumeric)
239
+ # # =>
240
+ # # shape: (3, 2)
241
+ # # ┌─────────┬──────────┐
242
+ # # │ 1st_col ┆ last col │
243
+ # # │ --- ┆ --- │
244
+ # # │ i64 ┆ str │
245
+ # # ╞═════════╪══════════╡
246
+ # # │ 100 ┆ x │
247
+ # # │ 200 ┆ y │
248
+ # # │ 300 ┆ z │
249
+ # # └─────────┴──────────┘
250
+ #
251
+ # @example
252
+ # df.select(~Polars.cs.alphanumeric(ignore_spaces: true))
253
+ # # =>
254
+ # # shape: (3, 1)
255
+ # # ┌─────────┐
256
+ # # │ 1st_col │
257
+ # # │ --- │
258
+ # # │ i64 │
259
+ # # ╞═════════╡
260
+ # # │ 100 │
261
+ # # │ 200 │
262
+ # # │ 300 │
263
+ # # └─────────┘
264
+ def self.alphanumeric(ascii_only: false, ignore_spaces: false)
265
+ # note that we need to supply patterns compatible with the *rust* regex crate
266
+ re_alpha = ascii_only ? "a-zA-Z" : "\\p{Alphabetic}"
267
+ re_digit = ascii_only ? "0-9" : "\\d"
268
+ re_space = ignore_spaces ? " " : ""
269
+ return Selector._from_rbselector(
270
+ RbSelector.matches("^[#{re_alpha}#{re_digit}#{re_space}]+$")
271
+ )
272
+ end
287
273
 
288
274
  # Select all binary columns.
289
275
  #
290
- # @return [SelectorProxy]
276
+ # @return [Selector]
291
277
  #
292
278
  # @example
293
279
  # df = Polars::DataFrame.new({"a" => ["hello".b], "b" => ["world"], "c" => ["!".b], "d" => [":)"]})
@@ -309,12 +295,12 @@ module Polars
309
295
  # df.select(~Polars.cs.binary).to_h(as_series: false)
310
296
  # # => {"b"=>["world"], "d"=>[":)"]}
311
297
  def self.binary
312
- _selector_proxy_(F.col(Binary), name: "binary")
298
+ by_dtype([Binary])
313
299
  end
314
300
 
315
301
  # Select all boolean columns.
316
302
  #
317
- # @return [SelectorProxy]
303
+ # @return [Selector]
318
304
  #
319
305
  # @example
320
306
  # df = Polars::DataFrame.new({"n" => 1..4}).with_columns(n_even: Polars.col("n") % 2 == 0)
@@ -361,16 +347,173 @@ module Polars
361
347
  # # │ 4 │
362
348
  # # └─────┘
363
349
  def self.boolean
364
- _selector_proxy_(F.col(Boolean), name: "boolean")
350
+ by_dtype([Boolean])
351
+ end
352
+
353
+ # Select all columns matching the given dtypes.
354
+ #
355
+ # @return [Selector]
356
+ #
357
+ # @example Select all columns with date or string dtypes:
358
+ # df = Polars::DataFrame.new(
359
+ # {
360
+ # "dt" => [Date.new(1999, 12, 31), Date.new(2024, 1, 1), Date.new(2010, 7, 5)],
361
+ # "value" => [1_234_500, 5_000_555, -4_500_000],
362
+ # "other" => ["foo", "bar", "foo"]
363
+ # }
364
+ # )
365
+ # df.select(Polars.cs.by_dtype(Polars::Date, Polars::String))
366
+ # # =>
367
+ # # shape: (3, 2)
368
+ # # ┌────────────┬───────┐
369
+ # # │ dt ┆ other │
370
+ # # │ --- ┆ --- │
371
+ # # │ date ┆ str │
372
+ # # ╞════════════╪═══════╡
373
+ # # │ 1999-12-31 ┆ foo │
374
+ # # │ 2024-01-01 ┆ bar │
375
+ # # │ 2010-07-05 ┆ foo │
376
+ # # └────────────┴───────┘
377
+ #
378
+ # @example Select all columns that are not of date or string dtype:
379
+ # df.select(~Polars.cs.by_dtype(Polars::Date, Polars::String))
380
+ # # =>
381
+ # # shape: (3, 1)
382
+ # # ┌──────────┐
383
+ # # │ value │
384
+ # # │ --- │
385
+ # # │ i64 │
386
+ # # ╞══════════╡
387
+ # # │ 1234500 │
388
+ # # │ 5000555 │
389
+ # # │ -4500000 │
390
+ # # └──────────┘
391
+ #
392
+ # Group by string columns and sum the numeric columns:
393
+ # df.group_by(Polars.cs.string).agg(Polars.cs.numeric.sum).sort("other")
394
+ # # =>
395
+ # # shape: (2, 2)
396
+ # # ┌───────┬──────────┐
397
+ # # │ other ┆ value │
398
+ # # │ --- ┆ --- │
399
+ # # │ str ┆ i64 │
400
+ # # ╞═══════╪══════════╡
401
+ # # │ bar ┆ 5000555 │
402
+ # # │ foo ┆ -3265500 │
403
+ # # └───────┴──────────┘
404
+ def self.by_dtype(*dtypes)
405
+ all_dtypes = []
406
+ dtypes.each do |tp|
407
+ if Utils.is_polars_dtype(tp) || tp.is_a?(Class)
408
+ all_dtypes << tp
409
+ elsif tp.is_a?(::Array)
410
+ tp.each do |t|
411
+ if !(Utils.is_polars_dtype(t) || t.is_a?(Class))
412
+ msg = "invalid dtype: #{t.inspect}"
413
+ raise TypeError, msg
414
+ end
415
+ all_dtypes << t
416
+ end
417
+ else
418
+ msg = "invalid dtype: #{tp.inspect}"
419
+ raise TypeError, msg
420
+ end
421
+ end
422
+
423
+ Selector._by_dtype(all_dtypes)
365
424
  end
366
425
 
367
- # TODO
368
- # def by_dtype
369
- # end
426
+ # Select all columns matching the given indices (or range objects).
427
+ #
428
+ # @param indices [Array]
429
+ # One or more column indices (or range objects).
430
+ # Negative indexing is supported.
431
+ #
432
+ # @return [Selector]
433
+ #
434
+ # @note
435
+ # Matching columns are returned in the order in which their indexes
436
+ # appear in the selector, not the underlying schema order.
437
+ #
438
+ # @example
439
+ # df = Polars::DataFrame.new(
440
+ # {
441
+ # "key" => ["abc"],
442
+ # **100.times.to_h { |i| ["c%02d" % i, 0.5 * i] }
443
+ # }
444
+ # )
445
+ # # =>
446
+ # # shape: (1, 101)
447
+ # # ┌─────┬─────┬─────┬─────┬───┬──────┬──────┬──────┬──────┐
448
+ # # │ key ┆ c00 ┆ c01 ┆ c02 ┆ … ┆ c96 ┆ c97 ┆ c98 ┆ c99 │
449
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │
450
+ # # │ str ┆ f64 ┆ f64 ┆ f64 ┆ ┆ f64 ┆ f64 ┆ f64 ┆ f64 │
451
+ # # ╞═════╪═════╪═════╪═════╪═══╪══════╪══════╪══════╪══════╡
452
+ # # │ abc ┆ 0.0 ┆ 0.5 ┆ 1.0 ┆ … ┆ 48.0 ┆ 48.5 ┆ 49.0 ┆ 49.5 │
453
+ # # └─────┴─────┴─────┴─────┴───┴──────┴──────┴──────┴──────┘
454
+ #
455
+ # @example Select columns by index ("key" column and the two first/last columns):
456
+ # df.select(Polars.cs.by_index(0, 1, 2, -2, -1))
457
+ # # =>
458
+ # # shape: (1, 5)
459
+ # # ┌─────┬─────┬─────┬──────┬──────┐
460
+ # # │ key ┆ c00 ┆ c01 ┆ c98 ┆ c99 │
461
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
462
+ # # │ str ┆ f64 ┆ f64 ┆ f64 ┆ f64 │
463
+ # # ╞═════╪═════╪═════╪══════╪══════╡
464
+ # # │ abc ┆ 0.0 ┆ 0.5 ┆ 49.0 ┆ 49.5 │
465
+ # # └─────┴─────┴─────┴──────┴──────┘
466
+ #
467
+ # @example Select the "key" column and use a `range` object to select various columns.
468
+ # df.select(Polars.cs.by_index(0, (1...101).step(20)))
469
+ # # =>
470
+ # # shape: (1, 6)
471
+ # # ┌─────┬─────┬──────┬──────┬──────┬──────┐
472
+ # # │ key ┆ c00 ┆ c20 ┆ c40 ┆ c60 ┆ c80 │
473
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
474
+ # # │ str ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │
475
+ # # ╞═════╪═════╪══════╪══════╪══════╪══════╡
476
+ # # │ abc ┆ 0.0 ┆ 10.0 ┆ 20.0 ┆ 30.0 ┆ 40.0 │
477
+ # # └─────┴─────┴──────┴──────┴──────┴──────┘
478
+ #
479
+ # @example
480
+ # df.select(Polars.cs.by_index(0, (101...0).step(-25), require_all: false))
481
+ # # =>
482
+ # # shape: (1, 5)
483
+ # # ┌─────┬──────┬──────┬──────┬─────┐
484
+ # # │ key ┆ c75 ┆ c50 ┆ c25 ┆ c00 │
485
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
486
+ # # │ str ┆ f64 ┆ f64 ┆ f64 ┆ f64 │
487
+ # # ╞═════╪══════╪══════╪══════╪═════╡
488
+ # # │ abc ┆ 37.5 ┆ 25.0 ┆ 12.5 ┆ 0.0 │
489
+ # # └─────┴──────┴──────┴──────┴─────┘
490
+ #
491
+ # @example Select all columns *except* for the even-indexed ones:
492
+ # df.select(~Polars.cs.by_index((1...100).step(2)))
493
+ # # =>
494
+ # # shape: (1, 51)
495
+ # # ┌─────┬─────┬─────┬─────┬───┬──────┬──────┬──────┬──────┐
496
+ # # │ key ┆ c01 ┆ c03 ┆ c05 ┆ … ┆ c93 ┆ c95 ┆ c97 ┆ c99 │
497
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │
498
+ # # │ str ┆ f64 ┆ f64 ┆ f64 ┆ ┆ f64 ┆ f64 ┆ f64 ┆ f64 │
499
+ # # ╞═════╪═════╪═════╪═════╪═══╪══════╪══════╪══════╪══════╡
500
+ # # │ abc ┆ 0.5 ┆ 1.5 ┆ 2.5 ┆ … ┆ 46.5 ┆ 47.5 ┆ 48.5 ┆ 49.5 │
501
+ # # └─────┴─────┴─────┴─────┴───┴──────┴──────┴──────┴──────┘
502
+ def self.by_index(*indices, require_all: true)
503
+ all_indices = []
504
+ indices.each do |idx|
505
+ if idx.is_a?(Enumerable)
506
+ all_indices.concat(idx.to_a)
507
+ elsif idx.is_a?(Integer)
508
+ all_indices << idx
509
+ else
510
+ msg = "invalid index value: #{idx.inspect}"
511
+ raise TypeError, msg
512
+ end
513
+ end
370
514
 
371
- # TODO
372
- # def by_index
373
- # end
515
+ Selector._from_rbselector(RbSelector.by_index(all_indices, require_all))
516
+ end
374
517
 
375
518
  # Select all columns matching the given names.
376
519
  #
@@ -379,7 +522,7 @@ module Polars
379
522
  # @param require_all [Boolean]
380
523
  # Whether to match *all* names (the default) or *any* of the names.
381
524
  #
382
- # @return [SelectorProxy]
525
+ # @return [Selector]
383
526
  #
384
527
  # @note
385
528
  # Matching columns are returned in the order in which they are declared in
@@ -413,12 +556,12 @@ module Polars
413
556
  # # =>
414
557
  # # shape: (2, 2)
415
558
  # # ┌─────┬─────┐
416
- # # │ foobaz
559
+ # # │ bazfoo
417
560
  # # │ --- ┆ --- │
418
- # # │ strf64
561
+ # # │ f64str
419
562
  # # ╞═════╪═════╡
420
- # # │ x ┆ 2.0 │
421
- # # │ y ┆ 5.5 │
563
+ # # │ 2.0 ┆ x
564
+ # # │ 5.5 ┆ y
422
565
  # # └─────┴─────┘
423
566
  #
424
567
  # @example Match all columns *except* for those given:
@@ -438,29 +581,316 @@ module Polars
438
581
  names.each do |nm|
439
582
  if nm.is_a?(::String)
440
583
  all_names << nm
584
+ elsif nm.is_a?(::Array)
585
+ nm.each do |n|
586
+ if !n.is_a?(::String)
587
+ msg = "invalid name: #{n.inspect}"
588
+ raise TypeError, msg
589
+ end
590
+ all_names << n
591
+ end
441
592
  else
442
593
  msg = "invalid name: #{nm.inspect}"
443
594
  raise TypeError, msg
444
595
  end
445
596
  end
446
597
 
447
- selector_params = {"*names" => all_names}
448
- match_cols = all_names
449
- if !require_all
450
- match_cols = "^(#{all_names.map { |nm| Utils.re_escape(nm) }.join("|")})$"
451
- selector_params["require_all"] = require_all
452
- end
598
+ Selector._by_name(all_names, strict: require_all)
599
+ end
453
600
 
454
- _selector_proxy_(
455
- F.col(match_cols),
456
- name: "by_name",
457
- parameters: selector_params
458
- )
601
+ # Select all enum columns.
602
+ #
603
+ # @return [Selector]
604
+ #
605
+ # @note
606
+ # This functionality is considered **unstable**. It may be changed
607
+ # at any point without it being considered a breaking change.
608
+ #
609
+ # @example Select all enum columns:
610
+ # df = Polars::DataFrame.new(
611
+ # {
612
+ # "foo" => ["xx", "yy"],
613
+ # "bar" => [123, 456],
614
+ # "baz" => [2.0, 5.5],
615
+ # },
616
+ # schema_overrides: {"foo" => Polars::Enum.new(["xx", "yy"])}
617
+ # )
618
+ # df.select(Polars.cs.enum)
619
+ # # =>
620
+ # # shape: (2, 1)
621
+ # # ┌──────┐
622
+ # # │ foo │
623
+ # # │ --- │
624
+ # # │ enum │
625
+ # # ╞══════╡
626
+ # # │ xx │
627
+ # # │ yy │
628
+ # # └──────┘
629
+ #
630
+ # @example Select all columns *except* for those that are enum:
631
+ # df.select(~Polars.cs.enum())
632
+ # # =>
633
+ # # shape: (2, 2)
634
+ # # ┌─────┬─────┐
635
+ # # │ bar ┆ baz │
636
+ # # │ --- ┆ --- │
637
+ # # │ i64 ┆ f64 │
638
+ # # ╞═════╪═════╡
639
+ # # │ 123 ┆ 2.0 │
640
+ # # │ 456 ┆ 5.5 │
641
+ # # └─────┴─────┘
642
+ def self.enum
643
+ Selector._from_rbselector(RbSelector.enum_)
644
+ end
645
+
646
+ # Select all list columns.
647
+ #
648
+ # @return [Selector]
649
+ #
650
+ # @note
651
+ # This functionality is considered **unstable**. It may be changed
652
+ # at any point without it being considered a breaking change.
653
+ #
654
+ # @example Select all list columns:
655
+ # df = Polars::DataFrame.new(
656
+ # {
657
+ # "foo" => [["xx", "yy"], ["x"]],
658
+ # "bar" => [123, 456],
659
+ # "baz" => [2.0, 5.5]
660
+ # }
661
+ # )
662
+ # df.select(Polars.cs.list)
663
+ # # =>
664
+ # # shape: (2, 1)
665
+ # # ┌──────────────┐
666
+ # # │ foo │
667
+ # # │ --- │
668
+ # # │ list[str] │
669
+ # # ╞══════════════╡
670
+ # # │ ["xx", "yy"] │
671
+ # # │ ["x"] │
672
+ # # └──────────────┘
673
+ #
674
+ # @example Select all columns *except* for those that are list:
675
+ # df.select(~Polars.cs.list)
676
+ # # =>
677
+ # # shape: (2, 2)
678
+ # # ┌─────┬─────┐
679
+ # # │ bar ┆ baz │
680
+ # # │ --- ┆ --- │
681
+ # # │ i64 ┆ f64 │
682
+ # # ╞═════╪═════╡
683
+ # # │ 123 ┆ 2.0 │
684
+ # # │ 456 ┆ 5.5 │
685
+ # # └─────┴─────┘
686
+ #
687
+ # @example Select all list columns with a certain matching inner type:
688
+ # df.select(Polars.cs.list(Polars.cs.string))
689
+ # # =>
690
+ # # shape: (2, 1)
691
+ # # ┌──────────────┐
692
+ # # │ foo │
693
+ # # │ --- │
694
+ # # │ list[str] │
695
+ # # ╞══════════════╡
696
+ # # │ ["xx", "yy"] │
697
+ # # │ ["x"] │
698
+ # # └──────────────┘
699
+ #
700
+ # @example
701
+ # df.select(Polars.cs.list(Polars.cs.integer))
702
+ # # =>
703
+ # # shape: (0, 0)
704
+ # # ┌┐
705
+ # # ╞╡
706
+ # # └┘
707
+ def self.list(inner = nil)
708
+ inner_s = !inner.nil? ? inner._rbselector : nil
709
+ Selector._from_rbselector(RbSelector.list(inner_s))
710
+ end
711
+
712
+ # Select all array columns.
713
+ #
714
+ # @return [Selector]
715
+ #
716
+ # @note
717
+ # This functionality is considered **unstable**. It may be changed
718
+ # at any point without it being considered a breaking change.
719
+ #
720
+ # @example Select all array columns:
721
+ # df = Polars::DataFrame.new(
722
+ # {
723
+ # "foo" => [["xx", "yy"], ["x", "y"]],
724
+ # "bar" => [123, 456],
725
+ # "baz" => [2.0, 5.5]
726
+ # },
727
+ # schema_overrides: {"foo" => Polars::Array.new(Polars::String, 2)}
728
+ # )
729
+ # df.select(Polars.cs.array)
730
+ # # =>
731
+ # # shape: (2, 1)
732
+ # # ┌───────────────┐
733
+ # # │ foo │
734
+ # # │ --- │
735
+ # # │ array[str, 2] │
736
+ # # ╞═══════════════╡
737
+ # # │ ["xx", "yy"] │
738
+ # # │ ["x", "y"] │
739
+ # # └───────────────┘
740
+ #
741
+ # @example Select all columns *except* for those that are array:
742
+ # df.select(~Polars.cs.array)
743
+ # # =>
744
+ # # shape: (2, 2)
745
+ # # ┌─────┬─────┐
746
+ # # │ bar ┆ baz │
747
+ # # │ --- ┆ --- │
748
+ # # │ i64 ┆ f64 │
749
+ # # ╞═════╪═════╡
750
+ # # │ 123 ┆ 2.0 │
751
+ # # │ 456 ┆ 5.5 │
752
+ # # └─────┴─────┘
753
+ #
754
+ # @example Select all array columns with a certain matching inner type:
755
+ # df.select(Polars.cs.array(Polars.cs.string))
756
+ # # =>
757
+ # # shape: (2, 1)
758
+ # # ┌───────────────┐
759
+ # # │ foo │
760
+ # # │ --- │
761
+ # # │ array[str, 2] │
762
+ # # ╞═══════════════╡
763
+ # # │ ["xx", "yy"] │
764
+ # # │ ["x", "y"] │
765
+ # # └───────────────┘
766
+ #
767
+ # @example
768
+ # df.select(Polars.cs.array(Polars.cs.integer))
769
+ # # =>
770
+ # # shape: (0, 0)
771
+ # # ┌┐
772
+ # # ╞╡
773
+ # # └┘
774
+ #
775
+ # @example
776
+ # df.select(Polars.cs.array(width: 2))
777
+ # # =>
778
+ # # shape: (2, 1)
779
+ # # ┌───────────────┐
780
+ # # │ foo │
781
+ # # │ --- │
782
+ # # │ array[str, 2] │
783
+ # # ╞═══════════════╡
784
+ # # │ ["xx", "yy"] │
785
+ # # │ ["x", "y"] │
786
+ # # └───────────────┘
787
+ #
788
+ # @example
789
+ # df.select(Polars.cs.array(width: 3))
790
+ # # =>
791
+ # # shape: (0, 0)
792
+ # # ┌┐
793
+ # # ╞╡
794
+ # # └┘
795
+ def self.array(inner = nil, width: nil)
796
+ inner_s = !inner.nil? ? inner._rbselector : nil
797
+ Selector._from_rbselector(RbSelector.array(inner_s, width))
798
+ end
799
+
800
+ # Select all struct columns.
801
+ #
802
+ # @return [Selector]
803
+ #
804
+ # @note
805
+ # This functionality is considered **unstable**. It may be changed
806
+ # at any point without it being considered a breaking change.
807
+ #
808
+ # @example Select all struct columns:
809
+ # df = Polars::DataFrame.new(
810
+ # {
811
+ # "foo" => [{"a": "xx", "b": "z"}, {"a": "x", "b": "y"}],
812
+ # "bar" => [123, 456],
813
+ # "baz" => [2.0, 5.5]
814
+ # }
815
+ # )
816
+ # df.select(Polars.cs.struct)
817
+ # # =>
818
+ # # shape: (2, 1)
819
+ # # ┌────────────┐
820
+ # # │ foo │
821
+ # # │ --- │
822
+ # # │ struct[2] │
823
+ # # ╞════════════╡
824
+ # # │ {"xx","z"} │
825
+ # # │ {"x","y"} │
826
+ # # └────────────┘
827
+ #
828
+ # @example Select all columns *except* for those that are struct:
829
+ # df.select(~Polars.cs.struct)
830
+ # # =>
831
+ # # shape: (2, 2)
832
+ # # ┌─────┬─────┐
833
+ # # │ bar ┆ baz │
834
+ # # │ --- ┆ --- │
835
+ # # │ i64 ┆ f64 │
836
+ # # ╞═════╪═════╡
837
+ # # │ 123 ┆ 2.0 │
838
+ # # │ 456 ┆ 5.5 │
839
+ # # └─────┴─────┘
840
+ def self.struct
841
+ Selector._from_rbselector(RbSelector.struct_)
842
+ end
843
+
844
+ # Select all nested columns.
845
+ #
846
+ # A nested column is a list, array or struct.
847
+ #
848
+ # @return [Selector]
849
+ #
850
+ # @note
851
+ # This functionality is considered **unstable**. It may be changed
852
+ # at any point without it being considered a breaking change.
853
+ #
854
+ # @example Select all nested columns:
855
+ # df = Polars::DataFrame.new(
856
+ # {
857
+ # "foo" => [{"a" => "xx", "b" => "z"}, {"a" => "x", "b" => "y"}],
858
+ # "bar" => [123, 456],
859
+ # "baz" => [2.0, 5.5],
860
+ # "wow" => [[1, 2], [3]]
861
+ # }
862
+ # )
863
+ # df.select(Polars.cs.nested)
864
+ # # =>
865
+ # # shape: (2, 2)
866
+ # # ┌────────────┬───────────┐
867
+ # # │ foo ┆ wow │
868
+ # # │ --- ┆ --- │
869
+ # # │ struct[2] ┆ list[i64] │
870
+ # # ╞════════════╪═══════════╡
871
+ # # │ {"xx","z"} ┆ [1, 2] │
872
+ # # │ {"x","y"} ┆ [3] │
873
+ # # └────────────┴───────────┘
874
+ #
875
+ # @example Select all columns *except* for those that are nested:
876
+ # df.select(~Polars.cs.nested)
877
+ # # =>
878
+ # # shape: (2, 2)
879
+ # # ┌─────┬─────┐
880
+ # # │ bar ┆ baz │
881
+ # # │ --- ┆ --- │
882
+ # # │ i64 ┆ f64 │
883
+ # # ╞═════╪═════╡
884
+ # # │ 123 ┆ 2.0 │
885
+ # # │ 456 ┆ 5.5 │
886
+ # # └─────┴─────┘
887
+ def self.nested
888
+ Selector._from_rbselector(RbSelector.nested)
459
889
  end
460
890
 
461
891
  # Select all categorical columns.
462
892
  #
463
- # @return [SelectorProxy]
893
+ # @return [Selector]
464
894
  #
465
895
  # @example
466
896
  # df = Polars::DataFrame.new(
@@ -498,7 +928,7 @@ module Polars
498
928
  # # │ 456 ┆ 5.5 │
499
929
  # # └─────┴─────┘
500
930
  def self.categorical
501
- _selector_proxy_(F.col(Categorical), name: "categorical")
931
+ Selector._from_rbselector(RbSelector.categorical())
502
932
  end
503
933
 
504
934
  # Select columns whose names contain the given literal substring(s).
@@ -506,7 +936,7 @@ module Polars
506
936
  # @param substring [Object]
507
937
  # Substring(s) that matching column names should contain.
508
938
  #
509
- # @return [SelectorProxy]
939
+ # @return [Selector]
510
940
  #
511
941
  # @example
512
942
  # df = Polars::DataFrame.new(
@@ -560,16 +990,12 @@ module Polars
560
990
  escaped_substring = _re_string(substring)
561
991
  raw_params = "^.*#{escaped_substring}.*$"
562
992
 
563
- _selector_proxy_(
564
- F.col(raw_params),
565
- name: "contains",
566
- parameters: {"*substring" => escaped_substring}
567
- )
993
+ Selector._from_rbselector(RbSelector.matches(raw_params))
568
994
  end
569
995
 
570
996
  # Select all date columns.
571
997
  #
572
- # @return [SelectorProxy]
998
+ # @return [Selector]
573
999
  #
574
1000
  # @example
575
1001
  # df = Polars::DataFrame.new(
@@ -605,16 +1031,23 @@ module Polars
605
1031
  # # │ 2031-12-31 00:30:00 │
606
1032
  # # └─────────────────────┘
607
1033
  def self.date
608
- _selector_proxy_(F.col(Date), name: "date")
1034
+ by_dtype([Date])
609
1035
  end
610
1036
 
611
- # TODO
612
- # def datetime
613
- # end
1037
+ # Select all datetime columns, optionally filtering by time unit/zone.
1038
+ #
1039
+ # @return [Selector]
1040
+ def self.datetime
1041
+ time_unit = ["ms", "us", "ns"]
1042
+
1043
+ time_zone = [nil]
1044
+
1045
+ Selector._from_rbselector(RbSelector.datetime(time_unit, time_zone))
1046
+ end
614
1047
 
615
1048
  # Select all decimal columns.
616
1049
  #
617
- # @return [SelectorProxy]
1050
+ # @return [Selector]
618
1051
  #
619
1052
  # @example
620
1053
  # df = Polars::DataFrame.new(
@@ -654,7 +1087,104 @@ module Polars
654
1087
  # # └─────┘
655
1088
  def self.decimal
656
1089
  # TODO: allow explicit selection by scale/precision?
657
- _selector_proxy_(F.col(Decimal), name: "decimal")
1090
+ Selector._from_rbselector(RbSelector.decimal)
1091
+ end
1092
+
1093
+ # Select all columns having names consisting only of digits.
1094
+ #
1095
+ # @return [Selector]
1096
+ #
1097
+ # @note
1098
+ # Matching column names cannot contain *any* non-digit characters. Note that the
1099
+ # definition of "digit" consists of all valid Unicode digit characters (`\d`)
1100
+ # by default; this can be changed by setting `ascii_only: true`.
1101
+ #
1102
+ # @example
1103
+ # df = Polars::DataFrame.new(
1104
+ # {
1105
+ # "key" => ["aaa", "bbb", "aaa", "bbb", "bbb"],
1106
+ # "year" => [2001, 2001, 2025, 2025, 2001],
1107
+ # "value" => [-25, 100, 75, -15, -5]
1108
+ # }
1109
+ # ).pivot(
1110
+ # "year",
1111
+ # values: "value",
1112
+ # index: "key",
1113
+ # aggregate_function: "sum"
1114
+ # )
1115
+ # # =>
1116
+ # # shape: (2, 3)
1117
+ # # ┌─────┬──────┬──────┐
1118
+ # # │ key ┆ 2001 ┆ 2025 │
1119
+ # # │ --- ┆ --- ┆ --- │
1120
+ # # │ str ┆ i64 ┆ i64 │
1121
+ # # ╞═════╪══════╪══════╡
1122
+ # # │ aaa ┆ -25 ┆ 75 │
1123
+ # # │ bbb ┆ 95 ┆ -15 │
1124
+ # # └─────┴──────┴──────┘
1125
+ #
1126
+ # @example Select columns with digit names:
1127
+ # df.select(Polars.cs.digit)
1128
+ # # =>
1129
+ # # shape: (2, 2)
1130
+ # # ┌──────┬──────┐
1131
+ # # │ 2001 ┆ 2025 │
1132
+ # # │ --- ┆ --- │
1133
+ # # │ i64 ┆ i64 │
1134
+ # # ╞══════╪══════╡
1135
+ # # │ -25 ┆ 75 │
1136
+ # # │ 95 ┆ -15 │
1137
+ # # └──────┴──────┘
1138
+ #
1139
+ # @example Select all columns *except* for those with digit names:
1140
+ # df.select(~Polars.cs.digit)
1141
+ # # =>
1142
+ # # shape: (2, 1)
1143
+ # # ┌─────┐
1144
+ # # │ key │
1145
+ # # │ --- │
1146
+ # # │ str │
1147
+ # # ╞═════╡
1148
+ # # │ aaa │
1149
+ # # │ bbb │
1150
+ # # └─────┘
1151
+ #
1152
+ # @example Demonstrate use of `ascii_only` flag (by default all valid unicode digits are considered, but this can be constrained to ascii 0-9):
1153
+ # df = Polars::DataFrame.new({"१९९९" => [1999], "२०७७" => [2077], "3000": [3000]})
1154
+ # df.select(Polars.cs.digit)
1155
+ # # =>
1156
+ # # shape: (1, 3)
1157
+ # # ┌──────┬──────┬──────┐
1158
+ # # │ १९९९ ┆ २०७७ ┆ 3000 │
1159
+ # # │ --- ┆ --- ┆ --- │
1160
+ # # │ i64 ┆ i64 ┆ i64 │
1161
+ # # ╞══════╪══════╪══════╡
1162
+ # # │ 1999 ┆ 2077 ┆ 3000 │
1163
+ # # └──────┴──────┴──────┘
1164
+ #
1165
+ # @example
1166
+ # df.select(Polars.cs.digit(ascii_only: true))
1167
+ # # =>
1168
+ # # shape: (1, 1)
1169
+ # # ┌──────┐
1170
+ # # │ 3000 │
1171
+ # # │ --- │
1172
+ # # │ i64 │
1173
+ # # ╞══════╡
1174
+ # # │ 3000 │
1175
+ # # └──────┘
1176
+ def self.digit(ascii_only: false)
1177
+ re_digit = ascii_only ? "[0-9]" : "\\d"
1178
+ Selector._from_rbselector(RbSelector.matches("^#{re_digit}+$"))
1179
+ end
1180
+
1181
+ # Select all duration columns, optionally filtering by time unit.
1182
+ #
1183
+ # @return [Selector]
1184
+ def self.duration
1185
+ time_unit = ["ms", "us", "ns"]
1186
+
1187
+ Selector._from_rbselector(RbSelector.duration(time_unit))
658
1188
  end
659
1189
 
660
1190
  # Select columns that end with the given substring(s).
@@ -662,7 +1192,7 @@ module Polars
662
1192
  # @param suffix [Object]
663
1193
  # Substring(s) that matching column names should end with.
664
1194
  #
665
- # @return [SelectorProxy]
1195
+ # @return [Selector]
666
1196
  #
667
1197
  # @example
668
1198
  # df = Polars::DataFrame.new(
@@ -716,16 +1246,64 @@ module Polars
716
1246
  escaped_suffix = _re_string(suffix)
717
1247
  raw_params = "^.*#{escaped_suffix}$"
718
1248
 
719
- _selector_proxy_(
720
- F.col(raw_params),
721
- name: "ends_with",
722
- parameters: {"*suffix" => escaped_suffix},
723
- )
1249
+ Selector._from_rbselector(RbSelector.matches(raw_params))
1250
+ end
1251
+
1252
+ # Select all columns except those matching the given columns, datatypes, or selectors.
1253
+ #
1254
+ # @param columns [Object]
1255
+ # One or more columns (col or name), datatypes, columns, or selectors representing
1256
+ # the columns to exclude.
1257
+ # @param more_columns [Array]
1258
+ # Additional columns, datatypes, or selectors to exclude, specified as positional
1259
+ # arguments.
1260
+ #
1261
+ # @return [Selector]
1262
+ #
1263
+ # @note
1264
+ # If excluding a single selector it is simpler to write as `~selector` instead.
1265
+ #
1266
+ # @example Exclude by column name(s):
1267
+ # df = Polars::DataFrame.new(
1268
+ # {
1269
+ # "aa" => [1, 2, 3],
1270
+ # "ba" => ["a", "b", nil],
1271
+ # "cc" => [nil, 2.5, 1.5]
1272
+ # }
1273
+ # )
1274
+ # df.select(Polars.cs.exclude("ba", "xx"))
1275
+ # # =>
1276
+ # # shape: (3, 2)
1277
+ # # ┌─────┬──────┐
1278
+ # # │ aa ┆ cc │
1279
+ # # │ --- ┆ --- │
1280
+ # # │ i64 ┆ f64 │
1281
+ # # ╞═════╪══════╡
1282
+ # # │ 1 ┆ null │
1283
+ # # │ 2 ┆ 2.5 │
1284
+ # # │ 3 ┆ 1.5 │
1285
+ # # └─────┴──────┘
1286
+ #
1287
+ # @example Exclude using a column name, a selector, and a dtype:
1288
+ # df.select(Polars.cs.exclude("aa", Polars.cs.string, Polars::UInt32))
1289
+ # # =>
1290
+ # # shape: (3, 1)
1291
+ # # ┌──────┐
1292
+ # # │ cc │
1293
+ # # │ --- │
1294
+ # # │ f64 │
1295
+ # # ╞══════╡
1296
+ # # │ null │
1297
+ # # │ 2.5 │
1298
+ # # │ 1.5 │
1299
+ # # └──────┘
1300
+ def self.exclude(columns, *more_columns)
1301
+ ~_combine_as_selector(columns, *more_columns)
724
1302
  end
725
1303
 
726
1304
  # Select the first column in the current scope.
727
1305
  #
728
- # @return [SelectorProxy]
1306
+ # @return [Selector]
729
1307
  #
730
1308
  # @example
731
1309
  # df = Polars::DataFrame.new(
@@ -762,13 +1340,13 @@ module Polars
762
1340
  # # │ 123 ┆ 2.0 ┆ 0 │
763
1341
  # # │ 456 ┆ 5.5 ┆ 1 │
764
1342
  # # └─────┴─────┴─────┘
765
- def self.first
766
- _selector_proxy_(F.first, name: "first")
1343
+ def self.first(strict: true)
1344
+ Selector._from_rbselector(RbSelector.first(strict))
767
1345
  end
768
1346
 
769
1347
  # Select all float columns.
770
1348
  #
771
- # @return [SelectorProxy]
1349
+ # @return [Selector]
772
1350
  #
773
1351
  # @example
774
1352
  # df = Polars::DataFrame.new(
@@ -807,12 +1385,12 @@ module Polars
807
1385
  # # │ y ┆ 456 │
808
1386
  # # └─────┴─────┘
809
1387
  def self.float
810
- _selector_proxy_(F.col(FLOAT_DTYPES), name: "float")
1388
+ Selector._from_rbselector(RbSelector.float)
811
1389
  end
812
1390
 
813
1391
  # Select all integer columns.
814
1392
  #
815
- # @return [SelectorProxy]
1393
+ # @return [Selector]
816
1394
  #
817
1395
  # @example
818
1396
  # df = Polars::DataFrame.new(
@@ -850,12 +1428,12 @@ module Polars
850
1428
  # # │ y ┆ 5.5 │
851
1429
  # # └─────┴─────┘
852
1430
  def self.integer
853
- _selector_proxy_(F.col(INTEGER_DTYPES), name: "integer")
1431
+ Selector._from_rbselector(RbSelector.integer)
854
1432
  end
855
1433
 
856
1434
  # Select all signed integer columns.
857
1435
  #
858
- # @return [SelectorProxy]
1436
+ # @return [Selector]
859
1437
  #
860
1438
  # @example
861
1439
  # df = Polars::DataFrame.new(
@@ -907,12 +1485,12 @@ module Polars
907
1485
  # # │ -456 ┆ 6789 ┆ 4321 │
908
1486
  # # └──────┴──────┴──────┘
909
1487
  def self.signed_integer
910
- _selector_proxy_(F.col(SIGNED_INTEGER_DTYPES), name: "signed_integer")
1488
+ Selector._from_rbselector(RbSelector.signed_integer)
911
1489
  end
912
1490
 
913
1491
  # Select all unsigned integer columns.
914
1492
  #
915
- # @return [SelectorProxy]
1493
+ # @return [Selector]
916
1494
  #
917
1495
  # @example
918
1496
  # df = Polars::DataFrame.new(
@@ -964,12 +1542,12 @@ module Polars
964
1542
  # # │ -456 ┆ 6789 ┆ 4321 │
965
1543
  # # └──────┴──────┴──────┘
966
1544
  def self.unsigned_integer
967
- _selector_proxy_(F.col(UNSIGNED_INTEGER_DTYPES), name: "unsigned_integer")
1545
+ Selector._from_rbselector(RbSelector.unsigned_integer)
968
1546
  end
969
1547
 
970
1548
  # Select the last column in the current scope.
971
1549
  #
972
- # @return [SelectorProxy]
1550
+ # @return [Selector]
973
1551
  #
974
1552
  # @example
975
1553
  # df = Polars::DataFrame.new(
@@ -1006,13 +1584,71 @@ module Polars
1006
1584
  # # │ x ┆ 123 ┆ 2.0 │
1007
1585
  # # │ y ┆ 456 ┆ 5.5 │
1008
1586
  # # └─────┴─────┴─────┘
1009
- def self.last
1010
- _selector_proxy_(F.last, name: "last")
1587
+ def self.last(strict: true)
1588
+ Selector._from_rbselector(RbSelector.last(strict))
1589
+ end
1590
+
1591
+ # Select all columns that match the given regex pattern.
1592
+ #
1593
+ # @param pattern [String]
1594
+ # A valid regular expression pattern, compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
1595
+ #
1596
+ # @return [Selector]
1597
+ #
1598
+ # @example Match column names containing an 'a', preceded by a character that is not 'z':
1599
+ # df = Polars::DataFrame.new(
1600
+ # {
1601
+ # "foo" => ["x", "y"],
1602
+ # "bar" => [123, 456],
1603
+ # "baz" => [2.0, 5.5],
1604
+ # "zap" => [0, 1]
1605
+ # }
1606
+ # )
1607
+ # df.select(Polars.cs.matches("[^z]a"))
1608
+ # # =>
1609
+ # # shape: (2, 2)
1610
+ # # ┌─────┬─────┐
1611
+ # # │ bar ┆ baz │
1612
+ # # │ --- ┆ --- │
1613
+ # # │ i64 ┆ f64 │
1614
+ # # ╞═════╪═════╡
1615
+ # # │ 123 ┆ 2.0 │
1616
+ # # │ 456 ┆ 5.5 │
1617
+ # # └─────┴─────┘
1618
+ #
1619
+ # @example Do not match column names ending in 'R' or 'z' (case-insensitively):
1620
+ # df.select(~Polars.cs.matches("(?i)R|z$"))
1621
+ # # =>
1622
+ # # shape: (2, 2)
1623
+ # # ┌─────┬─────┐
1624
+ # # │ foo ┆ zap │
1625
+ # # │ --- ┆ --- │
1626
+ # # │ str ┆ i64 │
1627
+ # # ╞═════╪═════╡
1628
+ # # │ x ┆ 0 │
1629
+ # # │ y ┆ 1 │
1630
+ # # └─────┴─────┘
1631
+ def self.matches(pattern)
1632
+ if pattern == ".*"
1633
+ all
1634
+ else
1635
+ if pattern.start_with?(".*")
1636
+ pattern = pattern[2..]
1637
+ elsif pattern.end_with?(".*")
1638
+ pattern = pattern[..-3]
1639
+ end
1640
+
1641
+ pfx = !pattern.start_with?("^") ? "^.*" : ""
1642
+ sfx = !pattern.end_with?("$") ? ".*$" : ""
1643
+ raw_params = "#{pfx}#{pattern}#{sfx}"
1644
+
1645
+ Selector._from_rbselector(RbSelector.matches(raw_params))
1646
+ end
1011
1647
  end
1012
1648
 
1013
1649
  # Select all numeric columns.
1014
1650
  #
1015
- # @return [SelectorProxy]
1651
+ # @return [Selector]
1016
1652
  #
1017
1653
  # @example
1018
1654
  # df = Polars::DataFrame.new(
@@ -1051,7 +1687,14 @@ module Polars
1051
1687
  # # │ y │
1052
1688
  # # └─────┘
1053
1689
  def self.numeric
1054
- _selector_proxy_(F.col(NUMERIC_DTYPES), name: "numeric")
1690
+ Selector._from_rbselector(RbSelector.numeric)
1691
+ end
1692
+
1693
+ # Select all object columns.
1694
+ #
1695
+ # @return [Selector]
1696
+ def self.object
1697
+ Selector._from_rbselector(RbSelector.object)
1055
1698
  end
1056
1699
 
1057
1700
  # Select columns that start with the given substring(s).
@@ -1059,7 +1702,7 @@ module Polars
1059
1702
  # @param prefix [Object]
1060
1703
  # Substring(s) that matching column names should start with.
1061
1704
  #
1062
- # @return [SelectorProxy]
1705
+ # @return [Selector]
1063
1706
  #
1064
1707
  # @example
1065
1708
  # df = Polars::DataFrame.new(
@@ -1113,16 +1756,12 @@ module Polars
1113
1756
  escaped_prefix = _re_string(prefix)
1114
1757
  raw_params = "^#{escaped_prefix}.*$"
1115
1758
 
1116
- _selector_proxy_(
1117
- F.col(raw_params),
1118
- name: "starts_with",
1119
- parameters: {"*prefix" => prefix}
1120
- )
1759
+ Selector._from_rbselector(RbSelector.matches(raw_params))
1121
1760
  end
1122
1761
 
1123
1762
  # Select all String (and, optionally, Categorical) string columns.
1124
1763
  #
1125
- # @return [SelectorProxy]
1764
+ # @return [Selector]
1126
1765
  #
1127
1766
  # @example
1128
1767
  # df = Polars::DataFrame.new(
@@ -1169,16 +1808,66 @@ module Polars
1169
1808
  string_dtypes << Categorical
1170
1809
  end
1171
1810
 
1172
- _selector_proxy_(
1173
- F.col(string_dtypes),
1174
- name: "string",
1175
- parameters: {"include_categorical" => include_categorical},
1176
- )
1811
+ by_dtype(string_dtypes)
1812
+ end
1813
+
1814
+ # Select all temporal columns.
1815
+ #
1816
+ # @return [Selector]
1817
+ #
1818
+ # @example Match all temporal columns:
1819
+ # df = Polars::DataFrame.new(
1820
+ # {
1821
+ # "dt" => [Date.new(2021, 1, 1), Date.new(2021, 1, 2)],
1822
+ # "tm" => [DateTime.new(2000, 1, 1, 12, 0, 0), DateTime.new(2000, 1, 1, 20, 30, 45)],
1823
+ # "value" => [1.2345, 2.3456],
1824
+ # },
1825
+ # schema_overrides: {"tm" => Polars::Time}
1826
+ # )
1827
+ # df.select(Polars.cs.temporal)
1828
+ # # =>
1829
+ # # shape: (2, 2)
1830
+ # # ┌────────────┬──────────┐
1831
+ # # │ dt ┆ tm │
1832
+ # # │ --- ┆ --- │
1833
+ # # │ date ┆ time │
1834
+ # # ╞════════════╪══════════╡
1835
+ # # │ 2021-01-01 ┆ 12:00:00 │
1836
+ # # │ 2021-01-02 ┆ 20:30:45 │
1837
+ # # └────────────┴──────────┘
1838
+ #
1839
+ # @example Match all temporal columns *except* for time columns:
1840
+ # df.select(Polars.cs.temporal - Polars.cs.time)
1841
+ # # =>
1842
+ # # shape: (2, 1)
1843
+ # # ┌────────────┐
1844
+ # # │ dt │
1845
+ # # │ --- │
1846
+ # # │ date │
1847
+ # # ╞════════════╡
1848
+ # # │ 2021-01-01 │
1849
+ # # │ 2021-01-02 │
1850
+ # # └────────────┘
1851
+ #
1852
+ # @example Match all columns *except* for temporal columns:
1853
+ # df.select(~Polars.cs.temporal)
1854
+ # # =>
1855
+ # # shape: (2, 1)
1856
+ # # ┌────────┐
1857
+ # # │ value │
1858
+ # # │ --- │
1859
+ # # │ f64 │
1860
+ # # ╞════════╡
1861
+ # # │ 1.2345 │
1862
+ # # │ 2.3456 │
1863
+ # # └────────┘
1864
+ def self.temporal
1865
+ Selector._from_rbselector(RbSelector.temporal)
1177
1866
  end
1178
1867
 
1179
1868
  # Select all time columns.
1180
1869
  #
1181
- # @return [SelectorProxy]
1870
+ # @return [Selector]
1182
1871
  #
1183
1872
  # @example
1184
1873
  # df = Polars::DataFrame.new(
@@ -1216,11 +1905,51 @@ module Polars
1216
1905
  # # │ 2031-12-31 00:30:00 ┆ 2024-08-09 │
1217
1906
  # # └─────────────────────┴────────────┘
1218
1907
  def self.time
1219
- _selector_proxy_(F.col(Time), name: "time")
1908
+ by_dtype([Time])
1909
+ end
1910
+
1911
+ # @private
1912
+ def self._combine_as_selector(items, *more_items)
1913
+ names, regexes, dtypes = [], [], []
1914
+ selectors = []
1915
+ ((items.is_a?(::Array) ? items : [items]) + more_items).each do |item|
1916
+ if Utils.is_selector(item)
1917
+ selectors << item
1918
+ elsif item.is_a?(::String)
1919
+ if item.start_with?("^") && item.end_with?("$")
1920
+ regexes << item
1921
+ else
1922
+ names << item
1923
+ end
1924
+ elsif Utils.is_polars_dtype(item)
1925
+ dtypes << item
1926
+ elsif Utils.is_column(item)
1927
+ names << item.meta.output_name
1928
+ else
1929
+ msg = "expected one or more `str`, `DataType` or selector; found #{item.inspect} instead."
1930
+ raise TypeError, msg
1931
+ end
1932
+ end
1933
+
1934
+ selected = []
1935
+ if names.any?
1936
+ selected << by_name(*names, require_all: false)
1937
+ end
1938
+ if dtypes.any?
1939
+ selected << by_dtype(*dtypes)
1940
+ end
1941
+ if regexes.any?
1942
+ raise Todo
1943
+ end
1944
+ if selectors.any?
1945
+ selected.concat(selectors)
1946
+ end
1947
+
1948
+ selected.reduce(empty, :|)
1220
1949
  end
1221
1950
  end
1222
1951
 
1223
1952
  def self.cs
1224
- Polars::Selectors
1953
+ Selectors
1225
1954
  end
1226
1955
  end