polars-df 0.14.0-aarch64-linux → 0.15.0-aarch64-linux

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -0
  3. data/Cargo.lock +1296 -283
  4. data/LICENSE-THIRD-PARTY.txt +24793 -13160
  5. data/LICENSE.txt +1 -0
  6. data/README.md +1 -2
  7. data/lib/polars/3.1/polars.so +0 -0
  8. data/lib/polars/3.2/polars.so +0 -0
  9. data/lib/polars/3.3/polars.so +0 -0
  10. data/lib/polars/batched_csv_reader.rb +0 -2
  11. data/lib/polars/binary_expr.rb +133 -9
  12. data/lib/polars/binary_name_space.rb +101 -6
  13. data/lib/polars/config.rb +4 -0
  14. data/lib/polars/data_frame.rb +275 -52
  15. data/lib/polars/data_type_group.rb +28 -0
  16. data/lib/polars/data_types.rb +2 -0
  17. data/lib/polars/date_time_expr.rb +244 -0
  18. data/lib/polars/date_time_name_space.rb +87 -0
  19. data/lib/polars/expr.rb +103 -2
  20. data/lib/polars/functions/as_datatype.rb +51 -2
  21. data/lib/polars/functions/col.rb +1 -1
  22. data/lib/polars/functions/eager.rb +1 -3
  23. data/lib/polars/functions/lazy.rb +88 -10
  24. data/lib/polars/functions/range/time_range.rb +21 -21
  25. data/lib/polars/io/csv.rb +14 -16
  26. data/lib/polars/io/database.rb +2 -2
  27. data/lib/polars/io/ipc.rb +14 -4
  28. data/lib/polars/io/ndjson.rb +10 -0
  29. data/lib/polars/io/parquet.rb +168 -111
  30. data/lib/polars/lazy_frame.rb +649 -15
  31. data/lib/polars/list_name_space.rb +169 -0
  32. data/lib/polars/selectors.rb +1144 -0
  33. data/lib/polars/series.rb +465 -35
  34. data/lib/polars/string_cache.rb +27 -1
  35. data/lib/polars/string_expr.rb +0 -1
  36. data/lib/polars/string_name_space.rb +73 -3
  37. data/lib/polars/struct_name_space.rb +31 -7
  38. data/lib/polars/utils/various.rb +5 -1
  39. data/lib/polars/utils.rb +45 -10
  40. data/lib/polars/version.rb +1 -1
  41. data/lib/polars.rb +2 -1
  42. metadata +4 -3
  43. data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,1144 @@
1
+ module Polars
2
+ module Selectors
3
+ # @private
4
+ class SelectorProxy < Expr
5
+ attr_accessor :_attrs
6
+ attr_accessor :_repr_override
7
+
8
+ def initialize(
9
+ expr,
10
+ name:,
11
+ parameters: nil
12
+ )
13
+ self._rbexpr = expr._rbexpr
14
+ self._attrs = {
15
+ name: name,
16
+ params: parameters
17
+ }
18
+ end
19
+
20
+ def inspect
21
+ if !_attrs
22
+ as_expr.inspect
23
+ elsif _repr_override
24
+ _repr_override
25
+ else
26
+ selector_name = _attrs[:name]
27
+ params = _attrs[:params] || {}
28
+ set_ops = {"and" => "&", "or" => "|", "sub" => "-", "xor" => "^"}
29
+ if set_ops.include?(selector_name)
30
+ op = set_ops[selector_name]
31
+ "(#{params.values.map(&:inspect).join(" #{op} ")})"
32
+ else
33
+ str_params = params.map { |k, v| k.start_with?("*") ? v.inspect[1..-2] : "#{k}=#{v.inspect}" }.join(", ")
34
+ "Polars.cs.#{selector_name}(#{str_params})"
35
+ end
36
+ end
37
+ end
38
+
39
+ def ~
40
+ if Utils.is_selector(self)
41
+ inverted = Selectors.all - self
42
+ inverted._repr_override = "~#{inspect}"
43
+ else
44
+ inverted = ~as_expr
45
+ end
46
+ inverted
47
+ end
48
+
49
+ def -(other)
50
+ if Utils.is_selector(other)
51
+ SelectorProxy.new(
52
+ meta._as_selector.meta._selector_sub(other),
53
+ parameters: {"self" => self, "other" => other},
54
+ name: "sub"
55
+ )
56
+ else
57
+ as_expr - other
58
+ end
59
+ end
60
+
61
+ def &(other)
62
+ if Utils.is_column(other)
63
+ raise Todo
64
+ end
65
+ if Utils.is_selector(other)
66
+ SelectorProxy.new(
67
+ meta._as_selector.meta._selector_and(other),
68
+ parameters: {"self" => self, "other" => other},
69
+ name: "and"
70
+ )
71
+ else
72
+ as_expr & other
73
+ end
74
+ end
75
+
76
+ def |(other)
77
+ if Utils.is_column(other)
78
+ raise Todo
79
+ end
80
+ if Utils.is_selector(other)
81
+ SelectorProxy.new(
82
+ meta._as_selector.meta._selector_and(other),
83
+ parameters: {"self" => self, "other" => other},
84
+ name: "or"
85
+ )
86
+ else
87
+ as_expr | other
88
+ end
89
+ end
90
+
91
+ def ^(other)
92
+ if Utils.is_column(other)
93
+ raise Todo
94
+ end
95
+ if Utils.is_selector(other)
96
+ SelectorProxy.new(
97
+ meta._as_selector.meta._selector_and(other),
98
+ parameters: {"self" => self, "other" => other},
99
+ name: "xor"
100
+ )
101
+ else
102
+ as_expr ^ other
103
+ end
104
+ end
105
+
106
+ def as_expr
107
+ Expr._from_rbexpr(_rbexpr)
108
+ end
109
+ end
110
+
111
+ # @private
112
+ def self._selector_proxy_(...)
113
+ SelectorProxy.new(...)
114
+ end
115
+
116
+ # @private
117
+ def self._re_string(string, escape: true)
118
+ if string.is_a?(::String)
119
+ rx = escape ? Utils.re_escape(string) : string
120
+ else
121
+ strings = []
122
+ string.each do |st|
123
+ if st.is_a?(Array)
124
+ strings.concat(st)
125
+ else
126
+ strings << st
127
+ end
128
+ end
129
+ rx = strings.map { |x| escape ? Utils.re_escape(x) : x }.join("|")
130
+ end
131
+ "(#{rx})"
132
+ end
133
+
134
+ # Select all columns.
135
+ #
136
+ # @return [SelectorProxy]
137
+ #
138
+ # @example
139
+ # df = Polars::DataFrame.new(
140
+ # {
141
+ # "dt" => [Date.new(1999, 12, 31), Date.new(2024, 1, 1)],
142
+ # "value" => [1_234_500, 5_000_555]
143
+ # },
144
+ # schema_overrides: {"value" => Polars::Int32}
145
+ # )
146
+ #
147
+ # @example Select all columns, casting them to string:
148
+ # df.select(Polars.cs.all.cast(Polars::String))
149
+ # # =>
150
+ # # shape: (2, 2)
151
+ # # ┌────────────┬─────────┐
152
+ # # │ dt ┆ value │
153
+ # # │ --- ┆ --- │
154
+ # # │ str ┆ str │
155
+ # # ╞════════════╪═════════╡
156
+ # # │ 1999-12-31 ┆ 1234500 │
157
+ # # │ 2024-01-01 ┆ 5000555 │
158
+ # # └────────────┴─────────┘
159
+ #
160
+ # @example Select all columns *except* for those matching the given dtypes:
161
+ # df.select(Polars.cs.all - Polars.cs.numeric)
162
+ # # =>
163
+ # # shape: (2, 1)
164
+ # # ┌────────────┐
165
+ # # │ dt │
166
+ # # │ --- │
167
+ # # │ date │
168
+ # # ╞════════════╡
169
+ # # │ 1999-12-31 │
170
+ # # │ 2024-01-01 │
171
+ # # └────────────┘
172
+ def self.all
173
+ _selector_proxy_(F.all, name: "all")
174
+ end
175
+
176
+ # Select all columns with alphabetic names (eg: only letters).
177
+ #
178
+ # @param ascii_only [Boolean]
179
+ # Indicate whether to consider only ASCII alphabetic characters, or the full
180
+ # Unicode range of valid letters (accented, idiographic, etc).
181
+ # @param ignore_spaces [Boolean]
182
+ # Indicate whether to ignore the presence of spaces in column names; if so,
183
+ # only the other (non-space) characters are considered.
184
+ #
185
+ # @return [SelectorProxy]
186
+ #
187
+ # @note
188
+ # Matching column names cannot contain *any* non-alphabetic characters. Note
189
+ # that the definition of "alphabetic" consists of all valid Unicode alphabetic
190
+ # characters (`\p{Alphabetic}`) by default; this can be changed by setting
191
+ # `ascii_only: true`.
192
+ #
193
+ # @example
194
+ # df = Polars::DataFrame.new(
195
+ # {
196
+ # "no1" => [100, 200, 300],
197
+ # "café" => ["espresso", "latte", "mocha"],
198
+ # "t or f" => [true, false, nil],
199
+ # "hmm" => ["aaa", "bbb", "ccc"],
200
+ # "都市" => ["東京", "大阪", "京都"]
201
+ # }
202
+ # )
203
+ #
204
+ # @example Select columns with alphabetic names; note that accented characters and kanji are recognised as alphabetic here:
205
+ # df.select(Polars.cs.alpha)
206
+ # # =>
207
+ # # shape: (3, 3)
208
+ # # ┌──────────┬─────┬──────┐
209
+ # # │ café ┆ hmm ┆ 都市 │
210
+ # # │ --- ┆ --- ┆ --- │
211
+ # # │ str ┆ str ┆ str │
212
+ # # ╞══════════╪═════╪══════╡
213
+ # # │ espresso ┆ aaa ┆ 東京 │
214
+ # # │ latte ┆ bbb ┆ 大阪 │
215
+ # # │ mocha ┆ ccc ┆ 京都 │
216
+ # # └──────────┴─────┴──────┘
217
+ #
218
+ # @example Constrain the definition of "alphabetic" to ASCII characters only:
219
+ # df.select(Polars.cs.alpha(ascii_only: true))
220
+ # # =>
221
+ # # shape: (3, 1)
222
+ # # ┌─────┐
223
+ # # │ hmm │
224
+ # # │ --- │
225
+ # # │ str │
226
+ # # ╞═════╡
227
+ # # │ aaa │
228
+ # # │ bbb │
229
+ # # │ ccc │
230
+ # # └─────┘
231
+ #
232
+ # @example
233
+ # df.select(Polars.cs.alpha(ascii_only: true, ignore_spaces: true))
234
+ # # =>
235
+ # # shape: (3, 2)
236
+ # # ┌────────┬─────┐
237
+ # # │ t or f ┆ hmm │
238
+ # # │ --- ┆ --- │
239
+ # # │ bool ┆ str │
240
+ # # ╞════════╪═════╡
241
+ # # │ true ┆ aaa │
242
+ # # │ false ┆ bbb │
243
+ # # │ null ┆ ccc │
244
+ # # └────────┴─────┘
245
+ #
246
+ # @example Select all columns *except* for those with alphabetic names:
247
+ # df.select(~Polars.cs.alpha)
248
+ # # =>
249
+ # # shape: (3, 2)
250
+ # # ┌─────┬────────┐
251
+ # # │ no1 ┆ t or f │
252
+ # # │ --- ┆ --- │
253
+ # # │ i64 ┆ bool │
254
+ # # ╞═════╪════════╡
255
+ # # │ 100 ┆ true │
256
+ # # │ 200 ┆ false │
257
+ # # │ 300 ┆ null │
258
+ # # └─────┴────────┘
259
+ #
260
+ # @example
261
+ # df.select(~Polars.cs.alpha(ignore_spaces: true))
262
+ # # =>
263
+ # # shape: (3, 1)
264
+ # # ┌─────┐
265
+ # # │ no1 │
266
+ # # │ --- │
267
+ # # │ i64 │
268
+ # # ╞═════╡
269
+ # # │ 100 │
270
+ # # │ 200 │
271
+ # # │ 300 │
272
+ # # └─────┘
273
+ def self.alpha(ascii_only: false, ignore_spaces: false)
274
+ # note that we need to supply a pattern compatible with the *rust* regex crate
275
+ re_alpha = ascii_only ? "a-zA-Z" : "\\p{Alphabetic}"
276
+ re_space = ignore_spaces ? " " : ""
277
+ _selector_proxy_(
278
+ F.col("^[#{re_alpha}#{re_space}]+$"),
279
+ name: "alpha",
280
+ parameters: {"ascii_only" => ascii_only, "ignore_spaces" => ignore_spaces},
281
+ )
282
+ end
283
+
284
+ # TODO
285
+ # def alphanumeric
286
+ # end
287
+
288
+ # Select all binary columns.
289
+ #
290
+ # @return [SelectorProxy]
291
+ #
292
+ # @example
293
+ # df = Polars::DataFrame.new({"a" => ["hello".b], "b" => ["world"], "c" => ["!".b], "d" => [":)"]})
294
+ # # =>
295
+ # # shape: (1, 4)
296
+ # # ┌──────────┬───────┬────────┬─────┐
297
+ # # │ a ┆ b ┆ c ┆ d │
298
+ # # │ --- ┆ --- ┆ --- ┆ --- │
299
+ # # │ binary ┆ str ┆ binary ┆ str │
300
+ # # ╞══════════╪═══════╪════════╪═════╡
301
+ # # │ b"hello" ┆ world ┆ b"!" ┆ :) │
302
+ # # └──────────┴───────┴────────┴─────┘
303
+ #
304
+ # @example Select binary columns and export as a dict:
305
+ # df.select(Polars.cs.binary).to_h(as_series: false)
306
+ # # => {"a"=>["hello"], "c"=>["!"]}
307
+ #
308
+ # @example Select all columns *except* for those that are binary:
309
+ # df.select(~Polars.cs.binary).to_h(as_series: false)
310
+ # # => {"b"=>["world"], "d"=>[":)"]}
311
+ def self.binary
312
+ _selector_proxy_(F.col(Binary), name: "binary")
313
+ end
314
+
315
+ # Select all boolean columns.
316
+ #
317
+ # @return [SelectorProxy]
318
+ #
319
+ # @example
320
+ # df = Polars::DataFrame.new({"n" => 1..4}).with_columns(n_even: Polars.col("n") % 2 == 0)
321
+ # # =>
322
+ # # shape: (4, 2)
323
+ # # ┌─────┬────────┐
324
+ # # │ n ┆ n_even │
325
+ # # │ --- ┆ --- │
326
+ # # │ i64 ┆ bool │
327
+ # # ╞═════╪════════╡
328
+ # # │ 1 ┆ false │
329
+ # # │ 2 ┆ true │
330
+ # # │ 3 ┆ false │
331
+ # # │ 4 ┆ true │
332
+ # # └─────┴────────┘
333
+ #
334
+ # @example Select and invert boolean columns:
335
+ # df.with_columns(is_odd: Polars.cs.boolean.not_)
336
+ # # =>
337
+ # # shape: (4, 3)
338
+ # # ┌─────┬────────┬────────┐
339
+ # # │ n ┆ n_even ┆ is_odd │
340
+ # # │ --- ┆ --- ┆ --- │
341
+ # # │ i64 ┆ bool ┆ bool │
342
+ # # ╞═════╪════════╪════════╡
343
+ # # │ 1 ┆ false ┆ true │
344
+ # # │ 2 ┆ true ┆ false │
345
+ # # │ 3 ┆ false ┆ true │
346
+ # # │ 4 ┆ true ┆ false │
347
+ # # └─────┴────────┴────────┘
348
+ #
349
+ # @example Select all columns *except* for those that are boolean:
350
+ # df.select(~Polars.cs.boolean)
351
+ # # =>
352
+ # # shape: (4, 1)
353
+ # # ┌─────┐
354
+ # # │ n │
355
+ # # │ --- │
356
+ # # │ i64 │
357
+ # # ╞═════╡
358
+ # # │ 1 │
359
+ # # │ 2 │
360
+ # # │ 3 │
361
+ # # │ 4 │
362
+ # # └─────┘
363
+ def self.boolean
364
+ _selector_proxy_(F.col(Boolean), name: "boolean")
365
+ end
366
+
367
+ # TODO
368
+ # def by_dtype
369
+ # end
370
+
371
+ # TODO
372
+ # def by_index
373
+ # end
374
+
375
+ # TODO
376
+ # def by_name
377
+ # end
378
+
379
+ # Select all categorical columns.
380
+ #
381
+ # @return [SelectorProxy]
382
+ #
383
+ # @example
384
+ # df = Polars::DataFrame.new(
385
+ # {
386
+ # "foo" => ["xx", "yy"],
387
+ # "bar" => [123, 456],
388
+ # "baz" => [2.0, 5.5]
389
+ # },
390
+ # schema_overrides: {"foo" => Polars::Categorical}
391
+ # )
392
+ #
393
+ # @example Select all categorical columns:
394
+ # df.select(Polars.cs.categorical)
395
+ # # =>
396
+ # # shape: (2, 1)
397
+ # # ┌─────┐
398
+ # # │ foo │
399
+ # # │ --- │
400
+ # # │ cat │
401
+ # # ╞═════╡
402
+ # # │ xx │
403
+ # # │ yy │
404
+ # # └─────┘
405
+ #
406
+ # @example Select all columns *except* for those that are categorical:
407
+ # df.select(~Polars.cs.categorical)
408
+ # # =>
409
+ # # shape: (2, 2)
410
+ # # ┌─────┬─────┐
411
+ # # │ bar ┆ baz │
412
+ # # │ --- ┆ --- │
413
+ # # │ i64 ┆ f64 │
414
+ # # ╞═════╪═════╡
415
+ # # │ 123 ┆ 2.0 │
416
+ # # │ 456 ┆ 5.5 │
417
+ # # └─────┴─────┘
418
+ def self.categorical
419
+ _selector_proxy_(F.col(Categorical), name: "categorical")
420
+ end
421
+
422
+ # Select columns whose names contain the given literal substring(s).
423
+ #
424
+ # @param substring [Object]
425
+ # Substring(s) that matching column names should contain.
426
+ #
427
+ # @return [SelectorProxy]
428
+ #
429
+ # @example
430
+ # df = Polars::DataFrame.new(
431
+ # {
432
+ # "foo" => ["x", "y"],
433
+ # "bar" => [123, 456],
434
+ # "baz" => [2.0, 5.5],
435
+ # "zap" => [false, true]
436
+ # }
437
+ # )
438
+ #
439
+ # @example Select columns that contain the substring 'ba':
440
+ # df.select(Polars.cs.contains("ba"))
441
+ # # =>
442
+ # # shape: (2, 2)
443
+ # # ┌─────┬─────┐
444
+ # # │ bar ┆ baz │
445
+ # # │ --- ┆ --- │
446
+ # # │ i64 ┆ f64 │
447
+ # # ╞═════╪═════╡
448
+ # # │ 123 ┆ 2.0 │
449
+ # # │ 456 ┆ 5.5 │
450
+ # # └─────┴─────┘
451
+ #
452
+ # @example Select columns that contain the substring 'ba' or the letter 'z':
453
+ # df.select(Polars.cs.contains("ba", "z"))
454
+ # # =>
455
+ # # shape: (2, 3)
456
+ # # ┌─────┬─────┬───────┐
457
+ # # │ bar ┆ baz ┆ zap │
458
+ # # │ --- ┆ --- ┆ --- │
459
+ # # │ i64 ┆ f64 ┆ bool │
460
+ # # ╞═════╪═════╪═══════╡
461
+ # # │ 123 ┆ 2.0 ┆ false │
462
+ # # │ 456 ┆ 5.5 ┆ true │
463
+ # # └─────┴─────┴───────┘
464
+ #
465
+ # @example Select all columns *except* for those that contain the substring 'ba':
466
+ # df.select(~Polars.cs.contains("ba"))
467
+ # # =>
468
+ # # shape: (2, 2)
469
+ # # ┌─────┬───────┐
470
+ # # │ foo ┆ zap │
471
+ # # │ --- ┆ --- │
472
+ # # │ str ┆ bool │
473
+ # # ╞═════╪═══════╡
474
+ # # │ x ┆ false │
475
+ # # │ y ┆ true │
476
+ # # └─────┴───────┘
477
+ def self.contains(*substring)
478
+ escaped_substring = _re_string(substring)
479
+ raw_params = "^.*#{escaped_substring}.*$"
480
+
481
+ _selector_proxy_(
482
+ F.col(raw_params),
483
+ name: "contains",
484
+ parameters: {"*substring" => escaped_substring}
485
+ )
486
+ end
487
+
488
+ # Select all date columns.
489
+ #
490
+ # @return [SelectorProxy]
491
+ #
492
+ # @example
493
+ # df = Polars::DataFrame.new(
494
+ # {
495
+ # "dtm" => [DateTime.new(2001, 5, 7, 10, 25), DateTime.new(2031, 12, 31, 0, 30)],
496
+ # "dt" => [Date.new(1999, 12, 31), Date.new(2024, 8, 9)]
497
+ # }
498
+ # )
499
+ #
500
+ # @example Select all date columns:
501
+ # df.select(Polars.cs.date)
502
+ # # =>
503
+ # # shape: (2, 1)
504
+ # # ┌────────────┐
505
+ # # │ dt │
506
+ # # │ --- │
507
+ # # │ date │
508
+ # # ╞════════════╡
509
+ # # │ 1999-12-31 │
510
+ # # │ 2024-08-09 │
511
+ # # └────────────┘
512
+ #
513
+ # @example Select all columns *except* for those that are dates:
514
+ # df.select(~Polars.cs.date)
515
+ # # =>
516
+ # # shape: (2, 1)
517
+ # # ┌─────────────────────┐
518
+ # # │ dtm │
519
+ # # │ --- │
520
+ # # │ datetime[ns] │
521
+ # # ╞═════════════════════╡
522
+ # # │ 2001-05-07 10:25:00 │
523
+ # # │ 2031-12-31 00:30:00 │
524
+ # # └─────────────────────┘
525
+ def self.date
526
+ _selector_proxy_(F.col(Date), name: "date")
527
+ end
528
+
529
+ # TODO
530
+ # def datetime
531
+ # end
532
+
533
+ # Select all decimal columns.
534
+ #
535
+ # @return [SelectorProxy]
536
+ #
537
+ # @example
538
+ # df = Polars::DataFrame.new(
539
+ # {
540
+ # "foo" => ["x", "y"],
541
+ # "bar" => [BigDecimal("123"), BigDecimal("456")],
542
+ # "baz" => [BigDecimal("2.0005"), BigDecimal("-50.5555")],
543
+ # },
544
+ # schema_overrides: {"baz" => Polars::Decimal.new(10, 5)}
545
+ # )
546
+ #
547
+ # @example Select all decimal columns:
548
+ # df.select(Polars.cs.decimal)
549
+ # # =>
550
+ # # shape: (2, 2)
551
+ # # ┌──────────────┬───────────────┐
552
+ # # │ bar ┆ baz │
553
+ # # │ --- ┆ --- │
554
+ # # │ decimal[*,0] ┆ decimal[10,5] │
555
+ # # ╞══════════════╪═══════════════╡
556
+ # # │ 123 ┆ 2.00050 │
557
+ # # │ 456 ┆ -50.55550 │
558
+ # # └──────────────┴───────────────┘
559
+ #
560
+ # @example Select all columns *except* the decimal ones:
561
+ #
562
+ # df.select(~Polars.cs.decimal)
563
+ # # =>
564
+ # # shape: (2, 1)
565
+ # # ┌─────┐
566
+ # # │ foo │
567
+ # # │ --- │
568
+ # # │ str │
569
+ # # ╞═════╡
570
+ # # │ x │
571
+ # # │ y │
572
+ # # └─────┘
573
+ def self.decimal
574
+ # TODO: allow explicit selection by scale/precision?
575
+ _selector_proxy_(F.col(Decimal), name: "decimal")
576
+ end
577
+
578
+ # Select columns that end with the given substring(s).
579
+ #
580
+ # @param suffix [Object]
581
+ # Substring(s) that matching column names should end with.
582
+ #
583
+ # @return [SelectorProxy]
584
+ #
585
+ # @example
586
+ # df = Polars::DataFrame.new(
587
+ # {
588
+ # "foo" => ["x", "y"],
589
+ # "bar" => [123, 456],
590
+ # "baz" => [2.0, 5.5],
591
+ # "zap" => [false, true]
592
+ # }
593
+ # )
594
+ #
595
+ # @example Select columns that end with the substring 'z':
596
+ # df.select(Polars.cs.ends_with("z"))
597
+ # # =>
598
+ # # shape: (2, 1)
599
+ # # ┌─────┐
600
+ # # │ baz │
601
+ # # │ --- │
602
+ # # │ f64 │
603
+ # # ╞═════╡
604
+ # # │ 2.0 │
605
+ # # │ 5.5 │
606
+ # # └─────┘
607
+ #
608
+ # @example Select columns that end with *either* the letter 'z' or 'r':
609
+ # df.select(Polars.cs.ends_with("z", "r"))
610
+ # # =>
611
+ # # shape: (2, 2)
612
+ # # ┌─────┬─────┐
613
+ # # │ bar ┆ baz │
614
+ # # │ --- ┆ --- │
615
+ # # │ i64 ┆ f64 │
616
+ # # ╞═════╪═════╡
617
+ # # │ 123 ┆ 2.0 │
618
+ # # │ 456 ┆ 5.5 │
619
+ # # └─────┴─────┘
620
+ #
621
+ # @example Select all columns *except* for those that end with the substring 'z':
622
+ # df.select(~Polars.cs.ends_with("z"))
623
+ # # =>
624
+ # # shape: (2, 3)
625
+ # # ┌─────┬─────┬───────┐
626
+ # # │ foo ┆ bar ┆ zap │
627
+ # # │ --- ┆ --- ┆ --- │
628
+ # # │ str ┆ i64 ┆ bool │
629
+ # # ╞═════╪═════╪═══════╡
630
+ # # │ x ┆ 123 ┆ false │
631
+ # # │ y ┆ 456 ┆ true │
632
+ # # └─────┴─────┴───────┘
633
+ def self.ends_with(*suffix)
634
+ escaped_suffix = _re_string(suffix)
635
+ raw_params = "^.*#{escaped_suffix}$"
636
+
637
+ _selector_proxy_(
638
+ F.col(raw_params),
639
+ name: "ends_with",
640
+ parameters: {"*suffix" => escaped_suffix},
641
+ )
642
+ end
643
+
644
+ # Select the first column in the current scope.
645
+ #
646
+ # @return [SelectorProxy]
647
+ #
648
+ # @example
649
+ # df = Polars::DataFrame.new(
650
+ # {
651
+ # "foo" => ["x", "y"],
652
+ # "bar" => [123, 456],
653
+ # "baz" => [2.0, 5.5],
654
+ # "zap" => [0, 1]
655
+ # }
656
+ # )
657
+ #
658
+ # @example Select the first column:
659
+ # df.select(Polars.cs.first)
660
+ # # =>
661
+ # # shape: (2, 1)
662
+ # # ┌─────┐
663
+ # # │ foo │
664
+ # # │ --- │
665
+ # # │ str │
666
+ # # ╞═════╡
667
+ # # │ x │
668
+ # # │ y │
669
+ # # └─────┘
670
+ #
671
+ # @example Select everything *except* for the first column:
672
+ # df.select(~Polars.cs.first)
673
+ # # =>
674
+ # # shape: (2, 3)
675
+ # # ┌─────┬─────┬─────┐
676
+ # # │ bar ┆ baz ┆ zap │
677
+ # # │ --- ┆ --- ┆ --- │
678
+ # # │ i64 ┆ f64 ┆ i64 │
679
+ # # ╞═════╪═════╪═════╡
680
+ # # │ 123 ┆ 2.0 ┆ 0 │
681
+ # # │ 456 ┆ 5.5 ┆ 1 │
682
+ # # └─────┴─────┴─────┘
683
+ def self.first
684
+ _selector_proxy_(F.first, name: "first")
685
+ end
686
+
687
+ # Select all float columns.
688
+ #
689
+ # @return [SelectorProxy]
690
+ #
691
+ # @example
692
+ # df = Polars::DataFrame.new(
693
+ # {
694
+ # "foo" => ["x", "y"],
695
+ # "bar" => [123, 456],
696
+ # "baz" => [2.0, 5.5],
697
+ # "zap" => [0.0, 1.0]
698
+ # },
699
+ # schema_overrides: {"baz" => Polars::Float32, "zap" => Polars::Float64}
700
+ # )
701
+ #
702
+ # @example Select all float columns:
703
+ # df.select(Polars.cs.float)
704
+ # # =>
705
+ # # shape: (2, 2)
706
+ # # ┌─────┬─────┐
707
+ # # │ baz ┆ zap │
708
+ # # │ --- ┆ --- │
709
+ # # │ f32 ┆ f64 │
710
+ # # ╞═════╪═════╡
711
+ # # │ 2.0 ┆ 0.0 │
712
+ # # │ 5.5 ┆ 1.0 │
713
+ # # └─────┴─────┘
714
+ #
715
+ # @example Select all columns *except* for those that are float:
716
+ # df.select(~Polars.cs.float)
717
+ # # =>
718
+ # # shape: (2, 2)
719
+ # # ┌─────┬─────┐
720
+ # # │ foo ┆ bar │
721
+ # # │ --- ┆ --- │
722
+ # # │ str ┆ i64 │
723
+ # # ╞═════╪═════╡
724
+ # # │ x ┆ 123 │
725
+ # # │ y ┆ 456 │
726
+ # # └─────┴─────┘
727
+ def self.float
728
+ _selector_proxy_(F.col(FLOAT_DTYPES), name: "float")
729
+ end
730
+
731
+ # Select all integer columns.
732
+ #
733
+ # @return [SelectorProxy]
734
+ #
735
+ # @example
736
+ # df = Polars::DataFrame.new(
737
+ # {
738
+ # "foo" => ["x", "y"],
739
+ # "bar" => [123, 456],
740
+ # "baz" => [2.0, 5.5],
741
+ # "zap" => [0, 1]
742
+ # }
743
+ # )
744
+ #
745
+ # @example Select all integer columns:
746
+ # df.select(Polars.cs.integer)
747
+ # # =>
748
+ # # shape: (2, 2)
749
+ # # ┌─────┬─────┐
750
+ # # │ bar ┆ zap │
751
+ # # │ --- ┆ --- │
752
+ # # │ i64 ┆ i64 │
753
+ # # ╞═════╪═════╡
754
+ # # │ 123 ┆ 0 │
755
+ # # │ 456 ┆ 1 │
756
+ # # └─────┴─────┘
757
+ #
758
+ # @example Select all columns *except* for those that are integer:
759
+ # df.select(~Polars.cs.integer)
760
+ # # =>
761
+ # # shape: (2, 2)
762
+ # # ┌─────┬─────┐
763
+ # # │ foo ┆ baz │
764
+ # # │ --- ┆ --- │
765
+ # # │ str ┆ f64 │
766
+ # # ╞═════╪═════╡
767
+ # # │ x ┆ 2.0 │
768
+ # # │ y ┆ 5.5 │
769
+ # # └─────┴─────┘
770
+ def self.integer
771
+ _selector_proxy_(F.col(INTEGER_DTYPES), name: "integer")
772
+ end
773
+
774
+ # Select all signed integer columns.
775
+ #
776
+ # @return [SelectorProxy]
777
+ #
778
+ # @example
779
+ # df = Polars::DataFrame.new(
780
+ # {
781
+ # "foo" => [-123, -456],
782
+ # "bar" => [3456, 6789],
783
+ # "baz" => [7654, 4321],
784
+ # "zap" => ["ab", "cd"]
785
+ # },
786
+ # schema_overrides: {"bar" => Polars::UInt32, "baz" => Polars::UInt64}
787
+ # )
788
+ #
789
+ # @example Select all signed integer columns:
790
+ # df.select(Polars.cs.signed_integer)
791
+ # # =>
792
+ # # shape: (2, 1)
793
+ # # ┌──────┐
794
+ # # │ foo │
795
+ # # │ --- │
796
+ # # │ i64 │
797
+ # # ╞══════╡
798
+ # # │ -123 │
799
+ # # │ -456 │
800
+ # # └──────┘
801
+ #
802
+ # @example
803
+ # df.select(~Polars.cs.signed_integer)
804
+ # # =>
805
+ # # shape: (2, 3)
806
+ # # ┌──────┬──────┬─────┐
807
+ # # │ bar ┆ baz ┆ zap │
808
+ # # │ --- ┆ --- ┆ --- │
809
+ # # │ u32 ┆ u64 ┆ str │
810
+ # # ╞══════╪══════╪═════╡
811
+ # # │ 3456 ┆ 7654 ┆ ab │
812
+ # # │ 6789 ┆ 4321 ┆ cd │
813
+ # # └──────┴──────┴─────┘
814
+ #
815
+ # @example Select all integer columns (both signed and unsigned):
816
+ # df.select(Polars.cs.integer)
817
+ # # =>
818
+ # # shape: (2, 3)
819
+ # # ┌──────┬──────┬──────┐
820
+ # # │ foo ┆ bar ┆ baz │
821
+ # # │ --- ┆ --- ┆ --- │
822
+ # # │ i64 ┆ u32 ┆ u64 │
823
+ # # ╞══════╪══════╪══════╡
824
+ # # │ -123 ┆ 3456 ┆ 7654 │
825
+ # # │ -456 ┆ 6789 ┆ 4321 │
826
+ # # └──────┴──────┴──────┘
827
+ def self.signed_integer
828
+ _selector_proxy_(F.col(SIGNED_INTEGER_DTYPES), name: "signed_integer")
829
+ end
830
+
831
+ # Select all unsigned integer columns.
832
+ #
833
+ # @return [SelectorProxy]
834
+ #
835
+ # @example
836
+ # df = Polars::DataFrame.new(
837
+ # {
838
+ # "foo" => [-123, -456],
839
+ # "bar" => [3456, 6789],
840
+ # "baz" => [7654, 4321],
841
+ # "zap" => ["ab", "cd"]
842
+ # },
843
+ # schema_overrides: {"bar" => Polars::UInt32, "baz" => Polars::UInt64}
844
+ # )
845
+ #
846
+ # @example Select all unsigned integer columns:
847
+ # df.select(Polars.cs.unsigned_integer)
848
+ # # =>
849
+ # # shape: (2, 2)
850
+ # # ┌──────┬──────┐
851
+ # # │ bar ┆ baz │
852
+ # # │ --- ┆ --- │
853
+ # # │ u32 ┆ u64 │
854
+ # # ╞══════╪══════╡
855
+ # # │ 3456 ┆ 7654 │
856
+ # # │ 6789 ┆ 4321 │
857
+ # # └──────┴──────┘
858
+ #
859
+ # @example Select all columns *except* for those that are unsigned integers:
860
+ # df.select(~Polars.cs.unsigned_integer)
861
+ # # =>
862
+ # # shape: (2, 2)
863
+ # # ┌──────┬─────┐
864
+ # # │ foo ┆ zap │
865
+ # # │ --- ┆ --- │
866
+ # # │ i64 ┆ str │
867
+ # # ╞══════╪═════╡
868
+ # # │ -123 ┆ ab │
869
+ # # │ -456 ┆ cd │
870
+ # # └──────┴─────┘
871
+ #
872
+ # @example Select all integer columns (both signed and unsigned):
873
+ # df.select(Polars.cs.integer)
874
+ # # =>
875
+ # # shape: (2, 3)
876
+ # # ┌──────┬──────┬──────┐
877
+ # # │ foo ┆ bar ┆ baz │
878
+ # # │ --- ┆ --- ┆ --- │
879
+ # # │ i64 ┆ u32 ┆ u64 │
880
+ # # ╞══════╪══════╪══════╡
881
+ # # │ -123 ┆ 3456 ┆ 7654 │
882
+ # # │ -456 ┆ 6789 ┆ 4321 │
883
+ # # └──────┴──────┴──────┘
884
+ def self.unsigned_integer
885
+ _selector_proxy_(F.col(UNSIGNED_INTEGER_DTYPES), name: "unsigned_integer")
886
+ end
887
+
888
+ # Select the last column in the current scope.
889
+ #
890
+ # @return [SelectorProxy]
891
+ #
892
+ # @example
893
+ # df = Polars::DataFrame.new(
894
+ # {
895
+ # "foo" => ["x", "y"],
896
+ # "bar" => [123, 456],
897
+ # "baz" => [2.0, 5.5],
898
+ # "zap" => [0, 1]
899
+ # }
900
+ # )
901
+ #
902
+ # @example Select the last column:
903
+ # df.select(Polars.cs.last)
904
+ # # =>
905
+ # # shape: (2, 1)
906
+ # # ┌─────┐
907
+ # # │ zap │
908
+ # # │ --- │
909
+ # # │ i64 │
910
+ # # ╞═════╡
911
+ # # │ 0 │
912
+ # # │ 1 │
913
+ # # └─────┘
914
+ #
915
+ # @example Select everything *except* for the last column:
916
+ # df.select(~Polars.cs.last)
917
+ # # =>
918
+ # # shape: (2, 3)
919
+ # # ┌─────┬─────┬─────┐
920
+ # # │ foo ┆ bar ┆ baz │
921
+ # # │ --- ┆ --- ┆ --- │
922
+ # # │ str ┆ i64 ┆ f64 │
923
+ # # ╞═════╪═════╪═════╡
924
+ # # │ x ┆ 123 ┆ 2.0 │
925
+ # # │ y ┆ 456 ┆ 5.5 │
926
+ # # └─────┴─────┴─────┘
927
+ def self.last
928
+ _selector_proxy_(F.last, name: "last")
929
+ end
930
+
931
+ # Select all numeric columns.
932
+ #
933
+ # @return [SelectorProxy]
934
+ #
935
+ # @example
936
+ # df = Polars::DataFrame.new(
937
+ # {
938
+ # "foo" => ["x", "y"],
939
+ # "bar" => [123, 456],
940
+ # "baz" => [2.0, 5.5],
941
+ # "zap" => [0, 0]
942
+ # },
943
+ # schema_overrides: {"bar" => Polars::Int16, "baz" => Polars::Float32, "zap" => Polars::UInt8},
944
+ # )
945
+ #
946
+ # @example Match all numeric columns:
947
+ # df.select(Polars.cs.numeric)
948
+ # # =>
949
+ # # shape: (2, 3)
950
+ # # ┌─────┬─────┬─────┐
951
+ # # │ bar ┆ baz ┆ zap │
952
+ # # │ --- ┆ --- ┆ --- │
953
+ # # │ i16 ┆ f32 ┆ u8 │
954
+ # # ╞═════╪═════╪═════╡
955
+ # # │ 123 ┆ 2.0 ┆ 0 │
956
+ # # │ 456 ┆ 5.5 ┆ 0 │
957
+ # # └─────┴─────┴─────┘
958
+ #
959
+ # @example Match all columns *except* for those that are numeric:
960
+ # df.select(~Polars.cs.numeric)
961
+ # # =>
962
+ # # shape: (2, 1)
963
+ # # ┌─────┐
964
+ # # │ foo │
965
+ # # │ --- │
966
+ # # │ str │
967
+ # # ╞═════╡
968
+ # # │ x │
969
+ # # │ y │
970
+ # # └─────┘
971
+ def self.numeric
972
+ _selector_proxy_(F.col(NUMERIC_DTYPES), name: "numeric")
973
+ end
974
+
975
+ # Select columns that start with the given substring(s).
976
+ #
977
+ # @param prefix [Object]
978
+ # Substring(s) that matching column names should start with.
979
+ #
980
+ # @return [SelectorProxy]
981
+ #
982
+ # @example
983
+ # df = Polars::DataFrame.new(
984
+ # {
985
+ # "foo" => [1.0, 2.0],
986
+ # "bar" => [3.0, 4.0],
987
+ # "baz" => [5, 6],
988
+ # "zap" => [7, 8]
989
+ # }
990
+ # )
991
+ #
992
+ # @example Match columns starting with a 'b':
993
+ # df.select(Polars.cs.starts_with("b"))
994
+ # # =>
995
+ # # shape: (2, 2)
996
+ # # ┌─────┬─────┐
997
+ # # │ bar ┆ baz │
998
+ # # │ --- ┆ --- │
999
+ # # │ f64 ┆ i64 │
1000
+ # # ╞═════╪═════╡
1001
+ # # │ 3.0 ┆ 5 │
1002
+ # # │ 4.0 ┆ 6 │
1003
+ # # └─────┴─────┘
1004
+ #
1005
+ # @example Match columns starting with *either* the letter 'b' or 'z':
1006
+ # df.select(Polars.cs.starts_with("b", "z"))
1007
+ # # =>
1008
+ # # shape: (2, 3)
1009
+ # # ┌─────┬─────┬─────┐
1010
+ # # │ bar ┆ baz ┆ zap │
1011
+ # # │ --- ┆ --- ┆ --- │
1012
+ # # │ f64 ┆ i64 ┆ i64 │
1013
+ # # ╞═════╪═════╪═════╡
1014
+ # # │ 3.0 ┆ 5 ┆ 7 │
1015
+ # # │ 4.0 ┆ 6 ┆ 8 │
1016
+ # # └─────┴─────┴─────┘
1017
+ #
1018
+ # @example Match all columns *except* for those starting with 'b':
1019
+ # df.select(~Polars.cs.starts_with("b"))
1020
+ # # =>
1021
+ # # shape: (2, 2)
1022
+ # # ┌─────┬─────┐
1023
+ # # │ foo ┆ zap │
1024
+ # # │ --- ┆ --- │
1025
+ # # │ f64 ┆ i64 │
1026
+ # # ╞═════╪═════╡
1027
+ # # │ 1.0 ┆ 7 │
1028
+ # # │ 2.0 ┆ 8 │
1029
+ # # └─────┴─────┘
1030
+ def self.starts_with(*prefix)
1031
+ escaped_prefix = _re_string(prefix)
1032
+ raw_params = "^#{escaped_prefix}.*$"
1033
+
1034
+ _selector_proxy_(
1035
+ F.col(raw_params),
1036
+ name: "starts_with",
1037
+ parameters: {"*prefix" => prefix}
1038
+ )
1039
+ end
1040
+
1041
+ # Select all String (and, optionally, Categorical) string columns.
1042
+ #
1043
+ # @return [SelectorProxy]
1044
+ #
1045
+ # @example
1046
+ # df = Polars::DataFrame.new(
1047
+ # {
1048
+ # "w" => ["xx", "yy", "xx", "yy", "xx"],
1049
+ # "x" => [1, 2, 1, 4, -2],
1050
+ # "y" => [3.0, 4.5, 1.0, 2.5, -2.0],
1051
+ # "z" => ["a", "b", "a", "b", "b"]
1052
+ # },
1053
+ # ).with_columns(
1054
+ # z: Polars.col("z").cast(Polars::Categorical.new("lexical")),
1055
+ # )
1056
+ #
1057
+ # @example Group by all string columns, sum the numeric columns, then sort by the string cols:
1058
+ # >>> df.group_by(Polars.cs.string).agg(Polars.cs.numeric.sum).sort(Polars.cs.string)
1059
+ # shape: (2, 3)
1060
+ # ┌─────┬─────┬─────┐
1061
+ # │ w ┆ x ┆ y │
1062
+ # │ --- ┆ --- ┆ --- │
1063
+ # │ str ┆ i64 ┆ f64 │
1064
+ # ╞═════╪═════╪═════╡
1065
+ # │ xx ┆ 0 ┆ 2.0 │
1066
+ # │ yy ┆ 6 ┆ 7.0 │
1067
+ # └─────┴─────┴─────┘
1068
+ #
1069
+ # @example Group by all string *and* categorical columns:
1070
+ # df.group_by(Polars.cs.string(include_categorical: true)).agg(Polars.cs.numeric.sum).sort(
1071
+ # Polars.cs.string(include_categorical: true)
1072
+ # )
1073
+ # # =>
1074
+ # # shape: (3, 4)
1075
+ # # ┌─────┬─────┬─────┬──────┐
1076
+ # # │ w ┆ z ┆ x ┆ y │
1077
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1078
+ # # │ str ┆ cat ┆ i64 ┆ f64 │
1079
+ # # ╞═════╪═════╪═════╪══════╡
1080
+ # # │ xx ┆ a ┆ 2 ┆ 4.0 │
1081
+ # # │ xx ┆ b ┆ -2 ┆ -2.0 │
1082
+ # # │ yy ┆ b ┆ 6 ┆ 7.0 │
1083
+ # # └─────┴─────┴─────┴──────┘
1084
+ def self.string(include_categorical: false)
1085
+ string_dtypes = [String]
1086
+ if include_categorical
1087
+ string_dtypes << Categorical
1088
+ end
1089
+
1090
+ _selector_proxy_(
1091
+ F.col(string_dtypes),
1092
+ name: "string",
1093
+ parameters: {"include_categorical" => include_categorical},
1094
+ )
1095
+ end
1096
+
1097
+ # Select all time columns.
1098
+ #
1099
+ # @return [SelectorProxy]
1100
+ #
1101
+ # @example
1102
+ # df = Polars::DataFrame.new(
1103
+ # {
1104
+ # "dtm" => [DateTime.new(2001, 5, 7, 10, 25), DateTime.new(2031, 12, 31, 0, 30)],
1105
+ # "dt" => [Date.new(1999, 12, 31), Date.new(2024, 8, 9)],
1106
+ # "tm" => [Time.utc(2001, 1, 1, 0, 0, 0), Time.utc(2001, 1, 1, 23, 59, 59)]
1107
+ # },
1108
+ # schema_overrides: {"tm" => Polars::Time}
1109
+ # )
1110
+ #
1111
+ # @example Select all time columns:
1112
+ # df.select(Polars.cs.time)
1113
+ # # =>
1114
+ # # shape: (2, 1)
1115
+ # # ┌──────────┐
1116
+ # # │ tm │
1117
+ # # │ --- │
1118
+ # # │ time │
1119
+ # # ╞══════════╡
1120
+ # # │ 00:00:00 │
1121
+ # # │ 23:59:59 │
1122
+ # # └──────────┘
1123
+ #
1124
+ # @example Select all columns *except* for those that are times:
1125
+ # df.select(~Polars.cs.time)
1126
+ # # =>
1127
+ # # shape: (2, 2)
1128
+ # # ┌─────────────────────┬────────────┐
1129
+ # # │ dtm ┆ dt │
1130
+ # # │ --- ┆ --- │
1131
+ # # │ datetime[ns] ┆ date │
1132
+ # # ╞═════════════════════╪════════════╡
1133
+ # # │ 2001-05-07 10:25:00 ┆ 1999-12-31 │
1134
+ # # │ 2031-12-31 00:30:00 ┆ 2024-08-09 │
1135
+ # # └─────────────────────┴────────────┘
1136
+ def self.time
1137
+ _selector_proxy_(F.col(Time), name: "time")
1138
+ end
1139
+ end
1140
+
1141
+ def self.cs
1142
+ Polars::Selectors
1143
+ end
1144
+ end