polars-df 0.13.0-aarch64-linux → 0.15.0-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -0
  3. data/Cargo.lock +1368 -319
  4. data/LICENSE-THIRD-PARTY.txt +24801 -13447
  5. data/LICENSE.txt +1 -0
  6. data/README.md +1 -2
  7. data/lib/polars/3.1/polars.so +0 -0
  8. data/lib/polars/3.2/polars.so +0 -0
  9. data/lib/polars/3.3/polars.so +0 -0
  10. data/lib/polars/batched_csv_reader.rb +0 -2
  11. data/lib/polars/binary_expr.rb +133 -9
  12. data/lib/polars/binary_name_space.rb +101 -6
  13. data/lib/polars/config.rb +4 -0
  14. data/lib/polars/data_frame.rb +285 -62
  15. data/lib/polars/data_type_group.rb +28 -0
  16. data/lib/polars/data_types.rb +2 -0
  17. data/lib/polars/date_time_expr.rb +244 -0
  18. data/lib/polars/date_time_name_space.rb +87 -0
  19. data/lib/polars/expr.rb +109 -8
  20. data/lib/polars/functions/as_datatype.rb +51 -2
  21. data/lib/polars/functions/col.rb +1 -1
  22. data/lib/polars/functions/eager.rb +1 -3
  23. data/lib/polars/functions/lazy.rb +88 -10
  24. data/lib/polars/functions/range/time_range.rb +21 -21
  25. data/lib/polars/io/csv.rb +14 -16
  26. data/lib/polars/io/database.rb +2 -2
  27. data/lib/polars/io/ipc.rb +14 -12
  28. data/lib/polars/io/ndjson.rb +10 -0
  29. data/lib/polars/io/parquet.rb +168 -111
  30. data/lib/polars/lazy_frame.rb +649 -15
  31. data/lib/polars/list_name_space.rb +169 -0
  32. data/lib/polars/selectors.rb +1144 -0
  33. data/lib/polars/series.rb +470 -40
  34. data/lib/polars/string_cache.rb +27 -1
  35. data/lib/polars/string_expr.rb +0 -1
  36. data/lib/polars/string_name_space.rb +73 -3
  37. data/lib/polars/struct_name_space.rb +31 -7
  38. data/lib/polars/utils/various.rb +5 -1
  39. data/lib/polars/utils.rb +45 -10
  40. data/lib/polars/version.rb +1 -1
  41. data/lib/polars.rb +2 -1
  42. metadata +4 -3
  43. data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,1144 @@
1
+ module Polars
2
+ module Selectors
3
+ # @private
4
+ class SelectorProxy < Expr
5
+ attr_accessor :_attrs
6
+ attr_accessor :_repr_override
7
+
8
+ def initialize(
9
+ expr,
10
+ name:,
11
+ parameters: nil
12
+ )
13
+ self._rbexpr = expr._rbexpr
14
+ self._attrs = {
15
+ name: name,
16
+ params: parameters
17
+ }
18
+ end
19
+
20
+ def inspect
21
+ if !_attrs
22
+ as_expr.inspect
23
+ elsif _repr_override
24
+ _repr_override
25
+ else
26
+ selector_name = _attrs[:name]
27
+ params = _attrs[:params] || {}
28
+ set_ops = {"and" => "&", "or" => "|", "sub" => "-", "xor" => "^"}
29
+ if set_ops.include?(selector_name)
30
+ op = set_ops[selector_name]
31
+ "(#{params.values.map(&:inspect).join(" #{op} ")})"
32
+ else
33
+ str_params = params.map { |k, v| k.start_with?("*") ? v.inspect[1..-2] : "#{k}=#{v.inspect}" }.join(", ")
34
+ "Polars.cs.#{selector_name}(#{str_params})"
35
+ end
36
+ end
37
+ end
38
+
39
+ def ~
40
+ if Utils.is_selector(self)
41
+ inverted = Selectors.all - self
42
+ inverted._repr_override = "~#{inspect}"
43
+ else
44
+ inverted = ~as_expr
45
+ end
46
+ inverted
47
+ end
48
+
49
+ def -(other)
50
+ if Utils.is_selector(other)
51
+ SelectorProxy.new(
52
+ meta._as_selector.meta._selector_sub(other),
53
+ parameters: {"self" => self, "other" => other},
54
+ name: "sub"
55
+ )
56
+ else
57
+ as_expr - other
58
+ end
59
+ end
60
+
61
+ def &(other)
62
+ if Utils.is_column(other)
63
+ raise Todo
64
+ end
65
+ if Utils.is_selector(other)
66
+ SelectorProxy.new(
67
+ meta._as_selector.meta._selector_and(other),
68
+ parameters: {"self" => self, "other" => other},
69
+ name: "and"
70
+ )
71
+ else
72
+ as_expr & other
73
+ end
74
+ end
75
+
76
+ def |(other)
77
+ if Utils.is_column(other)
78
+ raise Todo
79
+ end
80
+ if Utils.is_selector(other)
81
+ SelectorProxy.new(
82
+ meta._as_selector.meta._selector_and(other),
83
+ parameters: {"self" => self, "other" => other},
84
+ name: "or"
85
+ )
86
+ else
87
+ as_expr | other
88
+ end
89
+ end
90
+
91
+ def ^(other)
92
+ if Utils.is_column(other)
93
+ raise Todo
94
+ end
95
+ if Utils.is_selector(other)
96
+ SelectorProxy.new(
97
+ meta._as_selector.meta._selector_and(other),
98
+ parameters: {"self" => self, "other" => other},
99
+ name: "xor"
100
+ )
101
+ else
102
+ as_expr ^ other
103
+ end
104
+ end
105
+
106
+ def as_expr
107
+ Expr._from_rbexpr(_rbexpr)
108
+ end
109
+ end
110
+
111
+ # @private
112
+ def self._selector_proxy_(...)
113
+ SelectorProxy.new(...)
114
+ end
115
+
116
+ # @private
117
+ def self._re_string(string, escape: true)
118
+ if string.is_a?(::String)
119
+ rx = escape ? Utils.re_escape(string) : string
120
+ else
121
+ strings = []
122
+ string.each do |st|
123
+ if st.is_a?(Array)
124
+ strings.concat(st)
125
+ else
126
+ strings << st
127
+ end
128
+ end
129
+ rx = strings.map { |x| escape ? Utils.re_escape(x) : x }.join("|")
130
+ end
131
+ "(#{rx})"
132
+ end
133
+
134
+ # Select all columns.
135
+ #
136
+ # @return [SelectorProxy]
137
+ #
138
+ # @example
139
+ # df = Polars::DataFrame.new(
140
+ # {
141
+ # "dt" => [Date.new(1999, 12, 31), Date.new(2024, 1, 1)],
142
+ # "value" => [1_234_500, 5_000_555]
143
+ # },
144
+ # schema_overrides: {"value" => Polars::Int32}
145
+ # )
146
+ #
147
+ # @example Select all columns, casting them to string:
148
+ # df.select(Polars.cs.all.cast(Polars::String))
149
+ # # =>
150
+ # # shape: (2, 2)
151
+ # # ┌────────────┬─────────┐
152
+ # # │ dt ┆ value │
153
+ # # │ --- ┆ --- │
154
+ # # │ str ┆ str │
155
+ # # ╞════════════╪═════════╡
156
+ # # │ 1999-12-31 ┆ 1234500 │
157
+ # # │ 2024-01-01 ┆ 5000555 │
158
+ # # └────────────┴─────────┘
159
+ #
160
+ # @example Select all columns *except* for those matching the given dtypes:
161
+ # df.select(Polars.cs.all - Polars.cs.numeric)
162
+ # # =>
163
+ # # shape: (2, 1)
164
+ # # ┌────────────┐
165
+ # # │ dt │
166
+ # # │ --- │
167
+ # # │ date │
168
+ # # ╞════════════╡
169
+ # # │ 1999-12-31 │
170
+ # # │ 2024-01-01 │
171
+ # # └────────────┘
172
+ def self.all
173
+ _selector_proxy_(F.all, name: "all")
174
+ end
175
+
176
+ # Select all columns with alphabetic names (eg: only letters).
177
+ #
178
+ # @param ascii_only [Boolean]
179
+ # Indicate whether to consider only ASCII alphabetic characters, or the full
180
+ # Unicode range of valid letters (accented, idiographic, etc).
181
+ # @param ignore_spaces [Boolean]
182
+ # Indicate whether to ignore the presence of spaces in column names; if so,
183
+ # only the other (non-space) characters are considered.
184
+ #
185
+ # @return [SelectorProxy]
186
+ #
187
+ # @note
188
+ # Matching column names cannot contain *any* non-alphabetic characters. Note
189
+ # that the definition of "alphabetic" consists of all valid Unicode alphabetic
190
+ # characters (`\p{Alphabetic}`) by default; this can be changed by setting
191
+ # `ascii_only: true`.
192
+ #
193
+ # @example
194
+ # df = Polars::DataFrame.new(
195
+ # {
196
+ # "no1" => [100, 200, 300],
197
+ # "café" => ["espresso", "latte", "mocha"],
198
+ # "t or f" => [true, false, nil],
199
+ # "hmm" => ["aaa", "bbb", "ccc"],
200
+ # "都市" => ["東京", "大阪", "京都"]
201
+ # }
202
+ # )
203
+ #
204
+ # @example Select columns with alphabetic names; note that accented characters and kanji are recognised as alphabetic here:
205
+ # df.select(Polars.cs.alpha)
206
+ # # =>
207
+ # # shape: (3, 3)
208
+ # # ┌──────────┬─────┬──────┐
209
+ # # │ café ┆ hmm ┆ 都市 │
210
+ # # │ --- ┆ --- ┆ --- │
211
+ # # │ str ┆ str ┆ str │
212
+ # # ╞══════════╪═════╪══════╡
213
+ # # │ espresso ┆ aaa ┆ 東京 │
214
+ # # │ latte ┆ bbb ┆ 大阪 │
215
+ # # │ mocha ┆ ccc ┆ 京都 │
216
+ # # └──────────┴─────┴──────┘
217
+ #
218
+ # @example Constrain the definition of "alphabetic" to ASCII characters only:
219
+ # df.select(Polars.cs.alpha(ascii_only: true))
220
+ # # =>
221
+ # # shape: (3, 1)
222
+ # # ┌─────┐
223
+ # # │ hmm │
224
+ # # │ --- │
225
+ # # │ str │
226
+ # # ╞═════╡
227
+ # # │ aaa │
228
+ # # │ bbb │
229
+ # # │ ccc │
230
+ # # └─────┘
231
+ #
232
+ # @example
233
+ # df.select(Polars.cs.alpha(ascii_only: true, ignore_spaces: true))
234
+ # # =>
235
+ # # shape: (3, 2)
236
+ # # ┌────────┬─────┐
237
+ # # │ t or f ┆ hmm │
238
+ # # │ --- ┆ --- │
239
+ # # │ bool ┆ str │
240
+ # # ╞════════╪═════╡
241
+ # # │ true ┆ aaa │
242
+ # # │ false ┆ bbb │
243
+ # # │ null ┆ ccc │
244
+ # # └────────┴─────┘
245
+ #
246
+ # @example Select all columns *except* for those with alphabetic names:
247
+ # df.select(~Polars.cs.alpha)
248
+ # # =>
249
+ # # shape: (3, 2)
250
+ # # ┌─────┬────────┐
251
+ # # │ no1 ┆ t or f │
252
+ # # │ --- ┆ --- │
253
+ # # │ i64 ┆ bool │
254
+ # # ╞═════╪════════╡
255
+ # # │ 100 ┆ true │
256
+ # # │ 200 ┆ false │
257
+ # # │ 300 ┆ null │
258
+ # # └─────┴────────┘
259
+ #
260
+ # @example
261
+ # df.select(~Polars.cs.alpha(ignore_spaces: true))
262
+ # # =>
263
+ # # shape: (3, 1)
264
+ # # ┌─────┐
265
+ # # │ no1 │
266
+ # # │ --- │
267
+ # # │ i64 │
268
+ # # ╞═════╡
269
+ # # │ 100 │
270
+ # # │ 200 │
271
+ # # │ 300 │
272
+ # # └─────┘
273
+ def self.alpha(ascii_only: false, ignore_spaces: false)
274
+ # note that we need to supply a pattern compatible with the *rust* regex crate
275
+ re_alpha = ascii_only ? "a-zA-Z" : "\\p{Alphabetic}"
276
+ re_space = ignore_spaces ? " " : ""
277
+ _selector_proxy_(
278
+ F.col("^[#{re_alpha}#{re_space}]+$"),
279
+ name: "alpha",
280
+ parameters: {"ascii_only" => ascii_only, "ignore_spaces" => ignore_spaces},
281
+ )
282
+ end
283
+
284
+ # TODO
285
+ # def alphanumeric
286
+ # end
287
+
288
+ # Select all binary columns.
289
+ #
290
+ # @return [SelectorProxy]
291
+ #
292
+ # @example
293
+ # df = Polars::DataFrame.new({"a" => ["hello".b], "b" => ["world"], "c" => ["!".b], "d" => [":)"]})
294
+ # # =>
295
+ # # shape: (1, 4)
296
+ # # ┌──────────┬───────┬────────┬─────┐
297
+ # # │ a ┆ b ┆ c ┆ d │
298
+ # # │ --- ┆ --- ┆ --- ┆ --- │
299
+ # # │ binary ┆ str ┆ binary ┆ str │
300
+ # # ╞══════════╪═══════╪════════╪═════╡
301
+ # # │ b"hello" ┆ world ┆ b"!" ┆ :) │
302
+ # # └──────────┴───────┴────────┴─────┘
303
+ #
304
+ # @example Select binary columns and export as a dict:
305
+ # df.select(Polars.cs.binary).to_h(as_series: false)
306
+ # # => {"a"=>["hello"], "c"=>["!"]}
307
+ #
308
+ # @example Select all columns *except* for those that are binary:
309
+ # df.select(~Polars.cs.binary).to_h(as_series: false)
310
+ # # => {"b"=>["world"], "d"=>[":)"]}
311
+ def self.binary
312
+ _selector_proxy_(F.col(Binary), name: "binary")
313
+ end
314
+
315
+ # Select all boolean columns.
316
+ #
317
+ # @return [SelectorProxy]
318
+ #
319
+ # @example
320
+ # df = Polars::DataFrame.new({"n" => 1..4}).with_columns(n_even: Polars.col("n") % 2 == 0)
321
+ # # =>
322
+ # # shape: (4, 2)
323
+ # # ┌─────┬────────┐
324
+ # # │ n ┆ n_even │
325
+ # # │ --- ┆ --- │
326
+ # # │ i64 ┆ bool │
327
+ # # ╞═════╪════════╡
328
+ # # │ 1 ┆ false │
329
+ # # │ 2 ┆ true │
330
+ # # │ 3 ┆ false │
331
+ # # │ 4 ┆ true │
332
+ # # └─────┴────────┘
333
+ #
334
+ # @example Select and invert boolean columns:
335
+ # df.with_columns(is_odd: Polars.cs.boolean.not_)
336
+ # # =>
337
+ # # shape: (4, 3)
338
+ # # ┌─────┬────────┬────────┐
339
+ # # │ n ┆ n_even ┆ is_odd │
340
+ # # │ --- ┆ --- ┆ --- │
341
+ # # │ i64 ┆ bool ┆ bool │
342
+ # # ╞═════╪════════╪════════╡
343
+ # # │ 1 ┆ false ┆ true │
344
+ # # │ 2 ┆ true ┆ false │
345
+ # # │ 3 ┆ false ┆ true │
346
+ # # │ 4 ┆ true ┆ false │
347
+ # # └─────┴────────┴────────┘
348
+ #
349
+ # @example Select all columns *except* for those that are boolean:
350
+ # df.select(~Polars.cs.boolean)
351
+ # # =>
352
+ # # shape: (4, 1)
353
+ # # ┌─────┐
354
+ # # │ n │
355
+ # # │ --- │
356
+ # # │ i64 │
357
+ # # ╞═════╡
358
+ # # │ 1 │
359
+ # # │ 2 │
360
+ # # │ 3 │
361
+ # # │ 4 │
362
+ # # └─────┘
363
+ def self.boolean
364
+ _selector_proxy_(F.col(Boolean), name: "boolean")
365
+ end
366
+
367
+ # TODO
368
+ # def by_dtype
369
+ # end
370
+
371
+ # TODO
372
+ # def by_index
373
+ # end
374
+
375
+ # TODO
376
+ # def by_name
377
+ # end
378
+
379
+ # Select all categorical columns.
380
+ #
381
+ # @return [SelectorProxy]
382
+ #
383
+ # @example
384
+ # df = Polars::DataFrame.new(
385
+ # {
386
+ # "foo" => ["xx", "yy"],
387
+ # "bar" => [123, 456],
388
+ # "baz" => [2.0, 5.5]
389
+ # },
390
+ # schema_overrides: {"foo" => Polars::Categorical}
391
+ # )
392
+ #
393
+ # @example Select all categorical columns:
394
+ # df.select(Polars.cs.categorical)
395
+ # # =>
396
+ # # shape: (2, 1)
397
+ # # ┌─────┐
398
+ # # │ foo │
399
+ # # │ --- │
400
+ # # │ cat │
401
+ # # ╞═════╡
402
+ # # │ xx │
403
+ # # │ yy │
404
+ # # └─────┘
405
+ #
406
+ # @example Select all columns *except* for those that are categorical:
407
+ # df.select(~Polars.cs.categorical)
408
+ # # =>
409
+ # # shape: (2, 2)
410
+ # # ┌─────┬─────┐
411
+ # # │ bar ┆ baz │
412
+ # # │ --- ┆ --- │
413
+ # # │ i64 ┆ f64 │
414
+ # # ╞═════╪═════╡
415
+ # # │ 123 ┆ 2.0 │
416
+ # # │ 456 ┆ 5.5 │
417
+ # # └─────┴─────┘
418
+ def self.categorical
419
+ _selector_proxy_(F.col(Categorical), name: "categorical")
420
+ end
421
+
422
+ # Select columns whose names contain the given literal substring(s).
423
+ #
424
+ # @param substring [Object]
425
+ # Substring(s) that matching column names should contain.
426
+ #
427
+ # @return [SelectorProxy]
428
+ #
429
+ # @example
430
+ # df = Polars::DataFrame.new(
431
+ # {
432
+ # "foo" => ["x", "y"],
433
+ # "bar" => [123, 456],
434
+ # "baz" => [2.0, 5.5],
435
+ # "zap" => [false, true]
436
+ # }
437
+ # )
438
+ #
439
+ # @example Select columns that contain the substring 'ba':
440
+ # df.select(Polars.cs.contains("ba"))
441
+ # # =>
442
+ # # shape: (2, 2)
443
+ # # ┌─────┬─────┐
444
+ # # │ bar ┆ baz │
445
+ # # │ --- ┆ --- │
446
+ # # │ i64 ┆ f64 │
447
+ # # ╞═════╪═════╡
448
+ # # │ 123 ┆ 2.0 │
449
+ # # │ 456 ┆ 5.5 │
450
+ # # └─────┴─────┘
451
+ #
452
+ # @example Select columns that contain the substring 'ba' or the letter 'z':
453
+ # df.select(Polars.cs.contains("ba", "z"))
454
+ # # =>
455
+ # # shape: (2, 3)
456
+ # # ┌─────┬─────┬───────┐
457
+ # # │ bar ┆ baz ┆ zap │
458
+ # # │ --- ┆ --- ┆ --- │
459
+ # # │ i64 ┆ f64 ┆ bool │
460
+ # # ╞═════╪═════╪═══════╡
461
+ # # │ 123 ┆ 2.0 ┆ false │
462
+ # # │ 456 ┆ 5.5 ┆ true │
463
+ # # └─────┴─────┴───────┘
464
+ #
465
+ # @example Select all columns *except* for those that contain the substring 'ba':
466
+ # df.select(~Polars.cs.contains("ba"))
467
+ # # =>
468
+ # # shape: (2, 2)
469
+ # # ┌─────┬───────┐
470
+ # # │ foo ┆ zap │
471
+ # # │ --- ┆ --- │
472
+ # # │ str ┆ bool │
473
+ # # ╞═════╪═══════╡
474
+ # # │ x ┆ false │
475
+ # # │ y ┆ true │
476
+ # # └─────┴───────┘
477
+ def self.contains(*substring)
478
+ escaped_substring = _re_string(substring)
479
+ raw_params = "^.*#{escaped_substring}.*$"
480
+
481
+ _selector_proxy_(
482
+ F.col(raw_params),
483
+ name: "contains",
484
+ parameters: {"*substring" => escaped_substring}
485
+ )
486
+ end
487
+
488
+ # Select all date columns.
489
+ #
490
+ # @return [SelectorProxy]
491
+ #
492
+ # @example
493
+ # df = Polars::DataFrame.new(
494
+ # {
495
+ # "dtm" => [DateTime.new(2001, 5, 7, 10, 25), DateTime.new(2031, 12, 31, 0, 30)],
496
+ # "dt" => [Date.new(1999, 12, 31), Date.new(2024, 8, 9)]
497
+ # }
498
+ # )
499
+ #
500
+ # @example Select all date columns:
501
+ # df.select(Polars.cs.date)
502
+ # # =>
503
+ # # shape: (2, 1)
504
+ # # ┌────────────┐
505
+ # # │ dt │
506
+ # # │ --- │
507
+ # # │ date │
508
+ # # ╞════════════╡
509
+ # # │ 1999-12-31 │
510
+ # # │ 2024-08-09 │
511
+ # # └────────────┘
512
+ #
513
+ # @example Select all columns *except* for those that are dates:
514
+ # df.select(~Polars.cs.date)
515
+ # # =>
516
+ # # shape: (2, 1)
517
+ # # ┌─────────────────────┐
518
+ # # │ dtm │
519
+ # # │ --- │
520
+ # # │ datetime[ns] │
521
+ # # ╞═════════════════════╡
522
+ # # │ 2001-05-07 10:25:00 │
523
+ # # │ 2031-12-31 00:30:00 │
524
+ # # └─────────────────────┘
525
+ def self.date
526
+ _selector_proxy_(F.col(Date), name: "date")
527
+ end
528
+
529
+ # TODO
530
+ # def datetime
531
+ # end
532
+
533
+ # Select all decimal columns.
534
+ #
535
+ # @return [SelectorProxy]
536
+ #
537
+ # @example
538
+ # df = Polars::DataFrame.new(
539
+ # {
540
+ # "foo" => ["x", "y"],
541
+ # "bar" => [BigDecimal("123"), BigDecimal("456")],
542
+ # "baz" => [BigDecimal("2.0005"), BigDecimal("-50.5555")],
543
+ # },
544
+ # schema_overrides: {"baz" => Polars::Decimal.new(10, 5)}
545
+ # )
546
+ #
547
+ # @example Select all decimal columns:
548
+ # df.select(Polars.cs.decimal)
549
+ # # =>
550
+ # # shape: (2, 2)
551
+ # # ┌──────────────┬───────────────┐
552
+ # # │ bar ┆ baz │
553
+ # # │ --- ┆ --- │
554
+ # # │ decimal[*,0] ┆ decimal[10,5] │
555
+ # # ╞══════════════╪═══════════════╡
556
+ # # │ 123 ┆ 2.00050 │
557
+ # # │ 456 ┆ -50.55550 │
558
+ # # └──────────────┴───────────────┘
559
+ #
560
+ # @example Select all columns *except* the decimal ones:
561
+ #
562
+ # df.select(~Polars.cs.decimal)
563
+ # # =>
564
+ # # shape: (2, 1)
565
+ # # ┌─────┐
566
+ # # │ foo │
567
+ # # │ --- │
568
+ # # │ str │
569
+ # # ╞═════╡
570
+ # # │ x │
571
+ # # │ y │
572
+ # # └─────┘
573
+ def self.decimal
574
+ # TODO: allow explicit selection by scale/precision?
575
+ _selector_proxy_(F.col(Decimal), name: "decimal")
576
+ end
577
+
578
+ # Select columns that end with the given substring(s).
579
+ #
580
+ # @param suffix [Object]
581
+ # Substring(s) that matching column names should end with.
582
+ #
583
+ # @return [SelectorProxy]
584
+ #
585
+ # @example
586
+ # df = Polars::DataFrame.new(
587
+ # {
588
+ # "foo" => ["x", "y"],
589
+ # "bar" => [123, 456],
590
+ # "baz" => [2.0, 5.5],
591
+ # "zap" => [false, true]
592
+ # }
593
+ # )
594
+ #
595
+ # @example Select columns that end with the substring 'z':
596
+ # df.select(Polars.cs.ends_with("z"))
597
+ # # =>
598
+ # # shape: (2, 1)
599
+ # # ┌─────┐
600
+ # # │ baz │
601
+ # # │ --- │
602
+ # # │ f64 │
603
+ # # ╞═════╡
604
+ # # │ 2.0 │
605
+ # # │ 5.5 │
606
+ # # └─────┘
607
+ #
608
+ # @example Select columns that end with *either* the letter 'z' or 'r':
609
+ # df.select(Polars.cs.ends_with("z", "r"))
610
+ # # =>
611
+ # # shape: (2, 2)
612
+ # # ┌─────┬─────┐
613
+ # # │ bar ┆ baz │
614
+ # # │ --- ┆ --- │
615
+ # # │ i64 ┆ f64 │
616
+ # # ╞═════╪═════╡
617
+ # # │ 123 ┆ 2.0 │
618
+ # # │ 456 ┆ 5.5 │
619
+ # # └─────┴─────┘
620
+ #
621
+ # @example Select all columns *except* for those that end with the substring 'z':
622
+ # df.select(~Polars.cs.ends_with("z"))
623
+ # # =>
624
+ # # shape: (2, 3)
625
+ # # ┌─────┬─────┬───────┐
626
+ # # │ foo ┆ bar ┆ zap │
627
+ # # │ --- ┆ --- ┆ --- │
628
+ # # │ str ┆ i64 ┆ bool │
629
+ # # ╞═════╪═════╪═══════╡
630
+ # # │ x ┆ 123 ┆ false │
631
+ # # │ y ┆ 456 ┆ true │
632
+ # # └─────┴─────┴───────┘
633
+ def self.ends_with(*suffix)
634
+ escaped_suffix = _re_string(suffix)
635
+ raw_params = "^.*#{escaped_suffix}$"
636
+
637
+ _selector_proxy_(
638
+ F.col(raw_params),
639
+ name: "ends_with",
640
+ parameters: {"*suffix" => escaped_suffix},
641
+ )
642
+ end
643
+
644
+ # Select the first column in the current scope.
645
+ #
646
+ # @return [SelectorProxy]
647
+ #
648
+ # @example
649
+ # df = Polars::DataFrame.new(
650
+ # {
651
+ # "foo" => ["x", "y"],
652
+ # "bar" => [123, 456],
653
+ # "baz" => [2.0, 5.5],
654
+ # "zap" => [0, 1]
655
+ # }
656
+ # )
657
+ #
658
+ # @example Select the first column:
659
+ # df.select(Polars.cs.first)
660
+ # # =>
661
+ # # shape: (2, 1)
662
+ # # ┌─────┐
663
+ # # │ foo │
664
+ # # │ --- │
665
+ # # │ str │
666
+ # # ╞═════╡
667
+ # # │ x │
668
+ # # │ y │
669
+ # # └─────┘
670
+ #
671
+ # @example Select everything *except* for the first column:
672
+ # df.select(~Polars.cs.first)
673
+ # # =>
674
+ # # shape: (2, 3)
675
+ # # ┌─────┬─────┬─────┐
676
+ # # │ bar ┆ baz ┆ zap │
677
+ # # │ --- ┆ --- ┆ --- │
678
+ # # │ i64 ┆ f64 ┆ i64 │
679
+ # # ╞═════╪═════╪═════╡
680
+ # # │ 123 ┆ 2.0 ┆ 0 │
681
+ # # │ 456 ┆ 5.5 ┆ 1 │
682
+ # # └─────┴─────┴─────┘
683
+ def self.first
684
+ _selector_proxy_(F.first, name: "first")
685
+ end
686
+
687
+ # Select all float columns.
688
+ #
689
+ # @return [SelectorProxy]
690
+ #
691
+ # @example
692
+ # df = Polars::DataFrame.new(
693
+ # {
694
+ # "foo" => ["x", "y"],
695
+ # "bar" => [123, 456],
696
+ # "baz" => [2.0, 5.5],
697
+ # "zap" => [0.0, 1.0]
698
+ # },
699
+ # schema_overrides: {"baz" => Polars::Float32, "zap" => Polars::Float64}
700
+ # )
701
+ #
702
+ # @example Select all float columns:
703
+ # df.select(Polars.cs.float)
704
+ # # =>
705
+ # # shape: (2, 2)
706
+ # # ┌─────┬─────┐
707
+ # # │ baz ┆ zap │
708
+ # # │ --- ┆ --- │
709
+ # # │ f32 ┆ f64 │
710
+ # # ╞═════╪═════╡
711
+ # # │ 2.0 ┆ 0.0 │
712
+ # # │ 5.5 ┆ 1.0 │
713
+ # # └─────┴─────┘
714
+ #
715
+ # @example Select all columns *except* for those that are float:
716
+ # df.select(~Polars.cs.float)
717
+ # # =>
718
+ # # shape: (2, 2)
719
+ # # ┌─────┬─────┐
720
+ # # │ foo ┆ bar │
721
+ # # │ --- ┆ --- │
722
+ # # │ str ┆ i64 │
723
+ # # ╞═════╪═════╡
724
+ # # │ x ┆ 123 │
725
+ # # │ y ┆ 456 │
726
+ # # └─────┴─────┘
727
+ def self.float
728
+ _selector_proxy_(F.col(FLOAT_DTYPES), name: "float")
729
+ end
730
+
731
+ # Select all integer columns.
732
+ #
733
+ # @return [SelectorProxy]
734
+ #
735
+ # @example
736
+ # df = Polars::DataFrame.new(
737
+ # {
738
+ # "foo" => ["x", "y"],
739
+ # "bar" => [123, 456],
740
+ # "baz" => [2.0, 5.5],
741
+ # "zap" => [0, 1]
742
+ # }
743
+ # )
744
+ #
745
+ # @example Select all integer columns:
746
+ # df.select(Polars.cs.integer)
747
+ # # =>
748
+ # # shape: (2, 2)
749
+ # # ┌─────┬─────┐
750
+ # # │ bar ┆ zap │
751
+ # # │ --- ┆ --- │
752
+ # # │ i64 ┆ i64 │
753
+ # # ╞═════╪═════╡
754
+ # # │ 123 ┆ 0 │
755
+ # # │ 456 ┆ 1 │
756
+ # # └─────┴─────┘
757
+ #
758
+ # @example Select all columns *except* for those that are integer:
759
+ # df.select(~Polars.cs.integer)
760
+ # # =>
761
+ # # shape: (2, 2)
762
+ # # ┌─────┬─────┐
763
+ # # │ foo ┆ baz │
764
+ # # │ --- ┆ --- │
765
+ # # │ str ┆ f64 │
766
+ # # ╞═════╪═════╡
767
+ # # │ x ┆ 2.0 │
768
+ # # │ y ┆ 5.5 │
769
+ # # └─────┴─────┘
770
+ def self.integer
771
+ _selector_proxy_(F.col(INTEGER_DTYPES), name: "integer")
772
+ end
773
+
774
+ # Select all signed integer columns.
775
+ #
776
+ # @return [SelectorProxy]
777
+ #
778
+ # @example
779
+ # df = Polars::DataFrame.new(
780
+ # {
781
+ # "foo" => [-123, -456],
782
+ # "bar" => [3456, 6789],
783
+ # "baz" => [7654, 4321],
784
+ # "zap" => ["ab", "cd"]
785
+ # },
786
+ # schema_overrides: {"bar" => Polars::UInt32, "baz" => Polars::UInt64}
787
+ # )
788
+ #
789
+ # @example Select all signed integer columns:
790
+ # df.select(Polars.cs.signed_integer)
791
+ # # =>
792
+ # # shape: (2, 1)
793
+ # # ┌──────┐
794
+ # # │ foo │
795
+ # # │ --- │
796
+ # # │ i64 │
797
+ # # ╞══════╡
798
+ # # │ -123 │
799
+ # # │ -456 │
800
+ # # └──────┘
801
+ #
802
+ # @example
803
+ # df.select(~Polars.cs.signed_integer)
804
+ # # =>
805
+ # # shape: (2, 3)
806
+ # # ┌──────┬──────┬─────┐
807
+ # # │ bar ┆ baz ┆ zap │
808
+ # # │ --- ┆ --- ┆ --- │
809
+ # # │ u32 ┆ u64 ┆ str │
810
+ # # ╞══════╪══════╪═════╡
811
+ # # │ 3456 ┆ 7654 ┆ ab │
812
+ # # │ 6789 ┆ 4321 ┆ cd │
813
+ # # └──────┴──────┴─────┘
814
+ #
815
+ # @example Select all integer columns (both signed and unsigned):
816
+ # df.select(Polars.cs.integer)
817
+ # # =>
818
+ # # shape: (2, 3)
819
+ # # ┌──────┬──────┬──────┐
820
+ # # │ foo ┆ bar ┆ baz │
821
+ # # │ --- ┆ --- ┆ --- │
822
+ # # │ i64 ┆ u32 ┆ u64 │
823
+ # # ╞══════╪══════╪══════╡
824
+ # # │ -123 ┆ 3456 ┆ 7654 │
825
+ # # │ -456 ┆ 6789 ┆ 4321 │
826
+ # # └──────┴──────┴──────┘
827
+ def self.signed_integer
828
+ _selector_proxy_(F.col(SIGNED_INTEGER_DTYPES), name: "signed_integer")
829
+ end
830
+
831
+ # Select all unsigned integer columns.
832
+ #
833
+ # @return [SelectorProxy]
834
+ #
835
+ # @example
836
+ # df = Polars::DataFrame.new(
837
+ # {
838
+ # "foo" => [-123, -456],
839
+ # "bar" => [3456, 6789],
840
+ # "baz" => [7654, 4321],
841
+ # "zap" => ["ab", "cd"]
842
+ # },
843
+ # schema_overrides: {"bar" => Polars::UInt32, "baz" => Polars::UInt64}
844
+ # )
845
+ #
846
+ # @example Select all unsigned integer columns:
847
+ # df.select(Polars.cs.unsigned_integer)
848
+ # # =>
849
+ # # shape: (2, 2)
850
+ # # ┌──────┬──────┐
851
+ # # │ bar ┆ baz │
852
+ # # │ --- ┆ --- │
853
+ # # │ u32 ┆ u64 │
854
+ # # ╞══════╪══════╡
855
+ # # │ 3456 ┆ 7654 │
856
+ # # │ 6789 ┆ 4321 │
857
+ # # └──────┴──────┘
858
+ #
859
+ # @example Select all columns *except* for those that are unsigned integers:
860
+ # df.select(~Polars.cs.unsigned_integer)
861
+ # # =>
862
+ # # shape: (2, 2)
863
+ # # ┌──────┬─────┐
864
+ # # │ foo ┆ zap │
865
+ # # │ --- ┆ --- │
866
+ # # │ i64 ┆ str │
867
+ # # ╞══════╪═════╡
868
+ # # │ -123 ┆ ab │
869
+ # # │ -456 ┆ cd │
870
+ # # └──────┴─────┘
871
+ #
872
+ # @example Select all integer columns (both signed and unsigned):
873
+ # df.select(Polars.cs.integer)
874
+ # # =>
875
+ # # shape: (2, 3)
876
+ # # ┌──────┬──────┬──────┐
877
+ # # │ foo ┆ bar ┆ baz │
878
+ # # │ --- ┆ --- ┆ --- │
879
+ # # │ i64 ┆ u32 ┆ u64 │
880
+ # # ╞══════╪══════╪══════╡
881
+ # # │ -123 ┆ 3456 ┆ 7654 │
882
+ # # │ -456 ┆ 6789 ┆ 4321 │
883
+ # # └──────┴──────┴──────┘
884
+ def self.unsigned_integer
885
+ _selector_proxy_(F.col(UNSIGNED_INTEGER_DTYPES), name: "unsigned_integer")
886
+ end
887
+
888
+ # Select the last column in the current scope.
889
+ #
890
+ # @return [SelectorProxy]
891
+ #
892
+ # @example
893
+ # df = Polars::DataFrame.new(
894
+ # {
895
+ # "foo" => ["x", "y"],
896
+ # "bar" => [123, 456],
897
+ # "baz" => [2.0, 5.5],
898
+ # "zap" => [0, 1]
899
+ # }
900
+ # )
901
+ #
902
+ # @example Select the last column:
903
+ # df.select(Polars.cs.last)
904
+ # # =>
905
+ # # shape: (2, 1)
906
+ # # ┌─────┐
907
+ # # │ zap │
908
+ # # │ --- │
909
+ # # │ i64 │
910
+ # # ╞═════╡
911
+ # # │ 0 │
912
+ # # │ 1 │
913
+ # # └─────┘
914
+ #
915
+ # @example Select everything *except* for the last column:
916
+ # df.select(~Polars.cs.last)
917
+ # # =>
918
+ # # shape: (2, 3)
919
+ # # ┌─────┬─────┬─────┐
920
+ # # │ foo ┆ bar ┆ baz │
921
+ # # │ --- ┆ --- ┆ --- │
922
+ # # │ str ┆ i64 ┆ f64 │
923
+ # # ╞═════╪═════╪═════╡
924
+ # # │ x ┆ 123 ┆ 2.0 │
925
+ # # │ y ┆ 456 ┆ 5.5 │
926
+ # # └─────┴─────┴─────┘
927
+ def self.last
928
+ _selector_proxy_(F.last, name: "last")
929
+ end
930
+
931
+ # Select all numeric columns.
932
+ #
933
+ # @return [SelectorProxy]
934
+ #
935
+ # @example
936
+ # df = Polars::DataFrame.new(
937
+ # {
938
+ # "foo" => ["x", "y"],
939
+ # "bar" => [123, 456],
940
+ # "baz" => [2.0, 5.5],
941
+ # "zap" => [0, 0]
942
+ # },
943
+ # schema_overrides: {"bar" => Polars::Int16, "baz" => Polars::Float32, "zap" => Polars::UInt8},
944
+ # )
945
+ #
946
+ # @example Match all numeric columns:
947
+ # df.select(Polars.cs.numeric)
948
+ # # =>
949
+ # # shape: (2, 3)
950
+ # # ┌─────┬─────┬─────┐
951
+ # # │ bar ┆ baz ┆ zap │
952
+ # # │ --- ┆ --- ┆ --- │
953
+ # # │ i16 ┆ f32 ┆ u8 │
954
+ # # ╞═════╪═════╪═════╡
955
+ # # │ 123 ┆ 2.0 ┆ 0 │
956
+ # # │ 456 ┆ 5.5 ┆ 0 │
957
+ # # └─────┴─────┴─────┘
958
+ #
959
+ # @example Match all columns *except* for those that are numeric:
960
+ # df.select(~Polars.cs.numeric)
961
+ # # =>
962
+ # # shape: (2, 1)
963
+ # # ┌─────┐
964
+ # # │ foo │
965
+ # # │ --- │
966
+ # # │ str │
967
+ # # ╞═════╡
968
+ # # │ x │
969
+ # # │ y │
970
+ # # └─────┘
971
+ def self.numeric
972
+ _selector_proxy_(F.col(NUMERIC_DTYPES), name: "numeric")
973
+ end
974
+
975
+ # Select columns that start with the given substring(s).
976
+ #
977
+ # @param prefix [Object]
978
+ # Substring(s) that matching column names should start with.
979
+ #
980
+ # @return [SelectorProxy]
981
+ #
982
+ # @example
983
+ # df = Polars::DataFrame.new(
984
+ # {
985
+ # "foo" => [1.0, 2.0],
986
+ # "bar" => [3.0, 4.0],
987
+ # "baz" => [5, 6],
988
+ # "zap" => [7, 8]
989
+ # }
990
+ # )
991
+ #
992
+ # @example Match columns starting with a 'b':
993
+ # df.select(Polars.cs.starts_with("b"))
994
+ # # =>
995
+ # # shape: (2, 2)
996
+ # # ┌─────┬─────┐
997
+ # # │ bar ┆ baz │
998
+ # # │ --- ┆ --- │
999
+ # # │ f64 ┆ i64 │
1000
+ # # ╞═════╪═════╡
1001
+ # # │ 3.0 ┆ 5 │
1002
+ # # │ 4.0 ┆ 6 │
1003
+ # # └─────┴─────┘
1004
+ #
1005
+ # @example Match columns starting with *either* the letter 'b' or 'z':
1006
+ # df.select(Polars.cs.starts_with("b", "z"))
1007
+ # # =>
1008
+ # # shape: (2, 3)
1009
+ # # ┌─────┬─────┬─────┐
1010
+ # # │ bar ┆ baz ┆ zap │
1011
+ # # │ --- ┆ --- ┆ --- │
1012
+ # # │ f64 ┆ i64 ┆ i64 │
1013
+ # # ╞═════╪═════╪═════╡
1014
+ # # │ 3.0 ┆ 5 ┆ 7 │
1015
+ # # │ 4.0 ┆ 6 ┆ 8 │
1016
+ # # └─────┴─────┴─────┘
1017
+ #
1018
+ # @example Match all columns *except* for those starting with 'b':
1019
+ # df.select(~Polars.cs.starts_with("b"))
1020
+ # # =>
1021
+ # # shape: (2, 2)
1022
+ # # ┌─────┬─────┐
1023
+ # # │ foo ┆ zap │
1024
+ # # │ --- ┆ --- │
1025
+ # # │ f64 ┆ i64 │
1026
+ # # ╞═════╪═════╡
1027
+ # # │ 1.0 ┆ 7 │
1028
+ # # │ 2.0 ┆ 8 │
1029
+ # # └─────┴─────┘
1030
+ def self.starts_with(*prefix)
1031
+ escaped_prefix = _re_string(prefix)
1032
+ raw_params = "^#{escaped_prefix}.*$"
1033
+
1034
+ _selector_proxy_(
1035
+ F.col(raw_params),
1036
+ name: "starts_with",
1037
+ parameters: {"*prefix" => prefix}
1038
+ )
1039
+ end
1040
+
1041
+ # Select all String (and, optionally, Categorical) string columns.
1042
+ #
1043
+ # @return [SelectorProxy]
1044
+ #
1045
+ # @example
1046
+ # df = Polars::DataFrame.new(
1047
+ # {
1048
+ # "w" => ["xx", "yy", "xx", "yy", "xx"],
1049
+ # "x" => [1, 2, 1, 4, -2],
1050
+ # "y" => [3.0, 4.5, 1.0, 2.5, -2.0],
1051
+ # "z" => ["a", "b", "a", "b", "b"]
1052
+ # },
1053
+ # ).with_columns(
1054
+ # z: Polars.col("z").cast(Polars::Categorical.new("lexical")),
1055
+ # )
1056
+ #
1057
+ # @example Group by all string columns, sum the numeric columns, then sort by the string cols:
1058
+ # >>> df.group_by(Polars.cs.string).agg(Polars.cs.numeric.sum).sort(Polars.cs.string)
1059
+ # shape: (2, 3)
1060
+ # ┌─────┬─────┬─────┐
1061
+ # │ w ┆ x ┆ y │
1062
+ # │ --- ┆ --- ┆ --- │
1063
+ # │ str ┆ i64 ┆ f64 │
1064
+ # ╞═════╪═════╪═════╡
1065
+ # │ xx ┆ 0 ┆ 2.0 │
1066
+ # │ yy ┆ 6 ┆ 7.0 │
1067
+ # └─────┴─────┴─────┘
1068
+ #
1069
+ # @example Group by all string *and* categorical columns:
1070
+ # df.group_by(Polars.cs.string(include_categorical: true)).agg(Polars.cs.numeric.sum).sort(
1071
+ # Polars.cs.string(include_categorical: true)
1072
+ # )
1073
+ # # =>
1074
+ # # shape: (3, 4)
1075
+ # # ┌─────┬─────┬─────┬──────┐
1076
+ # # │ w ┆ z ┆ x ┆ y │
1077
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1078
+ # # │ str ┆ cat ┆ i64 ┆ f64 │
1079
+ # # ╞═════╪═════╪═════╪══════╡
1080
+ # # │ xx ┆ a ┆ 2 ┆ 4.0 │
1081
+ # # │ xx ┆ b ┆ -2 ┆ -2.0 │
1082
+ # # │ yy ┆ b ┆ 6 ┆ 7.0 │
1083
+ # # └─────┴─────┴─────┴──────┘
1084
+ def self.string(include_categorical: false)
1085
+ string_dtypes = [String]
1086
+ if include_categorical
1087
+ string_dtypes << Categorical
1088
+ end
1089
+
1090
+ _selector_proxy_(
1091
+ F.col(string_dtypes),
1092
+ name: "string",
1093
+ parameters: {"include_categorical" => include_categorical},
1094
+ )
1095
+ end
1096
+
1097
+ # Select all time columns.
1098
+ #
1099
+ # @return [SelectorProxy]
1100
+ #
1101
+ # @example
1102
+ # df = Polars::DataFrame.new(
1103
+ # {
1104
+ # "dtm" => [DateTime.new(2001, 5, 7, 10, 25), DateTime.new(2031, 12, 31, 0, 30)],
1105
+ # "dt" => [Date.new(1999, 12, 31), Date.new(2024, 8, 9)],
1106
+ # "tm" => [Time.utc(2001, 1, 1, 0, 0, 0), Time.utc(2001, 1, 1, 23, 59, 59)]
1107
+ # },
1108
+ # schema_overrides: {"tm" => Polars::Time}
1109
+ # )
1110
+ #
1111
+ # @example Select all time columns:
1112
+ # df.select(Polars.cs.time)
1113
+ # # =>
1114
+ # # shape: (2, 1)
1115
+ # # ┌──────────┐
1116
+ # # │ tm │
1117
+ # # │ --- │
1118
+ # # │ time │
1119
+ # # ╞══════════╡
1120
+ # # │ 00:00:00 │
1121
+ # # │ 23:59:59 │
1122
+ # # └──────────┘
1123
+ #
1124
+ # @example Select all columns *except* for those that are times:
1125
+ # df.select(~Polars.cs.time)
1126
+ # # =>
1127
+ # # shape: (2, 2)
1128
+ # # ┌─────────────────────┬────────────┐
1129
+ # # │ dtm ┆ dt │
1130
+ # # │ --- ┆ --- │
1131
+ # # │ datetime[ns] ┆ date │
1132
+ # # ╞═════════════════════╪════════════╡
1133
+ # # │ 2001-05-07 10:25:00 ┆ 1999-12-31 │
1134
+ # # │ 2031-12-31 00:30:00 ┆ 2024-08-09 │
1135
+ # # └─────────────────────┴────────────┘
1136
+ def self.time
1137
+ _selector_proxy_(F.col(Time), name: "time")
1138
+ end
1139
+ end
1140
+
1141
+ def self.cs
1142
+ Polars::Selectors
1143
+ end
1144
+ end