polars-df 0.21.0 → 0.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/Cargo.lock +1 -1
  4. data/ext/polars/Cargo.toml +7 -1
  5. data/ext/polars/src/conversion/mod.rs +92 -4
  6. data/ext/polars/src/exceptions.rs +1 -0
  7. data/ext/polars/src/expr/array.rs +73 -4
  8. data/ext/polars/src/expr/binary.rs +26 -1
  9. data/ext/polars/src/expr/bitwise.rs +39 -0
  10. data/ext/polars/src/expr/categorical.rs +20 -0
  11. data/ext/polars/src/expr/datatype.rs +24 -1
  12. data/ext/polars/src/expr/datetime.rs +58 -0
  13. data/ext/polars/src/expr/general.rs +84 -5
  14. data/ext/polars/src/expr/list.rs +24 -0
  15. data/ext/polars/src/expr/meta.rs +11 -0
  16. data/ext/polars/src/expr/mod.rs +1 -0
  17. data/ext/polars/src/expr/name.rs +8 -0
  18. data/ext/polars/src/expr/rolling.rs +20 -0
  19. data/ext/polars/src/expr/string.rs +59 -0
  20. data/ext/polars/src/expr/struct.rs +9 -1
  21. data/ext/polars/src/functions/io.rs +19 -0
  22. data/ext/polars/src/functions/lazy.rs +4 -0
  23. data/ext/polars/src/lazyframe/general.rs +51 -0
  24. data/ext/polars/src/lib.rs +119 -10
  25. data/ext/polars/src/map/dataframe.rs +2 -2
  26. data/ext/polars/src/map/series.rs +1 -1
  27. data/ext/polars/src/series/aggregation.rs +44 -0
  28. data/ext/polars/src/series/general.rs +64 -4
  29. data/lib/polars/array_expr.rb +382 -3
  30. data/lib/polars/array_name_space.rb +281 -0
  31. data/lib/polars/binary_expr.rb +67 -0
  32. data/lib/polars/binary_name_space.rb +43 -0
  33. data/lib/polars/cat_expr.rb +224 -0
  34. data/lib/polars/cat_name_space.rb +138 -0
  35. data/lib/polars/config.rb +2 -2
  36. data/lib/polars/convert.rb +6 -6
  37. data/lib/polars/data_frame.rb +684 -19
  38. data/lib/polars/data_type_expr.rb +52 -0
  39. data/lib/polars/data_types.rb +14 -2
  40. data/lib/polars/date_time_expr.rb +251 -0
  41. data/lib/polars/date_time_name_space.rb +299 -0
  42. data/lib/polars/expr.rb +1213 -180
  43. data/lib/polars/functions/datatype.rb +21 -0
  44. data/lib/polars/functions/lazy.rb +13 -0
  45. data/lib/polars/io/csv.rb +1 -1
  46. data/lib/polars/io/json.rb +4 -4
  47. data/lib/polars/io/ndjson.rb +4 -4
  48. data/lib/polars/io/parquet.rb +27 -5
  49. data/lib/polars/lazy_frame.rb +936 -20
  50. data/lib/polars/list_expr.rb +196 -4
  51. data/lib/polars/list_name_space.rb +201 -4
  52. data/lib/polars/meta_expr.rb +64 -0
  53. data/lib/polars/name_expr.rb +36 -0
  54. data/lib/polars/schema.rb +79 -3
  55. data/lib/polars/selector.rb +72 -0
  56. data/lib/polars/selectors.rb +3 -3
  57. data/lib/polars/series.rb +1051 -54
  58. data/lib/polars/string_expr.rb +411 -6
  59. data/lib/polars/string_name_space.rb +722 -49
  60. data/lib/polars/struct_expr.rb +103 -0
  61. data/lib/polars/struct_name_space.rb +19 -1
  62. data/lib/polars/utils/various.rb +18 -1
  63. data/lib/polars/utils.rb +5 -1
  64. data/lib/polars/version.rb +1 -1
  65. data/lib/polars.rb +2 -0
  66. metadata +4 -1
@@ -74,6 +74,60 @@ module Polars
74
74
  super
75
75
  end
76
76
 
77
+ # Compute the std of the values of the sub-arrays.
78
+ #
79
+ # @return [Series]
80
+ #
81
+ # @example
82
+ # s = Polars::Series.new("a", [[1, 2], [4, 3]], dtype: Polars::Array.new(Polars::Int64, 2))
83
+ # s.arr.std
84
+ # # =>
85
+ # # shape: (2,)
86
+ # # Series: 'a' [f64]
87
+ # # [
88
+ # # 0.707107
89
+ # # 0.707107
90
+ # # ]
91
+ def std(ddof: 1)
92
+ super
93
+ end
94
+
95
+ # Compute the var of the values of the sub-arrays.
96
+ #
97
+ # @return [Series]
98
+ #
99
+ # @example
100
+ # s = Polars::Series.new("a", [[1, 2], [4, 3]], dtype: Polars::Array.new(Polars::Int64, 2))
101
+ # s.arr.var
102
+ # # =>
103
+ # # shape: (2,)
104
+ # # Series: 'a' [f64]
105
+ # # [
106
+ # # 0.5
107
+ # # 0.5
108
+ # # ]
109
+ def var(ddof: 1)
110
+ super
111
+ end
112
+
113
+ # Compute the median of the values of the sub-arrays.
114
+ #
115
+ # @return [Series]
116
+ #
117
+ # @example
118
+ # s = Polars::Series.new("a", [[1, 2], [4, 3]], dtype: Polars::Array.new(Polars::Int64, 2))
119
+ # s.arr.median
120
+ # # =>
121
+ # # shape: (2,)
122
+ # # Series: 'a' [f64]
123
+ # # [
124
+ # # 1.5
125
+ # # 3.5
126
+ # # ]
127
+ def median
128
+ super
129
+ end
130
+
77
131
  # Get the unique/distinct values in the array.
78
132
  #
79
133
  # @param maintain_order [Boolean]
@@ -102,6 +156,24 @@ module Polars
102
156
  super
103
157
  end
104
158
 
159
+ # Count the number of unique values in every sub-arrays.
160
+ #
161
+ # @return [Series]
162
+ #
163
+ # @example
164
+ # s = Polars::Series.new("a", [[1, 2], [4, 4]], dtype: Polars::Array.new(Polars::Int64, 2))
165
+ # s.arr.n_unique
166
+ # # =>
167
+ # # shape: (2,)
168
+ # # Series: 'a' [u32]
169
+ # # [
170
+ # # 2
171
+ # # 1
172
+ # # ]
173
+ def n_unique
174
+ super
175
+ end
176
+
105
177
  # Convert an Array column into a List column with the same inner data type.
106
178
  #
107
179
  # @return [Series]
@@ -144,6 +216,148 @@ module Polars
144
216
  super
145
217
  end
146
218
 
219
+ # Return the number of elements in each array.
220
+ #
221
+ # @return [Series]
222
+ #
223
+ # @example
224
+ # s = Polars::Series.new("a", [[1, 2], [4, 3]], dtype: Polars::Array.new(Polars::Int8, 2))
225
+ # s.arr.len
226
+ # # =>
227
+ # # shape: (2,)
228
+ # # Series: 'a' [u32]
229
+ # # [
230
+ # # 2
231
+ # # 2
232
+ # # ]
233
+ def len
234
+ super
235
+ end
236
+
237
+ # Slice the sub-arrays.
238
+ #
239
+ # @param offset [Integer]
240
+ # The starting index of the slice.
241
+ # @param length [Integer]
242
+ # The length of the slice.
243
+ #
244
+ # @return [Series]
245
+ #
246
+ # @example
247
+ # s = Polars::Series.new(
248
+ # [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
249
+ # dtype: Polars::Array.new(Polars::Int64, 6)
250
+ # )
251
+ # s.arr.slice(1)
252
+ # # =>
253
+ # # shape: (2,)
254
+ # # Series: '' [list[i64]]
255
+ # # [
256
+ # # [2, 3, … 6]
257
+ # # [8, 9, … 12]
258
+ # # ]
259
+ #
260
+ # @example
261
+ # s.arr.slice(1, 3, as_array: true)
262
+ # # =>
263
+ # # shape: (2,)
264
+ # # Series: '' [array[i64, 3]]
265
+ # # [
266
+ # # [2, 3, 4]
267
+ # # [8, 9, 10]
268
+ # # ]
269
+ #
270
+ # @example
271
+ # s.arr.slice(-2)
272
+ # # =>
273
+ # # shape: (2,)
274
+ # # Series: '' [list[i64]]
275
+ # # [
276
+ # # [5, 6]
277
+ # # [11, 12]
278
+ # # ]
279
+ def slice(
280
+ offset,
281
+ length = nil,
282
+ as_array: false
283
+ )
284
+ super
285
+ end
286
+
287
+ # Get the first `n` elements of the sub-arrays.
288
+ #
289
+ # @param n [Integer]
290
+ # Number of values to return for each sublist.
291
+ # @param as_array [Boolean]
292
+ # Return result as a fixed-length `Array`, otherwise as a `List`.
293
+ # If true `n` must be a constant value.
294
+ #
295
+ # @return [Series]
296
+ #
297
+ # @example
298
+ # s = Polars::Series.new(
299
+ # [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
300
+ # dtype: Polars::Array.new(Polars::Int64, 6)
301
+ # )
302
+ # s.arr.head
303
+ # # =>
304
+ # # shape: (2,)
305
+ # # Series: '' [list[i64]]
306
+ # # [
307
+ # # [1, 2, … 5]
308
+ # # [7, 8, … 11]
309
+ # # ]
310
+ #
311
+ # @example
312
+ # s.arr.head(3, as_array: true)
313
+ # # =>
314
+ # # shape: (2,)
315
+ # # Series: '' [array[i64, 3]]
316
+ # # [
317
+ # # [1, 2, 3]
318
+ # # [7, 8, 9]
319
+ # # ]
320
+ def head(n = 5, as_array: false)
321
+ super
322
+ end
323
+
324
+ # Slice the last `n` values of every sublist.
325
+ #
326
+ # @param n [Integer]
327
+ # Number of values to return for each sublist.
328
+ # @param as_array [Boolean]
329
+ # Return result as a fixed-length `Array`, otherwise as a `List`.
330
+ # If true `n` must be a constant value.
331
+ #
332
+ # @return [Series]
333
+ #
334
+ # @example
335
+ # s = Polars::Series.new(
336
+ # [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
337
+ # dtype: Polars::Array.new(Polars::Int64, 6)
338
+ # )
339
+ # s.arr.tail
340
+ # # =>
341
+ # # shape: (2,)
342
+ # # Series: '' [list[i64]]
343
+ # # [
344
+ # # [2, 3, … 6]
345
+ # # [8, 9, … 12]
346
+ # # ]
347
+ #
348
+ # @example
349
+ # s.arr.tail(3, as_array: true)
350
+ # # =>
351
+ # # shape: (2,)
352
+ # # Series: '' [array[i64, 3]]
353
+ # # [
354
+ # # [4, 5, 6]
355
+ # # [10, 11, 12]
356
+ # # ]
357
+ def tail(n = 5, as_array: false)
358
+ super
359
+ end
360
+
147
361
  # Evaluate whether all boolean values are true for every subarray.
148
362
  #
149
363
  # @return [Series]
@@ -419,5 +633,72 @@ module Polars
419
633
  def count_matches(element)
420
634
  super
421
635
  end
636
+
637
+ # Convert the series of type `Array` to a series of type `Struct`.
638
+ #
639
+ # @param fields [Object]
640
+ # If the name and number of the desired fields is known in advance
641
+ # a list of field names can be given, which will be assigned by index.
642
+ # Otherwise, to dynamically assign field names, a custom function can be
643
+ # used; if neither are set, fields will be `field_0, field_1 .. field_n`.
644
+ #
645
+ # @return [Series]
646
+ #
647
+ # @example Convert array to struct with default field name assignment:
648
+ # s1 = Polars::Series.new("n", [[0, 1, 2], [3, 4, 5]], dtype: Polars::Array.new(Polars::Int8, 3))
649
+ # s2 = s1.arr.to_struct
650
+ # # =>
651
+ # # shape: (2,)
652
+ # # Series: 'n' [struct[3]]
653
+ # # [
654
+ # # {0,1,2}
655
+ # # {3,4,5}
656
+ # # ]
657
+ #
658
+ # @example
659
+ # s2.struct.fields
660
+ # # => ["field_0", "field_1", "field_2"]
661
+ def to_struct(
662
+ fields: nil
663
+ )
664
+ s = Utils.wrap_s(_s)
665
+ s.to_frame.select(F.col(s.name).arr.to_struct(fields: fields)).to_series
666
+ end
667
+
668
+ # Shift array values by the given number of indices.
669
+ #
670
+ # @param n [Integer]
671
+ # Number of indices to shift forward. If a negative value is passed, values
672
+ # are shifted in the opposite direction instead.
673
+ #
674
+ # @return [Series]
675
+ #
676
+ # @note
677
+ # This method is similar to the `LAG` operation in SQL when the value for `n`
678
+ # is positive. With a negative value for `n`, it is similar to `LEAD`.
679
+ #
680
+ # @example By default, array values are shifted forward by one index.
681
+ # s = Polars::Series.new([[1, 2, 3], [4, 5, 6]], dtype: Polars::Array.new(Polars::Int64, 3))
682
+ # s.arr.shift
683
+ # # =>
684
+ # # shape: (2,)
685
+ # # Series: '' [array[i64, 3]]
686
+ # # [
687
+ # # [null, 1, 2]
688
+ # # [null, 4, 5]
689
+ # # ]
690
+ #
691
+ # @example Pass a negative value to shift in the opposite direction instead.
692
+ # s.arr.shift(-2)
693
+ # # =>
694
+ # # shape: (2,)
695
+ # # Series: '' [array[i64, 3]]
696
+ # # [
697
+ # # [3, null, null]
698
+ # # [6, null, null]
699
+ # # ]
700
+ def shift(n = 1)
701
+ super
702
+ end
422
703
  end
423
704
  end
@@ -197,5 +197,72 @@ module Polars
197
197
  raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
198
198
  end
199
199
  end
200
+
201
+ # Get the size of binary values in the given unit.
202
+ #
203
+ # @param unit ['b', 'kb', 'mb', 'gb', 'tb']
204
+ # Scale the returned size to the given unit.
205
+ #
206
+ # @return [Expr]
207
+ #
208
+ # @example
209
+ # df = Polars::DataFrame.new({"data" => [512, 256, 1024].map { |n| "\x00".b * n }})
210
+ # df.with_columns(
211
+ # n_bytes: Polars.col("data").bin.size,
212
+ # n_kilobytes: Polars.col("data").bin.size("kb")
213
+ # )
214
+ # # =>
215
+ # # shape: (3, 3)
216
+ # # ┌─────────────────────────────────┬─────────┬─────────────┐
217
+ # # │ data ┆ n_bytes ┆ n_kilobytes │
218
+ # # │ --- ┆ --- ┆ --- │
219
+ # # │ binary ┆ u32 ┆ f64 │
220
+ # # ╞═════════════════════════════════╪═════════╪═════════════╡
221
+ # # │ b"\x00\x00\x00\x00\x00\x00\x00… ┆ 512 ┆ 0.5 │
222
+ # # │ b"\x00\x00\x00\x00\x00\x00\x00… ┆ 256 ┆ 0.25 │
223
+ # # │ b"\x00\x00\x00\x00\x00\x00\x00… ┆ 1024 ┆ 1.0 │
224
+ # # └─────────────────────────────────┴─────────┴─────────────┘
225
+ def size(unit = "b")
226
+ sz = Utils.wrap_expr(_rbexpr.bin_size_bytes)
227
+ sz = Utils.scale_bytes(sz, to: unit)
228
+ sz
229
+ end
230
+
231
+ # Interpret a buffer as a numerical Polars type.
232
+ #
233
+ # @param dtype [Object]
234
+ # Which type to interpret binary column into.
235
+ # @param endianness : ["big", "little"]
236
+ # Which endianness to use when interpreting bytes, by default "little".
237
+ #
238
+ # @return [Expr]
239
+ #
240
+ # @example
241
+ # df = Polars::DataFrame.new({"data" => ["\x05\x00\x00\x00".b, "\x10\x00\x01\x00".b]})
242
+ # df.with_columns(
243
+ # bin2int: Polars.col("data").bin.reinterpret(
244
+ # dtype: Polars::Int32, endianness: "little"
245
+ # )
246
+ # )
247
+ # # =>
248
+ # # shape: (2, 2)
249
+ # # ┌─────────────────────┬─────────┐
250
+ # # │ data ┆ bin2int │
251
+ # # │ --- ┆ --- │
252
+ # # │ binary ┆ i32 │
253
+ # # ╞═════════════════════╪═════════╡
254
+ # # │ b"\x05\x00\x00\x00" ┆ 5 │
255
+ # # │ b"\x10\x00\x01\x00" ┆ 65552 │
256
+ # # └─────────────────────┴─────────┘
257
+ def reinterpret(
258
+ dtype:,
259
+ endianness: "little"
260
+ )
261
+ dtype = Utils.parse_into_datatype_expr(dtype)
262
+
263
+ Utils.wrap_expr(
264
+ _rbexpr.bin_reinterpret(dtype._rbdatatype_expr, endianness)
265
+ )
266
+ end
200
267
  end
201
268
  end
@@ -157,5 +157,48 @@ module Polars
157
157
  def encode(encoding)
158
158
  super
159
159
  end
160
+
161
+ # Get the size of the binary values in a Series in the given unit.
162
+ #
163
+ # @return [Series]
164
+ #
165
+ # @example
166
+ # s = Polars::Series.new("data", [512, 256, 2560, 1024].map { |n| "\x00".b * n })
167
+ # s.bin.size("kb")
168
+ # # =>
169
+ # # shape: (4,)
170
+ # # Series: 'data' [f64]
171
+ # # [
172
+ # # 0.5
173
+ # # 0.25
174
+ # # 2.5
175
+ # # 1.0
176
+ # # ]
177
+ def size(unit = "b")
178
+ super
179
+ end
180
+
181
+ # Interpret a buffer as a numerical polars type.
182
+ #
183
+ # @param dtype [Object]
184
+ # Which type to interpret binary column into.
185
+ # @param endianness ["big", "little"]
186
+ # Which endianness to use when interpreting bytes, by default "little".
187
+ #
188
+ # @return [Series]
189
+ #
190
+ # @example
191
+ # s = Polars::Series.new("data", ["\x05\x00\x00\x00".b, "\x10\x00\x01\x00".b])
192
+ # s.bin.reinterpret(dtype: Polars::Int32, endianness: "little")
193
+ # # =>
194
+ # # shape: (2,)
195
+ # # Series: 'data' [i32]
196
+ # # [
197
+ # # 5
198
+ # # 65552
199
+ # # ]
200
+ def reinterpret(dtype:, endianness: "little")
201
+ super
202
+ end
160
203
  end
161
204
  end
@@ -32,5 +32,229 @@ module Polars
32
32
  def get_categories
33
33
  Utils.wrap_expr(_rbexpr.cat_get_categories)
34
34
  end
35
+
36
+ # Return the byte-length of the string representation of each value.
37
+ #
38
+ # @return [Expr]
39
+ #
40
+ # @note
41
+ # When working with non-ASCII text, the length in bytes is not the same as the
42
+ # length in characters. You may want to use `len_chars` instead.
43
+ # Note that `len_bytes` is much more performant (_O(1)_) than
44
+ # `len_chars` (_O(n)_).
45
+ #
46
+ # @example
47
+ # df = Polars::DataFrame.new(
48
+ # {"a" => Polars::Series.new(["Café", "345", "東京", nil], dtype: Polars::Categorical)}
49
+ # )
50
+ # df.with_columns(
51
+ # Polars.col("a").cat.len_bytes.alias("n_bytes"),
52
+ # Polars.col("a").cat.len_chars.alias("n_chars")
53
+ # )
54
+ # # =>
55
+ # # shape: (4, 3)
56
+ # # ┌──────┬─────────┬─────────┐
57
+ # # │ a ┆ n_bytes ┆ n_chars │
58
+ # # │ --- ┆ --- ┆ --- │
59
+ # # │ cat ┆ u32 ┆ u32 │
60
+ # # ╞══════╪═════════╪═════════╡
61
+ # # │ Café ┆ 5 ┆ 4 │
62
+ # # │ 345 ┆ 3 ┆ 3 │
63
+ # # │ 東京 ┆ 6 ┆ 2 │
64
+ # # │ null ┆ null ┆ null │
65
+ # # └──────┴─────────┴─────────┘
66
+ def len_bytes
67
+ Utils.wrap_expr(_rbexpr.cat_len_bytes)
68
+ end
69
+
70
+ # Return the number of characters of the string representation of each value.
71
+ #
72
+ # @return [Expr]
73
+ #
74
+ # @note
75
+ # When working with ASCII text, use `len_bytes` instead to achieve
76
+ # equivalent output with much better performance:
77
+ # `len_bytes` runs in _O(1)_, while `len_chars` runs in (_O(n)_).
78
+ #
79
+ # A character is defined as a [Unicode scalar value](https://www.unicode.org/glossary/#unicode_scalar_value). A single character is
80
+ # represented by a single byte when working with ASCII text, and a maximum of
81
+ # 4 bytes otherwise.
82
+ #
83
+ # @example
84
+ # df = Polars::DataFrame.new(
85
+ # {"a" => Polars::Series.new(["Café", "345", "東京", nil], dtype: Polars::Categorical)}
86
+ # )
87
+ # df.with_columns(
88
+ # Polars.col("a").cat.len_chars.alias("n_chars"),
89
+ # Polars.col("a").cat.len_bytes.alias("n_bytes")
90
+ # )
91
+ # # =>
92
+ # # shape: (4, 3)
93
+ # # ┌──────┬─────────┬─────────┐
94
+ # # │ a ┆ n_chars ┆ n_bytes │
95
+ # # │ --- ┆ --- ┆ --- │
96
+ # # │ cat ┆ u32 ┆ u32 │
97
+ # # ╞══════╪═════════╪═════════╡
98
+ # # │ Café ┆ 4 ┆ 5 │
99
+ # # │ 345 ┆ 3 ┆ 3 │
100
+ # # │ 東京 ┆ 2 ┆ 6 │
101
+ # # │ null ┆ null ┆ null │
102
+ # # └──────┴─────────┴─────────┘
103
+ def len_chars
104
+ Utils.wrap_expr(_rbexpr.cat_len_chars)
105
+ end
106
+
107
+ # Check if string representations of values start with a substring.
108
+ #
109
+ # @param prefix [String]
110
+ # Prefix substring.
111
+ #
112
+ # @return [Expr]
113
+ #
114
+ # @note
115
+ # Whereas `str.starts_with` allows expression inputs, `cat.starts_with` requires
116
+ # a literal string value.
117
+ #
118
+ # @example
119
+ # df = Polars::DataFrame.new(
120
+ # {"fruits" => Polars::Series.new(["apple", "mango", nil], dtype: Polars::Categorical)}
121
+ # )
122
+ # df.with_columns(
123
+ # Polars.col("fruits").cat.starts_with("app").alias("has_prefix")
124
+ # )
125
+ # # =>
126
+ # # shape: (3, 2)
127
+ # # ┌────────┬────────────┐
128
+ # # │ fruits ┆ has_prefix │
129
+ # # │ --- ┆ --- │
130
+ # # │ cat ┆ bool │
131
+ # # ╞════════╪════════════╡
132
+ # # │ apple ┆ true │
133
+ # # │ mango ┆ false │
134
+ # # │ null ┆ null │
135
+ # # └────────┴────────────┘
136
+ #
137
+ # @example Using `starts_with` as a filter condition:
138
+ # df.filter(Polars.col("fruits").cat.starts_with("app"))
139
+ # # =>
140
+ # # shape: (1, 1)
141
+ # # ┌────────┐
142
+ # # │ fruits │
143
+ # # │ --- │
144
+ # # │ cat │
145
+ # # ╞════════╡
146
+ # # │ apple │
147
+ # # └────────┘
148
+ def starts_with(prefix)
149
+ if !prefix.is_a?(::String)
150
+ msg = "'prefix' must be a string; found #{prefix.inspect}"
151
+ raise TypeError, msg
152
+ end
153
+ Utils.wrap_expr(_rbexpr.cat_starts_with(prefix))
154
+ end
155
+
156
+ # Check if string representations of values end with a substring.
157
+ #
158
+ # @param suffix [String]
159
+ # Suffix substring.
160
+ #
161
+ # @return [Expr]
162
+ #
163
+ # @note
164
+ # Whereas `str.ends_with` allows expression inputs, `cat.ends_with` requires a
165
+ # literal string value.
166
+ #
167
+ # @example
168
+ # df = Polars::DataFrame.new(
169
+ # {"fruits" => Polars::Series.new(["apple", "mango", nil], dtype: Polars::Categorical)}
170
+ # )
171
+ # df.with_columns(Polars.col("fruits").cat.ends_with("go").alias("has_suffix"))
172
+ # # =>
173
+ # # shape: (3, 2)
174
+ # # ┌────────┬────────────┐
175
+ # # │ fruits ┆ has_suffix │
176
+ # # │ --- ┆ --- │
177
+ # # │ cat ┆ bool │
178
+ # # ╞════════╪════════════╡
179
+ # # │ apple ┆ false │
180
+ # # │ mango ┆ true │
181
+ # # │ null ┆ null │
182
+ # # └────────┴────────────┘
183
+ #
184
+ # @example Using `ends_with` as a filter condition:
185
+ # df.filter(Polars.col("fruits").cat.ends_with("go"))
186
+ # # =>
187
+ # # shape: (1, 1)
188
+ # # ┌────────┐
189
+ # # │ fruits │
190
+ # # │ --- │
191
+ # # │ cat │
192
+ # # ╞════════╡
193
+ # # │ mango │
194
+ # # └────────┘
195
+ def ends_with(suffix)
196
+ if !suffix.is_a?(::String)
197
+ msg = "'suffix' must be a string; found #{suffix.inspect}"
198
+ raise TypeError, msg
199
+ end
200
+ Utils.wrap_expr(_rbexpr.cat_ends_with(suffix))
201
+ end
202
+
203
+ # Extract a substring from the string representation of each value.
204
+ #
205
+ # @param offset [Integer]
206
+ # Start index. Negative indexing is supported.
207
+ # @param length [Integer]
208
+ # Length of the slice. If set to `nil` (default), the slice is taken to the
209
+ # end of the string.
210
+ #
211
+ # @return [Expr]
212
+ #
213
+ # @note
214
+ # Both the `offset` and `length` inputs are defined in terms of the number
215
+ # of characters in the (UTF8) string. A character is defined as a
216
+ # [Unicode scalar value](https://www.unicode.org/glossary/#unicode_scalar_value). A single character is represented by a single byte
217
+ # when working with ASCII text, and a maximum of 4 bytes otherwise.
218
+ #
219
+ # @example
220
+ # df = Polars::DataFrame.new(
221
+ # {
222
+ # "s" => Polars::Series.new(
223
+ # ["pear", nil, "papaya", "dragonfruit"],
224
+ # dtype: Polars::Categorical
225
+ # )
226
+ # }
227
+ # )
228
+ # df.with_columns(Polars.col("s").cat.slice(-3).alias("slice"))
229
+ # # =>
230
+ # # shape: (4, 2)
231
+ # # ┌─────────────┬───────┐
232
+ # # │ s ┆ slice │
233
+ # # │ --- ┆ --- │
234
+ # # │ cat ┆ str │
235
+ # # ╞═════════════╪═══════╡
236
+ # # │ pear ┆ ear │
237
+ # # │ null ┆ null │
238
+ # # │ papaya ┆ aya │
239
+ # # │ dragonfruit ┆ uit │
240
+ # # └─────────────┴───────┘
241
+ #
242
+ # @example Using the optional `length` parameter
243
+ # df.with_columns(Polars.col("s").cat.slice(4, 3).alias("slice"))
244
+ # # =>
245
+ # # shape: (4, 2)
246
+ # # ┌─────────────┬───────┐
247
+ # # │ s ┆ slice │
248
+ # # │ --- ┆ --- │
249
+ # # │ cat ┆ str │
250
+ # # ╞═════════════╪═══════╡
251
+ # # │ pear ┆ │
252
+ # # │ null ┆ null │
253
+ # # │ papaya ┆ ya │
254
+ # # │ dragonfruit ┆ onf │
255
+ # # └─────────────┴───────┘
256
+ def slice(offset, length = nil)
257
+ Utils.wrap_expr(_rbexpr.cat_slice(offset, length))
258
+ end
35
259
  end
36
260
  end