polars-df 0.13.0-x64-mingw-ucrt

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +208 -0
  4. data/Cargo.lock +2556 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +39278 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +104 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +36 -0
  18. data/lib/polars/cat_name_space.rb +88 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +98 -0
  21. data/lib/polars/data_frame.rb +5191 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1397 -0
  24. data/lib/polars/date_time_name_space.rb +1287 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +38 -0
  27. data/lib/polars/expr.rb +7256 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +271 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1329 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +136 -0
  39. data/lib/polars/functions/range/datetime_range.rb +149 -0
  40. data/lib/polars/functions/range/int_range.rb +51 -0
  41. data/lib/polars/functions/range/time_range.rb +141 -0
  42. data/lib/polars/functions/repeat.rb +144 -0
  43. data/lib/polars/functions/whenthen.rb +96 -0
  44. data/lib/polars/functions.rb +57 -0
  45. data/lib/polars/group_by.rb +613 -0
  46. data/lib/polars/io/avro.rb +24 -0
  47. data/lib/polars/io/csv.rb +696 -0
  48. data/lib/polars/io/database.rb +73 -0
  49. data/lib/polars/io/ipc.rb +275 -0
  50. data/lib/polars/io/json.rb +29 -0
  51. data/lib/polars/io/ndjson.rb +80 -0
  52. data/lib/polars/io/parquet.rb +233 -0
  53. data/lib/polars/lazy_frame.rb +2708 -0
  54. data/lib/polars/lazy_group_by.rb +181 -0
  55. data/lib/polars/list_expr.rb +791 -0
  56. data/lib/polars/list_name_space.rb +449 -0
  57. data/lib/polars/meta_expr.rb +222 -0
  58. data/lib/polars/name_expr.rb +198 -0
  59. data/lib/polars/plot.rb +109 -0
  60. data/lib/polars/rolling_group_by.rb +35 -0
  61. data/lib/polars/series.rb +4444 -0
  62. data/lib/polars/slice.rb +104 -0
  63. data/lib/polars/sql_context.rb +194 -0
  64. data/lib/polars/string_cache.rb +75 -0
  65. data/lib/polars/string_expr.rb +1495 -0
  66. data/lib/polars/string_name_space.rb +811 -0
  67. data/lib/polars/struct_expr.rb +98 -0
  68. data/lib/polars/struct_name_space.rb +96 -0
  69. data/lib/polars/testing.rb +507 -0
  70. data/lib/polars/utils/constants.rb +9 -0
  71. data/lib/polars/utils/convert.rb +97 -0
  72. data/lib/polars/utils/parse.rb +89 -0
  73. data/lib/polars/utils/various.rb +76 -0
  74. data/lib/polars/utils/wrap.rb +19 -0
  75. data/lib/polars/utils.rb +130 -0
  76. data/lib/polars/version.rb +4 -0
  77. data/lib/polars/whenthen.rb +83 -0
  78. data/lib/polars-df.rb +1 -0
  79. data/lib/polars.rb +91 -0
  80. metadata +138 -0
@@ -0,0 +1,811 @@
1
+ module Polars
2
+ # Series.str namespace.
3
+ class StringNameSpace
4
+ include ExprDispatch
5
+
6
+ self._accessor = "str"
7
+
8
+ # @private
9
+ def initialize(series)
10
+ self._s = series._s
11
+ end
12
+
13
+ # Convert a Utf8 column into a Date column.
14
+ #
15
+ # @param format [String]
16
+ # Format to use for conversion. Refer to the
17
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
18
+ # for the full specification. Example: `"%Y-%m-%d"`.
19
+ # If set to nil (default), the format is inferred from the data.
20
+ # @param strict [Boolean]
21
+ # Raise an error if any conversion fails.
22
+ # @param exact [Boolean]
23
+ # Require an exact format match. If false, allow the format to match anywhere
24
+ # in the target string.
25
+ # @param cache [Boolean]
26
+ # Use a cache of unique, converted dates to apply the conversion.
27
+ #
28
+ # @return [Series]
29
+ #
30
+ # @example
31
+ # s = Polars::Series.new(["2020/01/01", "2020/02/01", "2020/03/01"])
32
+ # s.str.to_date
33
+ # # =>
34
+ # # shape: (3,)
35
+ # # Series: '' [date]
36
+ # # [
37
+ # # 2020-01-01
38
+ # # 2020-02-01
39
+ # # 2020-03-01
40
+ # # ]
41
+ def to_date(format = nil, strict: true, exact: true, cache: true)
42
+ super
43
+ end
44
+
45
+ # Convert a Utf8 column into a Datetime column.
46
+ #
47
+ # @param format [String]
48
+ # Format to use for conversion. Refer to the
49
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
50
+ # for the full specification. Example: `"%Y-%m-%d %H:%M:%S"`.
51
+ # If set to nil (default), the format is inferred from the data.
52
+ # @param time_unit ["us", "ns", "ms"]
53
+ # Unit of time for the resulting Datetime column. If set to nil (default),
54
+ # the time unit is inferred from the format string if given, eg:
55
+ # `"%F %T%.3f"` => `Datetime("ms")`. If no fractional second component is
56
+ # found, the default is `"us"`.
57
+ # @param time_zone [String]
58
+ # Time zone for the resulting Datetime column.
59
+ # @param strict [Boolean]
60
+ # Raise an error if any conversion fails.
61
+ # @param exact [Boolean]
62
+ # Require an exact format match. If false, allow the format to match anywhere
63
+ # in the target string.
64
+ # @param cache [Boolean]
65
+ # Use a cache of unique, converted datetimes to apply the conversion.
66
+ #
67
+ # @return [Series]
68
+ #
69
+ # @example
70
+ # s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
71
+ # s.str.to_datetime("%Y-%m-%d %H:%M%#z")
72
+ # # =>
73
+ # # shape: (2,)
74
+ # # Series: '' [datetime[μs, UTC]]
75
+ # # [
76
+ # # 2020-01-01 01:00:00 UTC
77
+ # # 2020-01-01 02:00:00 UTC
78
+ # # ]
79
+ def to_datetime(
80
+ format = nil,
81
+ time_unit: nil,
82
+ time_zone: nil,
83
+ strict: true,
84
+ exact: true,
85
+ cache: true,
86
+ ambiguous: "raise"
87
+ )
88
+ super
89
+ end
90
+
91
+ # Convert a Utf8 column into a Time column.
92
+ #
93
+ # @param format [String]
94
+ # Format to use for conversion. Refer to the
95
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
96
+ # for the full specification. Example: `"%H:%M:%S"`.
97
+ # If set to nil (default), the format is inferred from the data.
98
+ # @param strict [Boolean]
99
+ # Raise an error if any conversion fails.
100
+ # @param cache [Boolean]
101
+ # Use a cache of unique, converted times to apply the conversion.
102
+ #
103
+ # @return [Series]
104
+ #
105
+ # @example
106
+ # s = Polars::Series.new(["01:00", "02:00", "03:00"])
107
+ # s.str.to_time("%H:%M")
108
+ # # =>
109
+ # # shape: (3,)
110
+ # # Series: '' [time]
111
+ # # [
112
+ # # 01:00:00
113
+ # # 02:00:00
114
+ # # 03:00:00
115
+ # # ]
116
+ def to_time(format = nil, strict: true, cache: true)
117
+ super
118
+ end
119
+
120
+ # Parse a Series of dtype Utf8 to a Date/Datetime Series.
121
+ #
122
+ # @param datatype [Symbol]
123
+ # `:date`, `:dateime`, or `:time`.
124
+ # @param fmt [String]
125
+ # Format to use, refer to the
126
+ # [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
127
+ # for specification. Example: `"%y-%m-%d"`.
128
+ # @param strict [Boolean]
129
+ # Raise an error if any conversion fails.
130
+ # @param exact [Boolean]
131
+ # - If true, require an exact format match.
132
+ # - If false, allow the format to match anywhere in the target string.
133
+ # @param cache [Boolean]
134
+ # Use a cache of unique, converted dates to apply the datetime conversion.
135
+ #
136
+ # @return [Series]
137
+ #
138
+ # @example Dealing with a consistent format:
139
+ # s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
140
+ # s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
141
+ # # =>
142
+ # # shape: (2,)
143
+ # # Series: '' [datetime[μs, UTC]]
144
+ # # [
145
+ # # 2020-01-01 01:00:00 UTC
146
+ # # 2020-01-01 02:00:00 UTC
147
+ # # ]
148
+ #
149
+ # @example Dealing with different formats.
150
+ # s = Polars::Series.new(
151
+ # "date",
152
+ # [
153
+ # "2021-04-22",
154
+ # "2022-01-04 00:00:00",
155
+ # "01/31/22",
156
+ # "Sun Jul 8 00:34:60 2001"
157
+ # ]
158
+ # )
159
+ # s.to_frame.select(
160
+ # Polars.coalesce(
161
+ # Polars.col("date").str.strptime(Polars::Date, "%F", strict: false),
162
+ # Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false),
163
+ # Polars.col("date").str.strptime(Polars::Date, "%D", strict: false),
164
+ # Polars.col("date").str.strptime(Polars::Date, "%c", strict: false)
165
+ # )
166
+ # ).to_series
167
+ # # =>
168
+ # # shape: (4,)
169
+ # # Series: 'date' [date]
170
+ # # [
171
+ # # 2021-04-22
172
+ # # 2022-01-04
173
+ # # 2022-01-31
174
+ # # 2001-07-08
175
+ # # ]
176
+ def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true)
177
+ super
178
+ end
179
+
180
+ # Get length of the string values in the Series (as number of bytes).
181
+ #
182
+ # @return [Series]
183
+ #
184
+ # @note
185
+ # The returned lengths are equal to the number of bytes in the UTF8 string. If you
186
+ # need the length in terms of the number of characters, use `n_chars` instead.
187
+ #
188
+ # @example
189
+ # s = Polars::Series.new(["Café", nil, "345", "東京"])
190
+ # s.str.lengths
191
+ # # =>
192
+ # # shape: (4,)
193
+ # # Series: '' [u32]
194
+ # # [
195
+ # # 5
196
+ # # null
197
+ # # 3
198
+ # # 6
199
+ # # ]
200
+ def lengths
201
+ super
202
+ end
203
+
204
+ # Get length of the string values in the Series (as number of chars).
205
+ #
206
+ # @return [Series]
207
+ #
208
+ # @note
209
+ # If you know that you are working with ASCII text, `lengths` will be
210
+ # equivalent, and faster (returns length in terms of the number of bytes).
211
+ #
212
+ # @example
213
+ # s = Polars::Series.new(["Café", nil, "345", "東京"])
214
+ # s.str.n_chars
215
+ # # =>
216
+ # # shape: (4,)
217
+ # # Series: '' [u32]
218
+ # # [
219
+ # # 4
220
+ # # null
221
+ # # 3
222
+ # # 2
223
+ # # ]
224
+ def n_chars
225
+ super
226
+ end
227
+
228
+ # Vertically concat the values in the Series to a single string value.
229
+ #
230
+ # @param delimiter [String]
231
+ # The delimiter to insert between consecutive string values.
232
+ #
233
+ # @return [Series]
234
+ #
235
+ # @example
236
+ # Polars::Series.new([1, nil, 2]).str.join("-")
237
+ # # =>
238
+ # # shape: (1,)
239
+ # # Series: '' [str]
240
+ # # [
241
+ # # "1-2"
242
+ # # ]
243
+ #
244
+ # @example
245
+ # Polars::Series.new([1, nil, 2]).str.join("-", ignore_nulls: false)
246
+ # # =>
247
+ # # shape: (1,)
248
+ # # Series: '' [str]
249
+ # # [
250
+ # # null
251
+ # # ]
252
+ def join(delimiter = "-", ignore_nulls: true)
253
+ super
254
+ end
255
+ alias_method :concat, :join
256
+
257
+ # Check if strings in Series contain a substring that matches a regex.
258
+ #
259
+ # @param pattern [String]
260
+ # A valid regex pattern.
261
+ # @param literal [Boolean]
262
+ # Treat pattern as a literal string.
263
+ #
264
+ # @return [Series]
265
+ #
266
+ # @example
267
+ # s = Polars::Series.new(["Crab", "cat and dog", "rab$bit", nil])
268
+ # s.str.contains("cat|bit")
269
+ # # =>
270
+ # # shape: (4,)
271
+ # # Series: '' [bool]
272
+ # # [
273
+ # # false
274
+ # # true
275
+ # # true
276
+ # # null
277
+ # # ]
278
+ #
279
+ # @example
280
+ # s.str.contains("rab$", literal: true)
281
+ # # =>
282
+ # # shape: (4,)
283
+ # # Series: '' [bool]
284
+ # # [
285
+ # # false
286
+ # # false
287
+ # # true
288
+ # # null
289
+ # # ]
290
+ def contains(pattern, literal: false)
291
+ super
292
+ end
293
+
294
+ # Check if string values end with a substring.
295
+ #
296
+ # @param sub [String]
297
+ # Suffix substring.
298
+ #
299
+ # @return [Series]
300
+ #
301
+ # @example
302
+ # s = Polars::Series.new("fruits", ["apple", "mango", nil])
303
+ # s.str.ends_with("go")
304
+ # # =>
305
+ # # shape: (3,)
306
+ # # Series: 'fruits' [bool]
307
+ # # [
308
+ # # false
309
+ # # true
310
+ # # null
311
+ # # ]
312
+ def ends_with(sub)
313
+ super
314
+ end
315
+
316
+ # Check if string values start with a substring.
317
+ #
318
+ # @param sub [String]
319
+ # Prefix substring.
320
+ #
321
+ # @return [Series]
322
+ #
323
+ # @example
324
+ # s = Polars::Series.new("fruits", ["apple", "mango", nil])
325
+ # s.str.starts_with("app")
326
+ # # =>
327
+ # # shape: (3,)
328
+ # # Series: 'fruits' [bool]
329
+ # # [
330
+ # # true
331
+ # # false
332
+ # # null
333
+ # # ]
334
+ def starts_with(sub)
335
+ super
336
+ end
337
+
338
+ # Decode a value using the provided encoding.
339
+ #
340
+ # @param encoding ["hex", "base64"]
341
+ # The encoding to use.
342
+ # @param strict [Boolean]
343
+ # How to handle invalid inputs:
344
+ #
345
+ # - `true`: An error will be thrown if unable to decode a value.
346
+ # - `false`: Unhandled values will be replaced with `nil`.
347
+ #
348
+ # @return [Series]
349
+ #
350
+ # @example
351
+ # s = Polars::Series.new(["666f6f", "626172", nil])
352
+ # s.str.decode("hex")
353
+ # # =>
354
+ # # shape: (3,)
355
+ # # Series: '' [binary]
356
+ # # [
357
+ # # b"foo"
358
+ # # b"bar"
359
+ # # null
360
+ # # ]
361
+ def decode(encoding, strict: false)
362
+ super
363
+ end
364
+
365
+ # Encode a value using the provided encoding.
366
+ #
367
+ # @param encoding ["hex", "base64"]
368
+ # The encoding to use.
369
+ #
370
+ # @return [Series]
371
+ #
372
+ # @example
373
+ # s = Polars::Series.new(["foo", "bar", nil])
374
+ # s.str.encode("hex")
375
+ # # =>
376
+ # # shape: (3,)
377
+ # # Series: '' [str]
378
+ # # [
379
+ # # "666f6f"
380
+ # # "626172"
381
+ # # null
382
+ # # ]
383
+ def encode(encoding)
384
+ super
385
+ end
386
+
387
+ # Extract the first match of json string with provided JSONPath expression.
388
+ #
389
+ # Throw errors if encounter invalid json strings.
390
+ # All return value will be casted to Utf8 regardless of the original value.
391
+ #
392
+ # Documentation on JSONPath standard can be found
393
+ # [here](https://goessner.net/articles/JsonPath/).
394
+ #
395
+ # @param json_path [String]
396
+ # A valid JSON path query string.
397
+ #
398
+ # @return [Series]
399
+ #
400
+ # @example
401
+ # df = Polars::DataFrame.new(
402
+ # {"json_val" => ['{"a":"1"}', nil, '{"a":2}', '{"a":2.1}', '{"a":true}']}
403
+ # )
404
+ # df.select(Polars.col("json_val").str.json_path_match("$.a"))[0.., 0]
405
+ # # =>
406
+ # # shape: (5,)
407
+ # # Series: 'json_val' [str]
408
+ # # [
409
+ # # "1"
410
+ # # null
411
+ # # "2"
412
+ # # "2.1"
413
+ # # "true"
414
+ # # ]
415
+ def json_path_match(json_path)
416
+ super
417
+ end
418
+
419
+ # Extract the target capture group from provided patterns.
420
+ #
421
+ # @param pattern [String]
422
+ # A valid regex pattern
423
+ # @param group_index [Integer]
424
+ # Index of the targeted capture group.
425
+ # Group 0 mean the whole pattern, first group begin at index 1
426
+ # Default to the first capture group
427
+ #
428
+ # @return [Series]
429
+ #
430
+ # @example
431
+ # df = Polars::DataFrame.new({"foo" => ["123 bla 45 asd", "xyz 678 910t"]})
432
+ # df.select([Polars.col("foo").str.extract('(\d+)')])
433
+ # # =>
434
+ # # shape: (2, 1)
435
+ # # ┌─────┐
436
+ # # │ foo │
437
+ # # │ --- │
438
+ # # │ str │
439
+ # # ╞═════╡
440
+ # # │ 123 │
441
+ # # │ 678 │
442
+ # # └─────┘
443
+ def extract(pattern, group_index: 1)
444
+ super
445
+ end
446
+
447
+ # Extracts all matches for the given regex pattern.
448
+ #
449
+ # Extract each successive non-overlapping regex match in an individual string as
450
+ # an array
451
+ #
452
+ # @param pattern [String]
453
+ # A valid regex pattern
454
+ #
455
+ # @return [Series]
456
+ #
457
+ # @example
458
+ # s = Polars::Series.new("foo", ["123 bla 45 asd", "xyz 678 910t"])
459
+ # s.str.extract_all('(\d+)')
460
+ # # =>
461
+ # # shape: (2,)
462
+ # # Series: 'foo' [list[str]]
463
+ # # [
464
+ # # ["123", "45"]
465
+ # # ["678", "910"]
466
+ # # ]
467
+ def extract_all(pattern)
468
+ super
469
+ end
470
+
471
+ # Count all successive non-overlapping regex matches.
472
+ #
473
+ # @param pattern [String]
474
+ # A valid regex pattern
475
+ #
476
+ # @return [Series]
477
+ #
478
+ # @example
479
+ # s = Polars::Series.new("foo", ["123 bla 45 asd", "xyz 678 910t"])
480
+ # s.str.count_match('\d')
481
+ # # =>
482
+ # # shape: (2,)
483
+ # # Series: 'foo' [u32]
484
+ # # [
485
+ # # 5
486
+ # # 6
487
+ # # ]
488
+ def count_match(pattern)
489
+ super
490
+ end
491
+
492
+ # Split the string by a substring.
493
+ #
494
+ # @param by [String]
495
+ # Substring to split by.
496
+ # @param inclusive [Boolean]
497
+ # If true, include the split character/string in the results.
498
+ #
499
+ # @return [Series]
500
+ def split(by, inclusive: false)
501
+ super
502
+ end
503
+
504
+ # Split the string by a substring using `n` splits.
505
+ #
506
+ # Results in a struct of `n+1` fields.
507
+ #
508
+ # If it cannot make `n` splits, the remaining field elements will be null.
509
+ #
510
+ # @param by [String]
511
+ # Substring to split by.
512
+ # @param n [Integer]
513
+ # Number of splits to make.
514
+ # @param inclusive [Boolean]
515
+ # If true, include the split character/string in the results.
516
+ #
517
+ # @return [Series]
518
+ #
519
+ # @example
520
+ # df = Polars::DataFrame.new({"x" => ["a_1", nil, "c", "d_4"]})
521
+ # df["x"].str.split_exact("_", 1).alias("fields")
522
+ # # =>
523
+ # # shape: (4,)
524
+ # # Series: 'fields' [struct[2]]
525
+ # # [
526
+ # # {"a","1"}
527
+ # # {null,null}
528
+ # # {"c",null}
529
+ # # {"d","4"}
530
+ # # ]
531
+ #
532
+ # @example Split string values in column x in exactly 2 parts and assign each part to a new column.
533
+ # df["x"]
534
+ # .str.split_exact("_", 1)
535
+ # .struct.rename_fields(["first_part", "second_part"])
536
+ # .alias("fields")
537
+ # .to_frame
538
+ # .unnest("fields")
539
+ # # =>
540
+ # # shape: (4, 2)
541
+ # # ┌────────────┬─────────────┐
542
+ # # │ first_part ┆ second_part │
543
+ # # │ --- ┆ --- │
544
+ # # │ str ┆ str │
545
+ # # ╞════════════╪═════════════╡
546
+ # # │ a ┆ 1 │
547
+ # # │ null ┆ null │
548
+ # # │ c ┆ null │
549
+ # # │ d ┆ 4 │
550
+ # # └────────────┴─────────────┘
551
+ def split_exact(by, n, inclusive: false)
552
+ super
553
+ end
554
+
555
+ # Split the string by a substring, restricted to returning at most `n` items.
556
+ #
557
+ # If the number of possible splits is less than `n-1`, the remaining field
558
+ # elements will be null. If the number of possible splits is `n-1` or greater,
559
+ # the last (nth) substring will contain the remainder of the string.
560
+ #
561
+ # @param by [String]
562
+ # Substring to split by.
563
+ # @param n [Integer]
564
+ # Max number of items to return.
565
+ #
566
+ # @return [Series]
567
+ #
568
+ # @example
569
+ # df = Polars::DataFrame.new({"s" => ["foo bar", nil, "foo-bar", "foo bar baz"]})
570
+ # df["s"].str.splitn(" ", 2).alias("fields")
571
+ # # =>
572
+ # # shape: (4,)
573
+ # # Series: 'fields' [struct[2]]
574
+ # # [
575
+ # # {"foo","bar"}
576
+ # # {null,null}
577
+ # # {"foo-bar",null}
578
+ # # {"foo","bar baz"}
579
+ # # ]
580
+ #
581
+ # @example Split string values in column s in exactly 2 parts and assign each part to a new column.
582
+ # df["s"]
583
+ # .str.splitn(" ", 2)
584
+ # .struct.rename_fields(["first_part", "second_part"])
585
+ # .alias("fields")
586
+ # .to_frame
587
+ # .unnest("fields")
588
+ # # =>
589
+ # # shape: (4, 2)
590
+ # # ┌────────────┬─────────────┐
591
+ # # │ first_part ┆ second_part │
592
+ # # │ --- ┆ --- │
593
+ # # │ str ┆ str │
594
+ # # ╞════════════╪═════════════╡
595
+ # # │ foo ┆ bar │
596
+ # # │ null ┆ null │
597
+ # # │ foo-bar ┆ null │
598
+ # # │ foo ┆ bar baz │
599
+ # # └────────────┴─────────────┘
600
+ def splitn(by, n)
601
+ s = Utils.wrap_s(_s)
602
+ s.to_frame.select(Polars.col(s.name).str.splitn(by, n)).to_series
603
+ end
604
+
605
+ # Replace first matching regex/literal substring with a new string value.
606
+ #
607
+ # @param pattern [String]
608
+ # A valid regex pattern.
609
+ # @param value [String]
610
+ # Substring to replace.
611
+ # @param literal [Boolean]
612
+ # Treat pattern as a literal string.
613
+ #
614
+ # @return [Series]
615
+ #
616
+ # @example
617
+ # s = Polars::Series.new(["123abc", "abc456"])
618
+ # s.str.replace('abc\b', "ABC")
619
+ # # =>
620
+ # # shape: (2,)
621
+ # # Series: '' [str]
622
+ # # [
623
+ # # "123ABC"
624
+ # # "abc456"
625
+ # # ]
626
+ def replace(pattern, value, literal: false)
627
+ super
628
+ end
629
+
630
+ # Replace all matching regex/literal substrings with a new string value.
631
+ #
632
+ # @param pattern [String]
633
+ # A valid regex pattern.
634
+ # @param value [String]
635
+ # Substring to replace.
636
+ # @param literal [Boolean]
637
+ # Treat pattern as a literal string.
638
+ #
639
+ # @return [Series]
640
+ #
641
+ # @example
642
+ # df = Polars::Series.new(["abcabc", "123a123"])
643
+ # df.str.replace_all("a", "-")
644
+ # # =>
645
+ # # shape: (2,)
646
+ # # Series: '' [str]
647
+ # # [
648
+ # # "-bc-bc"
649
+ # # "123-123"
650
+ # # ]
651
+ def replace_all(pattern, value, literal: false)
652
+ super
653
+ end
654
+
655
+ # Remove leading and trailing whitespace.
656
+ #
657
+ # @param matches [String, nil]
658
+ # An optional single character that should be trimmed
659
+ #
660
+ # @return [Series]
661
+ def strip(matches = nil)
662
+ super
663
+ end
664
+
665
+ # Remove leading whitespace.
666
+ #
667
+ # @param matches [String, nil]
668
+ # An optional single character that should be trimmed
669
+ #
670
+ # @return [Series]
671
+ def lstrip(matches = nil)
672
+ super
673
+ end
674
+
675
+ # Remove trailing whitespace.
676
+ #
677
+ # @param matches [String, nil]
678
+ # An optional single character that should be trimmed
679
+ #
680
+ # @return [Series]
681
+ def rstrip(matches = nil)
682
+ super
683
+ end
684
+
685
+ # Fills the string with zeroes.
686
+ #
687
+ # Return a copy of the string left filled with ASCII '0' digits to make a string
688
+ # of length width.
689
+ #
690
+ # A leading sign prefix ('+'/'-') is handled by inserting the padding after the
691
+ # sign character rather than before. The original string is returned if width is
692
+ # less than or equal to `s.length`.
693
+ #
694
+ # @param length [Integer]
695
+ # Fill the value up to this length.
696
+ #
697
+ # @return [Series]
698
+ def zfill(length)
699
+ super
700
+ end
701
+
702
+ # Return the string left justified in a string of length `width`.
703
+ #
704
+ # Padding is done using the specified `fillchar`. The original string is
705
+ # returned if `width` is less than or equal to `s.length`.
706
+ #
707
+ # @param width [Integer]
708
+ # Justify left to this length.
709
+ # @param fillchar [String]
710
+ # Fill with this ASCII character.
711
+ #
712
+ # @return [Series]
713
+ #
714
+ # @example
715
+ # s = Polars::Series.new("a", ["cow", "monkey", nil, "hippopotamus"])
716
+ # s.str.ljust(8, "*")
717
+ # # =>
718
+ # # shape: (4,)
719
+ # # Series: 'a' [str]
720
+ # # [
721
+ # # "cow*****"
722
+ # # "monkey**"
723
+ # # null
724
+ # # "hippopotamus"
725
+ # # ]
726
+ def ljust(width, fillchar = " ")
727
+ super
728
+ end
729
+
730
+ # Return the string right justified in a string of length `width`.
731
+ #
732
+ # Padding is done using the specified `fillchar`. The original string is
733
+ # returned if `width` is less than or equal to `s.length`.
734
+ #
735
+ # @param width [Integer]
736
+ # Justify right to this length.
737
+ # @param fillchar [String]
738
+ # Fill with this ASCII character.
739
+ #
740
+ # @return [Series]
741
+ #
742
+ # @example
743
+ # s = Polars::Series.new("a", ["cow", "monkey", nil, "hippopotamus"])
744
+ # s.str.rjust(8, "*")
745
+ # # =>
746
+ # # shape: (4,)
747
+ # # Series: 'a' [str]
748
+ # # [
749
+ # # "*****cow"
750
+ # # "**monkey"
751
+ # # null
752
+ # # "hippopotamus"
753
+ # # ]
754
+ def rjust(width, fillchar = " ")
755
+ super
756
+ end
757
+
758
+ # Modify the strings to their lowercase equivalent.
759
+ #
760
+ # @return [Series]
761
+ def to_lowercase
762
+ super
763
+ end
764
+
765
+ # Modify the strings to their uppercase equivalent.
766
+ #
767
+ # @return [Series]
768
+ def to_uppercase
769
+ super
770
+ end
771
+
772
+ # Create subslices of the string values of a Utf8 Series.
773
+ #
774
+ # @param offset [Integer]
775
+ # Start index. Negative indexing is supported.
776
+ # @param length [Integer]
777
+ # Length of the slice. If set to `nil` (default), the slice is taken to the
778
+ # end of the string.
779
+ #
780
+ # @return [Series]
781
+ #
782
+ # @example
783
+ # s = Polars::Series.new("s", ["pear", nil, "papaya", "dragonfruit"])
784
+ # s.str.slice(-3)
785
+ # # =>
786
+ # # shape: (4,)
787
+ # # Series: 's' [str]
788
+ # # [
789
+ # # "ear"
790
+ # # null
791
+ # # "aya"
792
+ # # "uit"
793
+ # # ]
794
+ #
795
+ # @example Using the optional `length` parameter
796
+ # s.str.slice(4, 3)
797
+ # # =>
798
+ # # shape: (4,)
799
+ # # Series: 's' [str]
800
+ # # [
801
+ # # ""
802
+ # # null
803
+ # # "ya"
804
+ # # "onf"
805
+ # # ]
806
+ def slice(offset, length = nil)
807
+ s = Utils.wrap_s(_s)
808
+ s.to_frame.select(Polars.col(s.name).str.slice(offset, length)).to_series
809
+ end
810
+ end
811
+ end