polars-df 0.10.0-x86_64-linux-musl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +175 -0
  4. data/Cargo.lock +2536 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +38726 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +98 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +72 -0
  18. data/lib/polars/cat_name_space.rb +125 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +93 -0
  21. data/lib/polars/data_frame.rb +5418 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1444 -0
  24. data/lib/polars/date_time_name_space.rb +1484 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +31 -0
  27. data/lib/polars/expr.rb +6105 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +248 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1280 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +103 -0
  39. data/lib/polars/functions/range/int_range.rb +51 -0
  40. data/lib/polars/functions/repeat.rb +144 -0
  41. data/lib/polars/functions/whenthen.rb +96 -0
  42. data/lib/polars/functions.rb +57 -0
  43. data/lib/polars/group_by.rb +548 -0
  44. data/lib/polars/io.rb +890 -0
  45. data/lib/polars/lazy_frame.rb +2833 -0
  46. data/lib/polars/lazy_group_by.rb +84 -0
  47. data/lib/polars/list_expr.rb +791 -0
  48. data/lib/polars/list_name_space.rb +445 -0
  49. data/lib/polars/meta_expr.rb +222 -0
  50. data/lib/polars/name_expr.rb +198 -0
  51. data/lib/polars/plot.rb +109 -0
  52. data/lib/polars/rolling_group_by.rb +37 -0
  53. data/lib/polars/series.rb +4527 -0
  54. data/lib/polars/slice.rb +104 -0
  55. data/lib/polars/sql_context.rb +194 -0
  56. data/lib/polars/string_cache.rb +75 -0
  57. data/lib/polars/string_expr.rb +1519 -0
  58. data/lib/polars/string_name_space.rb +810 -0
  59. data/lib/polars/struct_expr.rb +98 -0
  60. data/lib/polars/struct_name_space.rb +96 -0
  61. data/lib/polars/testing.rb +507 -0
  62. data/lib/polars/utils.rb +422 -0
  63. data/lib/polars/version.rb +4 -0
  64. data/lib/polars/whenthen.rb +83 -0
  65. data/lib/polars-df.rb +1 -0
  66. data/lib/polars.rb +72 -0
  67. metadata +125 -0
@@ -0,0 +1,810 @@
1
+ module Polars
2
+ # Series.str namespace.
3
+ class StringNameSpace
4
+ include ExprDispatch
5
+
6
+ self._accessor = "str"
7
+
8
+ # @private
9
+ def initialize(series)
10
+ self._s = series._s
11
+ end
12
+
13
+ # Convert a Utf8 column into a Date column.
14
+ #
15
+ # @param format [String]
16
+ # Format to use for conversion. Refer to the
17
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
18
+ # for the full specification. Example: `"%Y-%m-%d"`.
19
+ # If set to nil (default), the format is inferred from the data.
20
+ # @param strict [Boolean]
21
+ # Raise an error if any conversion fails.
22
+ # @param exact [Boolean]
23
+ # Require an exact format match. If false, allow the format to match anywhere
24
+ # in the target string.
25
+ # @param cache [Boolean]
26
+ # Use a cache of unique, converted dates to apply the conversion.
27
+ #
28
+ # @return [Series]
29
+ #
30
+ # @example
31
+ # s = Polars::Series.new(["2020/01/01", "2020/02/01", "2020/03/01"])
32
+ # s.str.to_date
33
+ # # =>
34
+ # # shape: (3,)
35
+ # # Series: '' [date]
36
+ # # [
37
+ # # 2020-01-01
38
+ # # 2020-02-01
39
+ # # 2020-03-01
40
+ # # ]
41
+ def to_date(format = nil, strict: true, exact: true, cache: true)
42
+ super
43
+ end
44
+
45
+ # Convert a Utf8 column into a Datetime column.
46
+ #
47
+ # @param format [String]
48
+ # Format to use for conversion. Refer to the
49
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
50
+ # for the full specification. Example: `"%Y-%m-%d %H:%M:%S"`.
51
+ # If set to nil (default), the format is inferred from the data.
52
+ # @param time_unit ["us", "ns", "ms"]
53
+ # Unit of time for the resulting Datetime column. If set to nil (default),
54
+ # the time unit is inferred from the format string if given, eg:
55
+ # `"%F %T%.3f"` => `Datetime("ms")`. If no fractional second component is
56
+ # found, the default is `"us"`.
57
+ # @param time_zone [String]
58
+ # Time zone for the resulting Datetime column.
59
+ # @param strict [Boolean]
60
+ # Raise an error if any conversion fails.
61
+ # @param exact [Boolean]
62
+ # Require an exact format match. If false, allow the format to match anywhere
63
+ # in the target string.
64
+ # @param cache [Boolean]
65
+ # Use a cache of unique, converted datetimes to apply the conversion.
66
+ #
67
+ # @return [Series]
68
+ #
69
+ # @example
70
+ # s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
71
+ # s.str.to_datetime("%Y-%m-%d %H:%M%#z")
72
+ # # =>
73
+ # # shape: (2,)
74
+ # # Series: '' [datetime[μs, UTC]]
75
+ # # [
76
+ # # 2020-01-01 01:00:00 UTC
77
+ # # 2020-01-01 02:00:00 UTC
78
+ # # ]
79
+ def to_datetime(
80
+ format = nil,
81
+ time_unit: nil,
82
+ time_zone: nil,
83
+ strict: true,
84
+ exact: true,
85
+ cache: true,
86
+ use_earliest: nil
87
+ )
88
+ super
89
+ end
90
+
91
+ # Convert a Utf8 column into a Time column.
92
+ #
93
+ # @param format [String]
94
+ # Format to use for conversion. Refer to the
95
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
96
+ # for the full specification. Example: `"%H:%M:%S"`.
97
+ # If set to nil (default), the format is inferred from the data.
98
+ # @param strict [Boolean]
99
+ # Raise an error if any conversion fails.
100
+ # @param cache [Boolean]
101
+ # Use a cache of unique, converted times to apply the conversion.
102
+ #
103
+ # @return [Series]
104
+ #
105
+ # @example
106
+ # s = Polars::Series.new(["01:00", "02:00", "03:00"])
107
+ # s.str.to_time("%H:%M")
108
+ # # =>
109
+ # # shape: (3,)
110
+ # # Series: '' [time]
111
+ # # [
112
+ # # 01:00:00
113
+ # # 02:00:00
114
+ # # 03:00:00
115
+ # # ]
116
+ def to_time(format = nil, strict: true, cache: true)
117
+ super
118
+ end
119
+
120
+ # Parse a Series of dtype Utf8 to a Date/Datetime Series.
121
+ #
122
+ # @param datatype [Symbol]
123
+ # `:date`, `:dateime`, or `:time`.
124
+ # @param fmt [String]
125
+ # Format to use, refer to the
126
+ # [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
127
+ # for specification. Example: `"%y-%m-%d"`.
128
+ # @param strict [Boolean]
129
+ # Raise an error if any conversion fails.
130
+ # @param exact [Boolean]
131
+ # - If true, require an exact format match.
132
+ # - If false, allow the format to match anywhere in the target string.
133
+ # @param cache [Boolean]
134
+ # Use a cache of unique, converted dates to apply the datetime conversion.
135
+ #
136
+ # @return [Series]
137
+ #
138
+ # @example Dealing with a consistent format:
139
+ # s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
140
+ # s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
141
+ # # =>
142
+ # # shape: (2,)
143
+ # # Series: '' [datetime[μs, UTC]]
144
+ # # [
145
+ # # 2020-01-01 01:00:00 UTC
146
+ # # 2020-01-01 02:00:00 UTC
147
+ # # ]
148
+ #
149
+ # @example Dealing with different formats.
150
+ # s = Polars::Series.new(
151
+ # "date",
152
+ # [
153
+ # "2021-04-22",
154
+ # "2022-01-04 00:00:00",
155
+ # "01/31/22",
156
+ # "Sun Jul 8 00:34:60 2001"
157
+ # ]
158
+ # )
159
+ # s.to_frame.select(
160
+ # Polars.coalesce(
161
+ # Polars.col("date").str.strptime(Polars::Date, "%F", strict: false),
162
+ # Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false),
163
+ # Polars.col("date").str.strptime(Polars::Date, "%D", strict: false),
164
+ # Polars.col("date").str.strptime(Polars::Date, "%c", strict: false)
165
+ # )
166
+ # ).to_series
167
+ # # =>
168
+ # # shape: (4,)
169
+ # # Series: 'date' [date]
170
+ # # [
171
+ # # 2021-04-22
172
+ # # 2022-01-04
173
+ # # 2022-01-31
174
+ # # 2001-07-08
175
+ # # ]
176
+ def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true)
177
+ super
178
+ end
179
+
180
+ # Get length of the string values in the Series (as number of bytes).
181
+ #
182
+ # @return [Series]
183
+ #
184
+ # @note
185
+ # The returned lengths are equal to the number of bytes in the UTF8 string. If you
186
+ # need the length in terms of the number of characters, use `n_chars` instead.
187
+ #
188
+ # @example
189
+ # s = Polars::Series.new(["Café", nil, "345", "東京"])
190
+ # s.str.lengths
191
+ # # =>
192
+ # # shape: (4,)
193
+ # # Series: '' [u32]
194
+ # # [
195
+ # # 5
196
+ # # null
197
+ # # 3
198
+ # # 6
199
+ # # ]
200
+ def lengths
201
+ super
202
+ end
203
+
204
+ # Get length of the string values in the Series (as number of chars).
205
+ #
206
+ # @return [Series]
207
+ #
208
+ # @note
209
+ # If you know that you are working with ASCII text, `lengths` will be
210
+ # equivalent, and faster (returns length in terms of the number of bytes).
211
+ #
212
+ # @example
213
+ # s = Polars::Series.new(["Café", nil, "345", "東京"])
214
+ # s.str.n_chars
215
+ # # =>
216
+ # # shape: (4,)
217
+ # # Series: '' [u32]
218
+ # # [
219
+ # # 4
220
+ # # null
221
+ # # 3
222
+ # # 2
223
+ # # ]
224
+ def n_chars
225
+ super
226
+ end
227
+
228
+ # Vertically concat the values in the Series to a single string value.
229
+ #
230
+ # @param delimiter [String]
231
+ # The delimiter to insert between consecutive string values.
232
+ #
233
+ # @return [Series]
234
+ #
235
+ # @example
236
+ # Polars::Series.new([1, nil, 2]).str.concat("-")
237
+ # # =>
238
+ # # shape: (1,)
239
+ # # Series: '' [str]
240
+ # # [
241
+ # # "1-2"
242
+ # # ]
243
+ #
244
+ # @example
245
+ # Polars::Series.new([1, nil, 2]).str.concat("-", ignore_nulls: false)
246
+ # # =>
247
+ # # shape: (1,)
248
+ # # Series: '' [str]
249
+ # # [
250
+ # # null
251
+ # # ]
252
+ def concat(delimiter = "-", ignore_nulls: true)
253
+ super
254
+ end
255
+
256
+ # Check if strings in Series contain a substring that matches a regex.
257
+ #
258
+ # @param pattern [String]
259
+ # A valid regex pattern.
260
+ # @param literal [Boolean]
261
+ # Treat pattern as a literal string.
262
+ #
263
+ # @return [Series]
264
+ #
265
+ # @example
266
+ # s = Polars::Series.new(["Crab", "cat and dog", "rab$bit", nil])
267
+ # s.str.contains("cat|bit")
268
+ # # =>
269
+ # # shape: (4,)
270
+ # # Series: '' [bool]
271
+ # # [
272
+ # # false
273
+ # # true
274
+ # # true
275
+ # # null
276
+ # # ]
277
+ #
278
+ # @example
279
+ # s.str.contains("rab$", literal: true)
280
+ # # =>
281
+ # # shape: (4,)
282
+ # # Series: '' [bool]
283
+ # # [
284
+ # # false
285
+ # # false
286
+ # # true
287
+ # # null
288
+ # # ]
289
+ def contains(pattern, literal: false)
290
+ super
291
+ end
292
+
293
+ # Check if string values end with a substring.
294
+ #
295
+ # @param sub [String]
296
+ # Suffix substring.
297
+ #
298
+ # @return [Series]
299
+ #
300
+ # @example
301
+ # s = Polars::Series.new("fruits", ["apple", "mango", nil])
302
+ # s.str.ends_with("go")
303
+ # # =>
304
+ # # shape: (3,)
305
+ # # Series: 'fruits' [bool]
306
+ # # [
307
+ # # false
308
+ # # true
309
+ # # null
310
+ # # ]
311
+ def ends_with(sub)
312
+ super
313
+ end
314
+
315
+ # Check if string values start with a substring.
316
+ #
317
+ # @param sub [String]
318
+ # Prefix substring.
319
+ #
320
+ # @return [Series]
321
+ #
322
+ # @example
323
+ # s = Polars::Series.new("fruits", ["apple", "mango", nil])
324
+ # s.str.starts_with("app")
325
+ # # =>
326
+ # # shape: (3,)
327
+ # # Series: 'fruits' [bool]
328
+ # # [
329
+ # # true
330
+ # # false
331
+ # # null
332
+ # # ]
333
+ def starts_with(sub)
334
+ super
335
+ end
336
+
337
+ # Decode a value using the provided encoding.
338
+ #
339
+ # @param encoding ["hex", "base64"]
340
+ # The encoding to use.
341
+ # @param strict [Boolean]
342
+ # How to handle invalid inputs:
343
+ #
344
+ # - `true`: An error will be thrown if unable to decode a value.
345
+ # - `false`: Unhandled values will be replaced with `nil`.
346
+ #
347
+ # @return [Series]
348
+ #
349
+ # @example
350
+ # s = Polars::Series.new(["666f6f", "626172", nil])
351
+ # s.str.decode("hex")
352
+ # # =>
353
+ # # shape: (3,)
354
+ # # Series: '' [binary]
355
+ # # [
356
+ # # b"foo"
357
+ # # b"bar"
358
+ # # null
359
+ # # ]
360
+ def decode(encoding, strict: false)
361
+ super
362
+ end
363
+
364
+ # Encode a value using the provided encoding.
365
+ #
366
+ # @param encoding ["hex", "base64"]
367
+ # The encoding to use.
368
+ #
369
+ # @return [Series]
370
+ #
371
+ # @example
372
+ # s = Polars::Series.new(["foo", "bar", nil])
373
+ # s.str.encode("hex")
374
+ # # =>
375
+ # # shape: (3,)
376
+ # # Series: '' [str]
377
+ # # [
378
+ # # "666f6f"
379
+ # # "626172"
380
+ # # null
381
+ # # ]
382
+ def encode(encoding)
383
+ super
384
+ end
385
+
386
+ # Extract the first match of json string with provided JSONPath expression.
387
+ #
388
+ # Throw errors if encounter invalid json strings.
389
+ # All return value will be casted to Utf8 regardless of the original value.
390
+ #
391
+ # Documentation on JSONPath standard can be found
392
+ # [here](https://goessner.net/articles/JsonPath/).
393
+ #
394
+ # @param json_path [String]
395
+ # A valid JSON path query string.
396
+ #
397
+ # @return [Series]
398
+ #
399
+ # @example
400
+ # df = Polars::DataFrame.new(
401
+ # {"json_val" => ['{"a":"1"}', nil, '{"a":2}', '{"a":2.1}', '{"a":true}']}
402
+ # )
403
+ # df.select(Polars.col("json_val").str.json_path_match("$.a"))[0.., 0]
404
+ # # =>
405
+ # # shape: (5,)
406
+ # # Series: 'json_val' [str]
407
+ # # [
408
+ # # "1"
409
+ # # null
410
+ # # "2"
411
+ # # "2.1"
412
+ # # "true"
413
+ # # ]
414
+ def json_path_match(json_path)
415
+ super
416
+ end
417
+
418
+ # Extract the target capture group from provided patterns.
419
+ #
420
+ # @param pattern [String]
421
+ # A valid regex pattern
422
+ # @param group_index [Integer]
423
+ # Index of the targeted capture group.
424
+ # Group 0 mean the whole pattern, first group begin at index 1
425
+ # Default to the first capture group
426
+ #
427
+ # @return [Series]
428
+ #
429
+ # @example
430
+ # df = Polars::DataFrame.new({"foo" => ["123 bla 45 asd", "xyz 678 910t"]})
431
+ # df.select([Polars.col("foo").str.extract('(\d+)')])
432
+ # # =>
433
+ # # shape: (2, 1)
434
+ # # ┌─────┐
435
+ # # │ foo │
436
+ # # │ --- │
437
+ # # │ str │
438
+ # # ╞═════╡
439
+ # # │ 123 │
440
+ # # │ 678 │
441
+ # # └─────┘
442
+ def extract(pattern, group_index: 1)
443
+ super
444
+ end
445
+
446
+ # Extracts all matches for the given regex pattern.
447
+ #
448
+ # Extract each successive non-overlapping regex match in an individual string as
449
+ # an array
450
+ #
451
+ # @param pattern [String]
452
+ # A valid regex pattern
453
+ #
454
+ # @return [Series]
455
+ #
456
+ # @example
457
+ # s = Polars::Series.new("foo", ["123 bla 45 asd", "xyz 678 910t"])
458
+ # s.str.extract_all('(\d+)')
459
+ # # =>
460
+ # # shape: (2,)
461
+ # # Series: 'foo' [list[str]]
462
+ # # [
463
+ # # ["123", "45"]
464
+ # # ["678", "910"]
465
+ # # ]
466
+ def extract_all(pattern)
467
+ super
468
+ end
469
+
470
+ # Count all successive non-overlapping regex matches.
471
+ #
472
+ # @param pattern [String]
473
+ # A valid regex pattern
474
+ #
475
+ # @return [Series]
476
+ #
477
+ # @example
478
+ # s = Polars::Series.new("foo", ["123 bla 45 asd", "xyz 678 910t"])
479
+ # s.str.count_match('\d')
480
+ # # =>
481
+ # # shape: (2,)
482
+ # # Series: 'foo' [u32]
483
+ # # [
484
+ # # 5
485
+ # # 6
486
+ # # ]
487
+ def count_match(pattern)
488
+ super
489
+ end
490
+
491
+ # Split the string by a substring.
492
+ #
493
+ # @param by [String]
494
+ # Substring to split by.
495
+ # @param inclusive [Boolean]
496
+ # If true, include the split character/string in the results.
497
+ #
498
+ # @return [Series]
499
+ def split(by, inclusive: false)
500
+ super
501
+ end
502
+
503
+ # Split the string by a substring using `n` splits.
504
+ #
505
+ # Results in a struct of `n+1` fields.
506
+ #
507
+ # If it cannot make `n` splits, the remaining field elements will be null.
508
+ #
509
+ # @param by [String]
510
+ # Substring to split by.
511
+ # @param n [Integer]
512
+ # Number of splits to make.
513
+ # @param inclusive [Boolean]
514
+ # If true, include the split character/string in the results.
515
+ #
516
+ # @return [Series]
517
+ #
518
+ # @example
519
+ # df = Polars::DataFrame.new({"x" => ["a_1", nil, "c", "d_4"]})
520
+ # df["x"].str.split_exact("_", 1).alias("fields")
521
+ # # =>
522
+ # # shape: (4,)
523
+ # # Series: 'fields' [struct[2]]
524
+ # # [
525
+ # # {"a","1"}
526
+ # # {null,null}
527
+ # # {"c",null}
528
+ # # {"d","4"}
529
+ # # ]
530
+ #
531
+ # @example Split string values in column x in exactly 2 parts and assign each part to a new column.
532
+ # df["x"]
533
+ # .str.split_exact("_", 1)
534
+ # .struct.rename_fields(["first_part", "second_part"])
535
+ # .alias("fields")
536
+ # .to_frame
537
+ # .unnest("fields")
538
+ # # =>
539
+ # # shape: (4, 2)
540
+ # # ┌────────────┬─────────────┐
541
+ # # │ first_part ┆ second_part │
542
+ # # │ --- ┆ --- │
543
+ # # │ str ┆ str │
544
+ # # ╞════════════╪═════════════╡
545
+ # # │ a ┆ 1 │
546
+ # # │ null ┆ null │
547
+ # # │ c ┆ null │
548
+ # # │ d ┆ 4 │
549
+ # # └────────────┴─────────────┘
550
+ def split_exact(by, n, inclusive: false)
551
+ super
552
+ end
553
+
554
+ # Split the string by a substring, restricted to returning at most `n` items.
555
+ #
556
+ # If the number of possible splits is less than `n-1`, the remaining field
557
+ # elements will be null. If the number of possible splits is `n-1` or greater,
558
+ # the last (nth) substring will contain the remainder of the string.
559
+ #
560
+ # @param by [String]
561
+ # Substring to split by.
562
+ # @param n [Integer]
563
+ # Max number of items to return.
564
+ #
565
+ # @return [Series]
566
+ #
567
+ # @example
568
+ # df = Polars::DataFrame.new({"s" => ["foo bar", nil, "foo-bar", "foo bar baz"]})
569
+ # df["s"].str.splitn(" ", 2).alias("fields")
570
+ # # =>
571
+ # # shape: (4,)
572
+ # # Series: 'fields' [struct[2]]
573
+ # # [
574
+ # # {"foo","bar"}
575
+ # # {null,null}
576
+ # # {"foo-bar",null}
577
+ # # {"foo","bar baz"}
578
+ # # ]
579
+ #
580
+ # @example Split string values in column s in exactly 2 parts and assign each part to a new column.
581
+ # df["s"]
582
+ # .str.splitn(" ", 2)
583
+ # .struct.rename_fields(["first_part", "second_part"])
584
+ # .alias("fields")
585
+ # .to_frame
586
+ # .unnest("fields")
587
+ # # =>
588
+ # # shape: (4, 2)
589
+ # # ┌────────────┬─────────────┐
590
+ # # │ first_part ┆ second_part │
591
+ # # │ --- ┆ --- │
592
+ # # │ str ┆ str │
593
+ # # ╞════════════╪═════════════╡
594
+ # # │ foo ┆ bar │
595
+ # # │ null ┆ null │
596
+ # # │ foo-bar ┆ null │
597
+ # # │ foo ┆ bar baz │
598
+ # # └────────────┴─────────────┘
599
+ def splitn(by, n)
600
+ s = Utils.wrap_s(_s)
601
+ s.to_frame.select(Polars.col(s.name).str.splitn(by, n)).to_series
602
+ end
603
+
604
+ # Replace first matching regex/literal substring with a new string value.
605
+ #
606
+ # @param pattern [String]
607
+ # A valid regex pattern.
608
+ # @param value [String]
609
+ # Substring to replace.
610
+ # @param literal [Boolean]
611
+ # Treat pattern as a literal string.
612
+ #
613
+ # @return [Series]
614
+ #
615
+ # @example
616
+ # s = Polars::Series.new(["123abc", "abc456"])
617
+ # s.str.replace('abc\b', "ABC")
618
+ # # =>
619
+ # # shape: (2,)
620
+ # # Series: '' [str]
621
+ # # [
622
+ # # "123ABC"
623
+ # # "abc456"
624
+ # # ]
625
+ def replace(pattern, value, literal: false)
626
+ super
627
+ end
628
+
629
+ # Replace all matching regex/literal substrings with a new string value.
630
+ #
631
+ # @param pattern [String]
632
+ # A valid regex pattern.
633
+ # @param value [String]
634
+ # Substring to replace.
635
+ # @param literal [Boolean]
636
+ # Treat pattern as a literal string.
637
+ #
638
+ # @return [Series]
639
+ #
640
+ # @example
641
+ # df = Polars::Series.new(["abcabc", "123a123"])
642
+ # df.str.replace_all("a", "-")
643
+ # # =>
644
+ # # shape: (2,)
645
+ # # Series: '' [str]
646
+ # # [
647
+ # # "-bc-bc"
648
+ # # "123-123"
649
+ # # ]
650
+ def replace_all(pattern, value, literal: false)
651
+ super
652
+ end
653
+
654
+ # Remove leading and trailing whitespace.
655
+ #
656
+ # @param matches [String, nil]
657
+ # An optional single character that should be trimmed
658
+ #
659
+ # @return [Series]
660
+ def strip(matches = nil)
661
+ super
662
+ end
663
+
664
+ # Remove leading whitespace.
665
+ #
666
+ # @param matches [String, nil]
667
+ # An optional single character that should be trimmed
668
+ #
669
+ # @return [Series]
670
+ def lstrip(matches = nil)
671
+ super
672
+ end
673
+
674
+ # Remove trailing whitespace.
675
+ #
676
+ # @param matches [String, nil]
677
+ # An optional single character that should be trimmed
678
+ #
679
+ # @return [Series]
680
+ def rstrip(matches = nil)
681
+ super
682
+ end
683
+
684
+ # Fills the string with zeroes.
685
+ #
686
+ # Return a copy of the string left filled with ASCII '0' digits to make a string
687
+ # of length width.
688
+ #
689
+ # A leading sign prefix ('+'/'-') is handled by inserting the padding after the
690
+ # sign character rather than before. The original string is returned if width is
691
+ # less than or equal to `s.length`.
692
+ #
693
+ # @param length [Integer]
694
+ # Fill the value up to this length.
695
+ #
696
+ # @return [Series]
697
+ def zfill(length)
698
+ super
699
+ end
700
+
701
+ # Return the string left justified in a string of length `width`.
702
+ #
703
+ # Padding is done using the specified `fillchar`. The original string is
704
+ # returned if `width` is less than or equal to `s.length`.
705
+ #
706
+ # @param width [Integer]
707
+ # Justify left to this length.
708
+ # @param fillchar [String]
709
+ # Fill with this ASCII character.
710
+ #
711
+ # @return [Series]
712
+ #
713
+ # @example
714
+ # s = Polars::Series.new("a", ["cow", "monkey", nil, "hippopotamus"])
715
+ # s.str.ljust(8, "*")
716
+ # # =>
717
+ # # shape: (4,)
718
+ # # Series: 'a' [str]
719
+ # # [
720
+ # # "cow*****"
721
+ # # "monkey**"
722
+ # # null
723
+ # # "hippopotamus"
724
+ # # ]
725
+ def ljust(width, fillchar = " ")
726
+ super
727
+ end
728
+
729
+ # Return the string right justified in a string of length `width`.
730
+ #
731
+ # Padding is done using the specified `fillchar`. The original string is
732
+ # returned if `width` is less than or equal to `s.length`.
733
+ #
734
+ # @param width [Integer]
735
+ # Justify right to this length.
736
+ # @param fillchar [String]
737
+ # Fill with this ASCII character.
738
+ #
739
+ # @return [Series]
740
+ #
741
+ # @example
742
+ # s = Polars::Series.new("a", ["cow", "monkey", nil, "hippopotamus"])
743
+ # s.str.rjust(8, "*")
744
+ # # =>
745
+ # # shape: (4,)
746
+ # # Series: 'a' [str]
747
+ # # [
748
+ # # "*****cow"
749
+ # # "**monkey"
750
+ # # null
751
+ # # "hippopotamus"
752
+ # # ]
753
+ def rjust(width, fillchar = " ")
754
+ super
755
+ end
756
+
757
+ # Modify the strings to their lowercase equivalent.
758
+ #
759
+ # @return [Series]
760
+ def to_lowercase
761
+ super
762
+ end
763
+
764
+ # Modify the strings to their uppercase equivalent.
765
+ #
766
+ # @return [Series]
767
+ def to_uppercase
768
+ super
769
+ end
770
+
771
+ # Create subslices of the string values of a Utf8 Series.
772
+ #
773
+ # @param offset [Integer]
774
+ # Start index. Negative indexing is supported.
775
+ # @param length [Integer]
776
+ # Length of the slice. If set to `nil` (default), the slice is taken to the
777
+ # end of the string.
778
+ #
779
+ # @return [Series]
780
+ #
781
+ # @example
782
+ # s = Polars::Series.new("s", ["pear", nil, "papaya", "dragonfruit"])
783
+ # s.str.slice(-3)
784
+ # # =>
785
+ # # shape: (4,)
786
+ # # Series: 's' [str]
787
+ # # [
788
+ # # "ear"
789
+ # # null
790
+ # # "aya"
791
+ # # "uit"
792
+ # # ]
793
+ #
794
+ # @example Using the optional `length` parameter
795
+ # s.str.slice(4, 3)
796
+ # # =>
797
+ # # shape: (4,)
798
+ # # Series: 's' [str]
799
+ # # [
800
+ # # ""
801
+ # # null
802
+ # # "ya"
803
+ # # "onf"
804
+ # # ]
805
+ def slice(offset, length = nil)
806
+ s = Utils.wrap_s(_s)
807
+ s.to_frame.select(Polars.col(s.name).str.slice(offset, length)).to_series
808
+ end
809
+ end
810
+ end