polars-df 0.13.0-aarch64-linux-musl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +208 -0
  4. data/Cargo.lock +2556 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +39059 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +104 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +36 -0
  18. data/lib/polars/cat_name_space.rb +88 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +98 -0
  21. data/lib/polars/data_frame.rb +5191 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1397 -0
  24. data/lib/polars/date_time_name_space.rb +1287 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +38 -0
  27. data/lib/polars/expr.rb +7256 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +271 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1329 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +136 -0
  39. data/lib/polars/functions/range/datetime_range.rb +149 -0
  40. data/lib/polars/functions/range/int_range.rb +51 -0
  41. data/lib/polars/functions/range/time_range.rb +141 -0
  42. data/lib/polars/functions/repeat.rb +144 -0
  43. data/lib/polars/functions/whenthen.rb +96 -0
  44. data/lib/polars/functions.rb +57 -0
  45. data/lib/polars/group_by.rb +613 -0
  46. data/lib/polars/io/avro.rb +24 -0
  47. data/lib/polars/io/csv.rb +696 -0
  48. data/lib/polars/io/database.rb +73 -0
  49. data/lib/polars/io/ipc.rb +275 -0
  50. data/lib/polars/io/json.rb +29 -0
  51. data/lib/polars/io/ndjson.rb +80 -0
  52. data/lib/polars/io/parquet.rb +233 -0
  53. data/lib/polars/lazy_frame.rb +2708 -0
  54. data/lib/polars/lazy_group_by.rb +181 -0
  55. data/lib/polars/list_expr.rb +791 -0
  56. data/lib/polars/list_name_space.rb +449 -0
  57. data/lib/polars/meta_expr.rb +222 -0
  58. data/lib/polars/name_expr.rb +198 -0
  59. data/lib/polars/plot.rb +109 -0
  60. data/lib/polars/rolling_group_by.rb +35 -0
  61. data/lib/polars/series.rb +4444 -0
  62. data/lib/polars/slice.rb +104 -0
  63. data/lib/polars/sql_context.rb +194 -0
  64. data/lib/polars/string_cache.rb +75 -0
  65. data/lib/polars/string_expr.rb +1495 -0
  66. data/lib/polars/string_name_space.rb +811 -0
  67. data/lib/polars/struct_expr.rb +98 -0
  68. data/lib/polars/struct_name_space.rb +96 -0
  69. data/lib/polars/testing.rb +507 -0
  70. data/lib/polars/utils/constants.rb +9 -0
  71. data/lib/polars/utils/convert.rb +97 -0
  72. data/lib/polars/utils/parse.rb +89 -0
  73. data/lib/polars/utils/various.rb +76 -0
  74. data/lib/polars/utils/wrap.rb +19 -0
  75. data/lib/polars/utils.rb +130 -0
  76. data/lib/polars/version.rb +4 -0
  77. data/lib/polars/whenthen.rb +83 -0
  78. data/lib/polars-df.rb +1 -0
  79. data/lib/polars.rb +91 -0
  80. metadata +138 -0
@@ -0,0 +1,811 @@
1
+ module Polars
2
+ # Series.str namespace.
3
+ class StringNameSpace
4
+ include ExprDispatch
5
+
6
+ self._accessor = "str"
7
+
8
+ # @private
9
+ def initialize(series)
10
+ self._s = series._s
11
+ end
12
+
13
+ # Convert a Utf8 column into a Date column.
14
+ #
15
+ # @param format [String]
16
+ # Format to use for conversion. Refer to the
17
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
18
+ # for the full specification. Example: `"%Y-%m-%d"`.
19
+ # If set to nil (default), the format is inferred from the data.
20
+ # @param strict [Boolean]
21
+ # Raise an error if any conversion fails.
22
+ # @param exact [Boolean]
23
+ # Require an exact format match. If false, allow the format to match anywhere
24
+ # in the target string.
25
+ # @param cache [Boolean]
26
+ # Use a cache of unique, converted dates to apply the conversion.
27
+ #
28
+ # @return [Series]
29
+ #
30
+ # @example
31
+ # s = Polars::Series.new(["2020/01/01", "2020/02/01", "2020/03/01"])
32
+ # s.str.to_date
33
+ # # =>
34
+ # # shape: (3,)
35
+ # # Series: '' [date]
36
+ # # [
37
+ # # 2020-01-01
38
+ # # 2020-02-01
39
+ # # 2020-03-01
40
+ # # ]
41
+ def to_date(format = nil, strict: true, exact: true, cache: true)
42
+ super
43
+ end
44
+
45
+ # Convert a Utf8 column into a Datetime column.
46
+ #
47
+ # @param format [String]
48
+ # Format to use for conversion. Refer to the
49
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
50
+ # for the full specification. Example: `"%Y-%m-%d %H:%M:%S"`.
51
+ # If set to nil (default), the format is inferred from the data.
52
+ # @param time_unit ["us", "ns", "ms"]
53
+ # Unit of time for the resulting Datetime column. If set to nil (default),
54
+ # the time unit is inferred from the format string if given, eg:
55
+ # `"%F %T%.3f"` => `Datetime("ms")`. If no fractional second component is
56
+ # found, the default is `"us"`.
57
+ # @param time_zone [String]
58
+ # Time zone for the resulting Datetime column.
59
+ # @param strict [Boolean]
60
+ # Raise an error if any conversion fails.
61
+ # @param exact [Boolean]
62
+ # Require an exact format match. If false, allow the format to match anywhere
63
+ # in the target string.
64
+ # @param cache [Boolean]
65
+ # Use a cache of unique, converted datetimes to apply the conversion.
66
+ #
67
+ # @return [Series]
68
+ #
69
+ # @example
70
+ # s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
71
+ # s.str.to_datetime("%Y-%m-%d %H:%M%#z")
72
+ # # =>
73
+ # # shape: (2,)
74
+ # # Series: '' [datetime[μs, UTC]]
75
+ # # [
76
+ # # 2020-01-01 01:00:00 UTC
77
+ # # 2020-01-01 02:00:00 UTC
78
+ # # ]
79
+ def to_datetime(
80
+ format = nil,
81
+ time_unit: nil,
82
+ time_zone: nil,
83
+ strict: true,
84
+ exact: true,
85
+ cache: true,
86
+ ambiguous: "raise"
87
+ )
88
+ super
89
+ end
90
+
91
+ # Convert a Utf8 column into a Time column.
92
+ #
93
+ # @param format [String]
94
+ # Format to use for conversion. Refer to the
95
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
96
+ # for the full specification. Example: `"%H:%M:%S"`.
97
+ # If set to nil (default), the format is inferred from the data.
98
+ # @param strict [Boolean]
99
+ # Raise an error if any conversion fails.
100
+ # @param cache [Boolean]
101
+ # Use a cache of unique, converted times to apply the conversion.
102
+ #
103
+ # @return [Series]
104
+ #
105
+ # @example
106
+ # s = Polars::Series.new(["01:00", "02:00", "03:00"])
107
+ # s.str.to_time("%H:%M")
108
+ # # =>
109
+ # # shape: (3,)
110
+ # # Series: '' [time]
111
+ # # [
112
+ # # 01:00:00
113
+ # # 02:00:00
114
+ # # 03:00:00
115
+ # # ]
116
+ def to_time(format = nil, strict: true, cache: true)
117
+ super
118
+ end
119
+
120
+ # Parse a Series of dtype Utf8 to a Date/Datetime Series.
121
+ #
122
+ # @param datatype [Symbol]
123
+ # `:date`, `:dateime`, or `:time`.
124
+ # @param fmt [String]
125
+ # Format to use, refer to the
126
+ # [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
127
+ # for specification. Example: `"%y-%m-%d"`.
128
+ # @param strict [Boolean]
129
+ # Raise an error if any conversion fails.
130
+ # @param exact [Boolean]
131
+ # - If true, require an exact format match.
132
+ # - If false, allow the format to match anywhere in the target string.
133
+ # @param cache [Boolean]
134
+ # Use a cache of unique, converted dates to apply the datetime conversion.
135
+ #
136
+ # @return [Series]
137
+ #
138
+ # @example Dealing with a consistent format:
139
+ # s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
140
+ # s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
141
+ # # =>
142
+ # # shape: (2,)
143
+ # # Series: '' [datetime[μs, UTC]]
144
+ # # [
145
+ # # 2020-01-01 01:00:00 UTC
146
+ # # 2020-01-01 02:00:00 UTC
147
+ # # ]
148
+ #
149
+ # @example Dealing with different formats.
150
+ # s = Polars::Series.new(
151
+ # "date",
152
+ # [
153
+ # "2021-04-22",
154
+ # "2022-01-04 00:00:00",
155
+ # "01/31/22",
156
+ # "Sun Jul 8 00:34:60 2001"
157
+ # ]
158
+ # )
159
+ # s.to_frame.select(
160
+ # Polars.coalesce(
161
+ # Polars.col("date").str.strptime(Polars::Date, "%F", strict: false),
162
+ # Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false),
163
+ # Polars.col("date").str.strptime(Polars::Date, "%D", strict: false),
164
+ # Polars.col("date").str.strptime(Polars::Date, "%c", strict: false)
165
+ # )
166
+ # ).to_series
167
+ # # =>
168
+ # # shape: (4,)
169
+ # # Series: 'date' [date]
170
+ # # [
171
+ # # 2021-04-22
172
+ # # 2022-01-04
173
+ # # 2022-01-31
174
+ # # 2001-07-08
175
+ # # ]
176
+ def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true)
177
+ super
178
+ end
179
+
180
+ # Get length of the string values in the Series (as number of bytes).
181
+ #
182
+ # @return [Series]
183
+ #
184
+ # @note
185
+ # The returned lengths are equal to the number of bytes in the UTF8 string. If you
186
+ # need the length in terms of the number of characters, use `n_chars` instead.
187
+ #
188
+ # @example
189
+ # s = Polars::Series.new(["Café", nil, "345", "東京"])
190
+ # s.str.lengths
191
+ # # =>
192
+ # # shape: (4,)
193
+ # # Series: '' [u32]
194
+ # # [
195
+ # # 5
196
+ # # null
197
+ # # 3
198
+ # # 6
199
+ # # ]
200
+ def lengths
201
+ super
202
+ end
203
+
204
+ # Get length of the string values in the Series (as number of chars).
205
+ #
206
+ # @return [Series]
207
+ #
208
+ # @note
209
+ # If you know that you are working with ASCII text, `lengths` will be
210
+ # equivalent, and faster (returns length in terms of the number of bytes).
211
+ #
212
+ # @example
213
+ # s = Polars::Series.new(["Café", nil, "345", "東京"])
214
+ # s.str.n_chars
215
+ # # =>
216
+ # # shape: (4,)
217
+ # # Series: '' [u32]
218
+ # # [
219
+ # # 4
220
+ # # null
221
+ # # 3
222
+ # # 2
223
+ # # ]
224
+ def n_chars
225
+ super
226
+ end
227
+
228
+ # Vertically concat the values in the Series to a single string value.
229
+ #
230
+ # @param delimiter [String]
231
+ # The delimiter to insert between consecutive string values.
232
+ #
233
+ # @return [Series]
234
+ #
235
+ # @example
236
+ # Polars::Series.new([1, nil, 2]).str.join("-")
237
+ # # =>
238
+ # # shape: (1,)
239
+ # # Series: '' [str]
240
+ # # [
241
+ # # "1-2"
242
+ # # ]
243
+ #
244
+ # @example
245
+ # Polars::Series.new([1, nil, 2]).str.join("-", ignore_nulls: false)
246
+ # # =>
247
+ # # shape: (1,)
248
+ # # Series: '' [str]
249
+ # # [
250
+ # # null
251
+ # # ]
252
+ def join(delimiter = "-", ignore_nulls: true)
253
+ super
254
+ end
255
+ alias_method :concat, :join
256
+
257
+ # Check if strings in Series contain a substring that matches a regex.
258
+ #
259
+ # @param pattern [String]
260
+ # A valid regex pattern.
261
+ # @param literal [Boolean]
262
+ # Treat pattern as a literal string.
263
+ #
264
+ # @return [Series]
265
+ #
266
+ # @example
267
+ # s = Polars::Series.new(["Crab", "cat and dog", "rab$bit", nil])
268
+ # s.str.contains("cat|bit")
269
+ # # =>
270
+ # # shape: (4,)
271
+ # # Series: '' [bool]
272
+ # # [
273
+ # # false
274
+ # # true
275
+ # # true
276
+ # # null
277
+ # # ]
278
+ #
279
+ # @example
280
+ # s.str.contains("rab$", literal: true)
281
+ # # =>
282
+ # # shape: (4,)
283
+ # # Series: '' [bool]
284
+ # # [
285
+ # # false
286
+ # # false
287
+ # # true
288
+ # # null
289
+ # # ]
290
+ def contains(pattern, literal: false)
291
+ super
292
+ end
293
+
294
+ # Check if string values end with a substring.
295
+ #
296
+ # @param sub [String]
297
+ # Suffix substring.
298
+ #
299
+ # @return [Series]
300
+ #
301
+ # @example
302
+ # s = Polars::Series.new("fruits", ["apple", "mango", nil])
303
+ # s.str.ends_with("go")
304
+ # # =>
305
+ # # shape: (3,)
306
+ # # Series: 'fruits' [bool]
307
+ # # [
308
+ # # false
309
+ # # true
310
+ # # null
311
+ # # ]
312
+ def ends_with(sub)
313
+ super
314
+ end
315
+
316
+ # Check if string values start with a substring.
317
+ #
318
+ # @param sub [String]
319
+ # Prefix substring.
320
+ #
321
+ # @return [Series]
322
+ #
323
+ # @example
324
+ # s = Polars::Series.new("fruits", ["apple", "mango", nil])
325
+ # s.str.starts_with("app")
326
+ # # =>
327
+ # # shape: (3,)
328
+ # # Series: 'fruits' [bool]
329
+ # # [
330
+ # # true
331
+ # # false
332
+ # # null
333
+ # # ]
334
+ def starts_with(sub)
335
+ super
336
+ end
337
+
338
+ # Decode a value using the provided encoding.
339
+ #
340
+ # @param encoding ["hex", "base64"]
341
+ # The encoding to use.
342
+ # @param strict [Boolean]
343
+ # How to handle invalid inputs:
344
+ #
345
+ # - `true`: An error will be thrown if unable to decode a value.
346
+ # - `false`: Unhandled values will be replaced with `nil`.
347
+ #
348
+ # @return [Series]
349
+ #
350
+ # @example
351
+ # s = Polars::Series.new(["666f6f", "626172", nil])
352
+ # s.str.decode("hex")
353
+ # # =>
354
+ # # shape: (3,)
355
+ # # Series: '' [binary]
356
+ # # [
357
+ # # b"foo"
358
+ # # b"bar"
359
+ # # null
360
+ # # ]
361
+ def decode(encoding, strict: false)
362
+ super
363
+ end
364
+
365
+ # Encode a value using the provided encoding.
366
+ #
367
+ # @param encoding ["hex", "base64"]
368
+ # The encoding to use.
369
+ #
370
+ # @return [Series]
371
+ #
372
+ # @example
373
+ # s = Polars::Series.new(["foo", "bar", nil])
374
+ # s.str.encode("hex")
375
+ # # =>
376
+ # # shape: (3,)
377
+ # # Series: '' [str]
378
+ # # [
379
+ # # "666f6f"
380
+ # # "626172"
381
+ # # null
382
+ # # ]
383
+ def encode(encoding)
384
+ super
385
+ end
386
+
387
+ # Extract the first match of json string with provided JSONPath expression.
388
+ #
389
+ # Throw errors if encounter invalid json strings.
390
+ # All return value will be casted to Utf8 regardless of the original value.
391
+ #
392
+ # Documentation on JSONPath standard can be found
393
+ # [here](https://goessner.net/articles/JsonPath/).
394
+ #
395
+ # @param json_path [String]
396
+ # A valid JSON path query string.
397
+ #
398
+ # @return [Series]
399
+ #
400
+ # @example
401
+ # df = Polars::DataFrame.new(
402
+ # {"json_val" => ['{"a":"1"}', nil, '{"a":2}', '{"a":2.1}', '{"a":true}']}
403
+ # )
404
+ # df.select(Polars.col("json_val").str.json_path_match("$.a"))[0.., 0]
405
+ # # =>
406
+ # # shape: (5,)
407
+ # # Series: 'json_val' [str]
408
+ # # [
409
+ # # "1"
410
+ # # null
411
+ # # "2"
412
+ # # "2.1"
413
+ # # "true"
414
+ # # ]
415
+ def json_path_match(json_path)
416
+ super
417
+ end
418
+
419
+ # Extract the target capture group from provided patterns.
420
+ #
421
+ # @param pattern [String]
422
+ # A valid regex pattern
423
+ # @param group_index [Integer]
424
+ # Index of the targeted capture group.
425
+ # Group 0 mean the whole pattern, first group begin at index 1
426
+ # Default to the first capture group
427
+ #
428
+ # @return [Series]
429
+ #
430
+ # @example
431
+ # df = Polars::DataFrame.new({"foo" => ["123 bla 45 asd", "xyz 678 910t"]})
432
+ # df.select([Polars.col("foo").str.extract('(\d+)')])
433
+ # # =>
434
+ # # shape: (2, 1)
435
+ # # ┌─────┐
436
+ # # │ foo │
437
+ # # │ --- │
438
+ # # │ str │
439
+ # # ╞═════╡
440
+ # # │ 123 │
441
+ # # │ 678 │
442
+ # # └─────┘
443
+ def extract(pattern, group_index: 1)
444
+ super
445
+ end
446
+
447
+ # Extracts all matches for the given regex pattern.
448
+ #
449
+ # Extract each successive non-overlapping regex match in an individual string as
450
+ # an array
451
+ #
452
+ # @param pattern [String]
453
+ # A valid regex pattern
454
+ #
455
+ # @return [Series]
456
+ #
457
+ # @example
458
+ # s = Polars::Series.new("foo", ["123 bla 45 asd", "xyz 678 910t"])
459
+ # s.str.extract_all('(\d+)')
460
+ # # =>
461
+ # # shape: (2,)
462
+ # # Series: 'foo' [list[str]]
463
+ # # [
464
+ # # ["123", "45"]
465
+ # # ["678", "910"]
466
+ # # ]
467
+ def extract_all(pattern)
468
+ super
469
+ end
470
+
471
+ # Count all successive non-overlapping regex matches.
472
+ #
473
+ # @param pattern [String]
474
+ # A valid regex pattern
475
+ #
476
+ # @return [Series]
477
+ #
478
+ # @example
479
+ # s = Polars::Series.new("foo", ["123 bla 45 asd", "xyz 678 910t"])
480
+ # s.str.count_match('\d')
481
+ # # =>
482
+ # # shape: (2,)
483
+ # # Series: 'foo' [u32]
484
+ # # [
485
+ # # 5
486
+ # # 6
487
+ # # ]
488
+ def count_match(pattern)
489
+ super
490
+ end
491
+
492
+ # Split the string by a substring.
493
+ #
494
+ # @param by [String]
495
+ # Substring to split by.
496
+ # @param inclusive [Boolean]
497
+ # If true, include the split character/string in the results.
498
+ #
499
+ # @return [Series]
500
+ def split(by, inclusive: false)
501
+ super
502
+ end
503
+
504
+ # Split the string by a substring using `n` splits.
505
+ #
506
+ # Results in a struct of `n+1` fields.
507
+ #
508
+ # If it cannot make `n` splits, the remaining field elements will be null.
509
+ #
510
+ # @param by [String]
511
+ # Substring to split by.
512
+ # @param n [Integer]
513
+ # Number of splits to make.
514
+ # @param inclusive [Boolean]
515
+ # If true, include the split character/string in the results.
516
+ #
517
+ # @return [Series]
518
+ #
519
+ # @example
520
+ # df = Polars::DataFrame.new({"x" => ["a_1", nil, "c", "d_4"]})
521
+ # df["x"].str.split_exact("_", 1).alias("fields")
522
+ # # =>
523
+ # # shape: (4,)
524
+ # # Series: 'fields' [struct[2]]
525
+ # # [
526
+ # # {"a","1"}
527
+ # # {null,null}
528
+ # # {"c",null}
529
+ # # {"d","4"}
530
+ # # ]
531
+ #
532
+ # @example Split string values in column x in exactly 2 parts and assign each part to a new column.
533
+ # df["x"]
534
+ # .str.split_exact("_", 1)
535
+ # .struct.rename_fields(["first_part", "second_part"])
536
+ # .alias("fields")
537
+ # .to_frame
538
+ # .unnest("fields")
539
+ # # =>
540
+ # # shape: (4, 2)
541
+ # # ┌────────────┬─────────────┐
542
+ # # │ first_part ┆ second_part │
543
+ # # │ --- ┆ --- │
544
+ # # │ str ┆ str │
545
+ # # ╞════════════╪═════════════╡
546
+ # # │ a ┆ 1 │
547
+ # # │ null ┆ null │
548
+ # # │ c ┆ null │
549
+ # # │ d ┆ 4 │
550
+ # # └────────────┴─────────────┘
551
+ def split_exact(by, n, inclusive: false)
552
+ super
553
+ end
554
+
555
+ # Split the string by a substring, restricted to returning at most `n` items.
556
+ #
557
+ # If the number of possible splits is less than `n-1`, the remaining field
558
+ # elements will be null. If the number of possible splits is `n-1` or greater,
559
+ # the last (nth) substring will contain the remainder of the string.
560
+ #
561
+ # @param by [String]
562
+ # Substring to split by.
563
+ # @param n [Integer]
564
+ # Max number of items to return.
565
+ #
566
+ # @return [Series]
567
+ #
568
+ # @example
569
+ # df = Polars::DataFrame.new({"s" => ["foo bar", nil, "foo-bar", "foo bar baz"]})
570
+ # df["s"].str.splitn(" ", 2).alias("fields")
571
+ # # =>
572
+ # # shape: (4,)
573
+ # # Series: 'fields' [struct[2]]
574
+ # # [
575
+ # # {"foo","bar"}
576
+ # # {null,null}
577
+ # # {"foo-bar",null}
578
+ # # {"foo","bar baz"}
579
+ # # ]
580
+ #
581
+ # @example Split string values in column s in exactly 2 parts and assign each part to a new column.
582
+ # df["s"]
583
+ # .str.splitn(" ", 2)
584
+ # .struct.rename_fields(["first_part", "second_part"])
585
+ # .alias("fields")
586
+ # .to_frame
587
+ # .unnest("fields")
588
+ # # =>
589
+ # # shape: (4, 2)
590
+ # # ┌────────────┬─────────────┐
591
+ # # │ first_part ┆ second_part │
592
+ # # │ --- ┆ --- │
593
+ # # │ str ┆ str │
594
+ # # ╞════════════╪═════════════╡
595
+ # # │ foo ┆ bar │
596
+ # # │ null ┆ null │
597
+ # # │ foo-bar ┆ null │
598
+ # # │ foo ┆ bar baz │
599
+ # # └────────────┴─────────────┘
600
+ def splitn(by, n)
601
+ s = Utils.wrap_s(_s)
602
+ s.to_frame.select(Polars.col(s.name).str.splitn(by, n)).to_series
603
+ end
604
+
605
+ # Replace first matching regex/literal substring with a new string value.
606
+ #
607
+ # @param pattern [String]
608
+ # A valid regex pattern.
609
+ # @param value [String]
610
+ # Substring to replace.
611
+ # @param literal [Boolean]
612
+ # Treat pattern as a literal string.
613
+ #
614
+ # @return [Series]
615
+ #
616
+ # @example
617
+ # s = Polars::Series.new(["123abc", "abc456"])
618
+ # s.str.replace('abc\b', "ABC")
619
+ # # =>
620
+ # # shape: (2,)
621
+ # # Series: '' [str]
622
+ # # [
623
+ # # "123ABC"
624
+ # # "abc456"
625
+ # # ]
626
+ def replace(pattern, value, literal: false)
627
+ super
628
+ end
629
+
630
+ # Replace all matching regex/literal substrings with a new string value.
631
+ #
632
+ # @param pattern [String]
633
+ # A valid regex pattern.
634
+ # @param value [String]
635
+ # Substring to replace.
636
+ # @param literal [Boolean]
637
+ # Treat pattern as a literal string.
638
+ #
639
+ # @return [Series]
640
+ #
641
+ # @example
642
+ # df = Polars::Series.new(["abcabc", "123a123"])
643
+ # df.str.replace_all("a", "-")
644
+ # # =>
645
+ # # shape: (2,)
646
+ # # Series: '' [str]
647
+ # # [
648
+ # # "-bc-bc"
649
+ # # "123-123"
650
+ # # ]
651
+ def replace_all(pattern, value, literal: false)
652
+ super
653
+ end
654
+
655
+ # Remove leading and trailing whitespace.
656
+ #
657
+ # @param matches [String, nil]
658
+ # An optional single character that should be trimmed
659
+ #
660
+ # @return [Series]
661
+ def strip(matches = nil)
662
+ super
663
+ end
664
+
665
+ # Remove leading whitespace.
666
+ #
667
+ # @param matches [String, nil]
668
+ # An optional single character that should be trimmed
669
+ #
670
+ # @return [Series]
671
+ def lstrip(matches = nil)
672
+ super
673
+ end
674
+
675
+ # Remove trailing whitespace.
676
+ #
677
+ # @param matches [String, nil]
678
+ # An optional single character that should be trimmed
679
+ #
680
+ # @return [Series]
681
+ def rstrip(matches = nil)
682
+ super
683
+ end
684
+
685
+ # Fills the string with zeroes.
686
+ #
687
+ # Return a copy of the string left filled with ASCII '0' digits to make a string
688
+ # of length width.
689
+ #
690
+ # A leading sign prefix ('+'/'-') is handled by inserting the padding after the
691
+ # sign character rather than before. The original string is returned if width is
692
+ # less than or equal to `s.length`.
693
+ #
694
+ # @param length [Integer]
695
+ # Fill the value up to this length.
696
+ #
697
+ # @return [Series]
698
+ def zfill(length)
699
+ super
700
+ end
701
+
702
+ # Return the string left justified in a string of length `width`.
703
+ #
704
+ # Padding is done using the specified `fillchar`. The original string is
705
+ # returned if `width` is less than or equal to `s.length`.
706
+ #
707
+ # @param width [Integer]
708
+ # Justify left to this length.
709
+ # @param fillchar [String]
710
+ # Fill with this ASCII character.
711
+ #
712
+ # @return [Series]
713
+ #
714
+ # @example
715
+ # s = Polars::Series.new("a", ["cow", "monkey", nil, "hippopotamus"])
716
+ # s.str.ljust(8, "*")
717
+ # # =>
718
+ # # shape: (4,)
719
+ # # Series: 'a' [str]
720
+ # # [
721
+ # # "cow*****"
722
+ # # "monkey**"
723
+ # # null
724
+ # # "hippopotamus"
725
+ # # ]
726
+ def ljust(width, fillchar = " ")
727
+ super
728
+ end
729
+
730
+ # Return the string right justified in a string of length `width`.
731
+ #
732
+ # Padding is done using the specified `fillchar`. The original string is
733
+ # returned if `width` is less than or equal to `s.length`.
734
+ #
735
+ # @param width [Integer]
736
+ # Justify right to this length.
737
+ # @param fillchar [String]
738
+ # Fill with this ASCII character.
739
+ #
740
+ # @return [Series]
741
+ #
742
+ # @example
743
+ # s = Polars::Series.new("a", ["cow", "monkey", nil, "hippopotamus"])
744
+ # s.str.rjust(8, "*")
745
+ # # =>
746
+ # # shape: (4,)
747
+ # # Series: 'a' [str]
748
+ # # [
749
+ # # "*****cow"
750
+ # # "**monkey"
751
+ # # null
752
+ # # "hippopotamus"
753
+ # # ]
754
+ def rjust(width, fillchar = " ")
755
+ super
756
+ end
757
+
758
+ # Modify the strings to their lowercase equivalent.
759
+ #
760
+ # @return [Series]
761
+ def to_lowercase
762
+ super
763
+ end
764
+
765
+ # Modify the strings to their uppercase equivalent.
766
+ #
767
+ # @return [Series]
768
+ def to_uppercase
769
+ super
770
+ end
771
+
772
+ # Create subslices of the string values of a Utf8 Series.
773
+ #
774
+ # @param offset [Integer]
775
+ # Start index. Negative indexing is supported.
776
+ # @param length [Integer]
777
+ # Length of the slice. If set to `nil` (default), the slice is taken to the
778
+ # end of the string.
779
+ #
780
+ # @return [Series]
781
+ #
782
+ # @example
783
+ # s = Polars::Series.new("s", ["pear", nil, "papaya", "dragonfruit"])
784
+ # s.str.slice(-3)
785
+ # # =>
786
+ # # shape: (4,)
787
+ # # Series: 's' [str]
788
+ # # [
789
+ # # "ear"
790
+ # # null
791
+ # # "aya"
792
+ # # "uit"
793
+ # # ]
794
+ #
795
+ # @example Using the optional `length` parameter
796
+ # s.str.slice(4, 3)
797
+ # # =>
798
+ # # shape: (4,)
799
+ # # Series: 's' [str]
800
+ # # [
801
+ # # ""
802
+ # # null
803
+ # # "ya"
804
+ # # "onf"
805
+ # # ]
806
+ def slice(offset, length = nil)
807
+ s = Utils.wrap_s(_s)
808
+ s.to_frame.select(Polars.col(s.name).str.slice(offset, length)).to_series
809
+ end
810
+ end
811
+ end