polars-df 0.20.0-x64-mingw-ucrt → 0.21.1-x64-mingw-ucrt

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +192 -186
  4. data/LICENSE-THIRD-PARTY.txt +2153 -2532
  5. data/LICENSE.txt +1 -1
  6. data/lib/polars/3.2/polars.so +0 -0
  7. data/lib/polars/3.3/polars.so +0 -0
  8. data/lib/polars/3.4/polars.so +0 -0
  9. data/lib/polars/array_expr.rb +382 -3
  10. data/lib/polars/array_name_space.rb +281 -0
  11. data/lib/polars/binary_expr.rb +67 -0
  12. data/lib/polars/binary_name_space.rb +43 -0
  13. data/lib/polars/cat_expr.rb +224 -0
  14. data/lib/polars/cat_name_space.rb +130 -32
  15. data/lib/polars/catalog/unity/catalog_info.rb +20 -0
  16. data/lib/polars/catalog/unity/column_info.rb +31 -0
  17. data/lib/polars/catalog/unity/namespace_info.rb +21 -0
  18. data/lib/polars/catalog/unity/table_info.rb +50 -0
  19. data/lib/polars/catalog.rb +448 -0
  20. data/lib/polars/config.rb +2 -2
  21. data/lib/polars/convert.rb +12 -2
  22. data/lib/polars/data_frame.rb +834 -48
  23. data/lib/polars/data_type_expr.rb +52 -0
  24. data/lib/polars/data_types.rb +61 -5
  25. data/lib/polars/date_time_expr.rb +251 -0
  26. data/lib/polars/date_time_name_space.rb +299 -0
  27. data/lib/polars/exceptions.rb +7 -2
  28. data/lib/polars/expr.rb +1247 -211
  29. data/lib/polars/functions/col.rb +6 -5
  30. data/lib/polars/functions/datatype.rb +21 -0
  31. data/lib/polars/functions/lazy.rb +127 -15
  32. data/lib/polars/functions/repeat.rb +4 -0
  33. data/lib/polars/io/csv.rb +19 -1
  34. data/lib/polars/io/json.rb +16 -0
  35. data/lib/polars/io/ndjson.rb +13 -0
  36. data/lib/polars/io/parquet.rb +70 -66
  37. data/lib/polars/io/scan_options.rb +47 -0
  38. data/lib/polars/lazy_frame.rb +1099 -95
  39. data/lib/polars/list_expr.rb +400 -11
  40. data/lib/polars/list_name_space.rb +321 -5
  41. data/lib/polars/meta_expr.rb +71 -22
  42. data/lib/polars/name_expr.rb +36 -0
  43. data/lib/polars/scan_cast_options.rb +64 -0
  44. data/lib/polars/schema.rb +84 -3
  45. data/lib/polars/selector.rb +210 -0
  46. data/lib/polars/selectors.rb +932 -203
  47. data/lib/polars/series.rb +1083 -63
  48. data/lib/polars/string_expr.rb +435 -9
  49. data/lib/polars/string_name_space.rb +729 -45
  50. data/lib/polars/struct_expr.rb +103 -0
  51. data/lib/polars/struct_name_space.rb +19 -1
  52. data/lib/polars/utils/parse.rb +40 -0
  53. data/lib/polars/utils/various.rb +18 -1
  54. data/lib/polars/utils.rb +9 -1
  55. data/lib/polars/version.rb +1 -1
  56. data/lib/polars.rb +10 -0
  57. metadata +12 -2
@@ -63,6 +63,13 @@ module Polars
63
63
  # in the target string.
64
64
  # @param cache [Boolean]
65
65
  # Use a cache of unique, converted datetimes to apply the conversion.
66
+ # @param ambiguous ['raise', 'earliest', 'latest', 'null']
67
+ # Determine how to deal with ambiguous datetimes:
68
+ #
69
+ # - `'raise'` (default): raise
70
+ # - `'earliest'`: use the earliest datetime
71
+ # - `'latest'`: use the latest datetime
72
+ # - `'null'`: set to null
66
73
  #
67
74
  # @return [Series]
68
75
  #
@@ -177,82 +184,77 @@ module Polars
177
184
  super
178
185
  end
179
186
 
180
- # Get length of the string values in the Series (as number of bytes).
187
+ # Convert a String column into a Decimal column.
181
188
  #
182
- # @return [Series]
189
+ # This method infers the needed parameters `precision` and `scale`.
183
190
  #
184
- # @note
185
- # The returned lengths are equal to the number of bytes in the UTF8 string. If you
186
- # need the length in terms of the number of characters, use `n_chars` instead.
191
+ # @param inference_length [Integer]
192
+ # Number of elements to parse to determine the `precision` and `scale`
193
+ #
194
+ # @return [Series]
187
195
  #
188
196
  # @example
189
- # s = Polars::Series.new(["Café", nil, "345", "東京"])
190
- # s.str.lengths
197
+ # s = Polars::Series.new(
198
+ # ["40.12", "3420.13", "120134.19", "3212.98", "12.90", "143.09", "143.9"]
199
+ # )
200
+ # s.str.to_decimal
191
201
  # # =>
192
- # # shape: (4,)
193
- # # Series: '' [u32]
202
+ # # shape: (7,)
203
+ # # Series: '' [decimal[*,2]]
194
204
  # # [
195
- # # 5
196
- # # null
197
- # # 3
198
- # # 6
205
+ # # 40.12
206
+ # # 3420.13
207
+ # # 120134.19
208
+ # # 3212.98
209
+ # # 12.90
210
+ # # 143.09
211
+ # # 143.90
199
212
  # # ]
200
- def lengths
213
+ def to_decimal(inference_length = 100)
201
214
  super
202
215
  end
203
216
 
204
- # Get length of the string values in the Series (as number of chars).
217
+ # Return the length of each string as the number of bytes.
205
218
  #
206
219
  # @return [Series]
207
220
  #
208
- # @note
209
- # If you know that you are working with ASCII text, `lengths` will be
210
- # equivalent, and faster (returns length in terms of the number of bytes).
211
- #
212
221
  # @example
213
- # s = Polars::Series.new(["Café", nil, "345", "東京"])
214
- # s.str.n_chars
222
+ # s = Polars::Series.new(["Café", "345", "東京", nil])
223
+ # s.str.len_bytes
215
224
  # # =>
216
225
  # # shape: (4,)
217
226
  # # Series: '' [u32]
218
227
  # # [
219
- # # 4
220
- # # null
228
+ # # 5
221
229
  # # 3
222
- # # 2
230
+ # # 6
231
+ # # null
223
232
  # # ]
224
- def n_chars
233
+ def len_bytes
225
234
  super
226
235
  end
236
+ alias_method :lengths, :len_bytes
227
237
 
228
- # Vertically concat the values in the Series to a single string value.
229
- #
230
- # @param delimiter [String]
231
- # The delimiter to insert between consecutive string values.
238
+ # Return the length of each string as the number of characters.
232
239
  #
233
240
  # @return [Series]
234
241
  #
235
242
  # @example
236
- # Polars::Series.new([1, nil, 2]).str.join("-")
237
- # # =>
238
- # # shape: (1,)
239
- # # Series: '' [str]
240
- # # [
241
- # # "1-2"
242
- # # ]
243
- #
244
- # @example
245
- # Polars::Series.new([1, nil, 2]).str.join("-", ignore_nulls: false)
243
+ # s = Polars::Series.new(["Café", "345", "東京", nil])
244
+ # s.str.len_chars
246
245
  # # =>
247
- # # shape: (1,)
248
- # # Series: '' [str]
246
+ # # shape: (4,)
247
+ # # Series: '' [u32]
249
248
  # # [
249
+ # # 4
250
+ # # 3
251
+ # # 2
250
252
  # # null
251
253
  # # ]
252
- def join(delimiter = "-", ignore_nulls: true)
254
+ def len_chars
253
255
  super
254
256
  end
255
- alias_method :concat, :join
257
+ alias_method :n_chars, :len_chars
256
258
 
257
259
  # Check if strings in Series contain a substring that matches a regex.
258
260
  #
@@ -291,6 +293,65 @@ module Polars
291
293
  super
292
294
  end
293
295
 
296
+ # Return the bytes offset of the first substring matching a pattern.
297
+ #
298
+ # If the pattern is not found, returns nil.
299
+ #
300
+ # @param pattern
301
+ # A valid regular expression pattern, compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
302
+ # @param literal
303
+ # Treat `pattern` as a literal string, not as a regular expression.
304
+ # @param strict
305
+ # Raise an error if the underlying pattern is not a valid regex,
306
+ # otherwise mask out with a null value.
307
+ #
308
+ # @return [Series]
309
+ #
310
+ # @note
311
+ # To modify regular expression behaviour (such as case-sensitivity) with
312
+ # flags, use the inline `(?iLmsuxU)` syntax.
313
+ #
314
+ # @example Find the index of the first substring matching a regex pattern:
315
+ # s = Polars::Series.new("txt", ["Crab", "Lobster", nil, "Crustacean"])
316
+ # s.str.find("a|e").rename("idx_rx")
317
+ # # =>
318
+ # # shape: (4,)
319
+ # # Series: 'idx_rx' [u32]
320
+ # # [
321
+ # # 2
322
+ # # 5
323
+ # # null
324
+ # # 5
325
+ # # ]
326
+ #
327
+ # @example Find the index of the first substring matching a literal pattern:
328
+ # s.str.find("e", literal: true).rename("idx_lit")
329
+ # # =>
330
+ # # shape: (4,)
331
+ # # Series: 'idx_lit' [u32]
332
+ # # [
333
+ # # null
334
+ # # 5
335
+ # # null
336
+ # # 7
337
+ # # ]
338
+ #
339
+ # @example Match against a pattern found in another column or (expression):
340
+ # p = Polars::Series.new("pat", ["a[bc]", "b.t", "[aeiuo]", "(?i)A[BC]"])
341
+ # s.str.find(p).rename("idx")
342
+ # # =>
343
+ # # shape: (4,)
344
+ # # Series: 'idx' [u32]
345
+ # # [
346
+ # # 2
347
+ # # 2
348
+ # # null
349
+ # # 5
350
+ # # ]
351
+ def find(pattern, literal: false, strict: true)
352
+ super
353
+ end
354
+
294
355
  # Check if string values end with a substring.
295
356
  #
296
357
  # @param sub [String]
@@ -384,6 +445,34 @@ module Polars
384
445
  super
385
446
  end
386
447
 
448
+ # Parse string values as JSON.
449
+ #
450
+ # Throws an error if invalid JSON strings are encountered.
451
+ #
452
+ # @param dtype [Object]
453
+ # The dtype to cast the extracted value to. If None, the dtype will be
454
+ # inferred from the JSON value.
455
+ # @param infer_schema_length [Integer]
456
+ # The maximum number of rows to scan for schema inference.
457
+ # If set to `nil`, the full data may be scanned *(this is slow)*.
458
+ #
459
+ # @return [Series]
460
+ #
461
+ # @example
462
+ # s = Polars::Series.new("json", ['{"a":1, "b": true}', nil, '{"a":2, "b": false}'])
463
+ # s.str.json_decode
464
+ # # =>
465
+ # # shape: (3,)
466
+ # # Series: 'json' [struct[2]]
467
+ # # [
468
+ # # {1,true}
469
+ # # null
470
+ # # {2,false}
471
+ # # ]
472
+ def json_decode(dtype = nil, infer_schema_length: 100)
473
+ super
474
+ end
475
+
387
476
  # Extract the first match of json string with provided JSONPath expression.
388
477
  #
389
478
  # Throw errors if encounter invalid json strings.
@@ -468,6 +557,39 @@ module Polars
468
557
  super
469
558
  end
470
559
 
560
+ # Extract all capture groups for the given regex pattern.
561
+ #
562
+ # @param pattern [String]
563
+ # A valid regular expression pattern containing at least one capture group,
564
+ # compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
565
+ #
566
+ # @return [Series]
567
+ #
568
+ # @note
569
+ # All group names are **strings**.
570
+ #
571
+ # @example
572
+ # s = Polars::Series.new(
573
+ # "url",
574
+ # [
575
+ # "http://vote.com/ballon_dor?candidate=messi&ref=python",
576
+ # "http://vote.com/ballon_dor?candidate=weghorst&ref=polars",
577
+ # "http://vote.com/ballon_dor?error=404&ref=rust"
578
+ # ]
579
+ # )
580
+ # s.str.extract_groups("candidate=(?<candidate>\\w+)&ref=(?<ref>\\w+)")
581
+ # # =>
582
+ # # shape: (3,)
583
+ # # Series: 'url' [struct[2]]
584
+ # # [
585
+ # # {"messi","python"}
586
+ # # {"weghorst","polars"}
587
+ # # {null,null}
588
+ # # ]
589
+ def extract_groups(pattern)
590
+ super
591
+ end
592
+
471
593
  # Count all successive non-overlapping regex matches.
472
594
  #
473
595
  # @param pattern [String]
@@ -477,7 +599,7 @@ module Polars
477
599
  #
478
600
  # @example
479
601
  # s = Polars::Series.new("foo", ["123 bla 45 asd", "xyz 678 910t"])
480
- # s.str.count_match('\d')
602
+ # s.str.count_matches('\d')
481
603
  # # =>
482
604
  # # shape: (2,)
483
605
  # # Series: 'foo' [u32]
@@ -485,9 +607,10 @@ module Polars
485
607
  # # 5
486
608
  # # 6
487
609
  # # ]
488
- def count_match(pattern)
610
+ def count_matches(pattern)
489
611
  super
490
612
  end
613
+ alias_method :count_match, :count_matches
491
614
 
492
615
  # Split the string by a substring.
493
616
  #
@@ -717,6 +840,108 @@ module Polars
717
840
  end
718
841
  alias_method :rstrip, :strip_chars_end
719
842
 
843
+ # Remove prefix.
844
+ #
845
+ # The prefix will be removed from the string exactly once, if found.
846
+ #
847
+ # @param prefix [String]
848
+ # The prefix to be removed.
849
+ #
850
+ # @return [Series]
851
+ #
852
+ # @example
853
+ # s = Polars::Series.new(["foobar", "foofoobar", "foo", "bar"])
854
+ # s.str.strip_prefix("foo")
855
+ # # =>
856
+ # # shape: (4,)
857
+ # # Series: '' [str]
858
+ # # [
859
+ # # "bar"
860
+ # # "foobar"
861
+ # # ""
862
+ # # "bar"
863
+ # # ]
864
+ def strip_prefix(prefix)
865
+ super
866
+ end
867
+
868
+ # Remove suffix.
869
+ #
870
+ # The suffix will be removed from the string exactly once, if found.
871
+ #
872
+ # @param suffix [String]
873
+ # The suffix to be removed.
874
+ #
875
+ # @return [Series]
876
+ #
877
+ # @example
878
+ # s = Polars::Series.new(["foobar", "foobarbar", "foo", "bar"])
879
+ # s.str.strip_suffix("bar")
880
+ # # =>
881
+ # # shape: (4,)
882
+ # # Series: '' [str]
883
+ # # [
884
+ # # "foo"
885
+ # # "foobar"
886
+ # # "foo"
887
+ # # ""
888
+ # # ]
889
+ def strip_suffix(suffix)
890
+ super
891
+ end
892
+
893
+ # Pad the start of the string until it reaches the given length.
894
+ #
895
+ # @param length [Integer]
896
+ # Pad the string until it reaches this length. Strings with length equal to or
897
+ # greater than this value are returned as-is.
898
+ # @param fill_char [String]
899
+ # The character to pad the string with.
900
+ #
901
+ # @return [Series]
902
+ #
903
+ # @example
904
+ # s = Polars::Series.new("a", ["cow", "monkey", "hippopotamus", nil])
905
+ # s.str.pad_start(8, "*")
906
+ # # =>
907
+ # # shape: (4,)
908
+ # # Series: 'a' [str]
909
+ # # [
910
+ # # "*****cow"
911
+ # # "**monkey"
912
+ # # "hippopotamus"
913
+ # # null
914
+ # # ]
915
+ def pad_start(length, fill_char = " ")
916
+ super
917
+ end
918
+
919
+ # Pad the end of the string until it reaches the given length.
920
+ #
921
+ # @param length [Integer]
922
+ # Pad the string until it reaches this length. Strings with length equal to or
923
+ # greater than this value are returned as-is.
924
+ # @param fill_char [String]
925
+ # The character to pad the string with.
926
+ #
927
+ # @return [Series]
928
+ #
929
+ # @example
930
+ # s = Polars::Series.new(["cow", "monkey", "hippopotamus", nil])
931
+ # s.str.pad_end(8, "*")
932
+ # # =>
933
+ # # shape: (4,)
934
+ # # Series: '' [str]
935
+ # # [
936
+ # # "cow*****"
937
+ # # "monkey**"
938
+ # # "hippopotamus"
939
+ # # null
940
+ # # ]
941
+ def pad_end(length, fill_char = " ")
942
+ super
943
+ end
944
+
720
945
  # Fills the string with zeroes.
721
946
  #
722
947
  # Return a copy of the string left filled with ASCII '0' digits to make a string
@@ -839,6 +1064,25 @@ module Polars
839
1064
  super
840
1065
  end
841
1066
 
1067
+ # Returns string values in reversed order.
1068
+ #
1069
+ # @return [Series]
1070
+ #
1071
+ # @example
1072
+ # s = Polars::Series.new("text", ["foo", "bar", "man\u0303ana"])
1073
+ # s.str.reverse
1074
+ # # =>
1075
+ # # shape: (3,)
1076
+ # # Series: 'text' [str]
1077
+ # # [
1078
+ # # "oof"
1079
+ # # "rab"
1080
+ # # "anañam"
1081
+ # # ]
1082
+ def reverse
1083
+ super
1084
+ end
1085
+
842
1086
  # Create subslices of the string values of a Utf8 Series.
843
1087
  #
844
1088
  # @param offset [Integer]
@@ -877,5 +1121,445 @@ module Polars
877
1121
  s = Utils.wrap_s(_s)
878
1122
  s.to_frame.select(Polars.col(s.name).str.slice(offset, length)).to_series
879
1123
  end
1124
+
1125
+ # Return the first n characters of each string in a String Series.
1126
+ #
1127
+ # @param n [Object]
1128
+ # Length of the slice (integer or expression). Negative indexing is supported;
1129
+ # see note (2) below.
1130
+ #
1131
+ # @return [Series]
1132
+ #
1133
+ # @example Return up to the first 5 characters.
1134
+ # s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
1135
+ # s.str.head(5)
1136
+ # # =>
1137
+ # # shape: (4,)
1138
+ # # Series: '' [str]
1139
+ # # [
1140
+ # # "pear"
1141
+ # # null
1142
+ # # "papay"
1143
+ # # "drago"
1144
+ # # ]
1145
+ #
1146
+ # @example Return up to the 3rd character from the end.
1147
+ # s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
1148
+ # s.str.head(-3)
1149
+ # # =>
1150
+ # # shape: (4,)
1151
+ # # Series: '' [str]
1152
+ # # [
1153
+ # # "p"
1154
+ # # null
1155
+ # # "pap"
1156
+ # # "dragonfr"
1157
+ # # ]
1158
+ def head(n)
1159
+ super
1160
+ end
1161
+
1162
+ # Return the last n characters of each string in a String Series.
1163
+ #
1164
+ # @param n [Object]
1165
+ # Length of the slice (integer or expression). Negative indexing is supported;
1166
+ # see note (2) below.
1167
+ #
1168
+ # @return [Series]
1169
+ #
1170
+ # @example Return up to the last 5 characters:
1171
+ # s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
1172
+ # s.str.tail(5)
1173
+ # # =>
1174
+ # # shape: (4,)
1175
+ # # Series: '' [str]
1176
+ # # [
1177
+ # # "pear"
1178
+ # # null
1179
+ # # "apaya"
1180
+ # # "fruit"
1181
+ # # ]
1182
+ #
1183
+ # @example Return from the 3rd character to the end:
1184
+ # s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
1185
+ # s.str.tail(-3)
1186
+ # # =>
1187
+ # # shape: (4,)
1188
+ # # Series: '' [str]
1189
+ # # [
1190
+ # # "r"
1191
+ # # null
1192
+ # # "aya"
1193
+ # # "gonfruit"
1194
+ # # ]
1195
+ def tail(n)
1196
+ super
1197
+ end
1198
+
1199
+ # Convert an String column into a column of dtype with base radix.
1200
+ #
1201
+ # @param base [Integer]
1202
+ # Positive integer or expression which is the base of the string
1203
+ # we are parsing.
1204
+ # Default: 10.
1205
+ # @param dtype [Object]
1206
+ # Polars integer type to cast to.
1207
+ # Default: `Int64`.
1208
+ # @param strict [Object]
1209
+ # Bool, Default=true will raise any ParseError or overflow as ComputeError.
1210
+ # false silently convert to Null.
1211
+ #
1212
+ # @return [Series]
1213
+ #
1214
+ # @example
1215
+ # s = Polars::Series.new("bin", ["110", "101", "010", "invalid"])
1216
+ # s.str.to_integer(base: 2, dtype: Polars::Int32, strict: false)
1217
+ # # =>
1218
+ # # shape: (4,)
1219
+ # # Series: 'bin' [i32]
1220
+ # # [
1221
+ # # 6
1222
+ # # 5
1223
+ # # 2
1224
+ # # null
1225
+ # # ]
1226
+ #
1227
+ # @example
1228
+ # s = Polars::Series.new("hex", ["fa1e", "ff00", "cafe", nil])
1229
+ # s.str.to_integer(base: 16)
1230
+ # # =>
1231
+ # # shape: (4,)
1232
+ # # Series: 'hex' [i64]
1233
+ # # [
1234
+ # # 64030
1235
+ # # 65280
1236
+ # # 51966
1237
+ # # null
1238
+ # # ]
1239
+ def to_integer(
1240
+ base: 10,
1241
+ dtype: Int64,
1242
+ strict: true
1243
+ )
1244
+ super
1245
+ end
1246
+
1247
+ # Use the Aho-Corasick algorithm to find matches.
1248
+ #
1249
+ # Determines if any of the patterns are contained in the string.
1250
+ #
1251
+ # @param patterns [Object]
1252
+ # String patterns to search.
1253
+ # @param ascii_case_insensitive [Boolean]
1254
+ # Enable ASCII-aware case-insensitive matching.
1255
+ # When this option is enabled, searching will be performed without respect
1256
+ # to case for ASCII letters (a-z and A-Z) only.
1257
+ #
1258
+ # @return [Series]
1259
+ #
1260
+ # @note
1261
+ # This method supports matching on string literals only, and does not support
1262
+ # regular expression matching.
1263
+ #
1264
+ # @example
1265
+ # s = Polars::Series.new(
1266
+ # "lyrics",
1267
+ # [
1268
+ # "Everybody wants to rule the world",
1269
+ # "Tell me what you want, what you really really want",
1270
+ # "Can you feel the love tonight"
1271
+ # ]
1272
+ # )
1273
+ # s.str.contains_any(["you", "me"])
1274
+ # # =>
1275
+ # # shape: (3,)
1276
+ # # Series: 'lyrics' [bool]
1277
+ # # [
1278
+ # # false
1279
+ # # true
1280
+ # # true
1281
+ # # ]
1282
+ def contains_any(
1283
+ patterns,
1284
+ ascii_case_insensitive: false
1285
+ )
1286
+ super
1287
+ end
1288
+
1289
+ # Use the Aho-Corasick algorithm to replace many matches.
1290
+ #
1291
+ # @param patterns
1292
+ # String patterns to search and replace.
1293
+ # Also accepts a mapping of patterns to their replacement as syntactic sugar
1294
+ # for `replace_many(Polars::Series.new(mapping.keys), Polars::Series.new(mapping.values))`.
1295
+ # @param replace_with
1296
+ # Strings to replace where a pattern was a match.
1297
+ # Length must match the length of `patterns` or have length 1. This can be
1298
+ # broadcasted, so it supports many:one and many:many.
1299
+ # @param ascii_case_insensitive
1300
+ # Enable ASCII-aware case-insensitive matching.
1301
+ # When this option is enabled, searching will be performed without respect
1302
+ # to case for ASCII letters (a-z and A-Z) only.
1303
+ #
1304
+ # @return [Series]
1305
+ #
1306
+ # @note
1307
+ # This method supports matching on string literals only, and does not support
1308
+ # regular expression matching.
1309
+ #
1310
+ # @example Replace many patterns by passing lists of equal length to the `patterns` and `replace_with` parameters.
1311
+ # s = Polars::Series.new(
1312
+ # "lyrics",
1313
+ # [
1314
+ # "Everybody wants to rule the world",
1315
+ # "Tell me what you want, what you really really want",
1316
+ # "Can you feel the love tonight"
1317
+ # ]
1318
+ # )
1319
+ # s.str.replace_many(["you", "me"], ["me", "you"])
1320
+ # # =>
1321
+ # # shape: (3,)
1322
+ # # Series: 'lyrics' [str]
1323
+ # # [
1324
+ # # "Everybody wants to rule the wo…
1325
+ # # "Tell you what me want, what me…
1326
+ # # "Can me feel the love tonight"
1327
+ # # ]
1328
+ #
1329
+ # @example Broadcast a replacement for many patterns by passing a sequence of length 1 to the `replace_with` parameter.
1330
+ # s = Polars::Series.new(
1331
+ # "lyrics",
1332
+ # [
1333
+ # "Everybody wants to rule the world",
1334
+ # "Tell me what you want, what you really really want",
1335
+ # "Can you feel the love tonight",
1336
+ # ]
1337
+ # )
1338
+ # s.str.replace_many(["me", "you", "they"], [""])
1339
+ # # =>
1340
+ # # shape: (3,)
1341
+ # # Series: 'lyrics' [str]
1342
+ # # [
1343
+ # # "Everybody wants to rule the wo…
1344
+ # # "Tell what want, what really…
1345
+ # # "Can feel the love tonight"
1346
+ # # ]
1347
+ #
1348
+ # @example Passing a mapping with patterns and replacements is also supported as syntactic sugar.
1349
+ # s = Polars::Series.new(
1350
+ # "lyrics",
1351
+ # [
1352
+ # "Everybody wants to rule the world",
1353
+ # "Tell me what you want, what you really really want",
1354
+ # "Can you feel the love tonight"
1355
+ # ]
1356
+ # )
1357
+ # mapping = {"me" => "you", "you" => "me", "want" => "need"}
1358
+ # s.str.replace_many(mapping)
1359
+ # # =>
1360
+ # # shape: (3,)
1361
+ # # Series: 'lyrics' [str]
1362
+ # # [
1363
+ # # "Everybody needs to rule the wo…
1364
+ # # "Tell you what me need, what me…
1365
+ # # "Can me feel the love tonight"
1366
+ # # ]
1367
+ def replace_many(
1368
+ patterns,
1369
+ replace_with = Expr::NO_DEFAULT,
1370
+ ascii_case_insensitive: false
1371
+ )
1372
+ super
1373
+ end
1374
+
1375
+ # Use the Aho-Corasick algorithm to extract many matches.
1376
+ #
1377
+ # @param patterns [Object]
1378
+ # String patterns to search.
1379
+ # @param ascii_case_insensitive [Boolean]
1380
+ # Enable ASCII-aware case-insensitive matching.
1381
+ # When this option is enabled, searching will be performed without respect
1382
+ # to case for ASCII letters (a-z and A-Z) only.
1383
+ # @param overlapping [Boolean]
1384
+ # Whether matches may overlap.
1385
+ #
1386
+ # @return [Series]
1387
+ #
1388
+ # @note
1389
+ # This method supports matching on string literals only, and does not support
1390
+ # regular expression matching.
1391
+ #
1392
+ # @example
1393
+ # s = Polars::Series.new("values", ["discontent"])
1394
+ # patterns = ["winter", "disco", "onte", "discontent"]
1395
+ # s.str.extract_many(patterns, overlapping: true)
1396
+ # # =>
1397
+ # # shape: (1,)
1398
+ # # Series: 'values' [list[str]]
1399
+ # # [
1400
+ # # ["disco", "onte", "discontent"]
1401
+ # # ]
1402
+ def extract_many(
1403
+ patterns,
1404
+ ascii_case_insensitive: false,
1405
+ overlapping: false
1406
+ )
1407
+ super
1408
+ end
1409
+
1410
+ # Use the Aho-Corasick algorithm to find all matches.
1411
+ #
1412
+ # The function returns the byte offset of the start of each match.
1413
+ # The return type will be `List<UInt32>`
1414
+ #
1415
+ # @param patterns [Object]
1416
+ # String patterns to search.
1417
+ # @param ascii_case_insensitive [Boolean]
1418
+ # Enable ASCII-aware case-insensitive matching.
1419
+ # When this option is enabled, searching will be performed without respect
1420
+ # to case for ASCII letters (a-z and A-Z) only.
1421
+ # @param overlapping [Boolean]
1422
+ # Whether matches may overlap.
1423
+ #
1424
+ # @return [Series]
1425
+ #
1426
+ # @note
1427
+ # This method supports matching on string literals only, and does not support
1428
+ # regular expression matching.
1429
+ #
1430
+ # @example
1431
+ # df = Polars::DataFrame.new({"values" => ["discontent"]})
1432
+ # patterns = ["winter", "disco", "onte", "discontent"]
1433
+ # df.with_columns(
1434
+ # Polars.col("values")
1435
+ # .str.extract_many(patterns, overlapping: false)
1436
+ # .alias("matches"),
1437
+ # Polars.col("values")
1438
+ # .str.extract_many(patterns, overlapping: true)
1439
+ # .alias("matches_overlapping")
1440
+ # )
1441
+ # # =>
1442
+ # # shape: (1, 3)
1443
+ # # ┌────────────┬───────────┬─────────────────────────────────┐
1444
+ # # │ values ┆ matches ┆ matches_overlapping │
1445
+ # # │ --- ┆ --- ┆ --- │
1446
+ # # │ str ┆ list[str] ┆ list[str] │
1447
+ # # ╞════════════╪═══════════╪═════════════════════════════════╡
1448
+ # # │ discontent ┆ ["disco"] ┆ ["disco", "onte", "discontent"… │
1449
+ # # └────────────┴───────────┴─────────────────────────────────┘
1450
+ #
1451
+ # @example
1452
+ # df = Polars::DataFrame.new(
1453
+ # {
1454
+ # "values" => ["discontent", "rhapsody"],
1455
+ # "patterns" => [
1456
+ # ["winter", "disco", "onte", "discontent"],
1457
+ # ["rhap", "ody", "coalesce"]
1458
+ # ]
1459
+ # }
1460
+ # )
1461
+ # df.select(Polars.col("values").str.find_many("patterns"))
1462
+ # # =>
1463
+ # # shape: (2, 1)
1464
+ # # ┌───────────┐
1465
+ # # │ values │
1466
+ # # │ --- │
1467
+ # # │ list[u32] │
1468
+ # # ╞═══════════╡
1469
+ # # │ [0] │
1470
+ # # │ [0, 5] │
1471
+ # # └───────────┘
1472
+ def find_many(
1473
+ patterns,
1474
+ ascii_case_insensitive: false,
1475
+ overlapping: false
1476
+ )
1477
+ super
1478
+ end
1479
+
1480
+ # Vertically concat the values in the Series to a single string value.
1481
+ #
1482
+ # @param delimiter [String]
1483
+ # The delimiter to insert between consecutive string values.
1484
+ # @param ignore_nulls [Boolean]
1485
+ # Ignore null values (default).
1486
+ # If set to `False`, null values will be propagated. This means that
1487
+ # if the column contains any null values, the output is null.
1488
+ #
1489
+ # @return [Series]
1490
+ #
1491
+ # @example
1492
+ # Polars::Series.new([1, nil, 2]).str.join("-")
1493
+ # # =>
1494
+ # # shape: (1,)
1495
+ # # Series: '' [str]
1496
+ # # [
1497
+ # # "1-2"
1498
+ # # ]
1499
+ #
1500
+ # @example
1501
+ # Polars::Series.new([1, nil, 2]).str.join("-", ignore_nulls: false)
1502
+ # # =>
1503
+ # # shape: (1,)
1504
+ # # Series: '' [str]
1505
+ # # [
1506
+ # # null
1507
+ # # ]
1508
+ def join(delimiter = "-", ignore_nulls: true)
1509
+ super
1510
+ end
1511
+ alias_method :concat, :join
1512
+
1513
+ # Returns string values with all regular expression meta characters escaped.
1514
+ #
1515
+ # @return [Series]
1516
+ #
1517
+ # @example
1518
+ # Polars::Series.new(["abc", "def", nil, "abc(\\w+)"]).str.escape_regex
1519
+ # # =>
1520
+ # # shape: (4,)
1521
+ # # Series: '' [str]
1522
+ # # [
1523
+ # # "abc"
1524
+ # # "def"
1525
+ # # null
1526
+ # # "abc\(\\w\+\)"
1527
+ # # ]
1528
+ def escape_regex
1529
+ super
1530
+ end
1531
+
1532
+ # Returns the Unicode normal form of the string values.
1533
+ #
1534
+ # This uses the forms described in Unicode Standard Annex 15: <https://www.unicode.org/reports/tr15/>.
1535
+ #
1536
+ # @param form ['NFC', 'NFKC', 'NFD', 'NFKD']
1537
+ # Unicode form to use.
1538
+ #
1539
+ # @return [Series]
1540
+ #
1541
+ # @example
1542
+ # s = Polars::Series.new(["01²", "KADOKAWA"])
1543
+ # s.str.normalize("NFC")
1544
+ # # =>
1545
+ # # shape: (2,)
1546
+ # # Series: '' [str]
1547
+ # # [
1548
+ # # "01²"
1549
+ # # "KADOKAWA"
1550
+ # # ]
1551
+ #
1552
+ # @example
1553
+ # s.str.normalize("NFKC")
1554
+ # # =>
1555
+ # # shape: (2,)
1556
+ # # Series: '' [str]
1557
+ # # [
1558
+ # # "012"
1559
+ # # "KADOKAWA"
1560
+ # # ]
1561
+ def normalize(form = "NFC")
1562
+ super
1563
+ end
880
1564
  end
881
1565
  end